Message ID | 20221018045533.2396670-3-senozhatsky@chromium.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | zram: Support multiple compression streams | expand |
On Tue, Oct 18, 2022 at 01:55:26PM +0900, Sergey Senozhatsky wrote: > Introduce recomp_algorithm sysfs knob that controls > secondary algorithm selection used for recompression. > This device attribute works in a similar way with > comp_algorithm attribute. > > Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org> > --- > drivers/block/zram/zram_drv.c | 111 +++++++++++++++++++++++++++------- > 1 file changed, 90 insertions(+), 21 deletions(-) > > diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c > index 770ea3489eb6..a8ef3c0c3dae 100644 > --- a/drivers/block/zram/zram_drv.c > +++ b/drivers/block/zram/zram_drv.c > @@ -41,7 +41,12 @@ static DEFINE_IDR(zram_index_idr); > static DEFINE_MUTEX(zram_index_mutex); > > static int zram_major; > -static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; > +static const char *default_comp_algs[ZRAM_MAX_ZCOMPS] = { > + CONFIG_ZRAM_DEF_COMP, > +#ifdef CONFIG_ZRAM_MULTI_COMP > + "zstd", > +#endif > +}; > > /* Module params (documentation at end) */ > static unsigned int num_devices = 1; > @@ -1000,31 +1005,37 @@ static ssize_t max_comp_streams_store(struct device *dev, > return len; > } > > -static ssize_t comp_algorithm_show(struct device *dev, > - struct device_attribute *attr, char *buf) Do you have any reason to change show and set placement? Otherwise, please keep the function order to reduce unnecesssary churns. > +static void comp_algorithm_set(struct zram *zram, u32 idx, const char *alg) > { > - size_t sz; > - struct zram *zram = dev_to_zram(dev); > + bool default_alg = false; > + int i; > > - down_read(&zram->init_lock); > - sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_ZCOMP], buf); > - up_read(&zram->init_lock); > + /* Do not kfree() algs that we didn't allocate, IOW the default ones */ > + for (i = 0; i < ZRAM_MAX_ZCOMPS; i++) { > + if (zram->comp_algs[idx] == default_comp_algs[i]) { > + default_alg = true; > + break; > + } > + } > > - return sz; > + if (!default_alg) > + kfree(zram->comp_algs[idx]); > + zram->comp_algs[idx] = alg; > } > > -static void comp_algorithm_set(struct zram *zram, u32 idx, const char *alg) > +static ssize_t __comp_algorithm_show(struct zram *zram, u32 idx, char *buf) > { > - /* Do not kfree() algs that we didn't allocate, IOW the default ones */ > - if (zram->comp_algs[idx] != default_compressor) > - kfree(zram->comp_algs[idx]); > - zram->comp_algs[idx] = alg; > + ssize_t sz; > + > + down_read(&zram->init_lock); > + sz = zcomp_available_show(zram->comp_algs[idx], buf); > + up_read(&zram->init_lock); > + > + return sz; > } > > -static ssize_t comp_algorithm_store(struct device *dev, > - struct device_attribute *attr, const char *buf, size_t len) > +static int __comp_algorithm_store(struct zram *zram, u32 idx, const char *buf) > { > - struct zram *zram = dev_to_zram(dev); > char *compressor; > size_t sz; > > @@ -1053,11 +1064,55 @@ static ssize_t comp_algorithm_store(struct device *dev, > return -EBUSY; > } > > - comp_algorithm_set(zram, ZRAM_PRIMARY_ZCOMP, compressor); > + comp_algorithm_set(zram, idx, compressor); > up_write(&zram->init_lock); > - return len; > + return 0; > +} > + > +static ssize_t comp_algorithm_show(struct device *dev, > + struct device_attribute *attr, > + char *buf) > +{ > + struct zram *zram = dev_to_zram(dev); > + > + return __comp_algorithm_show(zram, ZRAM_PRIMARY_ZCOMP, buf); > +} > + > +static ssize_t comp_algorithm_store(struct device *dev, > + struct device_attribute *attr, > + const char *buf, > + size_t len) > +{ > + struct zram *zram = dev_to_zram(dev); > + int ret; > + > + ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_ZCOMP, buf); > + return ret ? ret : len; > } > > +#ifdef CONFIG_ZRAM_MULTI_COMP > +static ssize_t recomp_algorithm_show(struct device *dev, > + struct device_attribute *attr, > + char *buf) > +{ > + struct zram *zram = dev_to_zram(dev); > + > + return __comp_algorithm_show(zram, ZRAM_SECONDARY_ZCOMP, buf); > +} Just open question(I might be too paranoid?) I am thinking someone want to add third comp algorithm in future to balance decompression and memory efficiency. If it's not too crazy idea, let's think about the interface. Maybe, could we make the recomp knobs works like list? # A primary comp echo "A" > /zram/comp_algo # Multiple secondary comps echo "B threshold" > /zram/add_recomp_algo echo "C threshold" > /zram/add_recomp_algo echo "D threshold" > /zram/add_recomp_algo "cat /zram/recomp_algo" shows the list echo "C" > /zram/remove_recomp_algo will remove the C algorithm in stack. My point is that we don't need to implement it atm but makes the interface to open the possibility for future extension. What do you think? > + > +static ssize_t recomp_algorithm_store(struct device *dev, > + struct device_attribute *attr, > + const char *buf, > + size_t len) > +{ > + struct zram *zram = dev_to_zram(dev); > + int ret; > + > + ret = __comp_algorithm_store(zram, ZRAM_SECONDARY_ZCOMP, buf); > + return ret ? ret : len; > +} > +#endif > + > static ssize_t compact_store(struct device *dev, > struct device_attribute *attr, const char *buf, size_t len) > { > @@ -1762,7 +1817,11 @@ static void zram_reset_device(struct zram *zram) > memset(&zram->stats, 0, sizeof(zram->stats)); > reset_bdev(zram); > > - comp_algorithm_set(zram, ZRAM_PRIMARY_ZCOMP, default_compressor); > + comp_algorithm_set(zram, ZRAM_PRIMARY_ZCOMP, > + default_comp_algs[ZRAM_PRIMARY_ZCOMP]); > + if (IS_ENABLED(CONFIG_ZRAM_MULTI_COMP)) Dumb question: Why do you use IS_ENABLED instead of ifdef? > + comp_algorithm_set(zram, ZRAM_SECONDARY_ZCOMP, > + default_comp_algs[ZRAM_SECONDARY_ZCOMP]); > up_write(&zram->init_lock); > } > > @@ -1895,6 +1954,9 @@ static DEVICE_ATTR_WO(writeback); > static DEVICE_ATTR_RW(writeback_limit); > static DEVICE_ATTR_RW(writeback_limit_enable); > #endif > +#ifdef CONFIG_ZRAM_MULTI_COMP > +static DEVICE_ATTR_RW(recomp_algorithm); > +#endif > > static struct attribute *zram_disk_attrs[] = { > &dev_attr_disksize.attr, > @@ -1918,6 +1980,9 @@ static struct attribute *zram_disk_attrs[] = { > &dev_attr_bd_stat.attr, > #endif > &dev_attr_debug_stat.attr, > +#ifdef CONFIG_ZRAM_MULTI_COMP > + &dev_attr_recomp_algorithm.attr, > +#endif > NULL, > }; > > @@ -1997,7 +2062,11 @@ static int zram_add(void) > if (ret) > goto out_cleanup_disk; > > - zram->comp_algs[ZRAM_PRIMARY_ZCOMP] = default_compressor; > + zram->comp_algs[ZRAM_PRIMARY_ZCOMP] = > + default_comp_algs[ZRAM_PRIMARY_ZCOMP]; > + if (IS_ENABLED(CONFIG_ZRAM_MULTI_COMP)) > + zram->comp_algs[ZRAM_SECONDARY_ZCOMP] = > + default_comp_algs[ZRAM_SECONDARY_ZCOMP]; > > zram_debugfs_register(zram); > pr_info("Added device: %s\n", zram->disk->disk_name); > -- > 2.38.0.413.g74048e4d9e-goog >
On (22/11/02 13:15), Minchan Kim wrote: [..] > > /* Module params (documentation at end) */ > > static unsigned int num_devices = 1; > > @@ -1000,31 +1005,37 @@ static ssize_t max_comp_streams_store(struct device *dev, > > return len; > > } > > > > -static ssize_t comp_algorithm_show(struct device *dev, > > - struct device_attribute *attr, char *buf) > > Do you have any reason to change show and set placement? Otherwise, > please keep the function order to reduce unnecesssary churns. I don't change their placement. It's just show and store for primary and secondary algorithms use the same __store and __show functions, which are static and are placed ahead of store and show. [..] > Just open question(I might be too paranoid?) > > I am thinking someone want to add third comp algorithm in future > to balance decompression and memory efficiency. > > If it's not too crazy idea, let's think about the interface. > Maybe, could we make the recomp knobs works like list? > > # A primary comp > echo "A" > /zram/comp_algo > > # Multiple secondary comps > echo "B threshold" > /zram/add_recomp_algo > echo "C threshold" > /zram/add_recomp_algo > echo "D threshold" > /zram/add_recomp_algo What is the threshold here? My design approach is that ZRAM doesn't do recompression on its own, so no magic is happening automatically. It's the user-space that triggers recompression for selected pages when user-space thinks it's time to. This allows us to have various flexible policies and consider things that ZRAM is not even aware of: battery level, free memory, CPU load average, etc. E.g. no recompression when all CPUs are busy rendering video game, or when we are draining battery too fast, etc. > "cat /zram/recomp_algo" shows the list > > echo "C" > /zram/remove_recomp_algo > will remove the C algorithm in stack. What is the use case for removal of a secondary algorithm? > My point is that we don't need to implement it atm but makes the > interface to open the possibility for future extension. > > What do you think? So, as far as I understand, we don't have reason to add remove_recomp_algo right now. And existing recomp_algo does not enforce any particular format, it can be extended. Right now we accept "$name" but can do something like "$name:$priority". The only thing that we probably need to do is rename recomp_algo to either add_recomp_algo or register_recomp_algo? > > +static ssize_t recomp_algorithm_store(struct device *dev, > > + struct device_attribute *attr, > > + const char *buf, > > + size_t len) > > +{ > > + struct zram *zram = dev_to_zram(dev); > > + int ret; > > + > > + ret = __comp_algorithm_store(zram, ZRAM_SECONDARY_ZCOMP, buf); > > + return ret ? ret : len; > > +} > > +#endif > > + > > static ssize_t compact_store(struct device *dev, > > struct device_attribute *attr, const char *buf, size_t len) > > { > > @@ -1762,7 +1817,11 @@ static void zram_reset_device(struct zram *zram) > > memset(&zram->stats, 0, sizeof(zram->stats)); > > reset_bdev(zram); > > > > - comp_algorithm_set(zram, ZRAM_PRIMARY_ZCOMP, default_compressor); > > + comp_algorithm_set(zram, ZRAM_PRIMARY_ZCOMP, > > + default_comp_algs[ZRAM_PRIMARY_ZCOMP]); > > + if (IS_ENABLED(CONFIG_ZRAM_MULTI_COMP)) > > Dumb question: > > Why do you use IS_ENABLED instead of ifdef? #ifdef-s are banned in the new C-code, as far as I know. IS_ENABLED is what we should use.
On (22/11/03 12:05), Sergey Senozhatsky wrote: > [..] > > Just open question(I might be too paranoid?) > > > > I am thinking someone want to add third comp algorithm in future > > to balance decompression and memory efficiency. > > > > If it's not too crazy idea, let's think about the interface. > > Maybe, could we make the recomp knobs works like list? > > > > # A primary comp > > echo "A" > /zram/comp_algo > > > > # Multiple secondary comps > > echo "B threshold" > /zram/add_recomp_algo > > echo "C threshold" > /zram/add_recomp_algo > > echo "D threshold" > /zram/add_recomp_algo As a side note: The way it's implemented currently is that comps is an array, so we can store more comps there. I sort of was thinking that we probably can have more than two algos at some point the in the future (hence the MULTI_COMPRESS config option).
On (22/11/03 12:05), Sergey Senozhatsky wrote: > What is the use case for removal of a secondary algorithm? > > > My point is that we don't need to implement it atm but makes the > > interface to open the possibility for future extension. > > > > What do you think? > > So, as far as I understand, we don't have reason to add remove_recomp_algo > right now. And existing recomp_algo does not enforce any particular format, > it can be extended. Right now we accept "$name" but can do something like > "$name:$priority". Or with keywords: name=STRING priority=INT and the only legal priority for now is 1.
On (22/11/03 13:09), Sergey Senozhatsky wrote: > On (22/11/03 12:05), Sergey Senozhatsky wrote: > > What is the use case for removal of a secondary algorithm? > > > > > My point is that we don't need to implement it atm but makes the > > > interface to open the possibility for future extension. > > > > > > What do you think? > > > > So, as far as I understand, we don't have reason to add remove_recomp_algo > > right now. And existing recomp_algo does not enforce any particular format, > > it can be extended. Right now we accept "$name" but can do something like > > "$name:$priority". > > Or with keywords: > > name=STRING priority=INT > > and the only legal priority for now is 1. E.g. recomp_algorithm support for algorithms name= and optional integer priority=. I sort of like the recomp_algorithm name so far, but we can change it. --- diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index cf9d3474b80c..9a614253de07 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1102,9 +1102,38 @@ static ssize_t recomp_algorithm_store(struct device *dev, size_t len) { struct zram *zram = dev_to_zram(dev); + int prio = ZRAM_SECONDARY_ZCOMP; + char *args, *param, *val; + char *alg = NULL; int ret; - ret = __comp_algorithm_store(zram, ZRAM_SECONDARY_ZCOMP, buf); + args = skip_spaces(buf); + while (*args) { + args = next_arg(args, ¶m, &val); + + if (!*val) + return -EINVAL; + + if (!strcmp(param, "name")) { + alg = val; + continue; + } + + if (!strcmp(param, "priority")) { + ret = kstrtoint(val, 10, &prio); + if (ret) + return ret; + continue; + } + } + + if (!alg) + return -EINVAL; + + if (prio < ZRAM_SECONDARY_ZCOMP || prio >= ZRAM_MAX_ZCOMPS) + return -EINVAL; + + ret = __comp_algorithm_store(zram, prio, alg); return ret ? ret : len; } #endif
On Thu, Nov 03, 2022 at 12:05:06PM +0900, Sergey Senozhatsky wrote: < snip > > > Just open question(I might be too paranoid?) > > > > I am thinking someone want to add third comp algorithm in future > > to balance decompression and memory efficiency. > > > > If it's not too crazy idea, let's think about the interface. > > Maybe, could we make the recomp knobs works like list? > > > > # A primary comp > > echo "A" > /zram/comp_algo > > > > # Multiple secondary comps > > echo "B threshold" > /zram/add_recomp_algo > > echo "C threshold" > /zram/add_recomp_algo > > echo "D threshold" > /zram/add_recomp_algo > > What is the threshold here? My design approach is that ZRAM doesn't do As your term, watermark but yeah, priority you suggested would be good for me. > recompression on its own, so no magic is happening automatically. It's > the user-space that triggers recompression for selected pages when > user-space thinks it's time to. This allows us to have various flexible > policies and consider things that ZRAM is not even aware of: battery level, > free memory, CPU load average, etc. E.g. no recompression when all CPUs > are busy rendering video game, or when we are draining battery too fast, > etc. > > > "cat /zram/recomp_algo" shows the list > > > > echo "C" > /zram/remove_recomp_algo > > will remove the C algorithm in stack. > > What is the use case for removal of a secondary algorithm? Without the interface, How can we modify the selection if admin want to change the order of second algorithms? > > > My point is that we don't need to implement it atm but makes the > > interface to open the possibility for future extension. > > > > What do you think? > > So, as far as I understand, we don't have reason to add remove_recomp_algo > right now. And existing recomp_algo does not enforce any particular format, > it can be extended. Right now we accept "$name" but can do something like > "$name:$priority". The only thing that we probably need to do is rename > recomp_algo to either add_recomp_algo or register_recomp_algo? Yeah, I like the name and priority format. Only question is how we could support algorithm selection change under considering multiple secondary algorithms.
On Thu, Nov 03, 2022 at 12:54:43PM +0900, Sergey Senozhatsky wrote: > On (22/11/03 12:05), Sergey Senozhatsky wrote: > > [..] > > > Just open question(I might be too paranoid?) > > > > > > I am thinking someone want to add third comp algorithm in future > > > to balance decompression and memory efficiency. > > > > > > If it's not too crazy idea, let's think about the interface. > > > Maybe, could we make the recomp knobs works like list? > > > > > > # A primary comp > > > echo "A" > /zram/comp_algo > > > > > > # Multiple secondary comps > > > echo "B threshold" > /zram/add_recomp_algo > > > echo "C threshold" > /zram/add_recomp_algo > > > echo "D threshold" > /zram/add_recomp_algo > > As a side note: > The way it's implemented currently is that comps is an array, so we > can store more comps there. I sort of was thinking that we probably > can have more than two algos at some point the in the future (hence > the MULTI_COMPRESS config option). Sure.
On Thu, Nov 03, 2022 at 01:09:49PM +0900, Sergey Senozhatsky wrote: > On (22/11/03 12:05), Sergey Senozhatsky wrote: > > What is the use case for removal of a secondary algorithm? > > > > > My point is that we don't need to implement it atm but makes the > > > interface to open the possibility for future extension. > > > > > > What do you think? > > > > So, as far as I understand, we don't have reason to add remove_recomp_algo > > right now. And existing recomp_algo does not enforce any particular format, > > it can be extended. Right now we accept "$name" but can do something like > > "$name:$priority". > > Or with keywords: > > name=STRING priority=INT > > and the only legal priority for now is 1. I like it.
On (22/11/03 09:34), Minchan Kim wrote: > > > My point is that we don't need to implement it atm but makes the > > > interface to open the possibility for future extension. > > > > > > What do you think? > > > > So, as far as I understand, we don't have reason to add remove_recomp_algo > > right now. And existing recomp_algo does not enforce any particular format, > > it can be extended. Right now we accept "$name" but can do something like > > "$name:$priority". The only thing that we probably need to do is rename > > recomp_algo to either add_recomp_algo or register_recomp_algo? > > Yeah, I like the name and priority format. > > Only question is how we could support algorithm selection change > under considering multiple secondary algorithms. So what I was thinking about, and I'm still in the mental model that re-compression is a user-space event, just like writeback, extension of recompress sysfs knob with "algo_index" (or something similar) which will mirror algorithm priority. Example: Configure 2 alternative algos, with priority 1 and 2 echo "name=lz4 priority=1" > recomp_algo echo "name=lz5 priority=2" > recomp_algo Recompress pages using algo 1 and algo 2 echo "type=huge threshold=3000 algo_idx=1" > recompress echo "type=idle threshold=2000 algo_idx=2" > recompress Maybe we can even pass algo name instead of idx.
On (22/11/04 12:18), Sergey Senozhatsky wrote: > On (22/11/03 09:34), Minchan Kim wrote: > > Yeah, I like the name and priority format. > > > > Only question is how we could support algorithm selection change > > under considering multiple secondary algorithms. > > So what I was thinking about, and I'm still in the mental model that > re-compression is a user-space event, just like writeback, extension > of recompress sysfs knob with "algo_index" (or something similar) which > will mirror algorithm priority. > > Example: > > Configure 2 alternative algos, with priority 1 and 2 > > echo "name=lz4 priority=1" > recomp_algo > echo "name=lz5 priority=2" > recomp_algo > > Recompress pages using algo 1 and algo 2 > > echo "type=huge threshold=3000 algo_idx=1" > recompress > echo "type=idle threshold=2000 algo_idx=2" > recompress > > Maybe we can even pass algo name instead of idx. Or pass priority= so that interface that uses algorithms has the same keyword that the interface that configures those algorithms. I still don't see many use-cases for "delete algorithm", to be honest. ZRAM is configured by scripts in 99.99999% of cases and it is quite static once it has been configured. So we probably can use the "don't setup algorithms that you don't need" approach, to keep things simpler.
On Fri, Nov 04, 2022 at 12:18:43PM +0900, Sergey Senozhatsky wrote: > On (22/11/03 09:34), Minchan Kim wrote: > > > > My point is that we don't need to implement it atm but makes the > > > > interface to open the possibility for future extension. > > > > > > > > What do you think? > > > > > > So, as far as I understand, we don't have reason to add remove_recomp_algo > > > right now. And existing recomp_algo does not enforce any particular format, > > > it can be extended. Right now we accept "$name" but can do something like > > > "$name:$priority". The only thing that we probably need to do is rename > > > recomp_algo to either add_recomp_algo or register_recomp_algo? > > > > Yeah, I like the name and priority format. > > > > Only question is how we could support algorithm selection change > > under considering multiple secondary algorithms. > > So what I was thinking about, and I'm still in the mental model that > re-compression is a user-space event, just like writeback, extension > of recompress sysfs knob with "algo_index" (or something similar) which > will mirror algorithm priority. > > Example: > > Configure 2 alternative algos, with priority 1 and 2 > > echo "name=lz4 priority=1" > recomp_algo > echo "name=lz5 priority=2" > recomp_algo > > Recompress pages using algo 1 and algo 2 > > echo "type=huge threshold=3000 algo_idx=1" > recompress > echo "type=idle threshold=2000 algo_idx=2" > recompress > > Maybe we can even pass algo name instead of idx. Let's use name rather than index.
On Fri, Nov 04, 2022 at 01:53:11PM +0900, Sergey Senozhatsky wrote: > On (22/11/04 12:18), Sergey Senozhatsky wrote: > > On (22/11/03 09:34), Minchan Kim wrote: > > > Yeah, I like the name and priority format. > > > > > > Only question is how we could support algorithm selection change > > > under considering multiple secondary algorithms. > > > > So what I was thinking about, and I'm still in the mental model that > > re-compression is a user-space event, just like writeback, extension > > of recompress sysfs knob with "algo_index" (or something similar) which > > will mirror algorithm priority. > > > > Example: > > > > Configure 2 alternative algos, with priority 1 and 2 > > > > echo "name=lz4 priority=1" > recomp_algo > > echo "name=lz5 priority=2" > recomp_algo > > > > Recompress pages using algo 1 and algo 2 > > > > echo "type=huge threshold=3000 algo_idx=1" > recompress > > echo "type=idle threshold=2000 algo_idx=2" > recompress > > > > Maybe we can even pass algo name instead of idx. > > Or pass priority= so that interface that uses algorithms has the > same keyword that the interface that configures those algorithms. Hmm, why do we need algo_idx here if we already set up every fields at algorithm setup time? My understaind(assuming default(i.e., primary) algo is lzo) is echo "name=lz4 priority=1" > recomp_algo echo "name=lz5 priority=2" > recomp_algo echo "type=huge threshold=3000" > recompress It will try compress every objects which greater than 3000B with lz4 first and then lz5 if it's stillgreater or equal than 3000(or same size class). > > I still don't see many use-cases for "delete algorithm", to be honest. > ZRAM is configured by scripts in 99.99999% of cases and it is For the development time in the local side, people usually type in until they will have solid script version. If we asks resetting to zram to modify it, it's not good and consistent with other sysfs knobs we could overwrite it to change it. How about supporting overwritting to chage it over priority? echo "name=lz4 priority=1" > recomp_algo echo "name=lz5 priority=2" > recomp_algo # or I realized to change lz5 to lz7 so echo "name=lz6 priority=2" > recomp_algo > quite static once it has been configured. So we probably can use > the "don't setup algorithms that you don't need" approach, to keep > things simpler.
On (22/11/04 09:34), Minchan Kim wrote: > > > > So, as far as I understand, we don't have reason to add remove_recomp_algo > > > > right now. And existing recomp_algo does not enforce any particular format, > > > > it can be extended. Right now we accept "$name" but can do something like > > > > "$name:$priority". The only thing that we probably need to do is rename > > > > recomp_algo to either add_recomp_algo or register_recomp_algo? > > > > > > Yeah, I like the name and priority format. > > > > > > Only question is how we could support algorithm selection change > > > under considering multiple secondary algorithms. > > > > So what I was thinking about, and I'm still in the mental model that > > re-compression is a user-space event, just like writeback, extension > > of recompress sysfs knob with "algo_index" (or something similar) which > > will mirror algorithm priority. > > > > Example: > > > > Configure 2 alternative algos, with priority 1 and 2 > > > > echo "name=lz4 priority=1" > recomp_algo > > echo "name=lz5 priority=2" > recomp_algo > > > > Recompress pages using algo 1 and algo 2 > > > > echo "type=huge threshold=3000 algo_idx=1" > recompress > > echo "type=idle threshold=2000 algo_idx=2" > recompress > > > > Maybe we can even pass algo name instead of idx. > > Let's use name rather than index. OK. Any preference on the keyword? "name="? "algo="? "algorithm="? "compressor="? "comp="? I want use the same keyword for recomp_algo. I sort of like "algo=", but not sure.
On Sat, Nov 05, 2022 at 08:25:44AM +0900, Sergey Senozhatsky wrote: > On (22/11/04 09:34), Minchan Kim wrote: > > > > > So, as far as I understand, we don't have reason to add remove_recomp_algo > > > > > right now. And existing recomp_algo does not enforce any particular format, > > > > > it can be extended. Right now we accept "$name" but can do something like > > > > > "$name:$priority". The only thing that we probably need to do is rename > > > > > recomp_algo to either add_recomp_algo or register_recomp_algo? > > > > > > > > Yeah, I like the name and priority format. > > > > > > > > Only question is how we could support algorithm selection change > > > > under considering multiple secondary algorithms. > > > > > > So what I was thinking about, and I'm still in the mental model that > > > re-compression is a user-space event, just like writeback, extension > > > of recompress sysfs knob with "algo_index" (or something similar) which > > > will mirror algorithm priority. > > > > > > Example: > > > > > > Configure 2 alternative algos, with priority 1 and 2 > > > > > > echo "name=lz4 priority=1" > recomp_algo > > > echo "name=lz5 priority=2" > recomp_algo > > > > > > Recompress pages using algo 1 and algo 2 > > > > > > echo "type=huge threshold=3000 algo_idx=1" > recompress > > > echo "type=idle threshold=2000 algo_idx=2" > recompress > > > > > > Maybe we can even pass algo name instead of idx. > > > > Let's use name rather than index. > > OK. Any preference on the keyword? "name="? "algo="? "algorithm="? > "compressor="? "comp="? > > I want use the same keyword for recomp_algo. I sort of like "algo=", > but not sure. +1 with algo
On (22/11/04 10:43), Minchan Kim wrote: > > > Configure 2 alternative algos, with priority 1 and 2 > > > > > > echo "name=lz4 priority=1" > recomp_algo > > > echo "name=lz5 priority=2" > recomp_algo > > > > > > Recompress pages using algo 1 and algo 2 > > > > > > echo "type=huge threshold=3000 algo_idx=1" > recompress > > > echo "type=idle threshold=2000 algo_idx=2" > recompress > > > > > > Maybe we can even pass algo name instead of idx. > > > > Or pass priority= so that interface that uses algorithms has the > > same keyword that the interface that configures those algorithms. > > Hmm, why do we need algo_idx here if we already set up every > fields at algorithm setup time? > > My understaind(assuming default(i.e., primary) algo is lzo) is > > echo "name=lz4 priority=1" > recomp_algo > echo "name=lz5 priority=2" > recomp_algo > > echo "type=huge threshold=3000" > recompress > > It will try compress every objects which greater than 3000B with lz4 first > and then lz5 if it's stillgreater or equal than 3000(or same size class). One can be SW one can be HW. So I thought about having flexibility here. Instead of doing for (idx = 1; idx < MAX_IDX; idx++) { len = zcomp_compress(zram->comps[idx]); if (len <= threshold) break; } We would just directly use the suggested algo. But we probably don't need that param at all and can use the loop instead? [..] > echo "name=lz4 priority=1" > recomp_algo > echo "name=lz5 priority=2" > recomp_algo > > # or I realized to change lz5 to lz7 so > echo "name=lz6 priority=2" > recomp_algo So the latter should delete lz5 at idx 2 and put lz6 there? I can add that.
On (22/11/04 16:40), Minchan Kim wrote: > > > > Configure 2 alternative algos, with priority 1 and 2 > > > > > > > > echo "name=lz4 priority=1" > recomp_algo > > > > echo "name=lz5 priority=2" > recomp_algo > > > > > > > > Recompress pages using algo 1 and algo 2 > > > > > > > > echo "type=huge threshold=3000 algo_idx=1" > recompress > > > > echo "type=idle threshold=2000 algo_idx=2" > recompress > > > > > > > > Maybe we can even pass algo name instead of idx. > > > > > > Let's use name rather than index. > > > > OK. Any preference on the keyword? "name="? "algo="? "algorithm="? > > "compressor="? "comp="? > > > > I want use the same keyword for recomp_algo. I sort of like "algo=", > > but not sure. > > +1 with algo Minchan, I'm sorry I'm getting a bit confused (didn't sleep last night). I just saw your other email and you suggested there that we don't need any idx or name in recompress. Or did I misunderstand it?
On (22/11/05 08:41), Sergey Senozhatsky wrote: > One can be SW one can be HW. So I thought about having flexibility here. > Instead of doing > > for (idx = 1; idx < MAX_IDX; idx++) { > len = zcomp_compress(zram->comps[idx]); > if (len <= threshold) > break; > } > > We would just directly use the suggested algo. > > But we probably don't need that param at all and can use > the loop instead? My idea was that recompress does not loop through the algos (what on the fly recompression can do for instance), but instead recompress only does one thing. Because we can have, for instance, something like this algo=zstd priority=1 algo=deflate priority=2 And we recompress with algo=zstd only first. Without going into the slowest, most CPU and power consuming one. If the memory pressure keeps increasing then we might do algo=deflate recompress, as the last resort before we do writeback. But we may skip algo=deflate altogether and go straight to writeback, for instance because we have less than 30% of battery left. So the reason I suggested algo= in recompress was, basically, for that for having exact control of what recompression does.
On Sat, Nov 05, 2022 at 08:41:37AM +0900, Sergey Senozhatsky wrote: > On (22/11/04 10:43), Minchan Kim wrote: > > > > Configure 2 alternative algos, with priority 1 and 2 > > > > > > > > echo "name=lz4 priority=1" > recomp_algo > > > > echo "name=lz5 priority=2" > recomp_algo > > > > > > > > Recompress pages using algo 1 and algo 2 > > > > > > > > echo "type=huge threshold=3000 algo_idx=1" > recompress > > > > echo "type=idle threshold=2000 algo_idx=2" > recompress > > > > > > > > Maybe we can even pass algo name instead of idx. > > > > > > Or pass priority= so that interface that uses algorithms has the > > > same keyword that the interface that configures those algorithms. > > > > Hmm, why do we need algo_idx here if we already set up every > > fields at algorithm setup time? > > > > My understaind(assuming default(i.e., primary) algo is lzo) is > > > > echo "name=lz4 priority=1" > recomp_algo > > echo "name=lz5 priority=2" > recomp_algo > > > > echo "type=huge threshold=3000" > recompress > > > > It will try compress every objects which greater than 3000B with lz4 first > > and then lz5 if it's stillgreater or equal than 3000(or same size class). > > One can be SW one can be HW. So I thought about having flexibility here. > Instead of doing > > for (idx = 1; idx < MAX_IDX; idx++) { > len = zcomp_compress(zram->comps[idx]); > if (len <= threshold) > break; > } > > We would just directly use the suggested algo. > > But we probably don't need that param at all and can use > the loop instead? I don't understand what param you are saying. I expected the zram->comps array already has sorted algoritm based on the priority so the loop will try compression as expected so loop is fine. Are we on same page? > > [..] > > echo "name=lz4 priority=1" > recomp_algo > > echo "name=lz5 priority=2" > recomp_algo > > > > # or I realized to change lz5 to lz7 so > > echo "name=lz6 priority=2" > recomp_algo > > So the latter should delete lz5 at idx 2 and put lz6 there? > I can add that. Yub.
On Sat, Nov 05, 2022 at 08:44:46AM +0900, Sergey Senozhatsky wrote: > On (22/11/04 16:40), Minchan Kim wrote: > > > > > Configure 2 alternative algos, with priority 1 and 2 > > > > > > > > > > echo "name=lz4 priority=1" > recomp_algo > > > > > echo "name=lz5 priority=2" > recomp_algo > > > > > > > > > > Recompress pages using algo 1 and algo 2 > > > > > > > > > > echo "type=huge threshold=3000 algo_idx=1" > recompress > > > > > echo "type=idle threshold=2000 algo_idx=2" > recompress > > > > > > > > > > Maybe we can even pass algo name instead of idx. > > > > > > > > Let's use name rather than index. > > > > > > OK. Any preference on the keyword? "name="? "algo="? "algorithm="? > > > "compressor="? "comp="? > > > > > > I want use the same keyword for recomp_algo. I sort of like "algo=", > > > but not sure. > > > > +1 with algo > > Minchan, I'm sorry I'm getting a bit confused (didn't sleep last night). > I just saw your other email and you suggested there that we don't need > any idx or name in recompress. Or did I misunderstand it? > I should have more clear. Sorry for that. I meant if you need some reason, I prefer "algo=' to make review proceed. If you agree we don't need it, then, yeah, we are all good.
On (22/11/04 17:01), Minchan Kim wrote: > > One can be SW one can be HW. So I thought about having flexibility here. > > Instead of doing > > > > for (idx = 1; idx < MAX_IDX; idx++) { > > len = zcomp_compress(zram->comps[idx]); > > if (len <= threshold) > > break; > > } > > > > We would just directly use the suggested algo. > > > > But we probably don't need that param at all and can use > > the loop instead? > > I don't understand what param you are saying. I expected > the zram->comps array already has sorted algoritm based on the > priority so the loop will try compression as expected so loop is fine. > Are we on same page? I'll try to implement both loop and a specific algorithm selection.
On Sat, Nov 05, 2022 at 09:00:33AM +0900, Sergey Senozhatsky wrote: > On (22/11/05 08:41), Sergey Senozhatsky wrote: > > One can be SW one can be HW. So I thought about having flexibility here. > > Instead of doing > > > > for (idx = 1; idx < MAX_IDX; idx++) { > > len = zcomp_compress(zram->comps[idx]); > > if (len <= threshold) > > break; > > } > > > > We would just directly use the suggested algo. > > > > But we probably don't need that param at all and can use > > the loop instead? > > My idea was that recompress does not loop through the algos (what > on the fly recompression can do for instance), but instead > recompress only does one thing. > > Because we can have, for instance, something like this > > algo=zstd priority=1 > algo=deflate priority=2 > > And we recompress with algo=zstd only first. Without going > into the slowest, most CPU and power consuming one. If the > memory pressure keeps increasing then we might do algo=deflate > recompress, as the last resort before we do writeback. But we > may skip algo=deflate altogether and go straight to writeback, > for instance because we have less than 30% of battery left. > > So the reason I suggested algo= in recompress was, basically, > for that for having exact control of what recompression does. I am thinking like this: * Without recomp_algo setup, user can do whatever they want on the fly echo "type=idle threshold=3000 algo=zstd" > recompress Later they could do echo "type=idle threshold=3000 algo=deflate" > recompress or writeback to backing device * With recomp_algo setup like this, echo "algo=zstd priority=1" > recomp_algo echo "algo=deflate priority=2" > recomp_algo .. .. echo "type=idle threshold=3000" > recompress If zstd fails, it will continue to work with deflate transparently. IOW, "algo=" in *recompress* interface will override the global policy in the *recomp_algo* setup. Otherwise, the recomp_algo's set up will work.
On (22/11/07 11:08), Minchan Kim wrote: [..] > I am thinking like this: > > * Without recomp_algo setup, user can do whatever they want on the fly > > > echo "type=idle threshold=3000 algo=zstd" > recompress > > Later they could do > > echo "type=idle threshold=3000 algo=deflate" > recompress By "without recomp_algo setup" you mean that user doesn't configure anything before `echo XG > zramX/disksize`? Currently algorithm and recomp algorithm need to be selected at the same time - before zram device is initialised, because we use the same code and same approaches: we need to have zcomp back-ends in per-CPU data in zram read/write/recompress. Creating per-CPU zcomps on the fly is probably going to be a little bit intrusive to zram. What I currently have is as follows. A copy paste from my test script: - init device echo "algo=lz4 priority=1" > /sys/block/zram0/recomp_algorithm echo "algo=zstd priority=2" > /sys/block/zram0/recomp_algorithm echo "algo=deflate priority=3" > /sys/block/zram0/recomp_algorithm echo 5G > /sys/block/zram0/disksize Various recompression use cases: - recompress huge pages using all secondary algos in order of priority echo "type=huge" > /sys/block/zram0/recompress - recompress huge pages using zstd only echo "type=huge algo=zstd" > /sys/block/zram0/recompress - recompress all pages using lz4 echo "algo=lz4" > /sys/block/zram0/recompress - recompress idle pages, use all algos in priority order, with threshold echo "type=idle threshold=3000" > /sys/block/zram0/recompress - recompress idle pages, using zstd only, with threshold echo "algo=zstd type=idle threshold=2000" > /sys/block/zram0/recompress
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 770ea3489eb6..a8ef3c0c3dae 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -41,7 +41,12 @@ static DEFINE_IDR(zram_index_idr); static DEFINE_MUTEX(zram_index_mutex); static int zram_major; -static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; +static const char *default_comp_algs[ZRAM_MAX_ZCOMPS] = { + CONFIG_ZRAM_DEF_COMP, +#ifdef CONFIG_ZRAM_MULTI_COMP + "zstd", +#endif +}; /* Module params (documentation at end) */ static unsigned int num_devices = 1; @@ -1000,31 +1005,37 @@ static ssize_t max_comp_streams_store(struct device *dev, return len; } -static ssize_t comp_algorithm_show(struct device *dev, - struct device_attribute *attr, char *buf) +static void comp_algorithm_set(struct zram *zram, u32 idx, const char *alg) { - size_t sz; - struct zram *zram = dev_to_zram(dev); + bool default_alg = false; + int i; - down_read(&zram->init_lock); - sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_ZCOMP], buf); - up_read(&zram->init_lock); + /* Do not kfree() algs that we didn't allocate, IOW the default ones */ + for (i = 0; i < ZRAM_MAX_ZCOMPS; i++) { + if (zram->comp_algs[idx] == default_comp_algs[i]) { + default_alg = true; + break; + } + } - return sz; + if (!default_alg) + kfree(zram->comp_algs[idx]); + zram->comp_algs[idx] = alg; } -static void comp_algorithm_set(struct zram *zram, u32 idx, const char *alg) +static ssize_t __comp_algorithm_show(struct zram *zram, u32 idx, char *buf) { - /* Do not kfree() algs that we didn't allocate, IOW the default ones */ - if (zram->comp_algs[idx] != default_compressor) - kfree(zram->comp_algs[idx]); - zram->comp_algs[idx] = alg; + ssize_t sz; + + down_read(&zram->init_lock); + sz = zcomp_available_show(zram->comp_algs[idx], buf); + up_read(&zram->init_lock); + + return sz; } -static ssize_t comp_algorithm_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) +static int __comp_algorithm_store(struct zram *zram, u32 idx, const char *buf) { - struct zram *zram = dev_to_zram(dev); char *compressor; size_t sz; @@ -1053,11 +1064,55 @@ static ssize_t comp_algorithm_store(struct device *dev, return -EBUSY; } - comp_algorithm_set(zram, ZRAM_PRIMARY_ZCOMP, compressor); + comp_algorithm_set(zram, idx, compressor); up_write(&zram->init_lock); - return len; + return 0; +} + +static ssize_t comp_algorithm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return __comp_algorithm_show(zram, ZRAM_PRIMARY_ZCOMP, buf); +} + +static ssize_t comp_algorithm_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t len) +{ + struct zram *zram = dev_to_zram(dev); + int ret; + + ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_ZCOMP, buf); + return ret ? ret : len; } +#ifdef CONFIG_ZRAM_MULTI_COMP +static ssize_t recomp_algorithm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return __comp_algorithm_show(zram, ZRAM_SECONDARY_ZCOMP, buf); +} + +static ssize_t recomp_algorithm_store(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t len) +{ + struct zram *zram = dev_to_zram(dev); + int ret; + + ret = __comp_algorithm_store(zram, ZRAM_SECONDARY_ZCOMP, buf); + return ret ? ret : len; +} +#endif + static ssize_t compact_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -1762,7 +1817,11 @@ static void zram_reset_device(struct zram *zram) memset(&zram->stats, 0, sizeof(zram->stats)); reset_bdev(zram); - comp_algorithm_set(zram, ZRAM_PRIMARY_ZCOMP, default_compressor); + comp_algorithm_set(zram, ZRAM_PRIMARY_ZCOMP, + default_comp_algs[ZRAM_PRIMARY_ZCOMP]); + if (IS_ENABLED(CONFIG_ZRAM_MULTI_COMP)) + comp_algorithm_set(zram, ZRAM_SECONDARY_ZCOMP, + default_comp_algs[ZRAM_SECONDARY_ZCOMP]); up_write(&zram->init_lock); } @@ -1895,6 +1954,9 @@ static DEVICE_ATTR_WO(writeback); static DEVICE_ATTR_RW(writeback_limit); static DEVICE_ATTR_RW(writeback_limit_enable); #endif +#ifdef CONFIG_ZRAM_MULTI_COMP +static DEVICE_ATTR_RW(recomp_algorithm); +#endif static struct attribute *zram_disk_attrs[] = { &dev_attr_disksize.attr, @@ -1918,6 +1980,9 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_bd_stat.attr, #endif &dev_attr_debug_stat.attr, +#ifdef CONFIG_ZRAM_MULTI_COMP + &dev_attr_recomp_algorithm.attr, +#endif NULL, }; @@ -1997,7 +2062,11 @@ static int zram_add(void) if (ret) goto out_cleanup_disk; - zram->comp_algs[ZRAM_PRIMARY_ZCOMP] = default_compressor; + zram->comp_algs[ZRAM_PRIMARY_ZCOMP] = + default_comp_algs[ZRAM_PRIMARY_ZCOMP]; + if (IS_ENABLED(CONFIG_ZRAM_MULTI_COMP)) + zram->comp_algs[ZRAM_SECONDARY_ZCOMP] = + default_comp_algs[ZRAM_SECONDARY_ZCOMP]; zram_debugfs_register(zram); pr_info("Added device: %s\n", zram->disk->disk_name);
Introduce recomp_algorithm sysfs knob that controls secondary algorithm selection used for recompression. This device attribute works in a similar way with comp_algorithm attribute. Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org> --- drivers/block/zram/zram_drv.c | 111 +++++++++++++++++++++++++++------- 1 file changed, 90 insertions(+), 21 deletions(-)