Message ID | 20230302055103.60116-1-frank.li@vivo.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [f2fs-dev,v2] f2fs: introduce discard_cpuset mount opt | expand |
On 3/1/23 21:51, Yangtao Li wrote: > diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst > index 2055e72871fe..dc005f3b784a 100644 > --- a/Documentation/filesystems/f2fs.rst > +++ b/Documentation/filesystems/f2fs.rst > @@ -351,6 +351,8 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records > data block update frequency of the extent per inode, in > order to provide better temperature hints for data block > allocation. > +discard_cpuset=%u Set the cpumask of dicard thread, it makes the discard of discard > + process run faster on a more powerful CPU, or not. > ======================== ============================================================
This seems very counterintuitive. a. GC and discard operations are mostly I/O-bound. b. Both run when the storage is idle unless background conditions are not met and are forced to run synchronously (i.e., foreground). Setting the cpumask to run on the most efficient CPU core makes more sense in my books (especially considering Android's I/O characteristics). Please provide data/elaborations for: i. Why just the discard thread? ii. Does setting the discard thread to big cores provide meaningful and practical improvements? iii. Is it enough to justify an explicit mask over HMP scheduler's own heuristic? iv. Is the additional power consumption for setting the mask to more power hungry cores justified? f2fs mount options are already pretty convoluted, and unfortunately neither your commit message nor the code itself seem to justify its addition imho. Thanks, On Thu, Mar 2, 2023 at 2:52 PM Yangtao Li via Linux-f2fs-devel <linux-f2fs-devel@lists.sourceforge.net> wrote: > > It makes the discard process run faster on a more powerful CPU, or not. > And if bind it to a specific cpu, it is possible to have more cache > locality. > > Signed-off-by: Yangtao Li <frank.li@vivo.com> > --- > v2: > -fix kernel test robot warn > Documentation/filesystems/f2fs.rst | 2 ++ > fs/f2fs/f2fs.h | 1 + > fs/f2fs/segment.c | 8 ++++++- > fs/f2fs/super.c | 36 ++++++++++++++++++++++++++++++ > kernel/kthread.c | 1 + > 5 files changed, 47 insertions(+), 1 deletion(-) > > diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst > index 2055e72871fe..dc005f3b784a 100644 > --- a/Documentation/filesystems/f2fs.rst > +++ b/Documentation/filesystems/f2fs.rst > @@ -351,6 +351,8 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records > data block update frequency of the extent per inode, in > order to provide better temperature hints for data block > allocation. > +discard_cpuset=%u Set the cpumask of dicard thread, it makes the discard > + process run faster on a more powerful CPU, or not. > ======================== ============================================================ > > Debugfs Entries > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index b0ab2062038a..62ce02a87d33 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -183,6 +183,7 @@ struct f2fs_mount_info { > int compress_mode; /* compression mode */ > unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */ > unsigned char noextensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */ > + struct cpumask discard_cpumask; /* discard thread cpumask */ > }; > > #define F2FS_FEATURE_ENCRYPT 0x0001 > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > index 227e25836173..2648c564833e 100644 > --- a/fs/f2fs/segment.c > +++ b/fs/f2fs/segment.c > @@ -2054,11 +2054,17 @@ int f2fs_start_discard_thread(struct f2fs_sb_info *sbi) > if (!f2fs_realtime_discard_enable(sbi)) > return 0; > > - dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi, > + dcc->f2fs_issue_discard = kthread_create(issue_discard_thread, sbi, > "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); > if (IS_ERR(dcc->f2fs_issue_discard)) { > err = PTR_ERR(dcc->f2fs_issue_discard); > dcc->f2fs_issue_discard = NULL; > + } else { > + if (!cpumask_empty(&F2FS_OPTION(sbi).discard_cpumask)) { > + kthread_bind_mask(dcc->f2fs_issue_discard, > + &F2FS_OPTION(sbi).discard_cpumask); > + } > + wake_up_process(dcc->f2fs_issue_discard); > } > > return err; > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c > index fbaaabbcd6de..8ecbe3595f34 100644 > --- a/fs/f2fs/super.c > +++ b/fs/f2fs/super.c > @@ -164,6 +164,7 @@ enum { > Opt_discard_unit, > Opt_memory_mode, > Opt_age_extent_cache, > + Opt_discard_cpuset, > Opt_err, > }; > > @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = { > {Opt_discard_unit, "discard_unit=%s"}, > {Opt_memory_mode, "memory=%s"}, > {Opt_age_extent_cache, "age_extent_cache"}, > + {Opt_discard_cpuset, "discard_cpuset=%u"}, > {Opt_err, NULL}, > }; > > @@ -1256,6 +1258,22 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) > case Opt_age_extent_cache: > set_opt(sbi, AGE_EXTENT_CACHE); > break; > + case Opt_discard_cpuset: > + if (!f2fs_hw_support_discard(sbi)) { > + f2fs_warn(sbi, "device does not support discard"); > + break; > + } > + > + if (args->from && match_int(args, &arg)) > + return -EINVAL; > + > + if (!cpu_possible(arg)) { > + f2fs_err(sbi, "invalid cpu%d for discard_cpuset", arg); > + return -EINVAL; > + } > + > + cpumask_set_cpu(arg, &F2FS_OPTION(sbi).discard_cpumask); > + break; > default: > f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", > p); > @@ -1358,6 +1376,14 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) > f2fs_err(sbi, "Allow to mount readonly mode only"); > return -EROFS; > } > + > + if (!cpumask_empty(&F2FS_OPTION(sbi).discard_cpumask) && > + !cpumask_intersects(cpu_online_mask, > + &F2FS_OPTION(sbi).discard_cpumask)) { > + f2fs_err(sbi, "Must include one online CPU for discard_cpuset"); > + return -EINVAL; > + } > + > return 0; > } > > @@ -1884,6 +1910,7 @@ static inline void f2fs_show_compress_options(struct seq_file *seq, > static int f2fs_show_options(struct seq_file *seq, struct dentry *root) > { > struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); > + unsigned int cpu; > > if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) > seq_printf(seq, ",background_gc=%s", "sync"); > @@ -1909,6 +1936,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) > seq_printf(seq, ",discard_unit=%s", "segment"); > else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) > seq_printf(seq, ",discard_unit=%s", "section"); > + for_each_cpu(cpu, &F2FS_OPTION(sbi).discard_cpumask) > + seq_printf(seq, ",discard_cpuset=%u", cpu); > } else { > seq_puts(seq, ",nodiscard"); > } > @@ -2340,6 +2369,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) > goto restore_opts; > } > > + if (!cpumask_equal(&org_mount_opt.discard_cpumask, > + &F2FS_OPTION(sbi).discard_cpumask)) { > + err = -EINVAL; > + f2fs_warn(sbi, "switch discard_cpuset option is not allowed"); > + goto restore_opts; > + } > + > if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { > err = -EINVAL; > f2fs_warn(sbi, "disabling checkpoint not compatible with read-only"); > diff --git a/kernel/kthread.c b/kernel/kthread.c > index 7e6751b29101..8ddc2cd1b27e 100644 > --- a/kernel/kthread.c > +++ b/kernel/kthread.c > @@ -541,6 +541,7 @@ void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask) > { > __kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE); > } > +EXPORT_SYMBOL_GPL(kthread_bind_mask); > > /** > * kthread_bind - bind a just-created kthread to a cpu. This change to kernel/kthread.c should be made in a separate commit. > -- > 2.25.1 > > > > _______________________________________________ > Linux-f2fs-devel mailing list > Linux-f2fs-devel@lists.sourceforge.net > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 2055e72871fe..dc005f3b784a 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -351,6 +351,8 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records data block update frequency of the extent per inode, in order to provide better temperature hints for data block allocation. +discard_cpuset=%u Set the cpumask of dicard thread, it makes the discard + process run faster on a more powerful CPU, or not. ======================== ============================================================ Debugfs Entries diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b0ab2062038a..62ce02a87d33 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -183,6 +183,7 @@ struct f2fs_mount_info { int compress_mode; /* compression mode */ unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */ unsigned char noextensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */ + struct cpumask discard_cpumask; /* discard thread cpumask */ }; #define F2FS_FEATURE_ENCRYPT 0x0001 diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 227e25836173..2648c564833e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2054,11 +2054,17 @@ int f2fs_start_discard_thread(struct f2fs_sb_info *sbi) if (!f2fs_realtime_discard_enable(sbi)) return 0; - dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi, + dcc->f2fs_issue_discard = kthread_create(issue_discard_thread, sbi, "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(dcc->f2fs_issue_discard)) { err = PTR_ERR(dcc->f2fs_issue_discard); dcc->f2fs_issue_discard = NULL; + } else { + if (!cpumask_empty(&F2FS_OPTION(sbi).discard_cpumask)) { + kthread_bind_mask(dcc->f2fs_issue_discard, + &F2FS_OPTION(sbi).discard_cpumask); + } + wake_up_process(dcc->f2fs_issue_discard); } return err; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index fbaaabbcd6de..8ecbe3595f34 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -164,6 +164,7 @@ enum { Opt_discard_unit, Opt_memory_mode, Opt_age_extent_cache, + Opt_discard_cpuset, Opt_err, }; @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = { {Opt_discard_unit, "discard_unit=%s"}, {Opt_memory_mode, "memory=%s"}, {Opt_age_extent_cache, "age_extent_cache"}, + {Opt_discard_cpuset, "discard_cpuset=%u"}, {Opt_err, NULL}, }; @@ -1256,6 +1258,22 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_age_extent_cache: set_opt(sbi, AGE_EXTENT_CACHE); break; + case Opt_discard_cpuset: + if (!f2fs_hw_support_discard(sbi)) { + f2fs_warn(sbi, "device does not support discard"); + break; + } + + if (args->from && match_int(args, &arg)) + return -EINVAL; + + if (!cpu_possible(arg)) { + f2fs_err(sbi, "invalid cpu%d for discard_cpuset", arg); + return -EINVAL; + } + + cpumask_set_cpu(arg, &F2FS_OPTION(sbi).discard_cpumask); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -1358,6 +1376,14 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) f2fs_err(sbi, "Allow to mount readonly mode only"); return -EROFS; } + + if (!cpumask_empty(&F2FS_OPTION(sbi).discard_cpumask) && + !cpumask_intersects(cpu_online_mask, + &F2FS_OPTION(sbi).discard_cpumask)) { + f2fs_err(sbi, "Must include one online CPU for discard_cpuset"); + return -EINVAL; + } + return 0; } @@ -1884,6 +1910,7 @@ static inline void f2fs_show_compress_options(struct seq_file *seq, static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); + unsigned int cpu; if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) seq_printf(seq, ",background_gc=%s", "sync"); @@ -1909,6 +1936,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",discard_unit=%s", "segment"); else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) seq_printf(seq, ",discard_unit=%s", "section"); + for_each_cpu(cpu, &F2FS_OPTION(sbi).discard_cpumask) + seq_printf(seq, ",discard_cpuset=%u", cpu); } else { seq_puts(seq, ",nodiscard"); } @@ -2340,6 +2369,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + if (!cpumask_equal(&org_mount_opt.discard_cpumask, + &F2FS_OPTION(sbi).discard_cpumask)) { + err = -EINVAL; + f2fs_warn(sbi, "switch discard_cpuset option is not allowed"); + goto restore_opts; + } + if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { err = -EINVAL; f2fs_warn(sbi, "disabling checkpoint not compatible with read-only"); diff --git a/kernel/kthread.c b/kernel/kthread.c index 7e6751b29101..8ddc2cd1b27e 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -541,6 +541,7 @@ void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask) { __kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE); } +EXPORT_SYMBOL_GPL(kthread_bind_mask); /** * kthread_bind - bind a just-created kthread to a cpu.
It makes the discard process run faster on a more powerful CPU, or not. And if bind it to a specific cpu, it is possible to have more cache locality. Signed-off-by: Yangtao Li <frank.li@vivo.com> --- v2: -fix kernel test robot warn Documentation/filesystems/f2fs.rst | 2 ++ fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 8 ++++++- fs/f2fs/super.c | 36 ++++++++++++++++++++++++++++++ kernel/kthread.c | 1 + 5 files changed, 47 insertions(+), 1 deletion(-)