Message ID | 20241021142343.3857891-4-yangerkun@huaweicloud.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | bugfix for c_show/e_show | expand |
On Mon, Oct 21, 2024 at 10:23:43PM +0800, Yang Erkun wrote: > From: Yang Erkun <yangerkun@huawei.com> > > The last reference for `cache_head` can be reduced to zero in `c_show` > and `e_show`(using `rcu_read_lock` and `rcu_read_unlock`). Consequently, > `svc_export_put` and `expkey_put` will be invoked, leading to two > issues: > > 1. The `svc_export_put` will directly free ex_uuid. However, > `e_show`/`c_show` will access `ex_uuid` after `cache_put`, which can > trigger a use-after-free issue, shown below. > > ================================================================== > BUG: KASAN: slab-use-after-free in svc_export_show+0x362/0x430 [nfsd] > Read of size 1 at addr ff11000010fdc120 by task cat/870 > > CPU: 1 UID: 0 PID: 870 Comm: cat Not tainted 6.12.0-rc3+ #1 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS > 1.16.1-2.fc37 04/01/2014 > Call Trace: > <TASK> > dump_stack_lvl+0x53/0x70 > print_address_description.constprop.0+0x2c/0x3a0 > print_report+0xb9/0x280 > kasan_report+0xae/0xe0 > svc_export_show+0x362/0x430 [nfsd] > c_show+0x161/0x390 [sunrpc] > seq_read_iter+0x589/0x770 > seq_read+0x1e5/0x270 > proc_reg_read+0xe1/0x140 > vfs_read+0x125/0x530 > ksys_read+0xc1/0x160 > do_syscall_64+0x5f/0x170 > entry_SYSCALL_64_after_hwframe+0x76/0x7e > > Allocated by task 830: > kasan_save_stack+0x20/0x40 > kasan_save_track+0x14/0x30 > __kasan_kmalloc+0x8f/0xa0 > __kmalloc_node_track_caller_noprof+0x1bc/0x400 > kmemdup_noprof+0x22/0x50 > svc_export_parse+0x8a9/0xb80 [nfsd] > cache_do_downcall+0x71/0xa0 [sunrpc] > cache_write_procfs+0x8e/0xd0 [sunrpc] > proc_reg_write+0xe1/0x140 > vfs_write+0x1a5/0x6d0 > ksys_write+0xc1/0x160 > do_syscall_64+0x5f/0x170 > entry_SYSCALL_64_after_hwframe+0x76/0x7e > > Freed by task 868: > kasan_save_stack+0x20/0x40 > kasan_save_track+0x14/0x30 > kasan_save_free_info+0x3b/0x60 > __kasan_slab_free+0x37/0x50 > kfree+0xf3/0x3e0 > svc_export_put+0x87/0xb0 [nfsd] > cache_purge+0x17f/0x1f0 [sunrpc] > nfsd_destroy_serv+0x226/0x2d0 [nfsd] > nfsd_svc+0x125/0x1e0 [nfsd] > write_threads+0x16a/0x2a0 [nfsd] > nfsctl_transaction_write+0x74/0xa0 [nfsd] > vfs_write+0x1a5/0x6d0 > ksys_write+0xc1/0x160 > do_syscall_64+0x5f/0x170 > entry_SYSCALL_64_after_hwframe+0x76/0x7e > > 2. We cannot sleep while using `rcu_read_lock`/`rcu_read_unlock`. > However, `svc_export_put`/`expkey_put` will call path_put, which > subsequently triggers a sleeping operation due to the following > `dput`. > > ============================= > WARNING: suspicious RCU usage > 5.10.0-dirty #141 Not tainted > ----------------------------- > ... > Call Trace: > dump_stack+0x9a/0xd0 > ___might_sleep+0x231/0x240 > dput+0x39/0x600 > path_put+0x1b/0x30 > svc_export_put+0x17/0x80 > e_show+0x1c9/0x200 > seq_read_iter+0x63f/0x7c0 > seq_read+0x226/0x2d0 > vfs_read+0x113/0x2c0 > ksys_read+0xc9/0x170 > do_syscall_64+0x33/0x40 > entry_SYSCALL_64_after_hwframe+0x67/0xd1 > > Fix these issues by using `rcu_work` to help release > `svc_expkey`/`svc_export`. This approach allows for an asynchronous > context to invoke `path_put` and also facilitates the freeing of > `uuid/exp/key` after an RCU grace period. > > Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") I'd go with: Fixes: 9ceddd9da134 ("knfsd: Allow lockless lookups of the exports") I plan to apply these three to nfsd-next (for v6.13). > Signed-off-by: Yang Erkun <yangerkun@huawei.com> > --- > fs/nfsd/export.c | 31 +++++++++++++++++++++++++------ > fs/nfsd/export.h | 4 ++-- > 2 files changed, 27 insertions(+), 8 deletions(-) > > diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c > index 49aede376d86..6d0455973d64 100644 > --- a/fs/nfsd/export.c > +++ b/fs/nfsd/export.c > @@ -40,15 +40,24 @@ > #define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) > #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) > > -static void expkey_put(struct kref *ref) > +static void expkey_put_work(struct work_struct *work) > { > - struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); > + struct svc_expkey *key = > + container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work); > > if (test_bit(CACHE_VALID, &key->h.flags) && > !test_bit(CACHE_NEGATIVE, &key->h.flags)) > path_put(&key->ek_path); > auth_domain_put(key->ek_client); > - kfree_rcu(key, ek_rcu); > + kfree(key); > +} > + > +static void expkey_put(struct kref *ref) > +{ > + struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); > + > + INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work); > + queue_rcu_work(system_wq, &key->ek_rcu_work); > } > > static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) > @@ -355,16 +364,26 @@ static void export_stats_destroy(struct export_stats *stats) > EXP_STATS_COUNTERS_NUM); > } > > -static void svc_export_put(struct kref *ref) > +static void svc_export_put_work(struct work_struct *work) > { > - struct svc_export *exp = container_of(ref, struct svc_export, h.ref); > + struct svc_export *exp = > + container_of(to_rcu_work(work), struct svc_export, ex_rcu_work); > + > path_put(&exp->ex_path); > auth_domain_put(exp->ex_client); > nfsd4_fslocs_free(&exp->ex_fslocs); > export_stats_destroy(exp->ex_stats); > kfree(exp->ex_stats); > kfree(exp->ex_uuid); > - kfree_rcu(exp, ex_rcu); > + kfree(exp); > +} > + > +static void svc_export_put(struct kref *ref) > +{ > + struct svc_export *exp = container_of(ref, struct svc_export, h.ref); > + > + INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work); > + queue_rcu_work(system_wq, &exp->ex_rcu_work); > } > > static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) > diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h > index 3794ae253a70..081afb68681e 100644 > --- a/fs/nfsd/export.h > +++ b/fs/nfsd/export.h > @@ -75,7 +75,7 @@ struct svc_export { > u32 ex_layout_types; > struct nfsd4_deviceid_map *ex_devid_map; > struct cache_detail *cd; > - struct rcu_head ex_rcu; > + struct rcu_work ex_rcu_work; > unsigned long ex_xprtsec_modes; > struct export_stats *ex_stats; > }; > @@ -92,7 +92,7 @@ struct svc_expkey { > u32 ek_fsid[6]; > > struct path ek_path; > - struct rcu_head ek_rcu; > + struct rcu_work ek_rcu_work; > }; > > #define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC)) > -- > 2.39.2 >
在 2024/10/22 1:12, Chuck Lever 写道: > On Mon, Oct 21, 2024 at 10:23:43PM +0800, Yang Erkun wrote: >> From: Yang Erkun <yangerkun@huawei.com> >> >> The last reference for `cache_head` can be reduced to zero in `c_show` >> and `e_show`(using `rcu_read_lock` and `rcu_read_unlock`). Consequently, >> `svc_export_put` and `expkey_put` will be invoked, leading to two >> issues: >> >> 1. The `svc_export_put` will directly free ex_uuid. However, >> `e_show`/`c_show` will access `ex_uuid` after `cache_put`, which can >> trigger a use-after-free issue, shown below. >> >> ================================================================== >> BUG: KASAN: slab-use-after-free in svc_export_show+0x362/0x430 [nfsd] >> Read of size 1 at addr ff11000010fdc120 by task cat/870 >> >> CPU: 1 UID: 0 PID: 870 Comm: cat Not tainted 6.12.0-rc3+ #1 >> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS >> 1.16.1-2.fc37 04/01/2014 >> Call Trace: >> <TASK> >> dump_stack_lvl+0x53/0x70 >> print_address_description.constprop.0+0x2c/0x3a0 >> print_report+0xb9/0x280 >> kasan_report+0xae/0xe0 >> svc_export_show+0x362/0x430 [nfsd] >> c_show+0x161/0x390 [sunrpc] >> seq_read_iter+0x589/0x770 >> seq_read+0x1e5/0x270 >> proc_reg_read+0xe1/0x140 >> vfs_read+0x125/0x530 >> ksys_read+0xc1/0x160 >> do_syscall_64+0x5f/0x170 >> entry_SYSCALL_64_after_hwframe+0x76/0x7e >> >> Allocated by task 830: >> kasan_save_stack+0x20/0x40 >> kasan_save_track+0x14/0x30 >> __kasan_kmalloc+0x8f/0xa0 >> __kmalloc_node_track_caller_noprof+0x1bc/0x400 >> kmemdup_noprof+0x22/0x50 >> svc_export_parse+0x8a9/0xb80 [nfsd] >> cache_do_downcall+0x71/0xa0 [sunrpc] >> cache_write_procfs+0x8e/0xd0 [sunrpc] >> proc_reg_write+0xe1/0x140 >> vfs_write+0x1a5/0x6d0 >> ksys_write+0xc1/0x160 >> do_syscall_64+0x5f/0x170 >> entry_SYSCALL_64_after_hwframe+0x76/0x7e >> >> Freed by task 868: >> kasan_save_stack+0x20/0x40 >> kasan_save_track+0x14/0x30 >> kasan_save_free_info+0x3b/0x60 >> __kasan_slab_free+0x37/0x50 >> kfree+0xf3/0x3e0 >> svc_export_put+0x87/0xb0 [nfsd] >> cache_purge+0x17f/0x1f0 [sunrpc] >> nfsd_destroy_serv+0x226/0x2d0 [nfsd] >> nfsd_svc+0x125/0x1e0 [nfsd] >> write_threads+0x16a/0x2a0 [nfsd] >> nfsctl_transaction_write+0x74/0xa0 [nfsd] >> vfs_write+0x1a5/0x6d0 >> ksys_write+0xc1/0x160 >> do_syscall_64+0x5f/0x170 >> entry_SYSCALL_64_after_hwframe+0x76/0x7e >> >> 2. We cannot sleep while using `rcu_read_lock`/`rcu_read_unlock`. >> However, `svc_export_put`/`expkey_put` will call path_put, which >> subsequently triggers a sleeping operation due to the following >> `dput`. >> >> ============================= >> WARNING: suspicious RCU usage >> 5.10.0-dirty #141 Not tainted >> ----------------------------- >> ... >> Call Trace: >> dump_stack+0x9a/0xd0 >> ___might_sleep+0x231/0x240 >> dput+0x39/0x600 >> path_put+0x1b/0x30 >> svc_export_put+0x17/0x80 >> e_show+0x1c9/0x200 >> seq_read_iter+0x63f/0x7c0 >> seq_read+0x226/0x2d0 >> vfs_read+0x113/0x2c0 >> ksys_read+0xc9/0x170 >> do_syscall_64+0x33/0x40 >> entry_SYSCALL_64_after_hwframe+0x67/0xd1 >> >> Fix these issues by using `rcu_work` to help release >> `svc_expkey`/`svc_export`. This approach allows for an asynchronous >> context to invoke `path_put` and also facilitates the freeing of >> `uuid/exp/key` after an RCU grace period. >> >> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") > > I'd go with: > > Fixes: 9ceddd9da134 ("knfsd: Allow lockless lookups of the exports") Hi! Your advice for this three patches looks good to me, thanks for your review! > > I plan to apply these three to nfsd-next (for v6.13). > > >> Signed-off-by: Yang Erkun <yangerkun@huawei.com> >> --- >> fs/nfsd/export.c | 31 +++++++++++++++++++++++++------ >> fs/nfsd/export.h | 4 ++-- >> 2 files changed, 27 insertions(+), 8 deletions(-) >> >> diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c >> index 49aede376d86..6d0455973d64 100644 >> --- a/fs/nfsd/export.c >> +++ b/fs/nfsd/export.c >> @@ -40,15 +40,24 @@ >> #define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) >> #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) >> >> -static void expkey_put(struct kref *ref) >> +static void expkey_put_work(struct work_struct *work) >> { >> - struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); >> + struct svc_expkey *key = >> + container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work); >> >> if (test_bit(CACHE_VALID, &key->h.flags) && >> !test_bit(CACHE_NEGATIVE, &key->h.flags)) >> path_put(&key->ek_path); >> auth_domain_put(key->ek_client); >> - kfree_rcu(key, ek_rcu); >> + kfree(key); >> +} >> + >> +static void expkey_put(struct kref *ref) >> +{ >> + struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); >> + >> + INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work); >> + queue_rcu_work(system_wq, &key->ek_rcu_work); >> } >> >> static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) >> @@ -355,16 +364,26 @@ static void export_stats_destroy(struct export_stats *stats) >> EXP_STATS_COUNTERS_NUM); >> } >> >> -static void svc_export_put(struct kref *ref) >> +static void svc_export_put_work(struct work_struct *work) >> { >> - struct svc_export *exp = container_of(ref, struct svc_export, h.ref); >> + struct svc_export *exp = >> + container_of(to_rcu_work(work), struct svc_export, ex_rcu_work); >> + >> path_put(&exp->ex_path); >> auth_domain_put(exp->ex_client); >> nfsd4_fslocs_free(&exp->ex_fslocs); >> export_stats_destroy(exp->ex_stats); >> kfree(exp->ex_stats); >> kfree(exp->ex_uuid); >> - kfree_rcu(exp, ex_rcu); >> + kfree(exp); >> +} >> + >> +static void svc_export_put(struct kref *ref) >> +{ >> + struct svc_export *exp = container_of(ref, struct svc_export, h.ref); >> + >> + INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work); >> + queue_rcu_work(system_wq, &exp->ex_rcu_work); >> } >> >> static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) >> diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h >> index 3794ae253a70..081afb68681e 100644 >> --- a/fs/nfsd/export.h >> +++ b/fs/nfsd/export.h >> @@ -75,7 +75,7 @@ struct svc_export { >> u32 ex_layout_types; >> struct nfsd4_deviceid_map *ex_devid_map; >> struct cache_detail *cd; >> - struct rcu_head ex_rcu; >> + struct rcu_work ex_rcu_work; >> unsigned long ex_xprtsec_modes; >> struct export_stats *ex_stats; >> }; >> @@ -92,7 +92,7 @@ struct svc_expkey { >> u32 ek_fsid[6]; >> >> struct path ek_path; >> - struct rcu_head ek_rcu; >> + struct rcu_work ek_rcu_work; >> }; >> >> #define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC)) >> -- >> 2.39.2 >> >
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 49aede376d86..6d0455973d64 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -40,15 +40,24 @@ #define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) #define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) -static void expkey_put(struct kref *ref) +static void expkey_put_work(struct work_struct *work) { - struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); + struct svc_expkey *key = + container_of(to_rcu_work(work), struct svc_expkey, ek_rcu_work); if (test_bit(CACHE_VALID, &key->h.flags) && !test_bit(CACHE_NEGATIVE, &key->h.flags)) path_put(&key->ek_path); auth_domain_put(key->ek_client); - kfree_rcu(key, ek_rcu); + kfree(key); +} + +static void expkey_put(struct kref *ref) +{ + struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref); + + INIT_RCU_WORK(&key->ek_rcu_work, expkey_put_work); + queue_rcu_work(system_wq, &key->ek_rcu_work); } static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) @@ -355,16 +364,26 @@ static void export_stats_destroy(struct export_stats *stats) EXP_STATS_COUNTERS_NUM); } -static void svc_export_put(struct kref *ref) +static void svc_export_put_work(struct work_struct *work) { - struct svc_export *exp = container_of(ref, struct svc_export, h.ref); + struct svc_export *exp = + container_of(to_rcu_work(work), struct svc_export, ex_rcu_work); + path_put(&exp->ex_path); auth_domain_put(exp->ex_client); nfsd4_fslocs_free(&exp->ex_fslocs); export_stats_destroy(exp->ex_stats); kfree(exp->ex_stats); kfree(exp->ex_uuid); - kfree_rcu(exp, ex_rcu); + kfree(exp); +} + +static void svc_export_put(struct kref *ref) +{ + struct svc_export *exp = container_of(ref, struct svc_export, h.ref); + + INIT_RCU_WORK(&exp->ex_rcu_work, svc_export_put_work); + queue_rcu_work(system_wq, &exp->ex_rcu_work); } static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index 3794ae253a70..081afb68681e 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -75,7 +75,7 @@ struct svc_export { u32 ex_layout_types; struct nfsd4_deviceid_map *ex_devid_map; struct cache_detail *cd; - struct rcu_head ex_rcu; + struct rcu_work ex_rcu_work; unsigned long ex_xprtsec_modes; struct export_stats *ex_stats; }; @@ -92,7 +92,7 @@ struct svc_expkey { u32 ek_fsid[6]; struct path ek_path; - struct rcu_head ek_rcu; + struct rcu_work ek_rcu_work; }; #define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC))