Message ID | 20230425075558.3450970-1-lilingfeng@huaweicloud.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [-next] block: don't allow a disk link holder to its ancestor | expand |
Li, On 4/25/23 00:55, Li Lingfeng wrote: > From: Li Lingfeng <lilingfeng3@huawei.com> > > Previously commit 077a4033541f ("block: don't allow a disk link holder > to itself") prevent user from reloading dm with itself. However, user > can reload dm with its ancestor which will trigger dead loop and result > in oom. > > Test procedures: > 1) dmsetup create test --table "0 20971520 linear /dev/sdd 0" > 2) dmsetup create test1 --table "0 20971520 linear /dev/sdd 20971520" > 3) dmsetup suspend test > 4) dmsetup reload test --table "0 2048 linear /dev/mapper/test1 0" > 5) dmsetup resume test > 6) dmsetup suspend test1 > 7) dmsetup reload test1 --table "0 2048 linear /dev/mapper/test 0" > 8) dmsetup resume test1 > Can you please add blktest for this one ? -ck
Friendly ping ... Thanks 在 2023/4/25 15:55, Li Lingfeng 写道: > From: Li Lingfeng <lilingfeng3@huawei.com> > > Previously commit 077a4033541f ("block: don't allow a disk link holder > to itself") prevent user from reloading dm with itself. However, user > can reload dm with its ancestor which will trigger dead loop and result > in oom. > > Test procedures: > 1) dmsetup create test --table "0 20971520 linear /dev/sdd 0" > 2) dmsetup create test1 --table "0 20971520 linear /dev/sdd 20971520" > 3) dmsetup suspend test > 4) dmsetup reload test --table "0 2048 linear /dev/mapper/test1 0" > 5) dmsetup resume test > 6) dmsetup suspend test1 > 7) dmsetup reload test1 --table "0 2048 linear /dev/mapper/test 0" > 8) dmsetup resume test1 > > Dead loop: > [ 229.681231] Call Trace: > [ 229.681232] dm_dax_supported+0x5b/0xa0 > [ 229.681233] dax_supported+0x28/0x50 > [ 229.681234] device_not_dax_capable+0x45/0x70 > [ 229.681235] ? realloc_argv+0xa0/0xa0 > [ 229.681236] linear_iterate_devices+0x25/0x30 > [ 229.681237] dm_table_supports_dax+0x42/0xd0 > [ 229.681238] dm_dax_supported+0x5b/0xa0 > [ 229.681238] dax_supported+0x28/0x50 > [ 229.681239] device_not_dax_capable+0x45/0x70 > ......(a lot of same lines) > [ 229.681423] ? realloc_argv+0xa0/0xa0 > [ 229.681424] linear_iterate_devices+0x25/0x30 > [ 229.681425] dm_table_supports_dax+0x42/0xd0 > [ 229.681426] dm > [ 229.681428] Lost 437 message(s)! > [ 229.825588] ---[ end trace 0f2a9db839ed5b56 ]--- > > OOM: > [ 189.270011] Call Trace: > [ 189.270274] <TASK> > [ 189.270511] dump_stack_lvl+0xc1/0x170 > [ 189.270899] dump_stack+0x14/0x20 > [ 189.271222] dump_header+0x5a/0x710 > [ 189.271590] oom_kill_process+0x16b/0x500 > [ 189.272018] out_of_memory+0x333/0xad0 > [ 189.272453] __alloc_pages_slowpath.constprop.0+0x18b4/0x1c40 > [ 189.273130] ? find_held_lock+0x33/0xf0 > [ 189.273637] __alloc_pages+0x598/0x660 > [ 189.274106] alloc_pages+0x95/0x240 > [ 189.274482] folio_alloc+0x1f/0x60 > [ 189.274835] filemap_alloc_folio+0x223/0x350 > [ 189.275348] __filemap_get_folio+0x21e/0x770 > [ 189.275916] filemap_fault+0x72d/0xdc0 > [ 189.276454] __do_fault+0x41/0x360 > [ 189.276820] do_fault+0x263/0x8f0 > [ 189.277175] __handle_mm_fault+0x9af/0x1b20 > [ 189.277810] handle_mm_fault+0x128/0x570 > [ 189.278243] do_user_addr_fault+0x2af/0xea0 > [ 189.278733] exc_page_fault+0x73/0x340 > [ 189.279133] asm_exc_page_fault+0x22/0x30 > [ 189.279523] RIP: 0033:0x561e82ac67f0 > > Forbidding a disk to create link to its ancestor can solve the problem. > What's more, limit device depth to prevent recursive overflow. > > Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com> > --- > block/holder.c | 42 ++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 42 insertions(+) > > diff --git a/block/holder.c b/block/holder.c > index 37d18c13d958..6a8571b7d9c5 100644 > --- a/block/holder.c > +++ b/block/holder.c > @@ -2,9 +2,13 @@ > #include <linux/blkdev.h> > #include <linux/slab.h> > > +#define DEVICE_DEPTH 5 > +static DEFINE_MUTEX(slave_bdevs_lock); > + > struct bd_holder_disk { > struct list_head list; > struct kobject *holder_dir; > + struct gendisk *slave_disk; > int refcnt; > }; > > @@ -29,6 +33,32 @@ static void del_symlink(struct kobject *from, struct kobject *to) > sysfs_remove_link(from, kobject_name(to)); > } > > +static struct gendisk *iterate_slave_disk(struct gendisk *disk, > + struct gendisk *target, int depth) > +{ > + struct bd_holder_disk *holder; > + struct gendisk *iter_slave; > + > + if (!depth) > + return target; > + > + if (list_empty_careful(&disk->slave_bdevs)) > + return NULL; > + > + depth--; > + list_for_each_entry(holder, &disk->slave_bdevs, list) { > + if (holder->slave_disk == target) > + return target; > + > + iter_slave = iterate_slave_disk(holder->slave_disk, target, depth); > + if (iter_slave) > + return iter_slave; > + > + cond_resched(); > + } > + return NULL; > +} > + > /** > * bd_link_disk_holder - create symlinks between holding disk and slave bdev > * @bdev: the claimed slave bdev > @@ -62,6 +92,13 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) > struct bd_holder_disk *holder; > int ret = 0; > > + mutex_lock(&slave_bdevs_lock); > + if (iterate_slave_disk(bdev->bd_disk, disk, DEVICE_DEPTH)) { > + mutex_unlock(&slave_bdevs_lock); > + return -EINVAL; > + } > + mutex_unlock(&slave_bdevs_lock); > + > if (WARN_ON_ONCE(!disk->slave_dir)) > return -EINVAL; > > @@ -81,6 +118,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) > mutex_unlock(&bdev->bd_disk->open_mutex); > > mutex_lock(&disk->open_mutex); > + mutex_lock(&slave_bdevs_lock); > WARN_ON_ONCE(!bdev->bd_holder); > > holder = bd_find_holder_disk(bdev, disk); > @@ -106,8 +144,10 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) > ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj); > if (ret) > goto out_del_symlink; > + holder->slave_disk = bdev->bd_disk; > list_add(&holder->list, &disk->slave_bdevs); > > + mutex_unlock(&slave_bdevs_lock); > mutex_unlock(&disk->open_mutex); > return 0; > > @@ -141,6 +181,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) > return; > > mutex_lock(&disk->open_mutex); > + mutex_lock(&slave_bdevs_lock); > holder = bd_find_holder_disk(bdev, disk); > if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) { > del_symlink(disk->slave_dir, bdev_kobj(bdev)); > @@ -149,6 +190,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) > list_del_init(&holder->list); > kfree(holder); > } > + mutex_unlock(&slave_bdevs_lock); > mutex_unlock(&disk->open_mutex); > } > EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
Ping again Thanks 在 2023/10/8 9:14, Li Lingfeng 写道: > Friendly ping ... > > Thanks > > 在 2023/4/25 15:55, Li Lingfeng 写道: >> From: Li Lingfeng <lilingfeng3@huawei.com> >> >> Previously commit 077a4033541f ("block: don't allow a disk link holder >> to itself") prevent user from reloading dm with itself. However, user >> can reload dm with its ancestor which will trigger dead loop and result >> in oom. >> >> Test procedures: >> 1) dmsetup create test --table "0 20971520 linear /dev/sdd 0" >> 2) dmsetup create test1 --table "0 20971520 linear /dev/sdd 20971520" >> 3) dmsetup suspend test >> 4) dmsetup reload test --table "0 2048 linear /dev/mapper/test1 0" >> 5) dmsetup resume test >> 6) dmsetup suspend test1 >> 7) dmsetup reload test1 --table "0 2048 linear /dev/mapper/test 0" >> 8) dmsetup resume test1 >> >> Dead loop: >> [ 229.681231] Call Trace: >> [ 229.681232] dm_dax_supported+0x5b/0xa0 >> [ 229.681233] dax_supported+0x28/0x50 >> [ 229.681234] device_not_dax_capable+0x45/0x70 >> [ 229.681235] ? realloc_argv+0xa0/0xa0 >> [ 229.681236] linear_iterate_devices+0x25/0x30 >> [ 229.681237] dm_table_supports_dax+0x42/0xd0 >> [ 229.681238] dm_dax_supported+0x5b/0xa0 >> [ 229.681238] dax_supported+0x28/0x50 >> [ 229.681239] device_not_dax_capable+0x45/0x70 >> ......(a lot of same lines) >> [ 229.681423] ? realloc_argv+0xa0/0xa0 >> [ 229.681424] linear_iterate_devices+0x25/0x30 >> [ 229.681425] dm_table_supports_dax+0x42/0xd0 >> [ 229.681426] dm >> [ 229.681428] Lost 437 message(s)! >> [ 229.825588] ---[ end trace 0f2a9db839ed5b56 ]--- >> >> OOM: >> [ 189.270011] Call Trace: >> [ 189.270274] <TASK> >> [ 189.270511] dump_stack_lvl+0xc1/0x170 >> [ 189.270899] dump_stack+0x14/0x20 >> [ 189.271222] dump_header+0x5a/0x710 >> [ 189.271590] oom_kill_process+0x16b/0x500 >> [ 189.272018] out_of_memory+0x333/0xad0 >> [ 189.272453] __alloc_pages_slowpath.constprop.0+0x18b4/0x1c40 >> [ 189.273130] ? find_held_lock+0x33/0xf0 >> [ 189.273637] __alloc_pages+0x598/0x660 >> [ 189.274106] alloc_pages+0x95/0x240 >> [ 189.274482] folio_alloc+0x1f/0x60 >> [ 189.274835] filemap_alloc_folio+0x223/0x350 >> [ 189.275348] __filemap_get_folio+0x21e/0x770 >> [ 189.275916] filemap_fault+0x72d/0xdc0 >> [ 189.276454] __do_fault+0x41/0x360 >> [ 189.276820] do_fault+0x263/0x8f0 >> [ 189.277175] __handle_mm_fault+0x9af/0x1b20 >> [ 189.277810] handle_mm_fault+0x128/0x570 >> [ 189.278243] do_user_addr_fault+0x2af/0xea0 >> [ 189.278733] exc_page_fault+0x73/0x340 >> [ 189.279133] asm_exc_page_fault+0x22/0x30 >> [ 189.279523] RIP: 0033:0x561e82ac67f0 >> >> Forbidding a disk to create link to its ancestor can solve the problem. >> What's more, limit device depth to prevent recursive overflow. >> >> Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com> >> --- >> block/holder.c | 42 ++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 42 insertions(+) >> >> diff --git a/block/holder.c b/block/holder.c >> index 37d18c13d958..6a8571b7d9c5 100644 >> --- a/block/holder.c >> +++ b/block/holder.c >> @@ -2,9 +2,13 @@ >> #include <linux/blkdev.h> >> #include <linux/slab.h> >> +#define DEVICE_DEPTH 5 >> +static DEFINE_MUTEX(slave_bdevs_lock); >> + >> struct bd_holder_disk { >> struct list_head list; >> struct kobject *holder_dir; >> + struct gendisk *slave_disk; >> int refcnt; >> }; >> @@ -29,6 +33,32 @@ static void del_symlink(struct kobject *from, >> struct kobject *to) >> sysfs_remove_link(from, kobject_name(to)); >> } >> +static struct gendisk *iterate_slave_disk(struct gendisk *disk, >> + struct gendisk *target, int depth) >> +{ >> + struct bd_holder_disk *holder; >> + struct gendisk *iter_slave; >> + >> + if (!depth) >> + return target; >> + >> + if (list_empty_careful(&disk->slave_bdevs)) >> + return NULL; >> + >> + depth--; >> + list_for_each_entry(holder, &disk->slave_bdevs, list) { >> + if (holder->slave_disk == target) >> + return target; >> + >> + iter_slave = iterate_slave_disk(holder->slave_disk, target, >> depth); >> + if (iter_slave) >> + return iter_slave; >> + >> + cond_resched(); >> + } >> + return NULL; >> +} >> + >> /** >> * bd_link_disk_holder - create symlinks between holding disk and >> slave bdev >> * @bdev: the claimed slave bdev >> @@ -62,6 +92,13 @@ int bd_link_disk_holder(struct block_device *bdev, >> struct gendisk *disk) >> struct bd_holder_disk *holder; >> int ret = 0; >> + mutex_lock(&slave_bdevs_lock); >> + if (iterate_slave_disk(bdev->bd_disk, disk, DEVICE_DEPTH)) { >> + mutex_unlock(&slave_bdevs_lock); >> + return -EINVAL; >> + } >> + mutex_unlock(&slave_bdevs_lock); >> + >> if (WARN_ON_ONCE(!disk->slave_dir)) >> return -EINVAL; >> @@ -81,6 +118,7 @@ int bd_link_disk_holder(struct block_device >> *bdev, struct gendisk *disk) >> mutex_unlock(&bdev->bd_disk->open_mutex); >> mutex_lock(&disk->open_mutex); >> + mutex_lock(&slave_bdevs_lock); >> WARN_ON_ONCE(!bdev->bd_holder); >> holder = bd_find_holder_disk(bdev, disk); >> @@ -106,8 +144,10 @@ int bd_link_disk_holder(struct block_device >> *bdev, struct gendisk *disk) >> ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj); >> if (ret) >> goto out_del_symlink; >> + holder->slave_disk = bdev->bd_disk; >> list_add(&holder->list, &disk->slave_bdevs); >> + mutex_unlock(&slave_bdevs_lock); >> mutex_unlock(&disk->open_mutex); >> return 0; >> @@ -141,6 +181,7 @@ void bd_unlink_disk_holder(struct block_device >> *bdev, struct gendisk *disk) >> return; >> mutex_lock(&disk->open_mutex); >> + mutex_lock(&slave_bdevs_lock); >> holder = bd_find_holder_disk(bdev, disk); >> if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) { >> del_symlink(disk->slave_dir, bdev_kobj(bdev)); >> @@ -149,6 +190,7 @@ void bd_unlink_disk_holder(struct block_device >> *bdev, struct gendisk *disk) >> list_del_init(&holder->list); >> kfree(holder); >> } >> + mutex_unlock(&slave_bdevs_lock); >> mutex_unlock(&disk->open_mutex); >> } >> EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
diff --git a/block/holder.c b/block/holder.c index 37d18c13d958..6a8571b7d9c5 100644 --- a/block/holder.c +++ b/block/holder.c @@ -2,9 +2,13 @@ #include <linux/blkdev.h> #include <linux/slab.h> +#define DEVICE_DEPTH 5 +static DEFINE_MUTEX(slave_bdevs_lock); + struct bd_holder_disk { struct list_head list; struct kobject *holder_dir; + struct gendisk *slave_disk; int refcnt; }; @@ -29,6 +33,32 @@ static void del_symlink(struct kobject *from, struct kobject *to) sysfs_remove_link(from, kobject_name(to)); } +static struct gendisk *iterate_slave_disk(struct gendisk *disk, + struct gendisk *target, int depth) +{ + struct bd_holder_disk *holder; + struct gendisk *iter_slave; + + if (!depth) + return target; + + if (list_empty_careful(&disk->slave_bdevs)) + return NULL; + + depth--; + list_for_each_entry(holder, &disk->slave_bdevs, list) { + if (holder->slave_disk == target) + return target; + + iter_slave = iterate_slave_disk(holder->slave_disk, target, depth); + if (iter_slave) + return iter_slave; + + cond_resched(); + } + return NULL; +} + /** * bd_link_disk_holder - create symlinks between holding disk and slave bdev * @bdev: the claimed slave bdev @@ -62,6 +92,13 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) struct bd_holder_disk *holder; int ret = 0; + mutex_lock(&slave_bdevs_lock); + if (iterate_slave_disk(bdev->bd_disk, disk, DEVICE_DEPTH)) { + mutex_unlock(&slave_bdevs_lock); + return -EINVAL; + } + mutex_unlock(&slave_bdevs_lock); + if (WARN_ON_ONCE(!disk->slave_dir)) return -EINVAL; @@ -81,6 +118,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) mutex_unlock(&bdev->bd_disk->open_mutex); mutex_lock(&disk->open_mutex); + mutex_lock(&slave_bdevs_lock); WARN_ON_ONCE(!bdev->bd_holder); holder = bd_find_holder_disk(bdev, disk); @@ -106,8 +144,10 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj); if (ret) goto out_del_symlink; + holder->slave_disk = bdev->bd_disk; list_add(&holder->list, &disk->slave_bdevs); + mutex_unlock(&slave_bdevs_lock); mutex_unlock(&disk->open_mutex); return 0; @@ -141,6 +181,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) return; mutex_lock(&disk->open_mutex); + mutex_lock(&slave_bdevs_lock); holder = bd_find_holder_disk(bdev, disk); if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) { del_symlink(disk->slave_dir, bdev_kobj(bdev)); @@ -149,6 +190,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) list_del_init(&holder->list); kfree(holder); } + mutex_unlock(&slave_bdevs_lock); mutex_unlock(&disk->open_mutex); } EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);