Message ID | 20220729062356.1663513-2-yukuai1@huaweicloud.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | dm: fix nullptr crash | expand |
On Fri, Jul 29, 2022 at 02:23:54PM +0800, Yu Kuai wrote: > From: Christoph Hellwig <hch@lst.de> > > commit 0dbcfe247f22a6d73302dfa691c48b3c14d31c4c upstream. > > Invert they way the holder relations are tracked. This very > slightly reduces the memory overhead for partitioned devices. > > Signed-off-by: Christoph Hellwig <hch@lst.de> > Signed-off-by: Yu Kuai <yukuai3@huawei.com> > --- > block/genhd.c | 3 +++ > fs/block_dev.c | 31 +++++++++++++++++++------------ > include/linux/blk_types.h | 3 --- > include/linux/genhd.h | 4 +++- > 4 files changed, 25 insertions(+), 16 deletions(-) > > diff --git a/block/genhd.c b/block/genhd.c > index 796baf761202..2b11a2735285 100644 > --- a/block/genhd.c > +++ b/block/genhd.c > @@ -1760,6 +1760,9 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) > disk_to_dev(disk)->class = &block_class; > disk_to_dev(disk)->type = &disk_type; > device_initialize(disk_to_dev(disk)); > +#ifdef CONFIG_SYSFS > + INIT_LIST_HEAD(&disk->slave_bdevs); > +#endif > return disk; > > out_free_part0: > diff --git a/fs/block_dev.c b/fs/block_dev.c > index 29f020c4b2d0..a202c76fcf7f 100644 > --- a/fs/block_dev.c > +++ b/fs/block_dev.c > @@ -823,9 +823,6 @@ static void init_once(void *foo) > > memset(bdev, 0, sizeof(*bdev)); > mutex_init(&bdev->bd_mutex); > -#ifdef CONFIG_SYSFS > - INIT_LIST_HEAD(&bdev->bd_holder_disks); > -#endif > bdev->bd_bdi = &noop_backing_dev_info; > inode_init_once(&ei->vfs_inode); > /* Initialize mutex for freeze. */ > @@ -1188,7 +1185,7 @@ EXPORT_SYMBOL(bd_abort_claiming); > #ifdef CONFIG_SYSFS > struct bd_holder_disk { > struct list_head list; > - struct gendisk *disk; > + struct block_device *bdev; > int refcnt; > }; > > @@ -1197,8 +1194,8 @@ static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev, > { > struct bd_holder_disk *holder; > > - list_for_each_entry(holder, &bdev->bd_holder_disks, list) > - if (holder->disk == disk) > + list_for_each_entry(holder, &disk->slave_bdevs, list) > + if (holder->bdev == bdev) > return holder; > return NULL; > } > @@ -1244,9 +1241,13 @@ static void del_symlink(struct kobject *from, struct kobject *to) > int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) > { > struct bd_holder_disk *holder; > + struct block_device *bdev_holder = bdget_disk(disk, 0); > int ret = 0; > > - mutex_lock(&bdev->bd_mutex); > + if (WARN_ON_ONCE(!bdev_holder)) > + return -ENOENT; > + > + mutex_lock(&bdev_holder->bd_mutex); > > WARN_ON_ONCE(!bdev->bd_holder); > > @@ -1267,7 +1268,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) > } > > INIT_LIST_HEAD(&holder->list); > - holder->disk = disk; > + holder->bdev = bdev; > holder->refcnt = 1; > > ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); > @@ -1283,7 +1284,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) > */ > kobject_get(bdev->bd_part->holder_dir); > > - list_add(&holder->list, &bdev->bd_holder_disks); > + list_add(&holder->list, &disk->slave_bdevs); > goto out_unlock; > > out_del: > @@ -1291,7 +1292,8 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) > out_free: > kfree(holder); > out_unlock: > - mutex_unlock(&bdev->bd_mutex); > + mutex_unlock(&bdev_holder->bd_mutex); > + bdput(bdev_holder); > return ret; > } > EXPORT_SYMBOL_GPL(bd_link_disk_holder); > @@ -1309,8 +1311,12 @@ EXPORT_SYMBOL_GPL(bd_link_disk_holder); > void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) > { > struct bd_holder_disk *holder; > + struct block_device *bdev_holder = bdget_disk(disk, 0); > > - mutex_lock(&bdev->bd_mutex); > + if (WARN_ON_ONCE(!bdev_holder)) > + return; > + > + mutex_lock(&bdev_holder->bd_mutex); > > holder = bd_find_holder_disk(bdev, disk); > > @@ -1323,7 +1329,8 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) > kfree(holder); > } > > - mutex_unlock(&bdev->bd_mutex); > + mutex_unlock(&bdev_holder->bd_mutex); > + bdput(bdev_holder); > } > EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); > #endif > diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h > index d9b69bbde5cc..1b84ecb34c18 100644 > --- a/include/linux/blk_types.h > +++ b/include/linux/blk_types.h > @@ -29,9 +29,6 @@ struct block_device { > void * bd_holder; > int bd_holders; > bool bd_write_holder; > -#ifdef CONFIG_SYSFS > - struct list_head bd_holder_disks; > -#endif > struct block_device * bd_contains; > u8 bd_partno; > struct hd_struct * bd_part; > diff --git a/include/linux/genhd.h b/include/linux/genhd.h > index 03da3f603d30..3e5049a527e6 100644 > --- a/include/linux/genhd.h > +++ b/include/linux/genhd.h > @@ -195,7 +195,9 @@ struct gendisk { > #define GD_NEED_PART_SCAN 0 > struct rw_semaphore lookup_sem; > struct kobject *slave_dir; > - > +#ifdef CONFIG_SYSFS > + struct list_head slave_bdevs; > +#endif This is very different from the upstream version, and forces the change onto everyone, not just those who had CONFIG_BLOCK_HOLDER_DEPRECATED enabled like was done in the main kernel tree. Why force this on all and not just use the same option? I would need a strong ACK from the original developers and maintainers of these subsystems before being able to take these into the 5.10 tree. What prevents you from just using 5.15 for those systems that run into these issues? thanks, greg k-h
Hi, Greg 在 2022/08/01 19:19, Greg KH 写道: > On Fri, Jul 29, 2022 at 02:23:54PM +0800, Yu Kuai wrote: >> From: Christoph Hellwig <hch@lst.de> >> >> commit 0dbcfe247f22a6d73302dfa691c48b3c14d31c4c upstream. >> >> Invert they way the holder relations are tracked. This very >> slightly reduces the memory overhead for partitioned devices. >> >> Signed-off-by: Christoph Hellwig <hch@lst.de> >> Signed-off-by: Yu Kuai <yukuai3@huawei.com> >> --- >> block/genhd.c | 3 +++ >> fs/block_dev.c | 31 +++++++++++++++++++------------ >> include/linux/blk_types.h | 3 --- >> include/linux/genhd.h | 4 +++- >> 4 files changed, 25 insertions(+), 16 deletions(-) >> >> diff --git a/block/genhd.c b/block/genhd.c >> index 796baf761202..2b11a2735285 100644 >> --- a/block/genhd.c >> +++ b/block/genhd.c >> @@ -1760,6 +1760,9 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) >> disk_to_dev(disk)->class = &block_class; >> disk_to_dev(disk)->type = &disk_type; >> device_initialize(disk_to_dev(disk)); >> +#ifdef CONFIG_SYSFS >> + INIT_LIST_HEAD(&disk->slave_bdevs); >> +#endif >> return disk; >> >> out_free_part0: >> diff --git a/fs/block_dev.c b/fs/block_dev.c >> index 29f020c4b2d0..a202c76fcf7f 100644 >> --- a/fs/block_dev.c >> +++ b/fs/block_dev.c >> @@ -823,9 +823,6 @@ static void init_once(void *foo) >> >> memset(bdev, 0, sizeof(*bdev)); >> mutex_init(&bdev->bd_mutex); >> -#ifdef CONFIG_SYSFS >> - INIT_LIST_HEAD(&bdev->bd_holder_disks); >> -#endif >> bdev->bd_bdi = &noop_backing_dev_info; >> inode_init_once(&ei->vfs_inode); >> /* Initialize mutex for freeze. */ >> @@ -1188,7 +1185,7 @@ EXPORT_SYMBOL(bd_abort_claiming); >> #ifdef CONFIG_SYSFS >> struct bd_holder_disk { >> struct list_head list; >> - struct gendisk *disk; >> + struct block_device *bdev; >> int refcnt; >> }; >> >> @@ -1197,8 +1194,8 @@ static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev, >> { >> struct bd_holder_disk *holder; >> >> - list_for_each_entry(holder, &bdev->bd_holder_disks, list) >> - if (holder->disk == disk) >> + list_for_each_entry(holder, &disk->slave_bdevs, list) >> + if (holder->bdev == bdev) >> return holder; >> return NULL; >> } >> @@ -1244,9 +1241,13 @@ static void del_symlink(struct kobject *from, struct kobject *to) >> int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) >> { >> struct bd_holder_disk *holder; >> + struct block_device *bdev_holder = bdget_disk(disk, 0); >> int ret = 0; >> >> - mutex_lock(&bdev->bd_mutex); >> + if (WARN_ON_ONCE(!bdev_holder)) >> + return -ENOENT; >> + >> + mutex_lock(&bdev_holder->bd_mutex); >> >> WARN_ON_ONCE(!bdev->bd_holder); >> >> @@ -1267,7 +1268,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) >> } >> >> INIT_LIST_HEAD(&holder->list); >> - holder->disk = disk; >> + holder->bdev = bdev; >> holder->refcnt = 1; >> >> ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); >> @@ -1283,7 +1284,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) >> */ >> kobject_get(bdev->bd_part->holder_dir); >> >> - list_add(&holder->list, &bdev->bd_holder_disks); >> + list_add(&holder->list, &disk->slave_bdevs); >> goto out_unlock; >> >> out_del: >> @@ -1291,7 +1292,8 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) >> out_free: >> kfree(holder); >> out_unlock: >> - mutex_unlock(&bdev->bd_mutex); >> + mutex_unlock(&bdev_holder->bd_mutex); >> + bdput(bdev_holder); >> return ret; >> } >> EXPORT_SYMBOL_GPL(bd_link_disk_holder); >> @@ -1309,8 +1311,12 @@ EXPORT_SYMBOL_GPL(bd_link_disk_holder); >> void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) >> { >> struct bd_holder_disk *holder; >> + struct block_device *bdev_holder = bdget_disk(disk, 0); >> >> - mutex_lock(&bdev->bd_mutex); >> + if (WARN_ON_ONCE(!bdev_holder)) >> + return; >> + >> + mutex_lock(&bdev_holder->bd_mutex); >> >> holder = bd_find_holder_disk(bdev, disk); >> >> @@ -1323,7 +1329,8 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) >> kfree(holder); >> } >> >> - mutex_unlock(&bdev->bd_mutex); >> + mutex_unlock(&bdev_holder->bd_mutex); >> + bdput(bdev_holder); >> } >> EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); >> #endif >> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h >> index d9b69bbde5cc..1b84ecb34c18 100644 >> --- a/include/linux/blk_types.h >> +++ b/include/linux/blk_types.h >> @@ -29,9 +29,6 @@ struct block_device { >> void * bd_holder; >> int bd_holders; >> bool bd_write_holder; >> -#ifdef CONFIG_SYSFS >> - struct list_head bd_holder_disks; >> -#endif >> struct block_device * bd_contains; >> u8 bd_partno; >> struct hd_struct * bd_part; >> diff --git a/include/linux/genhd.h b/include/linux/genhd.h >> index 03da3f603d30..3e5049a527e6 100644 >> --- a/include/linux/genhd.h >> +++ b/include/linux/genhd.h >> @@ -195,7 +195,9 @@ struct gendisk { >> #define GD_NEED_PART_SCAN 0 >> struct rw_semaphore lookup_sem; >> struct kobject *slave_dir; >> - >> +#ifdef CONFIG_SYSFS >> + struct list_head slave_bdevs; >> +#endif > > This is very different from the upstream version, and forces the change > onto everyone, not just those who had CONFIG_BLOCK_HOLDER_DEPRECATED > enabled like was done in the main kernel tree. > > Why force this on all and not just use the same option? > > I would need a strong ACK from the original developers and maintainers > of these subsystems before being able to take these into the 5.10 tree. Yes, I agree that Christoph must take a look first. > > What prevents you from just using 5.15 for those systems that run into > these issues? The null pointer problem is reported by our product(related to dm- mpath), and they had been using 5.10 for a long time. And switching kernel for commercial will require lots of work, it's not an option for now. Thanks, Kuai
On Mon, Aug 01, 2022 at 08:25:27PM +0800, Yu Kuai wrote: > Hi, Greg > > 在 2022/08/01 19:19, Greg KH 写道: > > On Fri, Jul 29, 2022 at 02:23:54PM +0800, Yu Kuai wrote: > > > From: Christoph Hellwig <hch@lst.de> > > > > > > commit 0dbcfe247f22a6d73302dfa691c48b3c14d31c4c upstream. > > > > > > Invert they way the holder relations are tracked. This very > > > slightly reduces the memory overhead for partitioned devices. > > > > > > Signed-off-by: Christoph Hellwig <hch@lst.de> > > > Signed-off-by: Yu Kuai <yukuai3@huawei.com> > > > --- > > > block/genhd.c | 3 +++ > > > fs/block_dev.c | 31 +++++++++++++++++++------------ > > > include/linux/blk_types.h | 3 --- > > > include/linux/genhd.h | 4 +++- > > > 4 files changed, 25 insertions(+), 16 deletions(-) > > > > > > diff --git a/block/genhd.c b/block/genhd.c > > > index 796baf761202..2b11a2735285 100644 > > > --- a/block/genhd.c > > > +++ b/block/genhd.c > > > @@ -1760,6 +1760,9 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) > > > disk_to_dev(disk)->class = &block_class; > > > disk_to_dev(disk)->type = &disk_type; > > > device_initialize(disk_to_dev(disk)); > > > +#ifdef CONFIG_SYSFS > > > + INIT_LIST_HEAD(&disk->slave_bdevs); > > > +#endif > > > return disk; > > > out_free_part0: > > > diff --git a/fs/block_dev.c b/fs/block_dev.c > > > index 29f020c4b2d0..a202c76fcf7f 100644 > > > --- a/fs/block_dev.c > > > +++ b/fs/block_dev.c > > > @@ -823,9 +823,6 @@ static void init_once(void *foo) > > > memset(bdev, 0, sizeof(*bdev)); > > > mutex_init(&bdev->bd_mutex); > > > -#ifdef CONFIG_SYSFS > > > - INIT_LIST_HEAD(&bdev->bd_holder_disks); > > > -#endif > > > bdev->bd_bdi = &noop_backing_dev_info; > > > inode_init_once(&ei->vfs_inode); > > > /* Initialize mutex for freeze. */ > > > @@ -1188,7 +1185,7 @@ EXPORT_SYMBOL(bd_abort_claiming); > > > #ifdef CONFIG_SYSFS > > > struct bd_holder_disk { > > > struct list_head list; > > > - struct gendisk *disk; > > > + struct block_device *bdev; > > > int refcnt; > > > }; > > > @@ -1197,8 +1194,8 @@ static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev, > > > { > > > struct bd_holder_disk *holder; > > > - list_for_each_entry(holder, &bdev->bd_holder_disks, list) > > > - if (holder->disk == disk) > > > + list_for_each_entry(holder, &disk->slave_bdevs, list) > > > + if (holder->bdev == bdev) > > > return holder; > > > return NULL; > > > } > > > @@ -1244,9 +1241,13 @@ static void del_symlink(struct kobject *from, struct kobject *to) > > > int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) > > > { > > > struct bd_holder_disk *holder; > > > + struct block_device *bdev_holder = bdget_disk(disk, 0); > > > int ret = 0; > > > - mutex_lock(&bdev->bd_mutex); > > > + if (WARN_ON_ONCE(!bdev_holder)) > > > + return -ENOENT; > > > + > > > + mutex_lock(&bdev_holder->bd_mutex); > > > WARN_ON_ONCE(!bdev->bd_holder); > > > @@ -1267,7 +1268,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) > > > } > > > INIT_LIST_HEAD(&holder->list); > > > - holder->disk = disk; > > > + holder->bdev = bdev; > > > holder->refcnt = 1; > > > ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); > > > @@ -1283,7 +1284,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) > > > */ > > > kobject_get(bdev->bd_part->holder_dir); > > > - list_add(&holder->list, &bdev->bd_holder_disks); > > > + list_add(&holder->list, &disk->slave_bdevs); > > > goto out_unlock; > > > out_del: > > > @@ -1291,7 +1292,8 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) > > > out_free: > > > kfree(holder); > > > out_unlock: > > > - mutex_unlock(&bdev->bd_mutex); > > > + mutex_unlock(&bdev_holder->bd_mutex); > > > + bdput(bdev_holder); > > > return ret; > > > } > > > EXPORT_SYMBOL_GPL(bd_link_disk_holder); > > > @@ -1309,8 +1311,12 @@ EXPORT_SYMBOL_GPL(bd_link_disk_holder); > > > void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) > > > { > > > struct bd_holder_disk *holder; > > > + struct block_device *bdev_holder = bdget_disk(disk, 0); > > > - mutex_lock(&bdev->bd_mutex); > > > + if (WARN_ON_ONCE(!bdev_holder)) > > > + return; > > > + > > > + mutex_lock(&bdev_holder->bd_mutex); > > > holder = bd_find_holder_disk(bdev, disk); > > > @@ -1323,7 +1329,8 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) > > > kfree(holder); > > > } > > > - mutex_unlock(&bdev->bd_mutex); > > > + mutex_unlock(&bdev_holder->bd_mutex); > > > + bdput(bdev_holder); > > > } > > > EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); > > > #endif > > > diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h > > > index d9b69bbde5cc..1b84ecb34c18 100644 > > > --- a/include/linux/blk_types.h > > > +++ b/include/linux/blk_types.h > > > @@ -29,9 +29,6 @@ struct block_device { > > > void * bd_holder; > > > int bd_holders; > > > bool bd_write_holder; > > > -#ifdef CONFIG_SYSFS > > > - struct list_head bd_holder_disks; > > > -#endif > > > struct block_device * bd_contains; > > > u8 bd_partno; > > > struct hd_struct * bd_part; > > > diff --git a/include/linux/genhd.h b/include/linux/genhd.h > > > index 03da3f603d30..3e5049a527e6 100644 > > > --- a/include/linux/genhd.h > > > +++ b/include/linux/genhd.h > > > @@ -195,7 +195,9 @@ struct gendisk { > > > #define GD_NEED_PART_SCAN 0 > > > struct rw_semaphore lookup_sem; > > > struct kobject *slave_dir; > > > - > > > +#ifdef CONFIG_SYSFS > > > + struct list_head slave_bdevs; > > > +#endif > > > > This is very different from the upstream version, and forces the change > > onto everyone, not just those who had CONFIG_BLOCK_HOLDER_DEPRECATED > > enabled like was done in the main kernel tree. > > > > Why force this on all and not just use the same option? > > > > I would need a strong ACK from the original developers and maintainers > > of these subsystems before being able to take these into the 5.10 tree. > > Yes, I agree that Christoph must take a look first. > > > > What prevents you from just using 5.15 for those systems that run into > > these issues? > > The null pointer problem is reported by our product(related to dm- > mpath), and they had been using 5.10 for a long time. And switching > kernel for commercial will require lots of work, it's not an option > for now. It should be the same validation and verification and testing path for 5.15.y as for an intrusive kernel change as this one, right? What makes it any different to prevent 5.15 from being used? thanks, greg k-h
Hi, Greg 在 2022/08/01 21:17, Greg KH 写道: > On Mon, Aug 01, 2022 at 08:25:27PM +0800, Yu Kuai wrote: >> Hi, Greg >> >> 在 2022/08/01 19:19, Greg KH 写道: >>> On Fri, Jul 29, 2022 at 02:23:54PM +0800, Yu Kuai wrote: >>>> From: Christoph Hellwig <hch@lst.de> >>>> >>>> commit 0dbcfe247f22a6d73302dfa691c48b3c14d31c4c upstream. >>>> >>>> Invert they way the holder relations are tracked. This very >>>> slightly reduces the memory overhead for partitioned devices. >>>> >>>> Signed-off-by: Christoph Hellwig <hch@lst.de> >>>> Signed-off-by: Yu Kuai <yukuai3@huawei.com> >>>> --- >>>> block/genhd.c | 3 +++ >>>> fs/block_dev.c | 31 +++++++++++++++++++------------ >>>> include/linux/blk_types.h | 3 --- >>>> include/linux/genhd.h | 4 +++- >>>> 4 files changed, 25 insertions(+), 16 deletions(-) >>>> >>>> diff --git a/block/genhd.c b/block/genhd.c >>>> index 796baf761202..2b11a2735285 100644 >>>> --- a/block/genhd.c >>>> +++ b/block/genhd.c >>>> @@ -1760,6 +1760,9 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) >>>> disk_to_dev(disk)->class = &block_class; >>>> disk_to_dev(disk)->type = &disk_type; >>>> device_initialize(disk_to_dev(disk)); >>>> +#ifdef CONFIG_SYSFS >>>> + INIT_LIST_HEAD(&disk->slave_bdevs); >>>> +#endif >>>> return disk; >>>> out_free_part0: >>>> diff --git a/fs/block_dev.c b/fs/block_dev.c >>>> index 29f020c4b2d0..a202c76fcf7f 100644 >>>> --- a/fs/block_dev.c >>>> +++ b/fs/block_dev.c >>>> @@ -823,9 +823,6 @@ static void init_once(void *foo) >>>> memset(bdev, 0, sizeof(*bdev)); >>>> mutex_init(&bdev->bd_mutex); >>>> -#ifdef CONFIG_SYSFS >>>> - INIT_LIST_HEAD(&bdev->bd_holder_disks); >>>> -#endif >>>> bdev->bd_bdi = &noop_backing_dev_info; >>>> inode_init_once(&ei->vfs_inode); >>>> /* Initialize mutex for freeze. */ >>>> @@ -1188,7 +1185,7 @@ EXPORT_SYMBOL(bd_abort_claiming); >>>> #ifdef CONFIG_SYSFS >>>> struct bd_holder_disk { >>>> struct list_head list; >>>> - struct gendisk *disk; >>>> + struct block_device *bdev; >>>> int refcnt; >>>> }; >>>> @@ -1197,8 +1194,8 @@ static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev, >>>> { >>>> struct bd_holder_disk *holder; >>>> - list_for_each_entry(holder, &bdev->bd_holder_disks, list) >>>> - if (holder->disk == disk) >>>> + list_for_each_entry(holder, &disk->slave_bdevs, list) >>>> + if (holder->bdev == bdev) >>>> return holder; >>>> return NULL; >>>> } >>>> @@ -1244,9 +1241,13 @@ static void del_symlink(struct kobject *from, struct kobject *to) >>>> int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) >>>> { >>>> struct bd_holder_disk *holder; >>>> + struct block_device *bdev_holder = bdget_disk(disk, 0); >>>> int ret = 0; >>>> - mutex_lock(&bdev->bd_mutex); >>>> + if (WARN_ON_ONCE(!bdev_holder)) >>>> + return -ENOENT; >>>> + >>>> + mutex_lock(&bdev_holder->bd_mutex); >>>> WARN_ON_ONCE(!bdev->bd_holder); >>>> @@ -1267,7 +1268,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) >>>> } >>>> INIT_LIST_HEAD(&holder->list); >>>> - holder->disk = disk; >>>> + holder->bdev = bdev; >>>> holder->refcnt = 1; >>>> ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); >>>> @@ -1283,7 +1284,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) >>>> */ >>>> kobject_get(bdev->bd_part->holder_dir); >>>> - list_add(&holder->list, &bdev->bd_holder_disks); >>>> + list_add(&holder->list, &disk->slave_bdevs); >>>> goto out_unlock; >>>> out_del: >>>> @@ -1291,7 +1292,8 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) >>>> out_free: >>>> kfree(holder); >>>> out_unlock: >>>> - mutex_unlock(&bdev->bd_mutex); >>>> + mutex_unlock(&bdev_holder->bd_mutex); >>>> + bdput(bdev_holder); >>>> return ret; >>>> } >>>> EXPORT_SYMBOL_GPL(bd_link_disk_holder); >>>> @@ -1309,8 +1311,12 @@ EXPORT_SYMBOL_GPL(bd_link_disk_holder); >>>> void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) >>>> { >>>> struct bd_holder_disk *holder; >>>> + struct block_device *bdev_holder = bdget_disk(disk, 0); >>>> - mutex_lock(&bdev->bd_mutex); >>>> + if (WARN_ON_ONCE(!bdev_holder)) >>>> + return; >>>> + >>>> + mutex_lock(&bdev_holder->bd_mutex); >>>> holder = bd_find_holder_disk(bdev, disk); >>>> @@ -1323,7 +1329,8 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) >>>> kfree(holder); >>>> } >>>> - mutex_unlock(&bdev->bd_mutex); >>>> + mutex_unlock(&bdev_holder->bd_mutex); >>>> + bdput(bdev_holder); >>>> } >>>> EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); >>>> #endif >>>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h >>>> index d9b69bbde5cc..1b84ecb34c18 100644 >>>> --- a/include/linux/blk_types.h >>>> +++ b/include/linux/blk_types.h >>>> @@ -29,9 +29,6 @@ struct block_device { >>>> void * bd_holder; >>>> int bd_holders; >>>> bool bd_write_holder; >>>> -#ifdef CONFIG_SYSFS >>>> - struct list_head bd_holder_disks; >>>> -#endif >>>> struct block_device * bd_contains; >>>> u8 bd_partno; >>>> struct hd_struct * bd_part; >>>> diff --git a/include/linux/genhd.h b/include/linux/genhd.h >>>> index 03da3f603d30..3e5049a527e6 100644 >>>> --- a/include/linux/genhd.h >>>> +++ b/include/linux/genhd.h >>>> @@ -195,7 +195,9 @@ struct gendisk { >>>> #define GD_NEED_PART_SCAN 0 >>>> struct rw_semaphore lookup_sem; >>>> struct kobject *slave_dir; >>>> - >>>> +#ifdef CONFIG_SYSFS >>>> + struct list_head slave_bdevs; >>>> +#endif >>> >>> This is very different from the upstream version, and forces the change >>> onto everyone, not just those who had CONFIG_BLOCK_HOLDER_DEPRECATED >>> enabled like was done in the main kernel tree. >>> >>> Why force this on all and not just use the same option? >>> >>> I would need a strong ACK from the original developers and maintainers >>> of these subsystems before being able to take these into the 5.10 tree. >> >> Yes, I agree that Christoph must take a look first. >>> >>> What prevents you from just using 5.15 for those systems that run into >>> these issues? >> >> The null pointer problem is reported by our product(related to dm- >> mpath), and they had been using 5.10 for a long time. And switching >> kernel for commercial will require lots of work, it's not an option >> for now. > > It should be the same validation and verification and testing path for > 5.15.y as for an intrusive kernel change as this one, right? What makes > it any different to prevent 5.15 from being used? No, they are not the same, if we try to use 5.15.y, we have to test all other stuff that our product is used currently(ext4, block, scsi and lots of other modules). With this patch, we only need to make sure dm works fine. Thanks, Kuai
On Mon, Aug 01, 2022 at 09:39:30PM +0800, Yu Kuai wrote: > Hi, Greg > > 在 2022/08/01 21:17, Greg KH 写道: > > On Mon, Aug 01, 2022 at 08:25:27PM +0800, Yu Kuai wrote: > > > Hi, Greg > > > > > > 在 2022/08/01 19:19, Greg KH 写道: > > > > On Fri, Jul 29, 2022 at 02:23:54PM +0800, Yu Kuai wrote: > > > > > From: Christoph Hellwig <hch@lst.de> > > > > > > > > > > commit 0dbcfe247f22a6d73302dfa691c48b3c14d31c4c upstream. > > > > > > > > > > Invert they way the holder relations are tracked. This very > > > > > slightly reduces the memory overhead for partitioned devices. > > > > > > > > > > Signed-off-by: Christoph Hellwig <hch@lst.de> > > > > > Signed-off-by: Yu Kuai <yukuai3@huawei.com> > > > > > --- > > > > > block/genhd.c | 3 +++ > > > > > fs/block_dev.c | 31 +++++++++++++++++++------------ > > > > > include/linux/blk_types.h | 3 --- > > > > > include/linux/genhd.h | 4 +++- > > > > > 4 files changed, 25 insertions(+), 16 deletions(-) > > > > > > > > > > diff --git a/block/genhd.c b/block/genhd.c > > > > > index 796baf761202..2b11a2735285 100644 > > > > > --- a/block/genhd.c > > > > > +++ b/block/genhd.c > > > > > @@ -1760,6 +1760,9 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) > > > > > disk_to_dev(disk)->class = &block_class; > > > > > disk_to_dev(disk)->type = &disk_type; > > > > > device_initialize(disk_to_dev(disk)); > > > > > +#ifdef CONFIG_SYSFS > > > > > + INIT_LIST_HEAD(&disk->slave_bdevs); > > > > > +#endif > > > > > return disk; > > > > > out_free_part0: > > > > > diff --git a/fs/block_dev.c b/fs/block_dev.c > > > > > index 29f020c4b2d0..a202c76fcf7f 100644 > > > > > --- a/fs/block_dev.c > > > > > +++ b/fs/block_dev.c > > > > > @@ -823,9 +823,6 @@ static void init_once(void *foo) > > > > > memset(bdev, 0, sizeof(*bdev)); > > > > > mutex_init(&bdev->bd_mutex); > > > > > -#ifdef CONFIG_SYSFS > > > > > - INIT_LIST_HEAD(&bdev->bd_holder_disks); > > > > > -#endif > > > > > bdev->bd_bdi = &noop_backing_dev_info; > > > > > inode_init_once(&ei->vfs_inode); > > > > > /* Initialize mutex for freeze. */ > > > > > @@ -1188,7 +1185,7 @@ EXPORT_SYMBOL(bd_abort_claiming); > > > > > #ifdef CONFIG_SYSFS > > > > > struct bd_holder_disk { > > > > > struct list_head list; > > > > > - struct gendisk *disk; > > > > > + struct block_device *bdev; > > > > > int refcnt; > > > > > }; > > > > > @@ -1197,8 +1194,8 @@ static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev, > > > > > { > > > > > struct bd_holder_disk *holder; > > > > > - list_for_each_entry(holder, &bdev->bd_holder_disks, list) > > > > > - if (holder->disk == disk) > > > > > + list_for_each_entry(holder, &disk->slave_bdevs, list) > > > > > + if (holder->bdev == bdev) > > > > > return holder; > > > > > return NULL; > > > > > } > > > > > @@ -1244,9 +1241,13 @@ static void del_symlink(struct kobject *from, struct kobject *to) > > > > > int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) > > > > > { > > > > > struct bd_holder_disk *holder; > > > > > + struct block_device *bdev_holder = bdget_disk(disk, 0); > > > > > int ret = 0; > > > > > - mutex_lock(&bdev->bd_mutex); > > > > > + if (WARN_ON_ONCE(!bdev_holder)) > > > > > + return -ENOENT; > > > > > + > > > > > + mutex_lock(&bdev_holder->bd_mutex); > > > > > WARN_ON_ONCE(!bdev->bd_holder); > > > > > @@ -1267,7 +1268,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) > > > > > } > > > > > INIT_LIST_HEAD(&holder->list); > > > > > - holder->disk = disk; > > > > > + holder->bdev = bdev; > > > > > holder->refcnt = 1; > > > > > ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); > > > > > @@ -1283,7 +1284,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) > > > > > */ > > > > > kobject_get(bdev->bd_part->holder_dir); > > > > > - list_add(&holder->list, &bdev->bd_holder_disks); > > > > > + list_add(&holder->list, &disk->slave_bdevs); > > > > > goto out_unlock; > > > > > out_del: > > > > > @@ -1291,7 +1292,8 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) > > > > > out_free: > > > > > kfree(holder); > > > > > out_unlock: > > > > > - mutex_unlock(&bdev->bd_mutex); > > > > > + mutex_unlock(&bdev_holder->bd_mutex); > > > > > + bdput(bdev_holder); > > > > > return ret; > > > > > } > > > > > EXPORT_SYMBOL_GPL(bd_link_disk_holder); > > > > > @@ -1309,8 +1311,12 @@ EXPORT_SYMBOL_GPL(bd_link_disk_holder); > > > > > void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) > > > > > { > > > > > struct bd_holder_disk *holder; > > > > > + struct block_device *bdev_holder = bdget_disk(disk, 0); > > > > > - mutex_lock(&bdev->bd_mutex); > > > > > + if (WARN_ON_ONCE(!bdev_holder)) > > > > > + return; > > > > > + > > > > > + mutex_lock(&bdev_holder->bd_mutex); > > > > > holder = bd_find_holder_disk(bdev, disk); > > > > > @@ -1323,7 +1329,8 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) > > > > > kfree(holder); > > > > > } > > > > > - mutex_unlock(&bdev->bd_mutex); > > > > > + mutex_unlock(&bdev_holder->bd_mutex); > > > > > + bdput(bdev_holder); > > > > > } > > > > > EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); > > > > > #endif > > > > > diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h > > > > > index d9b69bbde5cc..1b84ecb34c18 100644 > > > > > --- a/include/linux/blk_types.h > > > > > +++ b/include/linux/blk_types.h > > > > > @@ -29,9 +29,6 @@ struct block_device { > > > > > void * bd_holder; > > > > > int bd_holders; > > > > > bool bd_write_holder; > > > > > -#ifdef CONFIG_SYSFS > > > > > - struct list_head bd_holder_disks; > > > > > -#endif > > > > > struct block_device * bd_contains; > > > > > u8 bd_partno; > > > > > struct hd_struct * bd_part; > > > > > diff --git a/include/linux/genhd.h b/include/linux/genhd.h > > > > > index 03da3f603d30..3e5049a527e6 100644 > > > > > --- a/include/linux/genhd.h > > > > > +++ b/include/linux/genhd.h > > > > > @@ -195,7 +195,9 @@ struct gendisk { > > > > > #define GD_NEED_PART_SCAN 0 > > > > > struct rw_semaphore lookup_sem; > > > > > struct kobject *slave_dir; > > > > > - > > > > > +#ifdef CONFIG_SYSFS > > > > > + struct list_head slave_bdevs; > > > > > +#endif > > > > > > > > This is very different from the upstream version, and forces the change > > > > onto everyone, not just those who had CONFIG_BLOCK_HOLDER_DEPRECATED > > > > enabled like was done in the main kernel tree. > > > > > > > > Why force this on all and not just use the same option? > > > > > > > > I would need a strong ACK from the original developers and maintainers > > > > of these subsystems before being able to take these into the 5.10 tree. > > > > > > Yes, I agree that Christoph must take a look first. > > > > > > > > What prevents you from just using 5.15 for those systems that run into > > > > these issues? > > > > > > The null pointer problem is reported by our product(related to dm- > > > mpath), and they had been using 5.10 for a long time. And switching > > > kernel for commercial will require lots of work, it's not an option > > > for now. > > > > It should be the same validation and verification and testing path for > > 5.15.y as for an intrusive kernel change as this one, right? What makes > > it any different to prevent 5.15 from being used? > > No, they are not the same, if we try to use 5.15.y, we have to test all > other stuff that our product is used currently(ext4, block, scsi and > lots of other modules). With this patch, we only need to make sure > dm works fine. That is a very odd way to test a monolithic kernel image, where any change in any part can affect any other part of the kernel. You touched the block layer, why you wouldn't think that all block-related things would also have to be tested is very strange to me as those are directly related. And while you are at it, you should test all other subsystems as the system is released as a whole, not as individual changes that are not unrelated. I think you need to revisit your testing strategy, good luck! greg k-h
On Mon, Aug 01, 2022 at 01:19:09PM +0200, Greg KH wrote: > This is very different from the upstream version, and forces the change > onto everyone, not just those who had CONFIG_BLOCK_HOLDER_DEPRECATED > enabled like was done in the main kernel tree. > > Why force this on all and not just use the same option? I'm really worried about backports that are significantly different from the original commit. To the point where if they are so different and we don't have a grave security or data integrity bug I'm really not very much in favor of backporting them at all.
On Mon, Aug 01, 2022 at 08:04:58PM +0200, Christoph Hellwig wrote: > On Mon, Aug 01, 2022 at 01:19:09PM +0200, Greg KH wrote: > > This is very different from the upstream version, and forces the change > > onto everyone, not just those who had CONFIG_BLOCK_HOLDER_DEPRECATED > > enabled like was done in the main kernel tree. > > > > Why force this on all and not just use the same option? > > I'm really worried about backports that are significantly different > from the original commit. To the point where if they are so different > and we don't have a grave security or data integrity bug I'm really not > very much in favor of backporting them at all. > I agree, I'll drop this from my review queue now, thanks. greg k-h
Hi, Christoph 在 2022/08/02 13:11, Greg KH 写道: > On Mon, Aug 01, 2022 at 08:04:58PM +0200, Christoph Hellwig wrote: >> On Mon, Aug 01, 2022 at 01:19:09PM +0200, Greg KH wrote: >>> This is very different from the upstream version, and forces the change >>> onto everyone, not just those who had CONFIG_BLOCK_HOLDER_DEPRECATED >>> enabled like was done in the main kernel tree. >>> >>> Why force this on all and not just use the same option? >> >> I'm really worried about backports that are significantly different >> from the original commit. To the point where if they are so different >> and we don't have a grave security or data integrity bug I'm really not >> very much in favor of backporting them at all. >> I do understand that backporting these patches is not good, thanks for taking time on this patchset. I decided to push following patch in our version: diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 588e8b43efab..c047d5fcb325 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2149,12 +2149,16 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) switch (type) { case DM_TYPE_REQUEST_BASED: - md->disk->fops = &dm_rq_blk_dops; r = dm_mq_init_request_queue(md, t); if (r) { DMERR("Cannot initialize queue for request-based dm mapped device"); return r; } + /* + * Change the fops after queue is initialized, so that bio won't + * issued by rq-based path until that. + */ + md->disk->fops = &dm_rq_blk_dops; break; case DM_TYPE_BIO_BASED: case DM_TYPE_DAX_BIO_BASED: This way only modify dm, and the problem can be fixed. If you guys think this is OK, I can send a patch for LTS. Otherwise, if you guys still think the null-ptr-def problem is uncommon and doesn't worth to fix, please ignore this mail. Thanks, Kuai > > I agree, I'll drop this from my review queue now, thanks. > > greg k-h > . >
diff --git a/block/genhd.c b/block/genhd.c index 796baf761202..2b11a2735285 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1760,6 +1760,9 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) disk_to_dev(disk)->class = &block_class; disk_to_dev(disk)->type = &disk_type; device_initialize(disk_to_dev(disk)); +#ifdef CONFIG_SYSFS + INIT_LIST_HEAD(&disk->slave_bdevs); +#endif return disk; out_free_part0: diff --git a/fs/block_dev.c b/fs/block_dev.c index 29f020c4b2d0..a202c76fcf7f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -823,9 +823,6 @@ static void init_once(void *foo) memset(bdev, 0, sizeof(*bdev)); mutex_init(&bdev->bd_mutex); -#ifdef CONFIG_SYSFS - INIT_LIST_HEAD(&bdev->bd_holder_disks); -#endif bdev->bd_bdi = &noop_backing_dev_info; inode_init_once(&ei->vfs_inode); /* Initialize mutex for freeze. */ @@ -1188,7 +1185,7 @@ EXPORT_SYMBOL(bd_abort_claiming); #ifdef CONFIG_SYSFS struct bd_holder_disk { struct list_head list; - struct gendisk *disk; + struct block_device *bdev; int refcnt; }; @@ -1197,8 +1194,8 @@ static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev, { struct bd_holder_disk *holder; - list_for_each_entry(holder, &bdev->bd_holder_disks, list) - if (holder->disk == disk) + list_for_each_entry(holder, &disk->slave_bdevs, list) + if (holder->bdev == bdev) return holder; return NULL; } @@ -1244,9 +1241,13 @@ static void del_symlink(struct kobject *from, struct kobject *to) int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) { struct bd_holder_disk *holder; + struct block_device *bdev_holder = bdget_disk(disk, 0); int ret = 0; - mutex_lock(&bdev->bd_mutex); + if (WARN_ON_ONCE(!bdev_holder)) + return -ENOENT; + + mutex_lock(&bdev_holder->bd_mutex); WARN_ON_ONCE(!bdev->bd_holder); @@ -1267,7 +1268,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) } INIT_LIST_HEAD(&holder->list); - holder->disk = disk; + holder->bdev = bdev; holder->refcnt = 1; ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); @@ -1283,7 +1284,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) */ kobject_get(bdev->bd_part->holder_dir); - list_add(&holder->list, &bdev->bd_holder_disks); + list_add(&holder->list, &disk->slave_bdevs); goto out_unlock; out_del: @@ -1291,7 +1292,8 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) out_free: kfree(holder); out_unlock: - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&bdev_holder->bd_mutex); + bdput(bdev_holder); return ret; } EXPORT_SYMBOL_GPL(bd_link_disk_holder); @@ -1309,8 +1311,12 @@ EXPORT_SYMBOL_GPL(bd_link_disk_holder); void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) { struct bd_holder_disk *holder; + struct block_device *bdev_holder = bdget_disk(disk, 0); - mutex_lock(&bdev->bd_mutex); + if (WARN_ON_ONCE(!bdev_holder)) + return; + + mutex_lock(&bdev_holder->bd_mutex); holder = bd_find_holder_disk(bdev, disk); @@ -1323,7 +1329,8 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) kfree(holder); } - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&bdev_holder->bd_mutex); + bdput(bdev_holder); } EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); #endif diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d9b69bbde5cc..1b84ecb34c18 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -29,9 +29,6 @@ struct block_device { void * bd_holder; int bd_holders; bool bd_write_holder; -#ifdef CONFIG_SYSFS - struct list_head bd_holder_disks; -#endif struct block_device * bd_contains; u8 bd_partno; struct hd_struct * bd_part; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 03da3f603d30..3e5049a527e6 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -195,7 +195,9 @@ struct gendisk { #define GD_NEED_PART_SCAN 0 struct rw_semaphore lookup_sem; struct kobject *slave_dir; - +#ifdef CONFIG_SYSFS + struct list_head slave_bdevs; +#endif struct timer_rand_state *random; atomic_t sync_io; /* RAID */ struct disk_events *ev;