diff mbox series

[stable,5.10,1/3] block: look up holders by bdev

Message ID 20220729062356.1663513-2-yukuai1@huaweicloud.com (mailing list archive)
State New, archived
Headers show
Series dm: fix nullptr crash | expand

Commit Message

Yu Kuai July 29, 2022, 6:23 a.m. UTC
From: Christoph Hellwig <hch@lst.de>

commit 0dbcfe247f22a6d73302dfa691c48b3c14d31c4c upstream.

Invert they way the holder relations are tracked.  This very
slightly reduces the memory overhead for partitioned devices.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/genhd.c             |  3 +++
 fs/block_dev.c            | 31 +++++++++++++++++++------------
 include/linux/blk_types.h |  3 ---
 include/linux/genhd.h     |  4 +++-
 4 files changed, 25 insertions(+), 16 deletions(-)

Comments

Greg Kroah-Hartman Aug. 1, 2022, 11:19 a.m. UTC | #1
On Fri, Jul 29, 2022 at 02:23:54PM +0800, Yu Kuai wrote:
> From: Christoph Hellwig <hch@lst.de>
> 
> commit 0dbcfe247f22a6d73302dfa691c48b3c14d31c4c upstream.
> 
> Invert they way the holder relations are tracked.  This very
> slightly reduces the memory overhead for partitioned devices.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  block/genhd.c             |  3 +++
>  fs/block_dev.c            | 31 +++++++++++++++++++------------
>  include/linux/blk_types.h |  3 ---
>  include/linux/genhd.h     |  4 +++-
>  4 files changed, 25 insertions(+), 16 deletions(-)
> 
> diff --git a/block/genhd.c b/block/genhd.c
> index 796baf761202..2b11a2735285 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -1760,6 +1760,9 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
>  	disk_to_dev(disk)->class = &block_class;
>  	disk_to_dev(disk)->type = &disk_type;
>  	device_initialize(disk_to_dev(disk));
> +#ifdef CONFIG_SYSFS
> +	INIT_LIST_HEAD(&disk->slave_bdevs);
> +#endif
>  	return disk;
>  
>  out_free_part0:
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index 29f020c4b2d0..a202c76fcf7f 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -823,9 +823,6 @@ static void init_once(void *foo)
>  
>  	memset(bdev, 0, sizeof(*bdev));
>  	mutex_init(&bdev->bd_mutex);
> -#ifdef CONFIG_SYSFS
> -	INIT_LIST_HEAD(&bdev->bd_holder_disks);
> -#endif
>  	bdev->bd_bdi = &noop_backing_dev_info;
>  	inode_init_once(&ei->vfs_inode);
>  	/* Initialize mutex for freeze. */
> @@ -1188,7 +1185,7 @@ EXPORT_SYMBOL(bd_abort_claiming);
>  #ifdef CONFIG_SYSFS
>  struct bd_holder_disk {
>  	struct list_head	list;
> -	struct gendisk		*disk;
> +	struct block_device	*bdev;
>  	int			refcnt;
>  };
>  
> @@ -1197,8 +1194,8 @@ static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
>  {
>  	struct bd_holder_disk *holder;
>  
> -	list_for_each_entry(holder, &bdev->bd_holder_disks, list)
> -		if (holder->disk == disk)
> +	list_for_each_entry(holder, &disk->slave_bdevs, list)
> +		if (holder->bdev == bdev)
>  			return holder;
>  	return NULL;
>  }
> @@ -1244,9 +1241,13 @@ static void del_symlink(struct kobject *from, struct kobject *to)
>  int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
>  {
>  	struct bd_holder_disk *holder;
> +	struct block_device *bdev_holder = bdget_disk(disk, 0);
>  	int ret = 0;
>  
> -	mutex_lock(&bdev->bd_mutex);
> +	if (WARN_ON_ONCE(!bdev_holder))
> +		return -ENOENT;
> +
> +	mutex_lock(&bdev_holder->bd_mutex);
>  
>  	WARN_ON_ONCE(!bdev->bd_holder);
>  
> @@ -1267,7 +1268,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
>  	}
>  
>  	INIT_LIST_HEAD(&holder->list);
> -	holder->disk = disk;
> +	holder->bdev = bdev;
>  	holder->refcnt = 1;
>  
>  	ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
> @@ -1283,7 +1284,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
>  	 */
>  	kobject_get(bdev->bd_part->holder_dir);
>  
> -	list_add(&holder->list, &bdev->bd_holder_disks);
> +	list_add(&holder->list, &disk->slave_bdevs);
>  	goto out_unlock;
>  
>  out_del:
> @@ -1291,7 +1292,8 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
>  out_free:
>  	kfree(holder);
>  out_unlock:
> -	mutex_unlock(&bdev->bd_mutex);
> +	mutex_unlock(&bdev_holder->bd_mutex);
> +	bdput(bdev_holder);
>  	return ret;
>  }
>  EXPORT_SYMBOL_GPL(bd_link_disk_holder);
> @@ -1309,8 +1311,12 @@ EXPORT_SYMBOL_GPL(bd_link_disk_holder);
>  void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
>  {
>  	struct bd_holder_disk *holder;
> +	struct block_device *bdev_holder = bdget_disk(disk, 0);
>  
> -	mutex_lock(&bdev->bd_mutex);
> +	if (WARN_ON_ONCE(!bdev_holder))
> +		return;
> +
> +	mutex_lock(&bdev_holder->bd_mutex);
>  
>  	holder = bd_find_holder_disk(bdev, disk);
>  
> @@ -1323,7 +1329,8 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
>  		kfree(holder);
>  	}
>  
> -	mutex_unlock(&bdev->bd_mutex);
> +	mutex_unlock(&bdev_holder->bd_mutex);
> +	bdput(bdev_holder);
>  }
>  EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
>  #endif
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index d9b69bbde5cc..1b84ecb34c18 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -29,9 +29,6 @@ struct block_device {
>  	void *			bd_holder;
>  	int			bd_holders;
>  	bool			bd_write_holder;
> -#ifdef CONFIG_SYSFS
> -	struct list_head	bd_holder_disks;
> -#endif
>  	struct block_device *	bd_contains;
>  	u8			bd_partno;
>  	struct hd_struct *	bd_part;
> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> index 03da3f603d30..3e5049a527e6 100644
> --- a/include/linux/genhd.h
> +++ b/include/linux/genhd.h
> @@ -195,7 +195,9 @@ struct gendisk {
>  #define GD_NEED_PART_SCAN		0
>  	struct rw_semaphore lookup_sem;
>  	struct kobject *slave_dir;
> -
> +#ifdef CONFIG_SYSFS
> +	struct list_head	slave_bdevs;
> +#endif

This is very different from the upstream version, and forces the change
onto everyone, not just those who had CONFIG_BLOCK_HOLDER_DEPRECATED
enabled like was done in the main kernel tree.

Why force this on all and not just use the same option?

I would need a strong ACK from the original developers and maintainers
of these subsystems before being able to take these into the 5.10 tree.

What prevents you from just using 5.15 for those systems that run into
these issues?

thanks,

greg k-h
Yu Kuai Aug. 1, 2022, 12:25 p.m. UTC | #2
Hi, Greg

在 2022/08/01 19:19, Greg KH 写道:
> On Fri, Jul 29, 2022 at 02:23:54PM +0800, Yu Kuai wrote:
>> From: Christoph Hellwig <hch@lst.de>
>>
>> commit 0dbcfe247f22a6d73302dfa691c48b3c14d31c4c upstream.
>>
>> Invert they way the holder relations are tracked.  This very
>> slightly reduces the memory overhead for partitioned devices.
>>
>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   block/genhd.c             |  3 +++
>>   fs/block_dev.c            | 31 +++++++++++++++++++------------
>>   include/linux/blk_types.h |  3 ---
>>   include/linux/genhd.h     |  4 +++-
>>   4 files changed, 25 insertions(+), 16 deletions(-)
>>
>> diff --git a/block/genhd.c b/block/genhd.c
>> index 796baf761202..2b11a2735285 100644
>> --- a/block/genhd.c
>> +++ b/block/genhd.c
>> @@ -1760,6 +1760,9 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
>>   	disk_to_dev(disk)->class = &block_class;
>>   	disk_to_dev(disk)->type = &disk_type;
>>   	device_initialize(disk_to_dev(disk));
>> +#ifdef CONFIG_SYSFS
>> +	INIT_LIST_HEAD(&disk->slave_bdevs);
>> +#endif
>>   	return disk;
>>   
>>   out_free_part0:
>> diff --git a/fs/block_dev.c b/fs/block_dev.c
>> index 29f020c4b2d0..a202c76fcf7f 100644
>> --- a/fs/block_dev.c
>> +++ b/fs/block_dev.c
>> @@ -823,9 +823,6 @@ static void init_once(void *foo)
>>   
>>   	memset(bdev, 0, sizeof(*bdev));
>>   	mutex_init(&bdev->bd_mutex);
>> -#ifdef CONFIG_SYSFS
>> -	INIT_LIST_HEAD(&bdev->bd_holder_disks);
>> -#endif
>>   	bdev->bd_bdi = &noop_backing_dev_info;
>>   	inode_init_once(&ei->vfs_inode);
>>   	/* Initialize mutex for freeze. */
>> @@ -1188,7 +1185,7 @@ EXPORT_SYMBOL(bd_abort_claiming);
>>   #ifdef CONFIG_SYSFS
>>   struct bd_holder_disk {
>>   	struct list_head	list;
>> -	struct gendisk		*disk;
>> +	struct block_device	*bdev;
>>   	int			refcnt;
>>   };
>>   
>> @@ -1197,8 +1194,8 @@ static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
>>   {
>>   	struct bd_holder_disk *holder;
>>   
>> -	list_for_each_entry(holder, &bdev->bd_holder_disks, list)
>> -		if (holder->disk == disk)
>> +	list_for_each_entry(holder, &disk->slave_bdevs, list)
>> +		if (holder->bdev == bdev)
>>   			return holder;
>>   	return NULL;
>>   }
>> @@ -1244,9 +1241,13 @@ static void del_symlink(struct kobject *from, struct kobject *to)
>>   int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
>>   {
>>   	struct bd_holder_disk *holder;
>> +	struct block_device *bdev_holder = bdget_disk(disk, 0);
>>   	int ret = 0;
>>   
>> -	mutex_lock(&bdev->bd_mutex);
>> +	if (WARN_ON_ONCE(!bdev_holder))
>> +		return -ENOENT;
>> +
>> +	mutex_lock(&bdev_holder->bd_mutex);
>>   
>>   	WARN_ON_ONCE(!bdev->bd_holder);
>>   
>> @@ -1267,7 +1268,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
>>   	}
>>   
>>   	INIT_LIST_HEAD(&holder->list);
>> -	holder->disk = disk;
>> +	holder->bdev = bdev;
>>   	holder->refcnt = 1;
>>   
>>   	ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
>> @@ -1283,7 +1284,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
>>   	 */
>>   	kobject_get(bdev->bd_part->holder_dir);
>>   
>> -	list_add(&holder->list, &bdev->bd_holder_disks);
>> +	list_add(&holder->list, &disk->slave_bdevs);
>>   	goto out_unlock;
>>   
>>   out_del:
>> @@ -1291,7 +1292,8 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
>>   out_free:
>>   	kfree(holder);
>>   out_unlock:
>> -	mutex_unlock(&bdev->bd_mutex);
>> +	mutex_unlock(&bdev_holder->bd_mutex);
>> +	bdput(bdev_holder);
>>   	return ret;
>>   }
>>   EXPORT_SYMBOL_GPL(bd_link_disk_holder);
>> @@ -1309,8 +1311,12 @@ EXPORT_SYMBOL_GPL(bd_link_disk_holder);
>>   void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
>>   {
>>   	struct bd_holder_disk *holder;
>> +	struct block_device *bdev_holder = bdget_disk(disk, 0);
>>   
>> -	mutex_lock(&bdev->bd_mutex);
>> +	if (WARN_ON_ONCE(!bdev_holder))
>> +		return;
>> +
>> +	mutex_lock(&bdev_holder->bd_mutex);
>>   
>>   	holder = bd_find_holder_disk(bdev, disk);
>>   
>> @@ -1323,7 +1329,8 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
>>   		kfree(holder);
>>   	}
>>   
>> -	mutex_unlock(&bdev->bd_mutex);
>> +	mutex_unlock(&bdev_holder->bd_mutex);
>> +	bdput(bdev_holder);
>>   }
>>   EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
>>   #endif
>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
>> index d9b69bbde5cc..1b84ecb34c18 100644
>> --- a/include/linux/blk_types.h
>> +++ b/include/linux/blk_types.h
>> @@ -29,9 +29,6 @@ struct block_device {
>>   	void *			bd_holder;
>>   	int			bd_holders;
>>   	bool			bd_write_holder;
>> -#ifdef CONFIG_SYSFS
>> -	struct list_head	bd_holder_disks;
>> -#endif
>>   	struct block_device *	bd_contains;
>>   	u8			bd_partno;
>>   	struct hd_struct *	bd_part;
>> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
>> index 03da3f603d30..3e5049a527e6 100644
>> --- a/include/linux/genhd.h
>> +++ b/include/linux/genhd.h
>> @@ -195,7 +195,9 @@ struct gendisk {
>>   #define GD_NEED_PART_SCAN		0
>>   	struct rw_semaphore lookup_sem;
>>   	struct kobject *slave_dir;
>> -
>> +#ifdef CONFIG_SYSFS
>> +	struct list_head	slave_bdevs;
>> +#endif
> 
> This is very different from the upstream version, and forces the change
> onto everyone, not just those who had CONFIG_BLOCK_HOLDER_DEPRECATED
> enabled like was done in the main kernel tree.
> 
> Why force this on all and not just use the same option?
> 
> I would need a strong ACK from the original developers and maintainers
> of these subsystems before being able to take these into the 5.10 tree.

Yes, I agree that Christoph must take a look first.
> 
> What prevents you from just using 5.15 for those systems that run into
> these issues?

The null pointer problem is reported by our product(related to dm-
mpath), and they had been using 5.10 for a long time. And switching
kernel for commercial will require lots of work, it's not an option
for now.

Thanks,
Kuai
Greg Kroah-Hartman Aug. 1, 2022, 1:17 p.m. UTC | #3
On Mon, Aug 01, 2022 at 08:25:27PM +0800, Yu Kuai wrote:
> Hi, Greg
> 
> 在 2022/08/01 19:19, Greg KH 写道:
> > On Fri, Jul 29, 2022 at 02:23:54PM +0800, Yu Kuai wrote:
> > > From: Christoph Hellwig <hch@lst.de>
> > > 
> > > commit 0dbcfe247f22a6d73302dfa691c48b3c14d31c4c upstream.
> > > 
> > > Invert they way the holder relations are tracked.  This very
> > > slightly reduces the memory overhead for partitioned devices.
> > > 
> > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > > Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> > > ---
> > >   block/genhd.c             |  3 +++
> > >   fs/block_dev.c            | 31 +++++++++++++++++++------------
> > >   include/linux/blk_types.h |  3 ---
> > >   include/linux/genhd.h     |  4 +++-
> > >   4 files changed, 25 insertions(+), 16 deletions(-)
> > > 
> > > diff --git a/block/genhd.c b/block/genhd.c
> > > index 796baf761202..2b11a2735285 100644
> > > --- a/block/genhd.c
> > > +++ b/block/genhd.c
> > > @@ -1760,6 +1760,9 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
> > >   	disk_to_dev(disk)->class = &block_class;
> > >   	disk_to_dev(disk)->type = &disk_type;
> > >   	device_initialize(disk_to_dev(disk));
> > > +#ifdef CONFIG_SYSFS
> > > +	INIT_LIST_HEAD(&disk->slave_bdevs);
> > > +#endif
> > >   	return disk;
> > >   out_free_part0:
> > > diff --git a/fs/block_dev.c b/fs/block_dev.c
> > > index 29f020c4b2d0..a202c76fcf7f 100644
> > > --- a/fs/block_dev.c
> > > +++ b/fs/block_dev.c
> > > @@ -823,9 +823,6 @@ static void init_once(void *foo)
> > >   	memset(bdev, 0, sizeof(*bdev));
> > >   	mutex_init(&bdev->bd_mutex);
> > > -#ifdef CONFIG_SYSFS
> > > -	INIT_LIST_HEAD(&bdev->bd_holder_disks);
> > > -#endif
> > >   	bdev->bd_bdi = &noop_backing_dev_info;
> > >   	inode_init_once(&ei->vfs_inode);
> > >   	/* Initialize mutex for freeze. */
> > > @@ -1188,7 +1185,7 @@ EXPORT_SYMBOL(bd_abort_claiming);
> > >   #ifdef CONFIG_SYSFS
> > >   struct bd_holder_disk {
> > >   	struct list_head	list;
> > > -	struct gendisk		*disk;
> > > +	struct block_device	*bdev;
> > >   	int			refcnt;
> > >   };
> > > @@ -1197,8 +1194,8 @@ static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
> > >   {
> > >   	struct bd_holder_disk *holder;
> > > -	list_for_each_entry(holder, &bdev->bd_holder_disks, list)
> > > -		if (holder->disk == disk)
> > > +	list_for_each_entry(holder, &disk->slave_bdevs, list)
> > > +		if (holder->bdev == bdev)
> > >   			return holder;
> > >   	return NULL;
> > >   }
> > > @@ -1244,9 +1241,13 @@ static void del_symlink(struct kobject *from, struct kobject *to)
> > >   int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > >   {
> > >   	struct bd_holder_disk *holder;
> > > +	struct block_device *bdev_holder = bdget_disk(disk, 0);
> > >   	int ret = 0;
> > > -	mutex_lock(&bdev->bd_mutex);
> > > +	if (WARN_ON_ONCE(!bdev_holder))
> > > +		return -ENOENT;
> > > +
> > > +	mutex_lock(&bdev_holder->bd_mutex);
> > >   	WARN_ON_ONCE(!bdev->bd_holder);
> > > @@ -1267,7 +1268,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > >   	}
> > >   	INIT_LIST_HEAD(&holder->list);
> > > -	holder->disk = disk;
> > > +	holder->bdev = bdev;
> > >   	holder->refcnt = 1;
> > >   	ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
> > > @@ -1283,7 +1284,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > >   	 */
> > >   	kobject_get(bdev->bd_part->holder_dir);
> > > -	list_add(&holder->list, &bdev->bd_holder_disks);
> > > +	list_add(&holder->list, &disk->slave_bdevs);
> > >   	goto out_unlock;
> > >   out_del:
> > > @@ -1291,7 +1292,8 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > >   out_free:
> > >   	kfree(holder);
> > >   out_unlock:
> > > -	mutex_unlock(&bdev->bd_mutex);
> > > +	mutex_unlock(&bdev_holder->bd_mutex);
> > > +	bdput(bdev_holder);
> > >   	return ret;
> > >   }
> > >   EXPORT_SYMBOL_GPL(bd_link_disk_holder);
> > > @@ -1309,8 +1311,12 @@ EXPORT_SYMBOL_GPL(bd_link_disk_holder);
> > >   void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > >   {
> > >   	struct bd_holder_disk *holder;
> > > +	struct block_device *bdev_holder = bdget_disk(disk, 0);
> > > -	mutex_lock(&bdev->bd_mutex);
> > > +	if (WARN_ON_ONCE(!bdev_holder))
> > > +		return;
> > > +
> > > +	mutex_lock(&bdev_holder->bd_mutex);
> > >   	holder = bd_find_holder_disk(bdev, disk);
> > > @@ -1323,7 +1329,8 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > >   		kfree(holder);
> > >   	}
> > > -	mutex_unlock(&bdev->bd_mutex);
> > > +	mutex_unlock(&bdev_holder->bd_mutex);
> > > +	bdput(bdev_holder);
> > >   }
> > >   EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
> > >   #endif
> > > diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> > > index d9b69bbde5cc..1b84ecb34c18 100644
> > > --- a/include/linux/blk_types.h
> > > +++ b/include/linux/blk_types.h
> > > @@ -29,9 +29,6 @@ struct block_device {
> > >   	void *			bd_holder;
> > >   	int			bd_holders;
> > >   	bool			bd_write_holder;
> > > -#ifdef CONFIG_SYSFS
> > > -	struct list_head	bd_holder_disks;
> > > -#endif
> > >   	struct block_device *	bd_contains;
> > >   	u8			bd_partno;
> > >   	struct hd_struct *	bd_part;
> > > diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> > > index 03da3f603d30..3e5049a527e6 100644
> > > --- a/include/linux/genhd.h
> > > +++ b/include/linux/genhd.h
> > > @@ -195,7 +195,9 @@ struct gendisk {
> > >   #define GD_NEED_PART_SCAN		0
> > >   	struct rw_semaphore lookup_sem;
> > >   	struct kobject *slave_dir;
> > > -
> > > +#ifdef CONFIG_SYSFS
> > > +	struct list_head	slave_bdevs;
> > > +#endif
> > 
> > This is very different from the upstream version, and forces the change
> > onto everyone, not just those who had CONFIG_BLOCK_HOLDER_DEPRECATED
> > enabled like was done in the main kernel tree.
> > 
> > Why force this on all and not just use the same option?
> > 
> > I would need a strong ACK from the original developers and maintainers
> > of these subsystems before being able to take these into the 5.10 tree.
> 
> Yes, I agree that Christoph must take a look first.
> > 
> > What prevents you from just using 5.15 for those systems that run into
> > these issues?
> 
> The null pointer problem is reported by our product(related to dm-
> mpath), and they had been using 5.10 for a long time. And switching
> kernel for commercial will require lots of work, it's not an option
> for now.

It should be the same validation and verification and testing path for
5.15.y as for an intrusive kernel change as this one, right?  What makes
it any different to prevent 5.15 from being used?

thanks,

greg k-h
Yu Kuai Aug. 1, 2022, 1:39 p.m. UTC | #4
Hi, Greg

在 2022/08/01 21:17, Greg KH 写道:
> On Mon, Aug 01, 2022 at 08:25:27PM +0800, Yu Kuai wrote:
>> Hi, Greg
>>
>> 在 2022/08/01 19:19, Greg KH 写道:
>>> On Fri, Jul 29, 2022 at 02:23:54PM +0800, Yu Kuai wrote:
>>>> From: Christoph Hellwig <hch@lst.de>
>>>>
>>>> commit 0dbcfe247f22a6d73302dfa691c48b3c14d31c4c upstream.
>>>>
>>>> Invert they way the holder relations are tracked.  This very
>>>> slightly reduces the memory overhead for partitioned devices.
>>>>
>>>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>>>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>>>> ---
>>>>    block/genhd.c             |  3 +++
>>>>    fs/block_dev.c            | 31 +++++++++++++++++++------------
>>>>    include/linux/blk_types.h |  3 ---
>>>>    include/linux/genhd.h     |  4 +++-
>>>>    4 files changed, 25 insertions(+), 16 deletions(-)
>>>>
>>>> diff --git a/block/genhd.c b/block/genhd.c
>>>> index 796baf761202..2b11a2735285 100644
>>>> --- a/block/genhd.c
>>>> +++ b/block/genhd.c
>>>> @@ -1760,6 +1760,9 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
>>>>    	disk_to_dev(disk)->class = &block_class;
>>>>    	disk_to_dev(disk)->type = &disk_type;
>>>>    	device_initialize(disk_to_dev(disk));
>>>> +#ifdef CONFIG_SYSFS
>>>> +	INIT_LIST_HEAD(&disk->slave_bdevs);
>>>> +#endif
>>>>    	return disk;
>>>>    out_free_part0:
>>>> diff --git a/fs/block_dev.c b/fs/block_dev.c
>>>> index 29f020c4b2d0..a202c76fcf7f 100644
>>>> --- a/fs/block_dev.c
>>>> +++ b/fs/block_dev.c
>>>> @@ -823,9 +823,6 @@ static void init_once(void *foo)
>>>>    	memset(bdev, 0, sizeof(*bdev));
>>>>    	mutex_init(&bdev->bd_mutex);
>>>> -#ifdef CONFIG_SYSFS
>>>> -	INIT_LIST_HEAD(&bdev->bd_holder_disks);
>>>> -#endif
>>>>    	bdev->bd_bdi = &noop_backing_dev_info;
>>>>    	inode_init_once(&ei->vfs_inode);
>>>>    	/* Initialize mutex for freeze. */
>>>> @@ -1188,7 +1185,7 @@ EXPORT_SYMBOL(bd_abort_claiming);
>>>>    #ifdef CONFIG_SYSFS
>>>>    struct bd_holder_disk {
>>>>    	struct list_head	list;
>>>> -	struct gendisk		*disk;
>>>> +	struct block_device	*bdev;
>>>>    	int			refcnt;
>>>>    };
>>>> @@ -1197,8 +1194,8 @@ static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
>>>>    {
>>>>    	struct bd_holder_disk *holder;
>>>> -	list_for_each_entry(holder, &bdev->bd_holder_disks, list)
>>>> -		if (holder->disk == disk)
>>>> +	list_for_each_entry(holder, &disk->slave_bdevs, list)
>>>> +		if (holder->bdev == bdev)
>>>>    			return holder;
>>>>    	return NULL;
>>>>    }
>>>> @@ -1244,9 +1241,13 @@ static void del_symlink(struct kobject *from, struct kobject *to)
>>>>    int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
>>>>    {
>>>>    	struct bd_holder_disk *holder;
>>>> +	struct block_device *bdev_holder = bdget_disk(disk, 0);
>>>>    	int ret = 0;
>>>> -	mutex_lock(&bdev->bd_mutex);
>>>> +	if (WARN_ON_ONCE(!bdev_holder))
>>>> +		return -ENOENT;
>>>> +
>>>> +	mutex_lock(&bdev_holder->bd_mutex);
>>>>    	WARN_ON_ONCE(!bdev->bd_holder);
>>>> @@ -1267,7 +1268,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
>>>>    	}
>>>>    	INIT_LIST_HEAD(&holder->list);
>>>> -	holder->disk = disk;
>>>> +	holder->bdev = bdev;
>>>>    	holder->refcnt = 1;
>>>>    	ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
>>>> @@ -1283,7 +1284,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
>>>>    	 */
>>>>    	kobject_get(bdev->bd_part->holder_dir);
>>>> -	list_add(&holder->list, &bdev->bd_holder_disks);
>>>> +	list_add(&holder->list, &disk->slave_bdevs);
>>>>    	goto out_unlock;
>>>>    out_del:
>>>> @@ -1291,7 +1292,8 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
>>>>    out_free:
>>>>    	kfree(holder);
>>>>    out_unlock:
>>>> -	mutex_unlock(&bdev->bd_mutex);
>>>> +	mutex_unlock(&bdev_holder->bd_mutex);
>>>> +	bdput(bdev_holder);
>>>>    	return ret;
>>>>    }
>>>>    EXPORT_SYMBOL_GPL(bd_link_disk_holder);
>>>> @@ -1309,8 +1311,12 @@ EXPORT_SYMBOL_GPL(bd_link_disk_holder);
>>>>    void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
>>>>    {
>>>>    	struct bd_holder_disk *holder;
>>>> +	struct block_device *bdev_holder = bdget_disk(disk, 0);
>>>> -	mutex_lock(&bdev->bd_mutex);
>>>> +	if (WARN_ON_ONCE(!bdev_holder))
>>>> +		return;
>>>> +
>>>> +	mutex_lock(&bdev_holder->bd_mutex);
>>>>    	holder = bd_find_holder_disk(bdev, disk);
>>>> @@ -1323,7 +1329,8 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
>>>>    		kfree(holder);
>>>>    	}
>>>> -	mutex_unlock(&bdev->bd_mutex);
>>>> +	mutex_unlock(&bdev_holder->bd_mutex);
>>>> +	bdput(bdev_holder);
>>>>    }
>>>>    EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
>>>>    #endif
>>>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
>>>> index d9b69bbde5cc..1b84ecb34c18 100644
>>>> --- a/include/linux/blk_types.h
>>>> +++ b/include/linux/blk_types.h
>>>> @@ -29,9 +29,6 @@ struct block_device {
>>>>    	void *			bd_holder;
>>>>    	int			bd_holders;
>>>>    	bool			bd_write_holder;
>>>> -#ifdef CONFIG_SYSFS
>>>> -	struct list_head	bd_holder_disks;
>>>> -#endif
>>>>    	struct block_device *	bd_contains;
>>>>    	u8			bd_partno;
>>>>    	struct hd_struct *	bd_part;
>>>> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
>>>> index 03da3f603d30..3e5049a527e6 100644
>>>> --- a/include/linux/genhd.h
>>>> +++ b/include/linux/genhd.h
>>>> @@ -195,7 +195,9 @@ struct gendisk {
>>>>    #define GD_NEED_PART_SCAN		0
>>>>    	struct rw_semaphore lookup_sem;
>>>>    	struct kobject *slave_dir;
>>>> -
>>>> +#ifdef CONFIG_SYSFS
>>>> +	struct list_head	slave_bdevs;
>>>> +#endif
>>>
>>> This is very different from the upstream version, and forces the change
>>> onto everyone, not just those who had CONFIG_BLOCK_HOLDER_DEPRECATED
>>> enabled like was done in the main kernel tree.
>>>
>>> Why force this on all and not just use the same option?
>>>
>>> I would need a strong ACK from the original developers and maintainers
>>> of these subsystems before being able to take these into the 5.10 tree.
>>
>> Yes, I agree that Christoph must take a look first.
>>>
>>> What prevents you from just using 5.15 for those systems that run into
>>> these issues?
>>
>> The null pointer problem is reported by our product(related to dm-
>> mpath), and they had been using 5.10 for a long time. And switching
>> kernel for commercial will require lots of work, it's not an option
>> for now.
> 
> It should be the same validation and verification and testing path for
> 5.15.y as for an intrusive kernel change as this one, right?  What makes
> it any different to prevent 5.15 from being used?

No, they are not the same, if we try to use 5.15.y, we have to test all
other stuff that our product is used currently(ext4, block, scsi and
lots of other modules). With this patch, we only need to make sure
dm works fine.

Thanks,
Kuai
Greg Kroah-Hartman Aug. 1, 2022, 1:43 p.m. UTC | #5
On Mon, Aug 01, 2022 at 09:39:30PM +0800, Yu Kuai wrote:
> Hi, Greg
> 
> 在 2022/08/01 21:17, Greg KH 写道:
> > On Mon, Aug 01, 2022 at 08:25:27PM +0800, Yu Kuai wrote:
> > > Hi, Greg
> > > 
> > > 在 2022/08/01 19:19, Greg KH 写道:
> > > > On Fri, Jul 29, 2022 at 02:23:54PM +0800, Yu Kuai wrote:
> > > > > From: Christoph Hellwig <hch@lst.de>
> > > > > 
> > > > > commit 0dbcfe247f22a6d73302dfa691c48b3c14d31c4c upstream.
> > > > > 
> > > > > Invert they way the holder relations are tracked.  This very
> > > > > slightly reduces the memory overhead for partitioned devices.
> > > > > 
> > > > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > > > > Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> > > > > ---
> > > > >    block/genhd.c             |  3 +++
> > > > >    fs/block_dev.c            | 31 +++++++++++++++++++------------
> > > > >    include/linux/blk_types.h |  3 ---
> > > > >    include/linux/genhd.h     |  4 +++-
> > > > >    4 files changed, 25 insertions(+), 16 deletions(-)
> > > > > 
> > > > > diff --git a/block/genhd.c b/block/genhd.c
> > > > > index 796baf761202..2b11a2735285 100644
> > > > > --- a/block/genhd.c
> > > > > +++ b/block/genhd.c
> > > > > @@ -1760,6 +1760,9 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
> > > > >    	disk_to_dev(disk)->class = &block_class;
> > > > >    	disk_to_dev(disk)->type = &disk_type;
> > > > >    	device_initialize(disk_to_dev(disk));
> > > > > +#ifdef CONFIG_SYSFS
> > > > > +	INIT_LIST_HEAD(&disk->slave_bdevs);
> > > > > +#endif
> > > > >    	return disk;
> > > > >    out_free_part0:
> > > > > diff --git a/fs/block_dev.c b/fs/block_dev.c
> > > > > index 29f020c4b2d0..a202c76fcf7f 100644
> > > > > --- a/fs/block_dev.c
> > > > > +++ b/fs/block_dev.c
> > > > > @@ -823,9 +823,6 @@ static void init_once(void *foo)
> > > > >    	memset(bdev, 0, sizeof(*bdev));
> > > > >    	mutex_init(&bdev->bd_mutex);
> > > > > -#ifdef CONFIG_SYSFS
> > > > > -	INIT_LIST_HEAD(&bdev->bd_holder_disks);
> > > > > -#endif
> > > > >    	bdev->bd_bdi = &noop_backing_dev_info;
> > > > >    	inode_init_once(&ei->vfs_inode);
> > > > >    	/* Initialize mutex for freeze. */
> > > > > @@ -1188,7 +1185,7 @@ EXPORT_SYMBOL(bd_abort_claiming);
> > > > >    #ifdef CONFIG_SYSFS
> > > > >    struct bd_holder_disk {
> > > > >    	struct list_head	list;
> > > > > -	struct gendisk		*disk;
> > > > > +	struct block_device	*bdev;
> > > > >    	int			refcnt;
> > > > >    };
> > > > > @@ -1197,8 +1194,8 @@ static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
> > > > >    {
> > > > >    	struct bd_holder_disk *holder;
> > > > > -	list_for_each_entry(holder, &bdev->bd_holder_disks, list)
> > > > > -		if (holder->disk == disk)
> > > > > +	list_for_each_entry(holder, &disk->slave_bdevs, list)
> > > > > +		if (holder->bdev == bdev)
> > > > >    			return holder;
> > > > >    	return NULL;
> > > > >    }
> > > > > @@ -1244,9 +1241,13 @@ static void del_symlink(struct kobject *from, struct kobject *to)
> > > > >    int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > > > >    {
> > > > >    	struct bd_holder_disk *holder;
> > > > > +	struct block_device *bdev_holder = bdget_disk(disk, 0);
> > > > >    	int ret = 0;
> > > > > -	mutex_lock(&bdev->bd_mutex);
> > > > > +	if (WARN_ON_ONCE(!bdev_holder))
> > > > > +		return -ENOENT;
> > > > > +
> > > > > +	mutex_lock(&bdev_holder->bd_mutex);
> > > > >    	WARN_ON_ONCE(!bdev->bd_holder);
> > > > > @@ -1267,7 +1268,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > > > >    	}
> > > > >    	INIT_LIST_HEAD(&holder->list);
> > > > > -	holder->disk = disk;
> > > > > +	holder->bdev = bdev;
> > > > >    	holder->refcnt = 1;
> > > > >    	ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
> > > > > @@ -1283,7 +1284,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > > > >    	 */
> > > > >    	kobject_get(bdev->bd_part->holder_dir);
> > > > > -	list_add(&holder->list, &bdev->bd_holder_disks);
> > > > > +	list_add(&holder->list, &disk->slave_bdevs);
> > > > >    	goto out_unlock;
> > > > >    out_del:
> > > > > @@ -1291,7 +1292,8 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > > > >    out_free:
> > > > >    	kfree(holder);
> > > > >    out_unlock:
> > > > > -	mutex_unlock(&bdev->bd_mutex);
> > > > > +	mutex_unlock(&bdev_holder->bd_mutex);
> > > > > +	bdput(bdev_holder);
> > > > >    	return ret;
> > > > >    }
> > > > >    EXPORT_SYMBOL_GPL(bd_link_disk_holder);
> > > > > @@ -1309,8 +1311,12 @@ EXPORT_SYMBOL_GPL(bd_link_disk_holder);
> > > > >    void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > > > >    {
> > > > >    	struct bd_holder_disk *holder;
> > > > > +	struct block_device *bdev_holder = bdget_disk(disk, 0);
> > > > > -	mutex_lock(&bdev->bd_mutex);
> > > > > +	if (WARN_ON_ONCE(!bdev_holder))
> > > > > +		return;
> > > > > +
> > > > > +	mutex_lock(&bdev_holder->bd_mutex);
> > > > >    	holder = bd_find_holder_disk(bdev, disk);
> > > > > @@ -1323,7 +1329,8 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
> > > > >    		kfree(holder);
> > > > >    	}
> > > > > -	mutex_unlock(&bdev->bd_mutex);
> > > > > +	mutex_unlock(&bdev_holder->bd_mutex);
> > > > > +	bdput(bdev_holder);
> > > > >    }
> > > > >    EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
> > > > >    #endif
> > > > > diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> > > > > index d9b69bbde5cc..1b84ecb34c18 100644
> > > > > --- a/include/linux/blk_types.h
> > > > > +++ b/include/linux/blk_types.h
> > > > > @@ -29,9 +29,6 @@ struct block_device {
> > > > >    	void *			bd_holder;
> > > > >    	int			bd_holders;
> > > > >    	bool			bd_write_holder;
> > > > > -#ifdef CONFIG_SYSFS
> > > > > -	struct list_head	bd_holder_disks;
> > > > > -#endif
> > > > >    	struct block_device *	bd_contains;
> > > > >    	u8			bd_partno;
> > > > >    	struct hd_struct *	bd_part;
> > > > > diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> > > > > index 03da3f603d30..3e5049a527e6 100644
> > > > > --- a/include/linux/genhd.h
> > > > > +++ b/include/linux/genhd.h
> > > > > @@ -195,7 +195,9 @@ struct gendisk {
> > > > >    #define GD_NEED_PART_SCAN		0
> > > > >    	struct rw_semaphore lookup_sem;
> > > > >    	struct kobject *slave_dir;
> > > > > -
> > > > > +#ifdef CONFIG_SYSFS
> > > > > +	struct list_head	slave_bdevs;
> > > > > +#endif
> > > > 
> > > > This is very different from the upstream version, and forces the change
> > > > onto everyone, not just those who had CONFIG_BLOCK_HOLDER_DEPRECATED
> > > > enabled like was done in the main kernel tree.
> > > > 
> > > > Why force this on all and not just use the same option?
> > > > 
> > > > I would need a strong ACK from the original developers and maintainers
> > > > of these subsystems before being able to take these into the 5.10 tree.
> > > 
> > > Yes, I agree that Christoph must take a look first.
> > > > 
> > > > What prevents you from just using 5.15 for those systems that run into
> > > > these issues?
> > > 
> > > The null pointer problem is reported by our product(related to dm-
> > > mpath), and they had been using 5.10 for a long time. And switching
> > > kernel for commercial will require lots of work, it's not an option
> > > for now.
> > 
> > It should be the same validation and verification and testing path for
> > 5.15.y as for an intrusive kernel change as this one, right?  What makes
> > it any different to prevent 5.15 from being used?
> 
> No, they are not the same, if we try to use 5.15.y, we have to test all
> other stuff that our product is used currently(ext4, block, scsi and
> lots of other modules). With this patch, we only need to make sure
> dm works fine.

That is a very odd way to test a monolithic kernel image, where any
change in any part can affect any other part of the kernel.  You touched
the block layer, why you wouldn't think that all block-related things
would also have to be tested is very strange to me as those are directly
related.  And while you are at it, you should test all other subsystems
as the system is released as a whole, not as individual changes that are
not unrelated.

I think you need to revisit your testing strategy, good luck!

greg k-h
Christoph Hellwig Aug. 1, 2022, 6:04 p.m. UTC | #6
On Mon, Aug 01, 2022 at 01:19:09PM +0200, Greg KH wrote:
> This is very different from the upstream version, and forces the change
> onto everyone, not just those who had CONFIG_BLOCK_HOLDER_DEPRECATED
> enabled like was done in the main kernel tree.
> 
> Why force this on all and not just use the same option?

I'm really worried about backports that are significantly different
from the original commit.  To the point where if they are so different
and we don't have a grave security or data integrity bug I'm really not
very much in favor of backporting them at all.
Greg Kroah-Hartman Aug. 2, 2022, 5:11 a.m. UTC | #7
On Mon, Aug 01, 2022 at 08:04:58PM +0200, Christoph Hellwig wrote:
> On Mon, Aug 01, 2022 at 01:19:09PM +0200, Greg KH wrote:
> > This is very different from the upstream version, and forces the change
> > onto everyone, not just those who had CONFIG_BLOCK_HOLDER_DEPRECATED
> > enabled like was done in the main kernel tree.
> > 
> > Why force this on all and not just use the same option?
> 
> I'm really worried about backports that are significantly different
> from the original commit.  To the point where if they are so different
> and we don't have a grave security or data integrity bug I'm really not
> very much in favor of backporting them at all.
> 

I agree, I'll drop this from my review queue now, thanks.

greg k-h
Yu Kuai Aug. 8, 2022, 3:31 a.m. UTC | #8
Hi, Christoph

在 2022/08/02 13:11, Greg KH 写道:
> On Mon, Aug 01, 2022 at 08:04:58PM +0200, Christoph Hellwig wrote:
>> On Mon, Aug 01, 2022 at 01:19:09PM +0200, Greg KH wrote:
>>> This is very different from the upstream version, and forces the change
>>> onto everyone, not just those who had CONFIG_BLOCK_HOLDER_DEPRECATED
>>> enabled like was done in the main kernel tree.
>>>
>>> Why force this on all and not just use the same option?
>>
>> I'm really worried about backports that are significantly different
>> from the original commit.  To the point where if they are so different
>> and we don't have a grave security or data integrity bug I'm really not
>> very much in favor of backporting them at all.
>>

I do understand that backporting these patches is not good, thanks for
taking time on this patchset.

I decided to push following patch in our version:

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 588e8b43efab..c047d5fcb325 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2149,12 +2149,16 @@ int dm_setup_md_queue(struct mapped_device *md, 
struct dm_table *t)

         switch (type) {
         case DM_TYPE_REQUEST_BASED:
-               md->disk->fops = &dm_rq_blk_dops;
                 r = dm_mq_init_request_queue(md, t);
                 if (r) {
                         DMERR("Cannot initialize queue for 
request-based dm mapped device");
                         return r;
                 }
+               /*
+                * Change the fops after queue is initialized, so that 
bio won't
+                * issued by rq-based path until that.
+                */
+               md->disk->fops = &dm_rq_blk_dops;
                 break;
         case DM_TYPE_BIO_BASED:
         case DM_TYPE_DAX_BIO_BASED:


This way only modify dm, and the problem can be fixed. If you guys think
this is OK, I can send a patch for LTS. Otherwise, if you guys still
think the null-ptr-def problem is uncommon and doesn't worth to fix,
please ignore this mail.

Thanks,
Kuai
> 
> I agree, I'll drop this from my review queue now, thanks.
> 
> greg k-h
> .
>
diff mbox series

Patch

diff --git a/block/genhd.c b/block/genhd.c
index 796baf761202..2b11a2735285 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1760,6 +1760,9 @@  struct gendisk *__alloc_disk_node(int minors, int node_id)
 	disk_to_dev(disk)->class = &block_class;
 	disk_to_dev(disk)->type = &disk_type;
 	device_initialize(disk_to_dev(disk));
+#ifdef CONFIG_SYSFS
+	INIT_LIST_HEAD(&disk->slave_bdevs);
+#endif
 	return disk;
 
 out_free_part0:
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 29f020c4b2d0..a202c76fcf7f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -823,9 +823,6 @@  static void init_once(void *foo)
 
 	memset(bdev, 0, sizeof(*bdev));
 	mutex_init(&bdev->bd_mutex);
-#ifdef CONFIG_SYSFS
-	INIT_LIST_HEAD(&bdev->bd_holder_disks);
-#endif
 	bdev->bd_bdi = &noop_backing_dev_info;
 	inode_init_once(&ei->vfs_inode);
 	/* Initialize mutex for freeze. */
@@ -1188,7 +1185,7 @@  EXPORT_SYMBOL(bd_abort_claiming);
 #ifdef CONFIG_SYSFS
 struct bd_holder_disk {
 	struct list_head	list;
-	struct gendisk		*disk;
+	struct block_device	*bdev;
 	int			refcnt;
 };
 
@@ -1197,8 +1194,8 @@  static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
 {
 	struct bd_holder_disk *holder;
 
-	list_for_each_entry(holder, &bdev->bd_holder_disks, list)
-		if (holder->disk == disk)
+	list_for_each_entry(holder, &disk->slave_bdevs, list)
+		if (holder->bdev == bdev)
 			return holder;
 	return NULL;
 }
@@ -1244,9 +1241,13 @@  static void del_symlink(struct kobject *from, struct kobject *to)
 int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
 {
 	struct bd_holder_disk *holder;
+	struct block_device *bdev_holder = bdget_disk(disk, 0);
 	int ret = 0;
 
-	mutex_lock(&bdev->bd_mutex);
+	if (WARN_ON_ONCE(!bdev_holder))
+		return -ENOENT;
+
+	mutex_lock(&bdev_holder->bd_mutex);
 
 	WARN_ON_ONCE(!bdev->bd_holder);
 
@@ -1267,7 +1268,7 @@  int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
 	}
 
 	INIT_LIST_HEAD(&holder->list);
-	holder->disk = disk;
+	holder->bdev = bdev;
 	holder->refcnt = 1;
 
 	ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
@@ -1283,7 +1284,7 @@  int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
 	 */
 	kobject_get(bdev->bd_part->holder_dir);
 
-	list_add(&holder->list, &bdev->bd_holder_disks);
+	list_add(&holder->list, &disk->slave_bdevs);
 	goto out_unlock;
 
 out_del:
@@ -1291,7 +1292,8 @@  int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
 out_free:
 	kfree(holder);
 out_unlock:
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev_holder->bd_mutex);
+	bdput(bdev_holder);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(bd_link_disk_holder);
@@ -1309,8 +1311,12 @@  EXPORT_SYMBOL_GPL(bd_link_disk_holder);
 void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
 {
 	struct bd_holder_disk *holder;
+	struct block_device *bdev_holder = bdget_disk(disk, 0);
 
-	mutex_lock(&bdev->bd_mutex);
+	if (WARN_ON_ONCE(!bdev_holder))
+		return;
+
+	mutex_lock(&bdev_holder->bd_mutex);
 
 	holder = bd_find_holder_disk(bdev, disk);
 
@@ -1323,7 +1329,8 @@  void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
 		kfree(holder);
 	}
 
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev_holder->bd_mutex);
+	bdput(bdev_holder);
 }
 EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
 #endif
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d9b69bbde5cc..1b84ecb34c18 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -29,9 +29,6 @@  struct block_device {
 	void *			bd_holder;
 	int			bd_holders;
 	bool			bd_write_holder;
-#ifdef CONFIG_SYSFS
-	struct list_head	bd_holder_disks;
-#endif
 	struct block_device *	bd_contains;
 	u8			bd_partno;
 	struct hd_struct *	bd_part;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 03da3f603d30..3e5049a527e6 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -195,7 +195,9 @@  struct gendisk {
 #define GD_NEED_PART_SCAN		0
 	struct rw_semaphore lookup_sem;
 	struct kobject *slave_dir;
-
+#ifdef CONFIG_SYSFS
+	struct list_head	slave_bdevs;
+#endif
 	struct timer_rand_state *random;
 	atomic_t sync_io;		/* RAID */
 	struct disk_events *ev;