diff mbox series

[06/10] block: change the refcounting for partitions

Message ID 20210724071249.1284585-7-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [01/10] block: delay freeing the gendisk | expand

Commit Message

Christoph Hellwig July 24, 2021, 7:12 a.m. UTC
Instead of acquiring an inode reference on open make sure partitions
always hold device model references to the disk while alive, and switch
open to grab only a device model reference to the opened block device.
If that is a partition the disk reference is transitively held by the
partition already.
---
 block/partitions/core.c |  4 +++
 fs/block_dev.c          | 60 ++++++++++++++++-------------------------
 2 files changed, 27 insertions(+), 37 deletions(-)

Comments

Damien Le Moal July 26, 2021, 2:20 a.m. UTC | #1
On 2021/07/24 16:15, Christoph Hellwig wrote:
> Instead of acquiring an inode reference on open make sure partitions
> always hold device model references to the disk while alive, and switch
> open to grab only a device model reference to the opened block device.
> If that is a partition the disk reference is transitively held by the
> partition already.

Your SoB is missing...

> ---
>  block/partitions/core.c |  4 +++
>  fs/block_dev.c          | 60 ++++++++++++++++-------------------------
>  2 files changed, 27 insertions(+), 37 deletions(-)
> 
> diff --git a/block/partitions/core.c b/block/partitions/core.c
> index ae88b5439056..1b02073a2047 100644
> --- a/block/partitions/core.c
> +++ b/block/partitions/core.c
> @@ -261,6 +261,7 @@ static void part_release(struct device *dev)
>  {
>  	if (MAJOR(dev->devt) == BLOCK_EXT_MAJOR)
>  		blk_free_ext_minor(MINOR(dev->devt));
> +	put_disk(dev_to_bdev(dev)->bd_disk);
>  	bdput(dev_to_bdev(dev));
>  }
>  
> @@ -364,6 +365,9 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
>  	pdev->type = &part_type;
>  	pdev->parent = ddev;
>  
> +	/* ensure we always have a reference to the whole disk */
> +	get_device(disk_to_dev(disk));
> +
>  	/* in consecutive minor range? */
>  	if (bdev->bd_partno < disk->minors) {
>  		devt = MKDEV(disk->major, disk->first_minor + bdev->bd_partno);
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index 932f4034ad66..4a6c8c0a3bc9 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -921,16 +921,6 @@ void bdev_add(struct block_device *bdev, dev_t dev)
>  	insert_inode_hash(bdev->bd_inode);
>  }
>  
> -static struct block_device *bdget(dev_t dev)
> -{
> -	struct inode *inode;
> -
> -	inode = ilookup(blockdev_superblock, dev);
> -	if (!inode)
> -		return NULL;
> -	return &BDEV_I(inode)->bdev;
> -}
> -
>  /**
>   * bdgrab -- Grab a reference to an already referenced block device
>   * @bdev:	Block device to grab a reference to.
> @@ -1282,16 +1272,14 @@ static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
>  static int blkdev_get_part(struct block_device *part, fmode_t mode)
>  {
>  	struct gendisk *disk = part->bd_disk;
> -	struct block_device *whole;
>  	int ret;
>  
>  	if (part->bd_openers)
>  		goto done;
>  
> -	whole = bdgrab(disk->part0);
> -	ret = blkdev_get_whole(whole, mode);
> +	ret = blkdev_get_whole(bdev_whole(part), mode);
>  	if (ret)
> -		goto out_put_whole;
> +		return ret;
>  
>  	ret = -ENXIO;
>  	if (!bdev_nr_sectors(part))
> @@ -1306,9 +1294,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
>  	return 0;
>  
>  out_blkdev_put:
> -	blkdev_put_whole(whole, mode);
> -out_put_whole:
> -	bdput(whole);
> +	blkdev_put_whole(bdev_whole(part), mode);
>  	return ret;
>  }
>  
> @@ -1321,42 +1307,42 @@ static void blkdev_put_part(struct block_device *part, fmode_t mode)
>  	blkdev_flush_mapping(part);
>  	whole->bd_disk->open_partitions--;
>  	blkdev_put_whole(whole, mode);
> -	bdput(whole);
>  }
>  
>  struct block_device *blkdev_get_no_open(dev_t dev)
>  {
>  	struct block_device *bdev;
> -	struct gendisk *disk;
> +	struct inode *inode;
>  
> -	bdev = bdget(dev);
> -	if (!bdev) {
> +	inode = ilookup(blockdev_superblock, dev);
> +	if (!inode) {
>  		blk_request_module(dev);
> -		bdev = bdget(dev);
> -		if (!bdev)
> +		inode = ilookup(blockdev_superblock, dev);
> +		if (!inode)
>  			return NULL;
>  	}
>  
> -	disk = bdev->bd_disk;
> -	if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj))
> -		goto bdput;
> -	if (disk->flags & GENHD_FL_HIDDEN)
> -		goto put_disk;
> -	if (!try_module_get(bdev->bd_disk->fops->owner))
> -		goto put_disk;
> +	/* switch from the inode reference to a device mode one: */
> +	bdev = &BDEV_I(inode)->bdev;
> +	if (!kobject_get_unless_zero(&bdev->bd_device.kobj))
> +		bdev = NULL;
> +	iput(inode);
> +
> +	if (!bdev)
> +		return NULL;
> +	if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN) ||
> +	    !try_module_get(bdev->bd_disk->fops->owner)) {
> +		put_device(&bdev->bd_device);
> +		return NULL;
> +	}
> +
>  	return bdev;
> -put_disk:
> -	put_disk(disk);
> -bdput:
> -	bdput(bdev);
> -	return NULL;
>  }
>  
>  void blkdev_put_no_open(struct block_device *bdev)
>  {
>  	module_put(bdev->bd_disk->fops->owner);
> -	put_disk(bdev->bd_disk);
> -	bdput(bdev);
> +	put_device(&bdev->bd_device);
>  }
>  
>  /**
>
Ming Lei July 26, 2021, 10:17 a.m. UTC | #2
On Sat, Jul 24, 2021 at 09:12:45AM +0200, Christoph Hellwig wrote:
> Instead of acquiring an inode reference on open make sure partitions
> always hold device model references to the disk while alive, and switch
> open to grab only a device model reference to the opened block device.
> If that is a partition the disk reference is transitively held by the
> partition already.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
diff mbox series

Patch

diff --git a/block/partitions/core.c b/block/partitions/core.c
index ae88b5439056..1b02073a2047 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -261,6 +261,7 @@  static void part_release(struct device *dev)
 {
 	if (MAJOR(dev->devt) == BLOCK_EXT_MAJOR)
 		blk_free_ext_minor(MINOR(dev->devt));
+	put_disk(dev_to_bdev(dev)->bd_disk);
 	bdput(dev_to_bdev(dev));
 }
 
@@ -364,6 +365,9 @@  static struct block_device *add_partition(struct gendisk *disk, int partno,
 	pdev->type = &part_type;
 	pdev->parent = ddev;
 
+	/* ensure we always have a reference to the whole disk */
+	get_device(disk_to_dev(disk));
+
 	/* in consecutive minor range? */
 	if (bdev->bd_partno < disk->minors) {
 		devt = MKDEV(disk->major, disk->first_minor + bdev->bd_partno);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 932f4034ad66..4a6c8c0a3bc9 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -921,16 +921,6 @@  void bdev_add(struct block_device *bdev, dev_t dev)
 	insert_inode_hash(bdev->bd_inode);
 }
 
-static struct block_device *bdget(dev_t dev)
-{
-	struct inode *inode;
-
-	inode = ilookup(blockdev_superblock, dev);
-	if (!inode)
-		return NULL;
-	return &BDEV_I(inode)->bdev;
-}
-
 /**
  * bdgrab -- Grab a reference to an already referenced block device
  * @bdev:	Block device to grab a reference to.
@@ -1282,16 +1272,14 @@  static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
 static int blkdev_get_part(struct block_device *part, fmode_t mode)
 {
 	struct gendisk *disk = part->bd_disk;
-	struct block_device *whole;
 	int ret;
 
 	if (part->bd_openers)
 		goto done;
 
-	whole = bdgrab(disk->part0);
-	ret = blkdev_get_whole(whole, mode);
+	ret = blkdev_get_whole(bdev_whole(part), mode);
 	if (ret)
-		goto out_put_whole;
+		return ret;
 
 	ret = -ENXIO;
 	if (!bdev_nr_sectors(part))
@@ -1306,9 +1294,7 @@  static int blkdev_get_part(struct block_device *part, fmode_t mode)
 	return 0;
 
 out_blkdev_put:
-	blkdev_put_whole(whole, mode);
-out_put_whole:
-	bdput(whole);
+	blkdev_put_whole(bdev_whole(part), mode);
 	return ret;
 }
 
@@ -1321,42 +1307,42 @@  static void blkdev_put_part(struct block_device *part, fmode_t mode)
 	blkdev_flush_mapping(part);
 	whole->bd_disk->open_partitions--;
 	blkdev_put_whole(whole, mode);
-	bdput(whole);
 }
 
 struct block_device *blkdev_get_no_open(dev_t dev)
 {
 	struct block_device *bdev;
-	struct gendisk *disk;
+	struct inode *inode;
 
-	bdev = bdget(dev);
-	if (!bdev) {
+	inode = ilookup(blockdev_superblock, dev);
+	if (!inode) {
 		blk_request_module(dev);
-		bdev = bdget(dev);
-		if (!bdev)
+		inode = ilookup(blockdev_superblock, dev);
+		if (!inode)
 			return NULL;
 	}
 
-	disk = bdev->bd_disk;
-	if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj))
-		goto bdput;
-	if (disk->flags & GENHD_FL_HIDDEN)
-		goto put_disk;
-	if (!try_module_get(bdev->bd_disk->fops->owner))
-		goto put_disk;
+	/* switch from the inode reference to a device mode one: */
+	bdev = &BDEV_I(inode)->bdev;
+	if (!kobject_get_unless_zero(&bdev->bd_device.kobj))
+		bdev = NULL;
+	iput(inode);
+
+	if (!bdev)
+		return NULL;
+	if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN) ||
+	    !try_module_get(bdev->bd_disk->fops->owner)) {
+		put_device(&bdev->bd_device);
+		return NULL;
+	}
+
 	return bdev;
-put_disk:
-	put_disk(disk);
-bdput:
-	bdput(bdev);
-	return NULL;
 }
 
 void blkdev_put_no_open(struct block_device *bdev)
 {
 	module_put(bdev->bd_disk->fops->owner);
-	put_disk(bdev->bd_disk);
-	bdput(bdev);
+	put_device(&bdev->bd_device);
 }
 
 /**