diff mbox series

block: propagate partition scanning errors to the BLKRRPART ioctl

Message ID 20240402160103.758141-1-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series block: propagate partition scanning errors to the BLKRRPART ioctl | expand

Commit Message

Christoph Hellwig April 2, 2024, 4:01 p.m. UTC
Commit 4601b4b130de ("block: reopen the device in blkdev_reread_part")
lost the propagation of I/O errors from the low-level read of the
partition table to the user space caller of the BLKRRPART.

Apparently some user space relies on, so restore the propagation.  This
isn't exactly pretty as other block device open calls explicitly do not
are about these errors, so add a new BLK_OPEN_STRICT_SCAN to opt into
the error propagation.

Fixes: 4601b4b130de ("block: reopen the device in blkdev_reread_part")
Reported-by: Saranya Muruganandam <saranyamohan@google.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/bdev.c           | 29 +++++++++++++++++++----------
 block/ioctl.c          |  3 ++-
 include/linux/blkdev.h |  2 ++
 3 files changed, 23 insertions(+), 11 deletions(-)

Comments

Chaitanya Kulkarni April 3, 2024, 2:28 a.m. UTC | #1
On 4/2/24 09:01, Christoph Hellwig wrote:
> Commit 4601b4b130de ("block: reopen the device in blkdev_reread_part")
> lost the propagation of I/O errors from the low-level read of the
> partition table to the user space caller of the BLKRRPART.
>
> Apparently some user space relies on, so restore the propagation.  This
> isn't exactly pretty as other block device open calls explicitly do not
> are about these errors, so add a new BLK_OPEN_STRICT_SCAN to opt into
> the error propagation.
>
> Fixes: 4601b4b130de ("block: reopen the device in blkdev_reread_part")
> Reported-by: Saranya Muruganandam<saranyamohan@google.com>
> Signed-off-by: Christoph Hellwig<hch@lst.de>

The only change from existing behavior is now we are calling
bdev_disk_changed() after atomic_inc(&bdev->bd_openers) in
blkdev_get_whole(), I guess it should not have any side effects.

Looks good.

Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>

-ck

I've verified that existing code in blkdev_get_whole() just returns
0 instead of catching the error from bdev_disk_changed(), with this patch
bdev_disk_changed() error is caught in blkdev_get_whole() and returned to
the userspace in following callchain :-

blkdev_common_ioctl()
  disk_scan_partitions()
   bdev_file_open_by_dev()
    bdev_open()
     blkdev_get_part()
      blkdev_get_whole()
       return bdev_disk_changed() error if
       GD_NEED_PART_SCAN in set
      blkdev_get_whole()
       return bdev_disk_changed() error if
       GD_NEED_PART_SCAN in set
Shin'ichiro Kawasaki April 17, 2024, 2:28 a.m. UTC | #2
On Apr 02, 2024 / 18:01, Christoph Hellwig wrote:
> Commit 4601b4b130de ("block: reopen the device in blkdev_reread_part")
> lost the propagation of I/O errors from the low-level read of the
> partition table to the user space caller of the BLKRRPART.
> 
> Apparently some user space relies on, so restore the propagation.  This
> isn't exactly pretty as other block device open calls explicitly do not
> are about these errors, so add a new BLK_OPEN_STRICT_SCAN to opt into
> the error propagation.
> 
> Fixes: 4601b4b130de ("block: reopen the device in blkdev_reread_part")
> Reported-by: Saranya Muruganandam <saranyamohan@google.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

I should have commented on this patch with the correct authorship instead of the
patch that Saranya posted. With this patch I observe unexpected EINVAL. Please
find a couple of comments in line.

> ---
>  block/bdev.c           | 29 +++++++++++++++++++----------
>  block/ioctl.c          |  3 ++-
>  include/linux/blkdev.h |  2 ++
>  3 files changed, 23 insertions(+), 11 deletions(-)
> 
> diff --git a/block/bdev.c b/block/bdev.c
> index 7a5f611c3d2e3e..42940bced33bb4 100644
> --- a/block/bdev.c
> +++ b/block/bdev.c
> @@ -652,6 +652,14 @@ static void blkdev_flush_mapping(struct block_device *bdev)
>  	bdev_write_inode(bdev);
>  }
>  
> +static void blkdev_put_whole(struct block_device *bdev)
> +{
> +	if (atomic_dec_and_test(&bdev->bd_openers))
> +		blkdev_flush_mapping(bdev);
> +	if (bdev->bd_disk->fops->release)
> +		bdev->bd_disk->fops->release(bdev->bd_disk);
> +}
> +
>  static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode)
>  {
>  	struct gendisk *disk = bdev->bd_disk;
> @@ -670,20 +678,21 @@ static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode)
>  
>  	if (!atomic_read(&bdev->bd_openers))
>  		set_init_blocksize(bdev);
> -	if (test_bit(GD_NEED_PART_SCAN, &disk->state))
> -		bdev_disk_changed(disk, false);
>  	atomic_inc(&bdev->bd_openers);
> +	if (test_bit(GD_NEED_PART_SCAN, &disk->state)) {
> +		/*
> +		 * Only return scanning errors if we are called from conexts

Nit: s/conexts/contexts/

> +		 * that explicitly want them, e.g. the BLKRRPART ioctl.
> +		 */
> +		ret = bdev_disk_changed(disk, false);
> +		if (ret && (mode & BLK_OPEN_STRICT_SCAN)) {
> +			blkdev_put_whole(bdev);
> +			return ret;
> +		}
> +	}
>  	return 0;
>  }
>  
> -static void blkdev_put_whole(struct block_device *bdev)
> -{
> -	if (atomic_dec_and_test(&bdev->bd_openers))
> -		blkdev_flush_mapping(bdev);
> -	if (bdev->bd_disk->fops->release)
> -		bdev->bd_disk->fops->release(bdev->bd_disk);
> -}
> -
>  static int blkdev_get_part(struct block_device *part, blk_mode_t mode)
>  {
>  	struct gendisk *disk = part->bd_disk;
> diff --git a/block/ioctl.c b/block/ioctl.c
> index 0c76137adcaaa5..128f503828cee7 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -562,7 +562,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode,
>  			return -EACCES;
>  		if (bdev_is_partition(bdev))
>  			return -EINVAL;
> -		return disk_scan_partitions(bdev->bd_disk, mode);
> +		return disk_scan_partitions(bdev->bd_disk,
> +				mode | BLK_OPEN_STRICT_SCAN);
>  	case BLKTRACESTART:
>  	case BLKTRACESTOP:
>  	case BLKTRACETEARDOWN:
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 79ed07bd652ac4..0b39df4864ef19 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -129,6 +129,8 @@ typedef unsigned int __bitwise blk_mode_t;
>  #define BLK_OPEN_WRITE_IOCTL	((__force blk_mode_t)(1 << 4))
>  /* open is exclusive wrt all other BLK_OPEN_WRITE opens to the device */
>  #define BLK_OPEN_RESTRICT_WRITES	((__force blk_mode_t)(1 << 5))
> +/* return partition scanning errors */
> +#define BLK_OPEN_STRICT_SCAN	((__force blk_mode_t)(1 << 5))

The line above assigns the same number for BLK_OPEN_STRICT_SCAN and
BLK_OPEN_RESTRICT_WRITES, then blockdev --rereadpt fails with EINVAL,
not EIO. I modified it to "1 << 6", then it looks working good.

>  
>  struct gendisk {
>  	/*
> -- 
> 2.39.2
>
diff mbox series

Patch

diff --git a/block/bdev.c b/block/bdev.c
index 7a5f611c3d2e3e..42940bced33bb4 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -652,6 +652,14 @@  static void blkdev_flush_mapping(struct block_device *bdev)
 	bdev_write_inode(bdev);
 }
 
+static void blkdev_put_whole(struct block_device *bdev)
+{
+	if (atomic_dec_and_test(&bdev->bd_openers))
+		blkdev_flush_mapping(bdev);
+	if (bdev->bd_disk->fops->release)
+		bdev->bd_disk->fops->release(bdev->bd_disk);
+}
+
 static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode)
 {
 	struct gendisk *disk = bdev->bd_disk;
@@ -670,20 +678,21 @@  static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode)
 
 	if (!atomic_read(&bdev->bd_openers))
 		set_init_blocksize(bdev);
-	if (test_bit(GD_NEED_PART_SCAN, &disk->state))
-		bdev_disk_changed(disk, false);
 	atomic_inc(&bdev->bd_openers);
+	if (test_bit(GD_NEED_PART_SCAN, &disk->state)) {
+		/*
+		 * Only return scanning errors if we are called from conexts
+		 * that explicitly want them, e.g. the BLKRRPART ioctl.
+		 */
+		ret = bdev_disk_changed(disk, false);
+		if (ret && (mode & BLK_OPEN_STRICT_SCAN)) {
+			blkdev_put_whole(bdev);
+			return ret;
+		}
+	}
 	return 0;
 }
 
-static void blkdev_put_whole(struct block_device *bdev)
-{
-	if (atomic_dec_and_test(&bdev->bd_openers))
-		blkdev_flush_mapping(bdev);
-	if (bdev->bd_disk->fops->release)
-		bdev->bd_disk->fops->release(bdev->bd_disk);
-}
-
 static int blkdev_get_part(struct block_device *part, blk_mode_t mode)
 {
 	struct gendisk *disk = part->bd_disk;
diff --git a/block/ioctl.c b/block/ioctl.c
index 0c76137adcaaa5..128f503828cee7 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -562,7 +562,8 @@  static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode,
 			return -EACCES;
 		if (bdev_is_partition(bdev))
 			return -EINVAL;
-		return disk_scan_partitions(bdev->bd_disk, mode);
+		return disk_scan_partitions(bdev->bd_disk,
+				mode | BLK_OPEN_STRICT_SCAN);
 	case BLKTRACESTART:
 	case BLKTRACESTOP:
 	case BLKTRACETEARDOWN:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 79ed07bd652ac4..0b39df4864ef19 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -129,6 +129,8 @@  typedef unsigned int __bitwise blk_mode_t;
 #define BLK_OPEN_WRITE_IOCTL	((__force blk_mode_t)(1 << 4))
 /* open is exclusive wrt all other BLK_OPEN_WRITE opens to the device */
 #define BLK_OPEN_RESTRICT_WRITES	((__force blk_mode_t)(1 << 5))
+/* return partition scanning errors */
+#define BLK_OPEN_STRICT_SCAN	((__force blk_mode_t)(1 << 5))
 
 struct gendisk {
 	/*