diff mbox series

[v4,06/11] block: Add atomic write support for statx

Message ID 20240219130109.341523-7-john.g.garry@oracle.com (mailing list archive)
State Superseded
Headers show
Series block atomic writes | expand

Commit Message

John Garry Feb. 19, 2024, 1:01 p.m. UTC
From: Prasad Singamsetty <prasad.singamsetty@oracle.com>

Extend statx system call to return additional info for atomic write support
support if the specified file is a block device.

Signed-off-by: Prasad Singamsetty <prasad.singamsetty@oracle.com>
Signed-off-by: John Garry <john.g.garry@oracle.com>
---
 block/bdev.c           | 37 +++++++++++++++++++++++++++----------
 fs/stat.c              | 13 ++++++-------
 include/linux/blkdev.h |  5 +++--
 3 files changed, 36 insertions(+), 19 deletions(-)

Comments

Christoph Hellwig Feb. 20, 2024, 8:29 a.m. UTC | #1
> +#define BDEV_STATX_SUPPORTED_MASK (STATX_DIOALIGN | STATX_WRITE_ATOMIC)

> +	if (!(request_mask & BDEV_STATX_SUPPORTED_MASK))
> +		return;

BDEV_STATX_SUPPORTED_MASK is misleading here.  bdevs support a lot more
fields, these are just the ones needing special attention.  I'd do away
with the extra define and just open code it.

> +	/* If this is a block device inode, override the filesystem
> +	 * attributes with the block device specific parameters
> +	 * that need to be obtained from the bdev backing inode
> +	 */

This is not the normal kernel multi-line comment format.

> +	if (S_ISBLK(d_backing_inode(path.dentry)->i_mode))
> +		bdev_statx(path.dentry, stat, request_mask);

I know I touched this last, but does anyone remember why we have
various random fixups in vfs_statx and not in vfs_getattr_nosec, where
they we have more of them and also the inode at hand?
John Garry Feb. 20, 2024, 9:35 a.m. UTC | #2
On 20/02/2024 08:29, Christoph Hellwig wrote:
>> +#define BDEV_STATX_SUPPORTED_MASK (STATX_DIOALIGN | STATX_WRITE_ATOMIC)
> 
>> +	if (!(request_mask & BDEV_STATX_SUPPORTED_MASK))
>> +		return;
> 
> BDEV_STATX_SUPPORTED_MASK is misleading here.  bdevs support a lot more
> fields, these are just the ones needing special attention.  I'd do away
> with the extra define and just open code it.

ok, fine

> 
>> +	/* If this is a block device inode, override the filesystem
>> +	 * attributes with the block device specific parameters
>> +	 * that need to be obtained from the bdev backing inode
>> +	 */
> 
> This is not the normal kernel multi-line comment format.
> 

will fix

>> +	if (S_ISBLK(d_backing_inode(path.dentry)->i_mode))
>> +		bdev_statx(path.dentry, stat, request_mask);
> 
> I know I touched this last, but does anyone remember why we have
> various random fixups in vfs_statx and not in vfs_getattr_nosec, where
> they we have more of them and also the inode at hand?
Ritesh Harjani (IBM) Feb. 25, 2024, 2:20 p.m. UTC | #3
John Garry <john.g.garry@oracle.com> writes:

> From: Prasad Singamsetty <prasad.singamsetty@oracle.com>
>
> Extend statx system call to return additional info for atomic write support
> support if the specified file is a block device.
>
> Signed-off-by: Prasad Singamsetty <prasad.singamsetty@oracle.com>
> Signed-off-by: John Garry <john.g.garry@oracle.com>
> ---
>  block/bdev.c           | 37 +++++++++++++++++++++++++++----------
>  fs/stat.c              | 13 ++++++-------
>  include/linux/blkdev.h |  5 +++--
>  3 files changed, 36 insertions(+), 19 deletions(-)
>
> diff --git a/block/bdev.c b/block/bdev.c
> index e9f1b12bd75c..0dada9902bd4 100644
> --- a/block/bdev.c
> +++ b/block/bdev.c
> @@ -1116,24 +1116,41 @@ void sync_bdevs(bool wait)
>  	iput(old_inode);
>  }
>  
> +#define BDEV_STATX_SUPPORTED_MASK (STATX_DIOALIGN | STATX_WRITE_ATOMIC)
> +
>  /*
> - * Handle STATX_DIOALIGN for block devices.
> - *
> - * Note that the inode passed to this is the inode of a block device node file,
> - * not the block device's internal inode.  Therefore it is *not* valid to use
> - * I_BDEV() here; the block device has to be looked up by i_rdev instead.
> + * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices.
>   */
> -void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
> +void bdev_statx(struct dentry *dentry, struct kstat *stat, u32 request_mask)

why change this to dentry? Why not keep it as inode itself?

-ritesh

>  {
>  	struct block_device *bdev;
>  
> -	bdev = blkdev_get_no_open(inode->i_rdev);
> +	if (!(request_mask & BDEV_STATX_SUPPORTED_MASK))
> +		return;
> +
> +	/*
> +	 * Note that d_backing_inode() returns the inode of a block device node
> +	 * file, not the block device's internal inode.  Therefore it is *not*
> +	 * valid to use I_BDEV() here; the block device has to be looked up by
> +	 * i_rdev instead.
> +	 */
> +	bdev = blkdev_get_no_open(d_backing_inode(dentry)->i_rdev);
>  	if (!bdev)
>  		return;
>  
> -	stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
> -	stat->dio_offset_align = bdev_logical_block_size(bdev);
> -	stat->result_mask |= STATX_DIOALIGN;
> +	if (request_mask & STATX_DIOALIGN) {
> +		stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
> +		stat->dio_offset_align = bdev_logical_block_size(bdev);
> +		stat->result_mask |= STATX_DIOALIGN;
> +	}
> +
> +	if (request_mask & STATX_WRITE_ATOMIC && bdev_can_atomic_write(bdev)) {
> +		struct request_queue *bd_queue = bdev->bd_queue;
> +
> +		generic_fill_statx_atomic_writes(stat,
> +			queue_atomic_write_unit_min_bytes(bd_queue),
> +			queue_atomic_write_unit_max_bytes(bd_queue));
> +	}
>  
>  	blkdev_put_no_open(bdev);
>  }
> diff --git a/fs/stat.c b/fs/stat.c
> index 522787a4ab6a..bd0618477702 100644
> --- a/fs/stat.c
> +++ b/fs/stat.c
> @@ -290,13 +290,12 @@ static int vfs_statx(int dfd, struct filename *filename, int flags,
>  		stat->attributes |= STATX_ATTR_MOUNT_ROOT;
>  	stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;
>  
> -	/* Handle STATX_DIOALIGN for block devices. */
> -	if (request_mask & STATX_DIOALIGN) {
> -		struct inode *inode = d_backing_inode(path.dentry);
> -
> -		if (S_ISBLK(inode->i_mode))
> -			bdev_statx_dioalign(inode, stat);
> -	}
> +	/* If this is a block device inode, override the filesystem
> +	 * attributes with the block device specific parameters
> +	 * that need to be obtained from the bdev backing inode
> +	 */
> +	if (S_ISBLK(d_backing_inode(path.dentry)->i_mode))
> +		bdev_statx(path.dentry, stat, request_mask);
>  
>  	path_put(&path);
>  	if (retry_estale(error, lookup_flags)) {
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 40ed56ef4937..4f04456f1250 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -1541,7 +1541,7 @@ int sync_blockdev(struct block_device *bdev);
>  int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend);
>  int sync_blockdev_nowait(struct block_device *bdev);
>  void sync_bdevs(bool wait);
> -void bdev_statx_dioalign(struct inode *inode, struct kstat *stat);
> +void bdev_statx(struct dentry *dentry, struct kstat *stat, u32 request_mask);
>  void printk_all_partitions(void);
>  int __init early_lookup_bdev(const char *pathname, dev_t *dev);
>  #else
> @@ -1559,7 +1559,8 @@ static inline int sync_blockdev_nowait(struct block_device *bdev)
>  static inline void sync_bdevs(bool wait)
>  {
>  }
> -static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
> +static inline void bdev_statx(struct dentry *dentry, struct kstat *stat,
> +				u32 request_mask)
>  {
>  }
>  static inline void printk_all_partitions(void)
> -- 
> 2.31.1
John Garry Feb. 26, 2024, 9:36 a.m. UTC | #4
On 25/02/2024 14:20, Ritesh Harjani (IBM) wrote:
> John Garry <john.g.garry@oracle.com> writes:
> 
>> From: Prasad Singamsetty <prasad.singamsetty@oracle.com>
>>
>> Extend statx system call to return additional info for atomic write support
>> support if the specified file is a block device.
>>
>> Signed-off-by: Prasad Singamsetty <prasad.singamsetty@oracle.com>
>> Signed-off-by: John Garry <john.g.garry@oracle.com>
>> ---
>>   block/bdev.c           | 37 +++++++++++++++++++++++++++----------
>>   fs/stat.c              | 13 ++++++-------
>>   include/linux/blkdev.h |  5 +++--
>>   3 files changed, 36 insertions(+), 19 deletions(-)
>>
>> diff --git a/block/bdev.c b/block/bdev.c
>> index e9f1b12bd75c..0dada9902bd4 100644
>> --- a/block/bdev.c
>> +++ b/block/bdev.c
>> @@ -1116,24 +1116,41 @@ void sync_bdevs(bool wait)
>>   	iput(old_inode);
>>   }
>>   
>> +#define BDEV_STATX_SUPPORTED_MASK (STATX_DIOALIGN | STATX_WRITE_ATOMIC)
>> +
>>   /*
>> - * Handle STATX_DIOALIGN for block devices.
>> - *
>> - * Note that the inode passed to this is the inode of a block device node file,
>> - * not the block device's internal inode.  Therefore it is *not* valid to use
>> - * I_BDEV() here; the block device has to be looked up by i_rdev instead.
>> + * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices.
>>    */
>> -void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
>> +void bdev_statx(struct dentry *dentry, struct kstat *stat, u32 request_mask)
> 
> why change this to dentry? Why not keep it as inode itself?

I suppose that I could do that.

Thanks,
John
diff mbox series

Patch

diff --git a/block/bdev.c b/block/bdev.c
index e9f1b12bd75c..0dada9902bd4 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -1116,24 +1116,41 @@  void sync_bdevs(bool wait)
 	iput(old_inode);
 }
 
+#define BDEV_STATX_SUPPORTED_MASK (STATX_DIOALIGN | STATX_WRITE_ATOMIC)
+
 /*
- * Handle STATX_DIOALIGN for block devices.
- *
- * Note that the inode passed to this is the inode of a block device node file,
- * not the block device's internal inode.  Therefore it is *not* valid to use
- * I_BDEV() here; the block device has to be looked up by i_rdev instead.
+ * Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices.
  */
-void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
+void bdev_statx(struct dentry *dentry, struct kstat *stat, u32 request_mask)
 {
 	struct block_device *bdev;
 
-	bdev = blkdev_get_no_open(inode->i_rdev);
+	if (!(request_mask & BDEV_STATX_SUPPORTED_MASK))
+		return;
+
+	/*
+	 * Note that d_backing_inode() returns the inode of a block device node
+	 * file, not the block device's internal inode.  Therefore it is *not*
+	 * valid to use I_BDEV() here; the block device has to be looked up by
+	 * i_rdev instead.
+	 */
+	bdev = blkdev_get_no_open(d_backing_inode(dentry)->i_rdev);
 	if (!bdev)
 		return;
 
-	stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
-	stat->dio_offset_align = bdev_logical_block_size(bdev);
-	stat->result_mask |= STATX_DIOALIGN;
+	if (request_mask & STATX_DIOALIGN) {
+		stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
+		stat->dio_offset_align = bdev_logical_block_size(bdev);
+		stat->result_mask |= STATX_DIOALIGN;
+	}
+
+	if (request_mask & STATX_WRITE_ATOMIC && bdev_can_atomic_write(bdev)) {
+		struct request_queue *bd_queue = bdev->bd_queue;
+
+		generic_fill_statx_atomic_writes(stat,
+			queue_atomic_write_unit_min_bytes(bd_queue),
+			queue_atomic_write_unit_max_bytes(bd_queue));
+	}
 
 	blkdev_put_no_open(bdev);
 }
diff --git a/fs/stat.c b/fs/stat.c
index 522787a4ab6a..bd0618477702 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -290,13 +290,12 @@  static int vfs_statx(int dfd, struct filename *filename, int flags,
 		stat->attributes |= STATX_ATTR_MOUNT_ROOT;
 	stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT;
 
-	/* Handle STATX_DIOALIGN for block devices. */
-	if (request_mask & STATX_DIOALIGN) {
-		struct inode *inode = d_backing_inode(path.dentry);
-
-		if (S_ISBLK(inode->i_mode))
-			bdev_statx_dioalign(inode, stat);
-	}
+	/* If this is a block device inode, override the filesystem
+	 * attributes with the block device specific parameters
+	 * that need to be obtained from the bdev backing inode
+	 */
+	if (S_ISBLK(d_backing_inode(path.dentry)->i_mode))
+		bdev_statx(path.dentry, stat, request_mask);
 
 	path_put(&path);
 	if (retry_estale(error, lookup_flags)) {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 40ed56ef4937..4f04456f1250 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1541,7 +1541,7 @@  int sync_blockdev(struct block_device *bdev);
 int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend);
 int sync_blockdev_nowait(struct block_device *bdev);
 void sync_bdevs(bool wait);
-void bdev_statx_dioalign(struct inode *inode, struct kstat *stat);
+void bdev_statx(struct dentry *dentry, struct kstat *stat, u32 request_mask);
 void printk_all_partitions(void);
 int __init early_lookup_bdev(const char *pathname, dev_t *dev);
 #else
@@ -1559,7 +1559,8 @@  static inline int sync_blockdev_nowait(struct block_device *bdev)
 static inline void sync_bdevs(bool wait)
 {
 }
-static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
+static inline void bdev_statx(struct dentry *dentry, struct kstat *stat,
+				u32 request_mask)
 {
 }
 static inline void printk_all_partitions(void)