diff mbox series

[-next] nilfs2: support STATX_DIOALIGN for statx file

Message ID 20240827015152.222983-1-lihongbo22@huawei.com (mailing list archive)
State New
Headers show
Series [-next] nilfs2: support STATX_DIOALIGN for statx file | expand

Commit Message

Hongbo Li Aug. 27, 2024, 1:51 a.m. UTC
Add support for STATX_DIOALIGN to nilfs2, so that direct I/O alignment
restrictions are exposed to userspace in a generic way.

By default, nilfs2 uses the default getattr implemented at vfs layer,
so we should implement getattr in nilfs2 to fill the dio_xx_align
members. The nilfs2 does not have the special align requirements. So
we use the default alignment setting from block layer.
We have done the following test:

[Before]
```
./statx_test /mnt/nilfs2/test
statx(/mnt/nilfs2/test) = 0
dio mem align:0
dio offset align:0
```

[After]
```
./statx_test /mnt/nilfs2/test
statx(/mnt/nilfs2/test) = 0
dio mem align:512
dio offset align:512
```

Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
---
 fs/nilfs2/file.c  |  1 +
 fs/nilfs2/inode.c | 20 ++++++++++++++++++++
 fs/nilfs2/namei.c |  2 ++
 fs/nilfs2/nilfs.h |  2 ++
 4 files changed, 25 insertions(+)

Comments

Ryusuke Konishi Aug. 27, 2024, 6:15 p.m. UTC | #1
Hi Hongbo,

Thanks for the suggestion.

I checked the STATX_DIOALIGN specification while looking at the
implementation of other file systems, and I thought that if DIO
support is incomplete, the dio_xx_align member should be set to 0.

Due to the nature of NILFS2 as a log-structured file system, DIO
writes fall back to buffered io.  (DIO reads are supported)

This is similar to the journal data mode of ext4 and the blkzoned
device support of f2fs, but in such case, these file systems return a
value of 0 (direct I/O not supported).

So, it's fine to respond to a STATX_DIOALIGN request, but I think the
value of dio_xx_align should be set to 0 to match these file systems.

In this sense, there may be no need to rush to support STATX_DIOALIGN
now.  Do you still think it would be better to have it?

The following are some minor comments:

On Tue, Aug 27, 2024 at 10:58 AM Hongbo Li wrote:
>
> Add support for STATX_DIOALIGN to nilfs2, so that direct I/O alignment
> restrictions are exposed to userspace in a generic way.
>
> By default, nilfs2 uses the default getattr implemented at vfs layer,
> so we should implement getattr in nilfs2 to fill the dio_xx_align
> members. The nilfs2 does not have the special align requirements. So
> we use the default alignment setting from block layer.
> We have done the following test:
>
> [Before]
> ```
> ./statx_test /mnt/nilfs2/test
> statx(/mnt/nilfs2/test) = 0
> dio mem align:0
> dio offset align:0
> ```
>
> [After]
> ```
> ./statx_test /mnt/nilfs2/test
> statx(/mnt/nilfs2/test) = 0
> dio mem align:512
> dio offset align:512
> ```
>
> Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
> ---
>  fs/nilfs2/file.c  |  1 +
>  fs/nilfs2/inode.c | 20 ++++++++++++++++++++
>  fs/nilfs2/namei.c |  2 ++
>  fs/nilfs2/nilfs.h |  2 ++
>  4 files changed, 25 insertions(+)
>
> diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
> index 0e3fc5ba33c7..5528918d4b96 100644
> --- a/fs/nilfs2/file.c
> +++ b/fs/nilfs2/file.c
> @@ -154,6 +154,7 @@ const struct file_operations nilfs_file_operations = {
>
>  const struct inode_operations nilfs_file_inode_operations = {
>         .setattr        = nilfs_setattr,
> +       .getattr        = nilfs_getattr,
>         .permission     = nilfs_permission,
>         .fiemap         = nilfs_fiemap,
>         .fileattr_get   = nilfs_fileattr_get,
> diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
> index 7340a01d80e1..b5bb2c2de32c 100644
> --- a/fs/nilfs2/inode.c
> +++ b/fs/nilfs2/inode.c
> @@ -1001,6 +1001,26 @@ int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
>         return err;
>  }
>
> +int nilfs_getattr(struct mnt_idmap *idmap, const struct path *path,
> +                       struct kstat *stat, u32 request_mask, unsigned int query_flags)
> +{
> +       struct inode *const inode = d_inode(path->dentry);
> +       struct block_device *bdev = inode->i_sb->s_bdev;
> +       unsigned int blksize = (1 << inode->i_blkbits);
> +
> +       if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
> +               stat->result_mask |= STATX_DIOALIGN;
> +

> +               if (bdev)
> +                       blksize = bdev_logical_block_size(bdev);

I don't think there's any need to check that bdev is NULL, but is
there a reason?

If sb->s_bdev can be NULL, I think that for such devices, a NULL
pointer dereference bug will occur in the mount path.
That's why I was concerned about this.

> +               stat->dio_mem_align = blksize;
> +               stat->dio_offset_align = blksize;
> +       }
> +
> +       generic_fillattr(idmap, request_mask, inode, stat);
> +       return 0;
> +}
> +
>  int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode,
>                      int mask)
>  {
> diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
> index c950139db6ef..ad56f4f8be1f 100644
> --- a/fs/nilfs2/namei.c
> +++ b/fs/nilfs2/namei.c
> @@ -546,6 +546,7 @@ const struct inode_operations nilfs_dir_inode_operations = {
>         .mknod          = nilfs_mknod,
>         .rename         = nilfs_rename,
>         .setattr        = nilfs_setattr,
> +       .getattr        = nilfs_getattr,

In the case of directories, the STATX_DIOALIGN request is ignored, so
I don't think this is necessary for now. (It can be added in the
future when supporting other optional getattr requests/responses).

>         .permission     = nilfs_permission,
>         .fiemap         = nilfs_fiemap,
>         .fileattr_get   = nilfs_fileattr_get,
> @@ -554,6 +555,7 @@ const struct inode_operations nilfs_dir_inode_operations = {
>
>  const struct inode_operations nilfs_special_inode_operations = {
>         .setattr        = nilfs_setattr,
> +       .getattr        = nilfs_getattr,
>         .permission     = nilfs_permission,
>  };

Ditto.

>
> diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
> index 4017f7856440..98a8b28ca1db 100644
> --- a/fs/nilfs2/nilfs.h
> +++ b/fs/nilfs2/nilfs.h
> @@ -280,6 +280,8 @@ extern void nilfs_truncate(struct inode *);
>  extern void nilfs_evict_inode(struct inode *);
>  extern int nilfs_setattr(struct mnt_idmap *, struct dentry *,
>                          struct iattr *);
> +extern int nilfs_getattr(struct mnt_idmap *idmap, const struct path *path,
> +                       struct kstat *stat, u32 request_mask, unsigned int query_flags);

Do not add the "extern" directive to new function declarations.
We are moving towards eliminating the extern declarator from function
declarations whenever possible.

>  extern void nilfs_write_failed(struct address_space *mapping, loff_t to);
>  int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode,
>                      int mask);
> --
> 2.34.1
>

That's all for my comments.

Thanks,
Ryusuke Konishi
Hongbo Li Aug. 28, 2024, 1:12 a.m. UTC | #2
On 2024/8/28 2:15, Ryusuke Konishi wrote:
> Hi Hongbo,
> 
> Thanks for the suggestion.
> 
> I checked the STATX_DIOALIGN specification while looking at the
> implementation of other file systems, and I thought that if DIO
> support is incomplete, the dio_xx_align member should be set to 0.
> 
> Due to the nature of NILFS2 as a log-structured file system, DIO
> writes fall back to buffered io.  (DIO reads are supported)
> 
That's really a question. How to handle the asymmetric situation of 
O_DIRECT read and write?

The STATX_DIOALIGN specification does not define this case.

Thanks,
Hongbo

> This is similar to the journal data mode of ext4 and the blkzoned
> device support of f2fs, but in such case, these file systems return a
> value of 0 (direct I/O not supported).
> 
> So, it's fine to respond to a STATX_DIOALIGN request, but I think the
> value of dio_xx_align should be set to 0 to match these file systems.
> 
> In this sense, there may be no need to rush to support STATX_DIOALIGN
> now.  Do you still think it would be better to have it?
> 
> The following are some minor comments:
> 
> On Tue, Aug 27, 2024 at 10:58 AM Hongbo Li wrote:
>>
>> Add support for STATX_DIOALIGN to nilfs2, so that direct I/O alignment
>> restrictions are exposed to userspace in a generic way.
>>
>> By default, nilfs2 uses the default getattr implemented at vfs layer,
>> so we should implement getattr in nilfs2 to fill the dio_xx_align
>> members. The nilfs2 does not have the special align requirements. So
>> we use the default alignment setting from block layer.
>> We have done the following test:
>>
>> [Before]
>> ```
>> ./statx_test /mnt/nilfs2/test
>> statx(/mnt/nilfs2/test) = 0
>> dio mem align:0
>> dio offset align:0
>> ```
>>
>> [After]
>> ```
>> ./statx_test /mnt/nilfs2/test
>> statx(/mnt/nilfs2/test) = 0
>> dio mem align:512
>> dio offset align:512
>> ```
>>
>> Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
>> ---
>>   fs/nilfs2/file.c  |  1 +
>>   fs/nilfs2/inode.c | 20 ++++++++++++++++++++
>>   fs/nilfs2/namei.c |  2 ++
>>   fs/nilfs2/nilfs.h |  2 ++
>>   4 files changed, 25 insertions(+)
>>
>> diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
>> index 0e3fc5ba33c7..5528918d4b96 100644
>> --- a/fs/nilfs2/file.c
>> +++ b/fs/nilfs2/file.c
>> @@ -154,6 +154,7 @@ const struct file_operations nilfs_file_operations = {
>>
>>   const struct inode_operations nilfs_file_inode_operations = {
>>          .setattr        = nilfs_setattr,
>> +       .getattr        = nilfs_getattr,
>>          .permission     = nilfs_permission,
>>          .fiemap         = nilfs_fiemap,
>>          .fileattr_get   = nilfs_fileattr_get,
>> diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
>> index 7340a01d80e1..b5bb2c2de32c 100644
>> --- a/fs/nilfs2/inode.c
>> +++ b/fs/nilfs2/inode.c
>> @@ -1001,6 +1001,26 @@ int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
>>          return err;
>>   }
>>
>> +int nilfs_getattr(struct mnt_idmap *idmap, const struct path *path,
>> +                       struct kstat *stat, u32 request_mask, unsigned int query_flags)
>> +{
>> +       struct inode *const inode = d_inode(path->dentry);
>> +       struct block_device *bdev = inode->i_sb->s_bdev;
>> +       unsigned int blksize = (1 << inode->i_blkbits);
>> +
>> +       if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
>> +               stat->result_mask |= STATX_DIOALIGN;
>> +
> 
>> +               if (bdev)
>> +                       blksize = bdev_logical_block_size(bdev);
> 
> I don't think there's any need to check that bdev is NULL, but is
> there a reason?
> 
> If sb->s_bdev can be NULL, I think that for such devices, a NULL
> pointer dereference bug will occur in the mount path.
> That's why I was concerned about this.
> 
>> +               stat->dio_mem_align = blksize;
>> +               stat->dio_offset_align = blksize;
>> +       }
>> +
>> +       generic_fillattr(idmap, request_mask, inode, stat);
>> +       return 0;
>> +}
>> +
>>   int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode,
>>                       int mask)
>>   {
>> diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
>> index c950139db6ef..ad56f4f8be1f 100644
>> --- a/fs/nilfs2/namei.c
>> +++ b/fs/nilfs2/namei.c
>> @@ -546,6 +546,7 @@ const struct inode_operations nilfs_dir_inode_operations = {
>>          .mknod          = nilfs_mknod,
>>          .rename         = nilfs_rename,
>>          .setattr        = nilfs_setattr,
>> +       .getattr        = nilfs_getattr,
> 
> In the case of directories, the STATX_DIOALIGN request is ignored, so
> I don't think this is necessary for now. (It can be added in the
> future when supporting other optional getattr requests/responses).
> 
>>          .permission     = nilfs_permission,
>>          .fiemap         = nilfs_fiemap,
>>          .fileattr_get   = nilfs_fileattr_get,
>> @@ -554,6 +555,7 @@ const struct inode_operations nilfs_dir_inode_operations = {
>>
>>   const struct inode_operations nilfs_special_inode_operations = {
>>          .setattr        = nilfs_setattr,
>> +       .getattr        = nilfs_getattr,
>>          .permission     = nilfs_permission,
>>   };
> 
> Ditto.
> 
>>
>> diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
>> index 4017f7856440..98a8b28ca1db 100644
>> --- a/fs/nilfs2/nilfs.h
>> +++ b/fs/nilfs2/nilfs.h
>> @@ -280,6 +280,8 @@ extern void nilfs_truncate(struct inode *);
>>   extern void nilfs_evict_inode(struct inode *);
>>   extern int nilfs_setattr(struct mnt_idmap *, struct dentry *,
>>                           struct iattr *);
>> +extern int nilfs_getattr(struct mnt_idmap *idmap, const struct path *path,
>> +                       struct kstat *stat, u32 request_mask, unsigned int query_flags);
> 
> Do not add the "extern" directive to new function declarations.
> We are moving towards eliminating the extern declarator from function
> declarations whenever possible.
> 
>>   extern void nilfs_write_failed(struct address_space *mapping, loff_t to);
>>   int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode,
>>                       int mask);
>> --
>> 2.34.1
>>
> 
> That's all for my comments.
> 
> Thanks,
> Ryusuke Konishi
Christoph Hellwig Aug. 28, 2024, 5:11 a.m. UTC | #3
On Wed, Aug 28, 2024 at 09:12:50AM +0800, Hongbo Li wrote:
>> I checked the STATX_DIOALIGN specification while looking at the
>> implementation of other file systems, and I thought that if DIO
>> support is incomplete, the dio_xx_align member should be set to 0.
>>
>> Due to the nature of NILFS2 as a log-structured file system, DIO
>> writes fall back to buffered io.  (DIO reads are supported)
>>
> That's really a question. How to handle the asymmetric situation of 
> O_DIRECT read and write?
>
> The STATX_DIOALIGN specification does not define this case.

Yes, it needs separate reporting for the read alignment.  I actually
wrote patches for that a few days ago, but never got around to testing
them.  I'll send out what I have.
diff mbox series

Patch

diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 0e3fc5ba33c7..5528918d4b96 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -154,6 +154,7 @@  const struct file_operations nilfs_file_operations = {
 
 const struct inode_operations nilfs_file_inode_operations = {
 	.setattr	= nilfs_setattr,
+	.getattr	= nilfs_getattr,
 	.permission     = nilfs_permission,
 	.fiemap		= nilfs_fiemap,
 	.fileattr_get	= nilfs_fileattr_get,
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 7340a01d80e1..b5bb2c2de32c 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -1001,6 +1001,26 @@  int nilfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 	return err;
 }
 
+int nilfs_getattr(struct mnt_idmap *idmap, const struct path *path,
+			struct kstat *stat, u32 request_mask, unsigned int query_flags)
+{
+	struct inode *const inode = d_inode(path->dentry);
+	struct block_device *bdev = inode->i_sb->s_bdev;
+	unsigned int blksize = (1 << inode->i_blkbits);
+
+	if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
+		stat->result_mask |= STATX_DIOALIGN;
+
+		if (bdev)
+			blksize = bdev_logical_block_size(bdev);
+		stat->dio_mem_align = blksize;
+		stat->dio_offset_align = blksize;
+	}
+
+	generic_fillattr(idmap, request_mask, inode, stat);
+	return 0;
+}
+
 int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode,
 		     int mask)
 {
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index c950139db6ef..ad56f4f8be1f 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -546,6 +546,7 @@  const struct inode_operations nilfs_dir_inode_operations = {
 	.mknod		= nilfs_mknod,
 	.rename		= nilfs_rename,
 	.setattr	= nilfs_setattr,
+	.getattr	= nilfs_getattr,
 	.permission	= nilfs_permission,
 	.fiemap		= nilfs_fiemap,
 	.fileattr_get	= nilfs_fileattr_get,
@@ -554,6 +555,7 @@  const struct inode_operations nilfs_dir_inode_operations = {
 
 const struct inode_operations nilfs_special_inode_operations = {
 	.setattr	= nilfs_setattr,
+	.getattr	= nilfs_getattr,
 	.permission	= nilfs_permission,
 };
 
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 4017f7856440..98a8b28ca1db 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -280,6 +280,8 @@  extern void nilfs_truncate(struct inode *);
 extern void nilfs_evict_inode(struct inode *);
 extern int nilfs_setattr(struct mnt_idmap *, struct dentry *,
 			 struct iattr *);
+extern int nilfs_getattr(struct mnt_idmap *idmap, const struct path *path,
+			struct kstat *stat, u32 request_mask, unsigned int query_flags);
 extern void nilfs_write_failed(struct address_space *mapping, loff_t to);
 int nilfs_permission(struct mnt_idmap *idmap, struct inode *inode,
 		     int mask);