Message ID | 20240828051149.1897291-3-hch@lst.de (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [1/3] fs: reformat the statx definition | expand |
On Wed, Aug 28, 2024 at 08:11:02AM +0300, Christoph Hellwig wrote: > Add a separate dio read align field, as many out of place write > file systems can easily do reads aligned to the device sector size, > but require bigger alignment for writes. > > This is usually papered over by falling back to buffered I/O for smaller > writes and doing read-modify-write cycles, but performance for this > sucks, so applications benefit from knowing the actual write alignment. > > Signed-off-by: Christoph Hellwig <hch@lst.de> Looks fine to me... Reviewed-by: Darrick J. Wong <djwong@kernel.org> --D > --- > fs/stat.c | 1 + > include/linux/stat.h | 1 + > include/uapi/linux/stat.h | 4 +++- > 3 files changed, 5 insertions(+), 1 deletion(-) > > diff --git a/fs/stat.c b/fs/stat.c > index 89ce1be563108c..044e7ad9f3abc2 100644 > --- a/fs/stat.c > +++ b/fs/stat.c > @@ -700,6 +700,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer) > tmp.stx_mnt_id = stat->mnt_id; > tmp.stx_dio_mem_align = stat->dio_mem_align; > tmp.stx_dio_offset_align = stat->dio_offset_align; > + tmp.stx_dio_read_offset_align = stat->dio_read_offset_align; > tmp.stx_subvol = stat->subvol; > tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min; > tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max; > diff --git a/include/linux/stat.h b/include/linux/stat.h > index 3d900c86981c5b..9d8382e23a9c69 100644 > --- a/include/linux/stat.h > +++ b/include/linux/stat.h > @@ -52,6 +52,7 @@ struct kstat { > u64 mnt_id; > u32 dio_mem_align; > u32 dio_offset_align; > + u32 dio_read_offset_align; > u64 change_cookie; > u64 subvol; > u32 atomic_write_unit_min; > diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h > index 8b35d7d511a287..f78ee3670dd5d7 100644 > --- a/include/uapi/linux/stat.h > +++ b/include/uapi/linux/stat.h > @@ -179,7 +179,8 @@ struct statx { > /* Max atomic write segment count */ > __u32 stx_atomic_write_segments_max; > > - __u32 __spare1[1]; > + /* File offset alignment for direct I/O reads */ > + __u32 stx_dio_read_offset_align; > > /* 0xb8 */ > __u64 __spare3[9]; /* Spare space for future expansion */ > @@ -213,6 +214,7 @@ struct statx { > #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ > #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ > #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ > +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ > > #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ > > -- > 2.43.0 > >
On Wed, Aug 28, 2024 at 08:11:02AM +0300, Christoph Hellwig wrote: > Add a separate dio read align field, as many out of place write > file systems can easily do reads aligned to the device sector size, > but require bigger alignment for writes. > > This is usually papered over by falling back to buffered I/O for smaller > writes and doing read-modify-write cycles, but performance for this > sucks, so applications benefit from knowing the actual write alignment. > > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > fs/stat.c | 1 + > include/linux/stat.h | 1 + > include/uapi/linux/stat.h | 4 +++- > 3 files changed, 5 insertions(+), 1 deletion(-) > > diff --git a/fs/stat.c b/fs/stat.c > index 89ce1be563108c..044e7ad9f3abc2 100644 > --- a/fs/stat.c > +++ b/fs/stat.c > @@ -700,6 +700,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer) > tmp.stx_mnt_id = stat->mnt_id; > tmp.stx_dio_mem_align = stat->dio_mem_align; > tmp.stx_dio_offset_align = stat->dio_offset_align; > + tmp.stx_dio_read_offset_align = stat->dio_read_offset_align; > tmp.stx_subvol = stat->subvol; > tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min; > tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max; > diff --git a/include/linux/stat.h b/include/linux/stat.h > index 3d900c86981c5b..9d8382e23a9c69 100644 > --- a/include/linux/stat.h > +++ b/include/linux/stat.h > @@ -52,6 +52,7 @@ struct kstat { > u64 mnt_id; > u32 dio_mem_align; > u32 dio_offset_align; > + u32 dio_read_offset_align; > u64 change_cookie; > u64 subvol; > u32 atomic_write_unit_min; > diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h > index 8b35d7d511a287..f78ee3670dd5d7 100644 > --- a/include/uapi/linux/stat.h > +++ b/include/uapi/linux/stat.h > @@ -179,7 +179,8 @@ struct statx { > /* Max atomic write segment count */ > __u32 stx_atomic_write_segments_max; > > - __u32 __spare1[1]; > + /* File offset alignment for direct I/O reads */ > + __u32 stx_dio_read_offset_align; > > /* 0xb8 */ > __u64 __spare3[9]; /* Spare space for future expansion */ > @@ -213,6 +214,7 @@ struct statx { > #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ > #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ > #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ > +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ > > #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ Thanks. We maybe should have included read/write separation in STATX_DIOALIGN, but at the time the only case that was brought up was "DIO reads are supported but DIO writes are not" which people had argued was not useful. Is this patch meant to support that case, or just the case where DIO in both directions is supported but with different alignments? Is that different file offset alignments, different memory alignments, or both? This patch doesn't add a stx_dio_read_mem_align field, so it's still assumed that both directions share the existing stx_dio_mem_align property, including whether DIO is supported at all (0 vs. nonzero). So as proposed, the only case it helps with is where DIO in both directions is supported with the same memory alignment but different file offset alignments. Maybe that is intended, but it's not clear to me. Are there specific userspace applications that would like to take advantage of a smaller value of stx_dio_read_offset_align compared to the existing stx_dio_offset_align? Even in the case of a discrepancy between reads and writes, applications are supposed to be able to just use stx_dio_offset_align since it has to be the greater of the two alignments. And as always, new UAPIs need to be accompanied by documentation. In this case that's an update to the statx man page. - Eric
On Wed, Aug 28, 2024 at 11:52:27PM +0000, Eric Biggers wrote: > Thanks. We maybe should have included read/write separation in STATX_DIOALIGN, > but at the time the only case that was brought up was "DIO reads are supported > but DIO writes are not" which people had argued was not useful. > > Is this patch meant to support that case, Why would anyone support direct I/O reads but not writes? That seems really weird, but maybe I'm missing something important. > or just the case where DIO in both > directions is supported but with different alignments? Is that different file > offset alignments, different memory alignments, or both? This patch doesn't add > a stx_dio_read_mem_align field, so it's still assumed that both directions share > the existing stx_dio_mem_align property, including whether DIO is supported at > all (0 vs. nonzero). Yes. The memory alignment really is dependent on the underlying storage hardware DMA engine, which doesn't distinguish between reads and writes. > So as proposed, the only case it helps with is where DIO > in both directions is supported with the same memory alignment but different > file offset alignments. Yes. > Maybe that is intended, but it's not clear to me. Well, that's good feedback to make it more clear. > Are there specific userspace applications that would like to take advantage of a > smaller value of stx_dio_read_offset_align compared to the existing > stx_dio_offset_align? There are a lot of read-heavy workloads where smaller reads do make a difference.
diff --git a/fs/stat.c b/fs/stat.c index 89ce1be563108c..044e7ad9f3abc2 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -700,6 +700,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer) tmp.stx_mnt_id = stat->mnt_id; tmp.stx_dio_mem_align = stat->dio_mem_align; tmp.stx_dio_offset_align = stat->dio_offset_align; + tmp.stx_dio_read_offset_align = stat->dio_read_offset_align; tmp.stx_subvol = stat->subvol; tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min; tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max; diff --git a/include/linux/stat.h b/include/linux/stat.h index 3d900c86981c5b..9d8382e23a9c69 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -52,6 +52,7 @@ struct kstat { u64 mnt_id; u32 dio_mem_align; u32 dio_offset_align; + u32 dio_read_offset_align; u64 change_cookie; u64 subvol; u32 atomic_write_unit_min; diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 8b35d7d511a287..f78ee3670dd5d7 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -179,7 +179,8 @@ struct statx { /* Max atomic write segment count */ __u32 stx_atomic_write_segments_max; - __u32 __spare1[1]; + /* File offset alignment for direct I/O reads */ + __u32 stx_dio_read_offset_align; /* 0xb8 */ __u64 __spare3[9]; /* Spare space for future expansion */ @@ -213,6 +214,7 @@ struct statx { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
Add a separate dio read align field, as many out of place write file systems can easily do reads aligned to the device sector size, but require bigger alignment for writes. This is usually papered over by falling back to buffered I/O for smaller writes and doing read-modify-write cycles, but performance for this sucks, so applications benefit from knowing the actual write alignment. Signed-off-by: Christoph Hellwig <hch@lst.de> --- fs/stat.c | 1 + include/linux/stat.h | 1 + include/uapi/linux/stat.h | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-)