Message ID | 20240226173612.1478858-4-john.g.garry@oracle.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | block atomic writes | expand |
On 2/26/24 10:36 AM, John Garry wrote: > diff --git a/io_uring/rw.c b/io_uring/rw.c > index d5e79d9bdc71..099dda3ff151 100644 > --- a/io_uring/rw.c > +++ b/io_uring/rw.c > @@ -719,7 +719,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) > struct kiocb *kiocb = &rw->kiocb; > struct io_ring_ctx *ctx = req->ctx; > struct file *file = req->file; > - int ret; > + int ret, rw_type = (mode == FMODE_WRITE) ? WRITE : READ; > > if (unlikely(!file || !(file->f_mode & mode))) > return -EBADF; > @@ -728,7 +728,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) > req->flags |= io_file_get_flags(file); > > kiocb->ki_flags = file->f_iocb_flags; > - ret = kiocb_set_rw_flags(kiocb, rw->flags); > + ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type); > if (unlikely(ret)) > return ret; > kiocb->ki_flags |= IOCB_ALLOC_CACHE; Not sure why you took the lazy way out here rather than just pass it in, now there's another branhc in the hot path. NAK.
On 08/03/2024 16:34, Jens Axboe wrote: > On 2/26/24 10:36 AM, John Garry wrote: >> diff --git a/io_uring/rw.c b/io_uring/rw.c >> index d5e79d9bdc71..099dda3ff151 100644 >> --- a/io_uring/rw.c >> +++ b/io_uring/rw.c >> @@ -719,7 +719,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) >> struct kiocb *kiocb = &rw->kiocb; >> struct io_ring_ctx *ctx = req->ctx; >> struct file *file = req->file; >> - int ret; >> + int ret, rw_type = (mode == FMODE_WRITE) ? WRITE : READ; >> >> if (unlikely(!file || !(file->f_mode & mode))) >> return -EBADF; >> @@ -728,7 +728,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) >> req->flags |= io_file_get_flags(file); >> >> kiocb->ki_flags = file->f_iocb_flags; >> - ret = kiocb_set_rw_flags(kiocb, rw->flags); >> + ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type); >> if (unlikely(ret)) >> return ret; >> kiocb->ki_flags |= IOCB_ALLOC_CACHE; > Not sure why you took the lazy way out here rather than just pass it in, > now there's another branhc in the hot path. NAK. Are you saying to change io_rw_init_file() to this: io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) And the callers can hardcode rw_type? Thanks, John
On 3/8/24 9:52 AM, John Garry wrote: > On 08/03/2024 16:34, Jens Axboe wrote: >> On 2/26/24 10:36 AM, John Garry wrote: >>> diff --git a/io_uring/rw.c b/io_uring/rw.c >>> index d5e79d9bdc71..099dda3ff151 100644 >>> --- a/io_uring/rw.c >>> +++ b/io_uring/rw.c >>> @@ -719,7 +719,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) >>> struct kiocb *kiocb = &rw->kiocb; >>> struct io_ring_ctx *ctx = req->ctx; >>> struct file *file = req->file; >>> - int ret; >>> + int ret, rw_type = (mode == FMODE_WRITE) ? WRITE : READ; >>> if (unlikely(!file || !(file->f_mode & mode))) >>> return -EBADF; >>> @@ -728,7 +728,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) >>> req->flags |= io_file_get_flags(file); >>> kiocb->ki_flags = file->f_iocb_flags; >>> - ret = kiocb_set_rw_flags(kiocb, rw->flags); >>> + ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type); >>> if (unlikely(ret)) >>> return ret; >>> kiocb->ki_flags |= IOCB_ALLOC_CACHE; >> Not sure why you took the lazy way out here rather than just pass it in, >> now there's another branhc in the hot path. NAK. > > Are you saying to change io_rw_init_file() to this: > > io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) > > And the callers can hardcode rw_type? Yep, basically making the change identical to the aio one. Not sure why you did it differently in those two spots.
On 08/03/2024 17:05, Jens Axboe wrote: >> And the callers can hardcode rw_type? > Yep, basically making the change identical to the aio one. Not sure why > you did it differently in those two spots. In the aio code, rw_type was readily available. For io_uring it was not, and I chose to derive from something locally available. But that's a bit awkward and is not good for performance, so I'll follow your suggestion. Thanks, John
On 3/8/24 10:15 AM, John Garry wrote: > On 08/03/2024 17:05, Jens Axboe wrote: >>> And the callers can hardcode rw_type? >> Yep, basically making the change identical to the aio one. Not sure why >> you did it differently in those two spots. > > In the aio code, rw_type was readily available. For io_uring it was > not, and I chose to derive from something locally available. But > that's a bit awkward and is not good for performance, so I'll follow > your suggestion. It's literally just one caller back, it's not like you had to look hard to spot this. Don't take lazy shortcuts - it's not very confidence inspiring if this is the level of attention to detail that went into this patchset.
diff --git a/fs/aio.c b/fs/aio.c index da18dbcfcb22..ba420faed82e 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1509,7 +1509,7 @@ static void aio_complete_rw(struct kiocb *kiocb, long res) iocb_put(iocb); } -static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) +static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb, int rw_type) { int ret; @@ -1535,7 +1535,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) } else req->ki_ioprio = get_current_ioprio(); - ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags); + ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags, rw_type); if (unlikely(ret)) return ret; @@ -1587,7 +1587,7 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb, struct file *file; int ret; - ret = aio_prep_rw(req, iocb); + ret = aio_prep_rw(req, iocb, READ); if (ret) return ret; file = req->ki_filp; @@ -1614,7 +1614,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb, struct file *file; int ret; - ret = aio_prep_rw(req, iocb); + ret = aio_prep_rw(req, iocb, WRITE); if (ret) return ret; file = req->ki_filp; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ac3316e0d11c..455f06d94b11 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4555,7 +4555,7 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool goto out_iov; init_sync_kiocb(&kiocb, file); - ret = kiocb_set_rw_flags(&kiocb, 0); + ret = kiocb_set_rw_flags(&kiocb, 0, WRITE); if (ret) goto out_iov; kiocb.ki_pos = pos; diff --git a/fs/read_write.c b/fs/read_write.c index d4c036e82b6c..a7dc1819192d 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -730,7 +730,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, ssize_t ret; init_sync_kiocb(&kiocb, filp); - ret = kiocb_set_rw_flags(&kiocb, flags); + ret = kiocb_set_rw_flags(&kiocb, flags, type); if (ret) return ret; kiocb.ki_pos = (ppos ? *ppos : 0); diff --git a/include/linux/fs.h b/include/linux/fs.h index 1fbc72c5f112..95946a706f23 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -43,6 +43,7 @@ #include <linux/cred.h> #include <linux/mnt_idmapping.h> #include <linux/slab.h> +#include <linux/uio.h> #include <asm/byteorder.h> #include <uapi/linux/fs.h> @@ -119,6 +120,10 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_PWRITE ((__force fmode_t)0x10) /* File is opened for execution with sys_execve / sys_uselib */ #define FMODE_EXEC ((__force fmode_t)0x20) + +/* File supports atomic writes */ +#define FMODE_CAN_ATOMIC_WRITE ((__force fmode_t)0x40) + /* 32bit hashes as llseek() offset (for directories) */ #define FMODE_32BITHASH ((__force fmode_t)0x200) /* 64bit hashes as llseek() offset (for directories) */ @@ -328,6 +333,7 @@ enum rw_hint { #define IOCB_SYNC (__force int) RWF_SYNC #define IOCB_NOWAIT (__force int) RWF_NOWAIT #define IOCB_APPEND (__force int) RWF_APPEND +#define IOCB_ATOMIC (__force int) RWF_ATOMIC /* non-RWF related bits - start at 16 */ #define IOCB_EVENTFD (1 << 16) @@ -362,6 +368,7 @@ enum rw_hint { { IOCB_SYNC, "SYNC" }, \ { IOCB_NOWAIT, "NOWAIT" }, \ { IOCB_APPEND, "APPEND" }, \ + { IOCB_ATOMIC, "ATOMIC"}, \ { IOCB_EVENTFD, "EVENTFD"}, \ { IOCB_DIRECT, "DIRECT" }, \ { IOCB_WRITE, "WRITE" }, \ @@ -3323,7 +3330,8 @@ static inline int iocb_flags(struct file *file) return res; } -static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) +static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags, + int rw_type) { int kiocb_flags = 0; @@ -3340,6 +3348,12 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) return -EOPNOTSUPP; kiocb_flags |= IOCB_NOIO; } + if (flags & RWF_ATOMIC) { + if (rw_type != WRITE) + return -EOPNOTSUPP; + if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE)) + return -EOPNOTSUPP; + } kiocb_flags |= (__force int) (flags & RWF_SUPPORTED); if (flags & RWF_SYNC) kiocb_flags |= IOCB_DSYNC; @@ -3525,4 +3539,25 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice); +static inline +bool generic_atomic_write_valid(loff_t pos, struct iov_iter *iter, + unsigned int unit_min, unsigned int unit_max) +{ + size_t len = iov_iter_count(iter); + + if (!iter_is_ubuf(iter)) + return false; + + if (len < unit_min || len > unit_max) + return false; + + if (!is_power_of_2(len)) + return false; + + if (!IS_ALIGNED(pos, len)) + return false; + + return true; +} + #endif /* _LINUX_FS_H */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 48ad69f7722e..a0975ae81e64 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -301,9 +301,12 @@ typedef int __bitwise __kernel_rwf_t; /* per-IO O_APPEND */ #define RWF_APPEND ((__force __kernel_rwf_t)0x00000010) +/* Atomic Write */ +#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND) + RWF_APPEND | RWF_ATOMIC) /* Pagemap ioctl */ #define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg) diff --git a/io_uring/rw.c b/io_uring/rw.c index d5e79d9bdc71..099dda3ff151 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -719,7 +719,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) struct kiocb *kiocb = &rw->kiocb; struct io_ring_ctx *ctx = req->ctx; struct file *file = req->file; - int ret; + int ret, rw_type = (mode == FMODE_WRITE) ? WRITE : READ; if (unlikely(!file || !(file->f_mode & mode))) return -EBADF; @@ -728,7 +728,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) req->flags |= io_file_get_flags(file); kiocb->ki_flags = file->f_iocb_flags; - ret = kiocb_set_rw_flags(kiocb, rw->flags); + ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type); if (unlikely(ret)) return ret; kiocb->ki_flags |= IOCB_ALLOC_CACHE;