Message ID | 1422381313-24034-2-git-send-email-hch@lst.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Adding Maxim Patlasov, who did the AIO mode in fuse. On Tue, 2015-01-27 at 18:55 +0100, Christoph Hellwig wrote: > The AIO interface is fairly complex because it tries to allow > filesystems to always work async and then wakeup a synchronous > caller through aio_complete. It turns out that basically no one > does this to avoid the complexity and context switches, with the > solve exception of fuse, so remove that possibility to simplify > the code. > > XXX: fix up fuse. > > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > drivers/usb/gadget/function/f_fs.c | 4 ++-- > fs/aio.c | 24 +----------------------- > fs/ecryptfs/file.c | 6 ------ > fs/fuse/file.c | 7 +++++-- > fs/read_write.c | 26 ++++++++------------------ > include/linux/aio.h | 4 ---- > net/socket.c | 9 +++------ > 7 files changed, 19 insertions(+), 61 deletions(-) > > diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c > index 63314ed..debd78b 100644 > --- a/drivers/usb/gadget/function/f_fs.c > +++ b/drivers/usb/gadget/function/f_fs.c > @@ -969,7 +969,7 @@ static ssize_t ffs_epfile_aio_write(struct kiocb *kiocb, > if (unlikely(!io_data)) > return -ENOMEM; > > - io_data->aio = true; > + io_data->aio = !is_sync_kiocb(kiocb); > io_data->read = false; > io_data->kiocb = kiocb; > io_data->iovec = iovec; > @@ -1005,7 +1005,7 @@ static ssize_t ffs_epfile_aio_read(struct kiocb *kiocb, > return -ENOMEM; > } > > - io_data->aio = true; > + io_data->aio = !is_sync_kiocb(kiocb); > io_data->read = true; > io_data->kiocb = kiocb; > io_data->iovec = iovec_copy; > diff --git a/fs/aio.c b/fs/aio.c > index 1b7893e..0ee25d6 100644 > --- a/fs/aio.c > +++ b/fs/aio.c > @@ -791,22 +791,6 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, > return 0; > } > > -/* wait_on_sync_kiocb: > - * Waits on the given sync kiocb to complete. > - */ > -ssize_t wait_on_sync_kiocb(struct kiocb *req) > -{ > - while (!req->ki_ctx) { > - set_current_state(TASK_UNINTERRUPTIBLE); > - if (req->ki_ctx) > - break; > - io_schedule(); > - } > - __set_current_state(TASK_RUNNING); > - return req->ki_user_data; > -} > -EXPORT_SYMBOL(wait_on_sync_kiocb); > - > /* > * exit_aio: called when the last user of mm goes away. At this point, there is > * no way for any new requests to be submited or any of the io_* syscalls to be > @@ -1038,13 +1022,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) > * ref, no other paths have a way to get another ref > * - the sync task helpfully left a reference to itself in the iocb > */ > - if (is_sync_kiocb(iocb)) { > - iocb->ki_user_data = res; > - smp_wmb(); > - iocb->ki_ctx = ERR_PTR(-EXDEV); > - wake_up_process(iocb->ki_obj.tsk); > - return; > - } > + BUG_ON(is_sync_kiocb(iocb)); > > if (iocb->ki_list.next) { > unsigned long flags; > diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c > index 6f4e659..a36da88 100644 > --- a/fs/ecryptfs/file.c > +++ b/fs/ecryptfs/file.c > @@ -52,12 +52,6 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, > struct file *file = iocb->ki_filp; > > rc = generic_file_read_iter(iocb, to); > - /* > - * Even though this is a async interface, we need to wait > - * for IO to finish to update atime > - */ > - if (-EIOCBQUEUED == rc) > - rc = wait_on_sync_kiocb(iocb); > if (rc >= 0) { > path = ecryptfs_dentry_to_lower_path(file->f_path.dentry); > touch_atime(path); > diff --git a/fs/fuse/file.c b/fs/fuse/file.c > index 760b2c5..9c27312 100644 > --- a/fs/fuse/file.c > +++ b/fs/fuse/file.c > @@ -2841,8 +2841,10 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, > /* > * By default, we want to optimize all I/Os with async request > * submission to the client filesystem if supported. > + * > + * XXX: need to add back support for this mode.. > */ > - io->async = async_dio; > + io->async = async_dio && !is_sync_kiocb(iocb); > io->iocb = iocb; > > /* > @@ -2865,7 +2867,8 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, > if (!is_sync_kiocb(iocb)) > return -EIOCBQUEUED; > > - ret = wait_on_sync_kiocb(iocb); > + // XXX: need fuse specific replacement > +// ret = wait_on_sync_kiocb(iocb); > } else { > kfree(io); > } > diff --git a/fs/read_write.c b/fs/read_write.c > index ab4f26a..adab608 100644 > --- a/fs/read_write.c > +++ b/fs/read_write.c > @@ -347,9 +347,7 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos) > > iter->type |= READ; > ret = file->f_op->read_iter(&kiocb, iter); > - if (ret == -EIOCBQUEUED) > - ret = wait_on_sync_kiocb(&kiocb); > - > + BUG_ON(ret == -EIOCBQUEUED); > if (ret > 0) > *ppos = kiocb.ki_pos; > return ret; > @@ -370,9 +368,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos) > > iter->type |= WRITE; > ret = file->f_op->write_iter(&kiocb, iter); > - if (ret == -EIOCBQUEUED) > - ret = wait_on_sync_kiocb(&kiocb); > - > + BUG_ON(ret == -EIOCBQUEUED); > if (ret > 0) > *ppos = kiocb.ki_pos; > return ret; > @@ -429,8 +425,7 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp > kiocb.ki_nbytes = len; > > ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); > - if (-EIOCBQUEUED == ret) > - ret = wait_on_sync_kiocb(&kiocb); > + BUG_ON(ret == -EIOCBQUEUED); > *ppos = kiocb.ki_pos; > return ret; > } > @@ -450,8 +445,7 @@ ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *p > iov_iter_init(&iter, READ, &iov, 1, len); > > ret = filp->f_op->read_iter(&kiocb, &iter); > - if (-EIOCBQUEUED == ret) > - ret = wait_on_sync_kiocb(&kiocb); > + BUG_ON(ret == -EIOCBQUEUED); > *ppos = kiocb.ki_pos; > return ret; > } > @@ -513,8 +507,7 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof > kiocb.ki_nbytes = len; > > ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); > - if (-EIOCBQUEUED == ret) > - ret = wait_on_sync_kiocb(&kiocb); > + BUG_ON(ret == -EIOCBQUEUED); > *ppos = kiocb.ki_pos; > return ret; > } > @@ -534,8 +527,7 @@ ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, lo > iov_iter_init(&iter, WRITE, &iov, 1, len); > > ret = filp->f_op->write_iter(&kiocb, &iter); > - if (-EIOCBQUEUED == ret) > - ret = wait_on_sync_kiocb(&kiocb); > + BUG_ON(ret == -EIOCBQUEUED); > *ppos = kiocb.ki_pos; > return ret; > } > @@ -723,8 +715,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iove > > iov_iter_init(&iter, rw, iov, nr_segs, len); > ret = fn(&kiocb, &iter); > - if (ret == -EIOCBQUEUED) > - ret = wait_on_sync_kiocb(&kiocb); > + BUG_ON(ret == -EIOCBQUEUED); > *ppos = kiocb.ki_pos; > return ret; > } > @@ -740,8 +731,7 @@ static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, > kiocb.ki_nbytes = len; > > ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); > - if (ret == -EIOCBQUEUED) > - ret = wait_on_sync_kiocb(&kiocb); > + BUG_ON(ret == -EIOCBQUEUED); > *ppos = kiocb.ki_pos; > return ret; > } > diff --git a/include/linux/aio.h b/include/linux/aio.h > index d9c92da..57c86c0 100644 > --- a/include/linux/aio.h > +++ b/include/linux/aio.h > @@ -37,7 +37,6 @@ struct kiocb { > > union { > void __user *user; > - struct task_struct *tsk; > } ki_obj; > > __u64 ki_user_data; /* user's data for completion */ > @@ -64,13 +63,11 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) > *kiocb = (struct kiocb) { > .ki_ctx = NULL, > .ki_filp = filp, > - .ki_obj.tsk = current, > }; > } > > /* prototypes */ > #ifdef CONFIG_AIO > -extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb); > extern void aio_complete(struct kiocb *iocb, long res, long res2); > struct mm_struct; > extern void exit_aio(struct mm_struct *mm); > @@ -78,7 +75,6 @@ extern long do_io_submit(aio_context_t ctx_id, long nr, > struct iocb __user *__user *iocbpp, bool compat); > void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); > #else > -static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; } > static inline void aio_complete(struct kiocb *iocb, long res, long res2) { } > struct mm_struct; > static inline void exit_aio(struct mm_struct *mm) { } > diff --git a/net/socket.c b/net/socket.c > index a2c33a4..4032931 100644 > --- a/net/socket.c > +++ b/net/socket.c > @@ -642,8 +642,7 @@ static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg, > iocb.private = &siocb; > ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) : > __sock_sendmsg(&iocb, sock, msg, size); > - if (-EIOCBQUEUED == ret) > - ret = wait_on_sync_kiocb(&iocb); > + BUG_ON(-EIOCBQUEUED == ret); > return ret; > } > > @@ -785,8 +784,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, > init_sync_kiocb(&iocb, NULL); > iocb.private = &siocb; > ret = __sock_recvmsg(&iocb, sock, msg, size, flags); > - if (-EIOCBQUEUED == ret) > - ret = wait_on_sync_kiocb(&iocb); > + BUG_ON(-EIOCBQUEUED == ret); > return ret; > } > EXPORT_SYMBOL(sock_recvmsg); > @@ -801,8 +799,7 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, > init_sync_kiocb(&iocb, NULL); > iocb.private = &siocb; > ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); > - if (-EIOCBQUEUED == ret) > - ret = wait_on_sync_kiocb(&iocb); > + BUG_ON(-EIOCBQUEUED == ret); > return ret; > } > -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jan 28, 2015 at 04:30:25PM +0100, Miklos Szeredi wrote:
> Adding Maxim Patlasov, who did the AIO mode in fuse.
FYI, I don't think fuse should be too hard, it just needs a completion
in struct fuse_io_priv. I just didn't bother before getting some higher
level review for the concept.
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 63314ed..debd78b 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -969,7 +969,7 @@ static ssize_t ffs_epfile_aio_write(struct kiocb *kiocb, if (unlikely(!io_data)) return -ENOMEM; - io_data->aio = true; + io_data->aio = !is_sync_kiocb(kiocb); io_data->read = false; io_data->kiocb = kiocb; io_data->iovec = iovec; @@ -1005,7 +1005,7 @@ static ssize_t ffs_epfile_aio_read(struct kiocb *kiocb, return -ENOMEM; } - io_data->aio = true; + io_data->aio = !is_sync_kiocb(kiocb); io_data->read = true; io_data->kiocb = kiocb; io_data->iovec = iovec_copy; diff --git a/fs/aio.c b/fs/aio.c index 1b7893e..0ee25d6 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -791,22 +791,6 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, return 0; } -/* wait_on_sync_kiocb: - * Waits on the given sync kiocb to complete. - */ -ssize_t wait_on_sync_kiocb(struct kiocb *req) -{ - while (!req->ki_ctx) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (req->ki_ctx) - break; - io_schedule(); - } - __set_current_state(TASK_RUNNING); - return req->ki_user_data; -} -EXPORT_SYMBOL(wait_on_sync_kiocb); - /* * exit_aio: called when the last user of mm goes away. At this point, there is * no way for any new requests to be submited or any of the io_* syscalls to be @@ -1038,13 +1022,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) * ref, no other paths have a way to get another ref * - the sync task helpfully left a reference to itself in the iocb */ - if (is_sync_kiocb(iocb)) { - iocb->ki_user_data = res; - smp_wmb(); - iocb->ki_ctx = ERR_PTR(-EXDEV); - wake_up_process(iocb->ki_obj.tsk); - return; - } + BUG_ON(is_sync_kiocb(iocb)); if (iocb->ki_list.next) { unsigned long flags; diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 6f4e659..a36da88 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -52,12 +52,6 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, struct file *file = iocb->ki_filp; rc = generic_file_read_iter(iocb, to); - /* - * Even though this is a async interface, we need to wait - * for IO to finish to update atime - */ - if (-EIOCBQUEUED == rc) - rc = wait_on_sync_kiocb(iocb); if (rc >= 0) { path = ecryptfs_dentry_to_lower_path(file->f_path.dentry); touch_atime(path); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 760b2c5..9c27312 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2841,8 +2841,10 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, /* * By default, we want to optimize all I/Os with async request * submission to the client filesystem if supported. + * + * XXX: need to add back support for this mode.. */ - io->async = async_dio; + io->async = async_dio && !is_sync_kiocb(iocb); io->iocb = iocb; /* @@ -2865,7 +2867,8 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, if (!is_sync_kiocb(iocb)) return -EIOCBQUEUED; - ret = wait_on_sync_kiocb(iocb); + // XXX: need fuse specific replacement +// ret = wait_on_sync_kiocb(iocb); } else { kfree(io); } diff --git a/fs/read_write.c b/fs/read_write.c index ab4f26a..adab608 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -347,9 +347,7 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos) iter->type |= READ; ret = file->f_op->read_iter(&kiocb, iter); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); - + BUG_ON(ret == -EIOCBQUEUED); if (ret > 0) *ppos = kiocb.ki_pos; return ret; @@ -370,9 +368,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos) iter->type |= WRITE; ret = file->f_op->write_iter(&kiocb, iter); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); - + BUG_ON(ret == -EIOCBQUEUED); if (ret > 0) *ppos = kiocb.ki_pos; return ret; @@ -429,8 +425,7 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp kiocb.ki_nbytes = len; ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } @@ -450,8 +445,7 @@ ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *p iov_iter_init(&iter, READ, &iov, 1, len); ret = filp->f_op->read_iter(&kiocb, &iter); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } @@ -513,8 +507,7 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof kiocb.ki_nbytes = len; ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } @@ -534,8 +527,7 @@ ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, lo iov_iter_init(&iter, WRITE, &iov, 1, len); ret = filp->f_op->write_iter(&kiocb, &iter); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } @@ -723,8 +715,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iove iov_iter_init(&iter, rw, iov, nr_segs, len); ret = fn(&kiocb, &iter); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } @@ -740,8 +731,7 @@ static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, kiocb.ki_nbytes = len; ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); - if (ret == -EIOCBQUEUED) - ret = wait_on_sync_kiocb(&kiocb); + BUG_ON(ret == -EIOCBQUEUED); *ppos = kiocb.ki_pos; return ret; } diff --git a/include/linux/aio.h b/include/linux/aio.h index d9c92da..57c86c0 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -37,7 +37,6 @@ struct kiocb { union { void __user *user; - struct task_struct *tsk; } ki_obj; __u64 ki_user_data; /* user's data for completion */ @@ -64,13 +63,11 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) *kiocb = (struct kiocb) { .ki_ctx = NULL, .ki_filp = filp, - .ki_obj.tsk = current, }; } /* prototypes */ #ifdef CONFIG_AIO -extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb); extern void aio_complete(struct kiocb *iocb, long res, long res2); struct mm_struct; extern void exit_aio(struct mm_struct *mm); @@ -78,7 +75,6 @@ extern long do_io_submit(aio_context_t ctx_id, long nr, struct iocb __user *__user *iocbpp, bool compat); void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel); #else -static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; } static inline void aio_complete(struct kiocb *iocb, long res, long res2) { } struct mm_struct; static inline void exit_aio(struct mm_struct *mm) { } diff --git a/net/socket.c b/net/socket.c index a2c33a4..4032931 100644 --- a/net/socket.c +++ b/net/socket.c @@ -642,8 +642,7 @@ static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg, iocb.private = &siocb; ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) : __sock_sendmsg(&iocb, sock, msg, size); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); + BUG_ON(-EIOCBQUEUED == ret); return ret; } @@ -785,8 +784,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, init_sync_kiocb(&iocb, NULL); iocb.private = &siocb; ret = __sock_recvmsg(&iocb, sock, msg, size, flags); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); + BUG_ON(-EIOCBQUEUED == ret); return ret; } EXPORT_SYMBOL(sock_recvmsg); @@ -801,8 +799,7 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, init_sync_kiocb(&iocb, NULL); iocb.private = &siocb; ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); - if (-EIOCBQUEUED == ret) - ret = wait_on_sync_kiocb(&iocb); + BUG_ON(-EIOCBQUEUED == ret); return ret; }
The AIO interface is fairly complex because it tries to allow filesystems to always work async and then wakeup a synchronous caller through aio_complete. It turns out that basically no one does this to avoid the complexity and context switches, with the solve exception of fuse, so remove that possibility to simplify the code. XXX: fix up fuse. Signed-off-by: Christoph Hellwig <hch@lst.de> --- drivers/usb/gadget/function/f_fs.c | 4 ++-- fs/aio.c | 24 +----------------------- fs/ecryptfs/file.c | 6 ------ fs/fuse/file.c | 7 +++++-- fs/read_write.c | 26 ++++++++------------------ include/linux/aio.h | 4 ---- net/socket.c | 9 +++------ 7 files changed, 19 insertions(+), 61 deletions(-)