From patchwork Tue Jul 11 11:40:25 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Hao Xu X-Patchwork-Id: 13308456 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0503CEB64DC for ; Tue, 11 Jul 2023 11:41:29 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230100AbjGKLl2 (ORCPT ); Tue, 11 Jul 2023 07:41:28 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58930 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230078AbjGKLl1 (ORCPT ); Tue, 11 Jul 2023 07:41:27 -0400 Received: from out-53.mta0.migadu.com (out-53.mta0.migadu.com [91.218.175.53]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C042312F for ; Tue, 11 Jul 2023 04:41:26 -0700 (PDT) X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1689075685; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=/Dd8Jy++S1yPEBbt2vMnTbOwRqjipskzxM0+iDSbMpM=; b=UDSemjXy4oE/hFtyZQFr7vexiB3aRPCbl+TxUm7MdmPbXwu6nQBKEd0Jm+n/q+kMp2LIfJ hSwNaSXC+fyJHnAnlrEu/Gg8N303MpTw87Gs022obOnRxr/DYI56fwRH0hcVHQcl0wLHyO VhlbD6C+yrBUabI3tdw4uMbx1H7LQnI= From: Hao Xu To: io-uring@vger.kernel.org, Jens Axboe Cc: Dominique Martinet , Pavel Begunkov , Christian Brauner , Alexander Viro , Stefan Roesch , Clay Harris , Dave Chinner , linux-fsdevel@vger.kernel.org, Wanpeng Li Subject: [PATCH 1/3] fs: split off vfs_getdents function of getdents64 syscall Date: Tue, 11 Jul 2023 19:40:25 +0800 Message-Id: <20230711114027.59945-2-hao.xu@linux.dev> In-Reply-To: <20230711114027.59945-1-hao.xu@linux.dev> References: <20230711114027.59945-1-hao.xu@linux.dev> MIME-Version: 1.0 X-Migadu-Flow: FLOW_OUT Precedence: bulk List-ID: X-Mailing-List: io-uring@vger.kernel.org From: Dominique Martinet This splits off the vfs_getdents function from the getdents64 system call. This will allow io_uring to call the vfs_getdents function. Co-developed-by: Stefan Roesch Signed-off-by: Stefan Roesch Signed-off-by: Dominique Martinet --- fs/internal.h | 8 ++++++++ fs/readdir.c | 34 ++++++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index f7a3dc111026..b1f66e52d61b 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -304,3 +304,11 @@ ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *po struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns); struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap); void mnt_idmap_put(struct mnt_idmap *idmap); + +/* + * fs/readdir.c + */ +struct linux_dirent64; + +int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent, + unsigned int count); diff --git a/fs/readdir.c b/fs/readdir.c index b264ce60114d..9592259b7e7f 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -21,6 +21,7 @@ #include #include #include +#include "internal.h" #include @@ -351,10 +352,16 @@ static bool filldir64(struct dir_context *ctx, const char *name, int namlen, return false; } -SYSCALL_DEFINE3(getdents64, unsigned int, fd, - struct linux_dirent64 __user *, dirent, unsigned int, count) + +/** + * vfs_getdents - getdents without fdget + * @file : pointer to file struct of directory + * @dirent : pointer to user directory structure + * @count : size of buffer + */ +int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent, + unsigned int count) { - struct fd f; struct getdents_callback64 buf = { .ctx.actor = filldir64, .count = count, @@ -362,11 +369,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, }; int error; - f = fdget_pos(fd); - if (!f.file) - return -EBADF; - - error = iterate_dir(f.file, &buf.ctx); + error = iterate_dir(file, &buf.ctx); if (error >= 0) error = buf.error; if (buf.prev_reclen) { @@ -379,6 +382,21 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, else error = count - buf.count; } + return error; +} + +SYSCALL_DEFINE3(getdents64, unsigned int, fd, + struct linux_dirent64 __user *, dirent, unsigned int, count) +{ + struct fd f; + int error; + + f = fdget_pos(fd); + if (!f.file) + return -EBADF; + + error = vfs_getdents(f.file, dirent, count); + fdput_pos(f); return error; } From patchwork Tue Jul 11 11:40:26 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Hao Xu X-Patchwork-Id: 13308457 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id C5619C001DF for ; Tue, 11 Jul 2023 11:41:44 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230120AbjGKLln (ORCPT ); Tue, 11 Jul 2023 07:41:43 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59034 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230063AbjGKLln (ORCPT ); Tue, 11 Jul 2023 07:41:43 -0400 Received: from out-54.mta0.migadu.com (out-54.mta0.migadu.com [91.218.175.54]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 390CEE0; Tue, 11 Jul 2023 04:41:42 -0700 (PDT) X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1689075700; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=e5tdpTadoLYdZJB8cQ8K1ymkQcAcLx9XJrtCjJGJaVs=; b=J9PeguGP7+f9wgXWpd2r9+B9jO4cPeL8ag/XSFVutn6znyDnk0QLnJT0MJhr3YoNjCT/24 B2xAy0Hl5bnqyapPTVUD89vZRk/DsqWqZQOSCPervNYPXp4UYIrcNGRLy1SSxpyy5lMGJH RmJr5lCNu6vVx6TyEtc6AFWADDOsiWs= From: Hao Xu To: io-uring@vger.kernel.org, Jens Axboe Cc: Dominique Martinet , Pavel Begunkov , Christian Brauner , Alexander Viro , Stefan Roesch , Clay Harris , Dave Chinner , linux-fsdevel@vger.kernel.org, Wanpeng Li Subject: [PATCH 2/3] vfs_getdents/struct dir_context: add flags field Date: Tue, 11 Jul 2023 19:40:26 +0800 Message-Id: <20230711114027.59945-3-hao.xu@linux.dev> In-Reply-To: <20230711114027.59945-1-hao.xu@linux.dev> References: <20230711114027.59945-1-hao.xu@linux.dev> MIME-Version: 1.0 X-Migadu-Flow: FLOW_OUT Precedence: bulk List-ID: X-Mailing-List: io-uring@vger.kernel.org From: Hao Xu The flags will allow passing DIR_CONTEXT_F_NOWAIT to iterate() implementations that support it (as signaled through FMODE_NWAIT in file->f_mode) Notes: - considered using IOCB_NOWAIT but if we add more flags later it would be confusing to keep track of which values are valid, use dedicated flags - might want to check ctx.flags & DIR_CONTEXT_F_NOWAIT is only set when file->f_mode & FMODE_NOWAIT in iterate_dir() as e.g. WARN_ONCE? Co-developed-by: Dominique Martinet Signed-off-by: Dominique Martinet Signed-off-by: Hao Xu --- fs/internal.h | 2 +- fs/readdir.c | 6 ++++-- include/linux/fs.h | 8 ++++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index b1f66e52d61b..7508d485c655 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -311,4 +311,4 @@ void mnt_idmap_put(struct mnt_idmap *idmap); struct linux_dirent64; int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent, - unsigned int count); + unsigned int count, unsigned long flags); diff --git a/fs/readdir.c b/fs/readdir.c index 9592259b7e7f..b80caf4c9321 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -358,12 +358,14 @@ static bool filldir64(struct dir_context *ctx, const char *name, int namlen, * @file : pointer to file struct of directory * @dirent : pointer to user directory structure * @count : size of buffer + * @flags : additional dir_context flags */ int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent, - unsigned int count) + unsigned int count, unsigned long flags) { struct getdents_callback64 buf = { .ctx.actor = filldir64, + .ctx.flags = flags, .count = count, .current_dir = dirent }; @@ -395,7 +397,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, if (!f.file) return -EBADF; - error = vfs_getdents(f.file, dirent, count); + error = vfs_getdents(f.file, dirent, count, 0); fdput_pos(f); return error; diff --git a/include/linux/fs.h b/include/linux/fs.h index 6867512907d6..f3e315e8efdd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1719,8 +1719,16 @@ typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, struct dir_context { filldir_t actor; loff_t pos; + unsigned long flags; }; +/* + * flags for dir_context flags + * DIR_CONTEXT_F_NOWAIT: Request non-blocking iterate + * (requires file->f_mode & FMODE_NOWAIT) + */ +#define DIR_CONTEXT_F_NOWAIT (1 << 0) + /* * These flags let !MMU mmap() govern direct device mapping vs immediate * copying more easily for MAP_PRIVATE, especially for ROM filesystems. From patchwork Tue Jul 11 11:40:27 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Hao Xu X-Patchwork-Id: 13308458 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id E2FD7EB64DC for ; Tue, 11 Jul 2023 11:42:25 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229954AbjGKLmZ (ORCPT ); Tue, 11 Jul 2023 07:42:25 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59496 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229503AbjGKLmY (ORCPT ); Tue, 11 Jul 2023 07:42:24 -0400 Received: from out-33.mta0.migadu.com (out-33.mta0.migadu.com [91.218.175.33]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 22878188 for ; Tue, 11 Jul 2023 04:42:23 -0700 (PDT) X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1689075741; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=WEcvjNdIwRRdNEdy4CUUCneNVXfs1KxIIH3UVTTZbIk=; b=G6fca/RPO9sgIcOT2MbnvMK6J2ikvBjtOsQbgRc4A3jl4KLXCsPVfp8BSFgbwivYrgR+hr dcZNcu7lV45Ki1z/XtREwawNsLGsR+x0ufw+H7nqzqGMzE/2Gl8aWydSj8Bd4P47XhL/Yx FNZUIe5yWg4kiN8mMnhnkOWiGHyffKM= From: Hao Xu To: io-uring@vger.kernel.org, Jens Axboe Cc: Dominique Martinet , Pavel Begunkov , Christian Brauner , Alexander Viro , Stefan Roesch , Clay Harris , Dave Chinner , linux-fsdevel@vger.kernel.org, Wanpeng Li Subject: [PATCH 3/3] io_uring: add support for getdents Date: Tue, 11 Jul 2023 19:40:27 +0800 Message-Id: <20230711114027.59945-4-hao.xu@linux.dev> In-Reply-To: <20230711114027.59945-1-hao.xu@linux.dev> References: <20230711114027.59945-1-hao.xu@linux.dev> MIME-Version: 1.0 X-Migadu-Flow: FLOW_OUT Precedence: bulk List-ID: X-Mailing-List: io-uring@vger.kernel.org From: Hao Xu This add support for getdents64 to io_uring, acting exactly like the syscall: the directory is iterated from it's current's position as stored in the file struct, and the file's position is updated exactly as if getdents64 had been called. For filesystems that support NOWAIT in iterate_shared(), try to use it first; if a user already knows the filesystem they use do not support nowait they can force async through IOSQE_ASYNC in the sqe flags, avoiding the need to bounce back through a useless EAGAIN return. Co-developed-by: Dominique Martinet Signed-off-by: Dominique Martinet Signed-off-by: Hao Xu --- include/uapi/linux/io_uring.h | 7 ++++ io_uring/fs.c | 60 +++++++++++++++++++++++++++++++++++ io_uring/fs.h | 3 ++ io_uring/opdef.c | 8 +++++ 4 files changed, 78 insertions(+) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 08720c7bd92f..6c0d521135a6 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -65,6 +65,7 @@ struct io_uring_sqe { __u32 xattr_flags; __u32 msg_ring_flags; __u32 uring_cmd_flags; + __u32 getdents_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -235,6 +236,7 @@ enum io_uring_op { IORING_OP_URING_CMD, IORING_OP_SEND_ZC, IORING_OP_SENDMSG_ZC, + IORING_OP_GETDENTS, /* this goes last, obviously */ IORING_OP_LAST, @@ -273,6 +275,11 @@ enum io_uring_op { */ #define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */ +/* + * sqe->getdents_flags + */ +#define IORING_GETDENTS_REWIND (1U << 0) + /* * POLL_ADD flags. Note that since sqe->poll_events is the flag space, the * command flags for POLL_ADD are stored in sqe->len. diff --git a/io_uring/fs.c b/io_uring/fs.c index f6a69a549fd4..77f00577e09c 100644 --- a/io_uring/fs.c +++ b/io_uring/fs.c @@ -47,6 +47,13 @@ struct io_link { int flags; }; +struct io_getdents { + struct file *file; + struct linux_dirent64 __user *dirent; + unsigned int count; + int flags; +}; + int io_renameat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_rename *ren = io_kiocb_to_cmd(req, struct io_rename); @@ -291,3 +298,56 @@ void io_link_cleanup(struct io_kiocb *req) putname(sl->oldpath); putname(sl->newpath); } + +int io_getdents_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_getdents *gd = io_kiocb_to_cmd(req, struct io_getdents); + + if (READ_ONCE(sqe->off) != 0) + return -EINVAL; + + gd->dirent = u64_to_user_ptr(READ_ONCE(sqe->addr)); + gd->count = READ_ONCE(sqe->len); + + return 0; +} + +int io_getdents(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_getdents *gd = io_kiocb_to_cmd(req, struct io_getdents); + struct file *file; + unsigned long getdents_flags = 0; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + bool should_lock = false; + int ret; + + if (force_nonblock) { + if (!(req->file->f_mode & FMODE_NOWAIT)) + return -EAGAIN; + + getdents_flags = DIR_CONTEXT_F_NOWAIT; + } + + file = req->file; + if (file && (file->f_mode & FMODE_ATOMIC_POS)) { + if (file_count(file) > 1) + should_lock = true; + } + if (should_lock) { + if (!force_nonblock) + mutex_lock(&file->f_pos_lock); + else if (!mutex_trylock(&file->f_pos_lock)) + return -EAGAIN; + } + + ret = vfs_getdents(file, gd->dirent, gd->count, getdents_flags); + if (should_lock) + mutex_unlock(&file->f_pos_lock); + + if (ret == -EAGAIN && force_nonblock) + return -EAGAIN; + + io_req_set_res(req, ret, 0); + return 0; +} + diff --git a/io_uring/fs.h b/io_uring/fs.h index 0bb5efe3d6bb..f83a6f3a678d 100644 --- a/io_uring/fs.h +++ b/io_uring/fs.h @@ -18,3 +18,6 @@ int io_symlinkat(struct io_kiocb *req, unsigned int issue_flags); int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_linkat(struct io_kiocb *req, unsigned int issue_flags); void io_link_cleanup(struct io_kiocb *req); + +int io_getdents_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); +int io_getdents(struct io_kiocb *req, unsigned int issue_flags); diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 3b9c6489b8b6..1bae6b2a8d0b 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -428,6 +428,11 @@ const struct io_issue_def io_issue_defs[] = { .prep = io_eopnotsupp_prep, #endif }, + [IORING_OP_GETDENTS] = { + .needs_file = 1, + .prep = io_getdents_prep, + .issue = io_getdents, + }, }; @@ -648,6 +653,9 @@ const struct io_cold_def io_cold_defs[] = { .fail = io_sendrecv_fail, #endif }, + [IORING_OP_GETDENTS] = { + .name = "GETDENTS", + }, }; const char *io_uring_get_opcode(u8 opcode)