diff mbox series

[3/3] avoid extra path_get/path_put cycle in path_openat()

Message ID 20240822004149.GR504335@ZenIV (mailing list archive)
State New
Headers show
Series [1/3] don't duplicate vfs_open() in kernel_file_open() | expand

Commit Message

Al Viro Aug. 22, 2024, 12:41 a.m. UTC
Once we'd opened the file, nd->path and file->f_path have the
same contents.  Rather than having both pinned and nd->path
dropped by terminate_walk(), let's have them share the
references from the moment when FMODE_OPENED is set and
clear nd->path just before the terminate_walk() in such case.

To do that, we
	* add a variant of vfs_open() that does *not* do conditional
path_get() (vfs_open_borrow()); use it in do_open().
	* don't grab f->f_path.mnt in finish_open() - only
f->f_path.dentry.  Have atomic_open() drop the child dentry
in FMODE_OPENED case and return f->path.dentry without grabbing it.
	* adjust vfs_tmpfile() for finish_open() change (it
is called from ->tmpfile() instances).
	* make do_o_path() use vfs_open_borrow(), collapse path_put()
there with the conditional path_get() we would've get in vfs_open().
	* in FMODE_OPENED case clear nd->path before calling
terminate_walk().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/internal.h |  1 +
 fs/namei.c    | 22 ++++++++++++++--------
 fs/open.c     | 19 ++++++++++++++++++-
 3 files changed, 33 insertions(+), 9 deletions(-)

Comments

Christian Brauner Aug. 22, 2024, 9:31 a.m. UTC | #1
On Thu, Aug 22, 2024 at 01:41:49AM GMT, Al Viro wrote:
> Once we'd opened the file, nd->path and file->f_path have the
> same contents.  Rather than having both pinned and nd->path
> dropped by terminate_walk(), let's have them share the
> references from the moment when FMODE_OPENED is set and
> clear nd->path just before the terminate_walk() in such case.
> 
> To do that, we
> 	* add a variant of vfs_open() that does *not* do conditional
> path_get() (vfs_open_borrow()); use it in do_open().
> 	* don't grab f->f_path.mnt in finish_open() - only
> f->f_path.dentry.  Have atomic_open() drop the child dentry
> in FMODE_OPENED case and return f->path.dentry without grabbing it.
> 	* adjust vfs_tmpfile() for finish_open() change (it
> is called from ->tmpfile() instances).
> 	* make do_o_path() use vfs_open_borrow(), collapse path_put()
> there with the conditional path_get() we would've get in vfs_open().
> 	* in FMODE_OPENED case clear nd->path before calling
> terminate_walk().
> 
> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> ---

Reviewed-by: Christian Brauner <brauner@kernel.org>
Mateusz Guzik Aug. 22, 2024, 10:21 a.m. UTC | #2
On Thu, Aug 22, 2024 at 01:41:49AM +0100, Al Viro wrote:
> Once we'd opened the file, nd->path and file->f_path have the
> same contents.  Rather than having both pinned and nd->path
> dropped by terminate_walk(), let's have them share the
> references from the moment when FMODE_OPENED is set and
> clear nd->path just before the terminate_walk() in such case.
> 
> To do that, we
> 	* add a variant of vfs_open() that does *not* do conditional
> path_get() (vfs_open_borrow()); use it in do_open().
> 	* don't grab f->f_path.mnt in finish_open() - only
> f->f_path.dentry.  Have atomic_open() drop the child dentry
> in FMODE_OPENED case and return f->path.dentry without grabbing it.
> 	* adjust vfs_tmpfile() for finish_open() change (it
> is called from ->tmpfile() instances).
> 	* make do_o_path() use vfs_open_borrow(), collapse path_put()
> there with the conditional path_get() we would've get in vfs_open().
> 	* in FMODE_OPENED case clear nd->path before calling
> terminate_walk().
> 
> diff --git a/fs/open.c b/fs/open.c
> index 0ec2e9a33856..f9988427fb97 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -1046,7 +1046,7 @@ int finish_open(struct file *file, struct dentry *dentry,
>  	file->f_path.dentry = dentry;
>  	err = do_dentry_open(file, open);
>  	if (file->f_mode & FMODE_OPENED)
> -		path_get(&file->f_path);
> +		dget(&file->f_path.dentry);
>  	return err;
>  }

There are numerous consumers of finish_open(), I don't see how they got
adjusted to cope with this (or why they would not need adjustment).

For example fuse_create_open().

If this is sorted out I would argue it needs to be explained in the
commit message.

fwiw I don't think patching up the convention of finish_open() is needed
for avoiding the extra ref cycle to work.
diff mbox series

Patch

diff --git a/fs/internal.h b/fs/internal.h
index cdd73209eecb..11834829cc3f 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -194,6 +194,7 @@  int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
 		int flag);
 int chown_common(const struct path *path, uid_t user, gid_t group);
 extern int vfs_open(const struct path *, struct file *);
+extern int vfs_open_borrow(const struct path *, struct file *);
 
 /*
  * inode.c
diff --git a/fs/namei.c b/fs/namei.c
index 5512cb10fa89..e02160460422 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3443,10 +3443,8 @@  static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry,
 	d_lookup_done(dentry);
 	if (!error) {
 		if (file->f_mode & FMODE_OPENED) {
-			if (unlikely(dentry != file->f_path.dentry)) {
-				dput(dentry);
-				dentry = dget(file->f_path.dentry);
-			}
+			dput(dentry);
+			dentry = file->f_path.dentry;
 		} else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
 			error = -EIO;
 		} else {
@@ -3724,7 +3722,7 @@  static int do_open(struct nameidata *nd,
 	}
 	error = may_open(idmap, &nd->path, acc_mode, open_flag);
 	if (!error && !(file->f_mode & FMODE_OPENED))
-		error = vfs_open(&nd->path, file);
+		error = vfs_open_borrow(&nd->path, file);
 	if (!error)
 		error = security_file_post_open(file, op->acc_mode);
 	if (!error && do_truncate)
@@ -3777,8 +3775,10 @@  int vfs_tmpfile(struct mnt_idmap *idmap,
 	mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
 	error = dir->i_op->tmpfile(idmap, dir, file, mode);
 	dput(child);
-	if (file->f_mode & FMODE_OPENED)
+	if (file->f_mode & FMODE_OPENED) {
+		mntget(file->f_path.mnt);
 		fsnotify_open(file);
+	}
 	if (error)
 		return error;
 	/* Don't check for other permissions, the inode was just created */
@@ -3857,8 +3857,9 @@  static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file)
 	int error = path_lookupat(nd, flags, &path);
 	if (!error) {
 		audit_inode(nd->name, path.dentry, 0);
-		error = vfs_open(&path, file);
-		path_put(&path);
+		error = vfs_open_borrow(&path, file);
+		if (!(file->f_mode & FMODE_OPENED))
+			path_put(&path);
 	}
 	return error;
 }
@@ -3884,6 +3885,11 @@  static struct file *path_openat(struct nameidata *nd,
 			;
 		if (!error)
 			error = do_open(nd, file, op);
+		if (file->f_mode & FMODE_OPENED) {
+			// borrowed into file->f_path, transfer it there
+			nd->path.mnt = NULL;
+			nd->path.dentry = NULL;
+		}
 		terminate_walk(nd);
 	}
 	if (likely(!error)) {
diff --git a/fs/open.c b/fs/open.c
index 0ec2e9a33856..f9988427fb97 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1046,7 +1046,7 @@  int finish_open(struct file *file, struct dentry *dentry,
 	file->f_path.dentry = dentry;
 	err = do_dentry_open(file, open);
 	if (file->f_mode & FMODE_OPENED)
-		path_get(&file->f_path);
+		dget(&file->f_path.dentry);
 	return err;
 }
 EXPORT_SYMBOL(finish_open);
@@ -1102,6 +1102,23 @@  int vfs_open(const struct path *path, struct file *file)
 	return ret;
 }
 
+int vfs_open_borrow(const struct path *path, struct file *file)
+{
+	int ret;
+
+	file->f_path = *path;
+	ret = do_dentry_open(file, NULL);
+	if (!ret) {
+		/*
+		 * Once we return a file with FMODE_OPENED, __fput() will call
+		 * fsnotify_close(), so we need fsnotify_open() here for
+		 * symmetry.
+		 */
+		fsnotify_open(file);
+	}
+	return ret;
+}
+
 struct file *dentry_open(const struct path *path, int flags,
 			 const struct cred *cred)
 {