diff mbox series

[v6,3/7] kernfs: use VFS negative dentry caching

Message ID 162322862726.361452.10114120072438540655.stgit@web.messagingengine.com (mailing list archive)
State New, archived
Headers show
Series kernfs: proposed locking and concurrency improvement | expand

Commit Message

Ian Kent June 9, 2021, 8:50 a.m. UTC
If there are many lookups for non-existent paths these negative lookups
can lead to a lot of overhead during path walks.

The VFS allows dentries to be created as negative and hashed, and caches
them so they can be used to reduce the fairly high overhead alloc/free
cycle that occurs during these lookups.

Use the kernfs node parent revision to identify if a change has been
made to the containing directory so that the negative dentry can be
discarded and the lookup redone.

Signed-off-by: Ian Kent <raven@themaw.net>
---
 fs/kernfs/dir.c |   52 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 32 insertions(+), 20 deletions(-)

Comments

Miklos Szeredi June 11, 2021, 1:07 p.m. UTC | #1
On Wed, 9 Jun 2021 at 10:50, Ian Kent <raven@themaw.net> wrote:
>
> If there are many lookups for non-existent paths these negative lookups
> can lead to a lot of overhead during path walks.
>
> The VFS allows dentries to be created as negative and hashed, and caches
> them so they can be used to reduce the fairly high overhead alloc/free
> cycle that occurs during these lookups.
>
> Use the kernfs node parent revision to identify if a change has been
> made to the containing directory so that the negative dentry can be
> discarded and the lookup redone.
>
> Signed-off-by: Ian Kent <raven@themaw.net>
> ---
>  fs/kernfs/dir.c |   52 ++++++++++++++++++++++++++++++++--------------------
>  1 file changed, 32 insertions(+), 20 deletions(-)
>
> diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> index b3d1bc0f317d0..4f037456a8e17 100644
> --- a/fs/kernfs/dir.c
> +++ b/fs/kernfs/dir.c
> @@ -1039,9 +1039,28 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
>         if (flags & LOOKUP_RCU)
>                 return -ECHILD;
>
> -       /* Always perform fresh lookup for negatives */
> -       if (d_really_is_negative(dentry))
> -               goto out_bad_unlocked;
> +       /* Negative hashed dentry? */
> +       if (d_really_is_negative(dentry)) {
> +               struct dentry *d_parent = dget_parent(dentry);
> +               struct kernfs_node *parent;
> +
> +               /* If the kernfs parent node has changed discard and
> +                * proceed to ->lookup.
> +                */
> +               parent = kernfs_dentry_node(d_parent);
> +               if (parent) {
> +                       if (kernfs_dir_changed(parent, dentry)) {

Perhaps add a note about this being dependent on parent of a negative
dentry never changing.

If this was backported to a kernel where this assumption doesn't hold,
there would be a mathematical chance of a false negative.

Thanks,
Miklos
Al Viro June 12, 2021, 12:07 a.m. UTC | #2
On Wed, Jun 09, 2021 at 04:50:27PM +0800, Ian Kent wrote:

> +	if (d_really_is_negative(dentry)) {
> +		struct dentry *d_parent = dget_parent(dentry);
> +		struct kernfs_node *parent;

What the hell is dget_parent() for?  You don't do anything blocking
here, so why not simply grab dentry->d_lock - that'll stabilize
the value of ->d_parent just fine.  Just don't forget to drop the
lock before returning and that's it...

> +		/* If the kernfs parent node has changed discard and
> +		 * proceed to ->lookup.
> +		 */
> +		parent = kernfs_dentry_node(d_parent);
> +		if (parent) {
> +			if (kernfs_dir_changed(parent, dentry)) {
> +				dput(d_parent);
> +				return 0;
> +			}
> +		}
> +		dput(d_parent);
> +
> +		/* The kernfs node doesn't exist, leave the dentry
> +		 * negative and return success.
> +		 */
> +		return 1;
> +	}
Ian Kent June 12, 2021, 12:43 a.m. UTC | #3
On Sat, 2021-06-12 at 00:07 +0000, Al Viro wrote:
> On Wed, Jun 09, 2021 at 04:50:27PM +0800, Ian Kent wrote:
> 
> > +       if (d_really_is_negative(dentry)) {
> > +               struct dentry *d_parent = dget_parent(dentry);
> > +               struct kernfs_node *parent;
> 
> What the hell is dget_parent() for?  You don't do anything blocking
> here, so why not simply grab dentry->d_lock - that'll stabilize
> the value of ->d_parent just fine.  Just don't forget to drop the
> lock before returning and that's it...

Thanks Al, I'll change it.

> 
> > +               /* If the kernfs parent node has changed discard
> > and
> > +                * proceed to ->lookup.
> > +                */
> > +               parent = kernfs_dentry_node(d_parent);
> > +               if (parent) {
> > +                       if (kernfs_dir_changed(parent, dentry)) {
> > +                               dput(d_parent);
> > +                               return 0;
> > +                       }
> > +               }
> > +               dput(d_parent);
> > +
> > +               /* The kernfs node doesn't exist, leave the dentry
> > +                * negative and return success.
> > +                */
> > +               return 1;
> > +       }
Ian Kent June 12, 2021, 12:47 a.m. UTC | #4
On Fri, 2021-06-11 at 15:07 +0200, Miklos Szeredi wrote:
> On Wed, 9 Jun 2021 at 10:50, Ian Kent <raven@themaw.net> wrote:
> > 
> > If there are many lookups for non-existent paths these negative
> > lookups
> > can lead to a lot of overhead during path walks.
> > 
> > The VFS allows dentries to be created as negative and hashed, and
> > caches
> > them so they can be used to reduce the fairly high overhead
> > alloc/free
> > cycle that occurs during these lookups.
> > 
> > Use the kernfs node parent revision to identify if a change has
> > been
> > made to the containing directory so that the negative dentry can be
> > discarded and the lookup redone.
> > 
> > Signed-off-by: Ian Kent <raven@themaw.net>
> > ---
> >  fs/kernfs/dir.c |   52 ++++++++++++++++++++++++++++++++-----------
> > ---------
> >  1 file changed, 32 insertions(+), 20 deletions(-)
> > 
> > diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> > index b3d1bc0f317d0..4f037456a8e17 100644
> > --- a/fs/kernfs/dir.c
> > +++ b/fs/kernfs/dir.c
> > @@ -1039,9 +1039,28 @@ static int kernfs_dop_revalidate(struct
> > dentry *dentry, unsigned int flags)
> >         if (flags & LOOKUP_RCU)
> >                 return -ECHILD;
> > 
> > -       /* Always perform fresh lookup for negatives */
> > -       if (d_really_is_negative(dentry))
> > -               goto out_bad_unlocked;
> > +       /* Negative hashed dentry? */
> > +       if (d_really_is_negative(dentry)) {
> > +               struct dentry *d_parent = dget_parent(dentry);
> > +               struct kernfs_node *parent;
> > +
> > +               /* If the kernfs parent node has changed discard
> > and
> > +                * proceed to ->lookup.
> > +                */
> > +               parent = kernfs_dentry_node(d_parent);
> > +               if (parent) {
> > +                       if (kernfs_dir_changed(parent, dentry)) {
> 
> Perhaps add a note about this being dependent on parent of a negative
> dentry never changing.

Which of course it it can change, at any time.

> 
> If this was backported to a kernel where this assumption doesn't
> hold,
> there would be a mathematical chance of a false negative.

Isn't this a cunning way of saying "in thinking about the move case
you've forgotten about the obvious common case, just put back taking
the read lock already, at least for the check"?

Ian
Ian Kent June 12, 2021, 1:08 a.m. UTC | #5
On Sat, 2021-06-12 at 08:43 +0800, Ian Kent wrote:
> On Sat, 2021-06-12 at 00:07 +0000, Al Viro wrote:
> > On Wed, Jun 09, 2021 at 04:50:27PM +0800, Ian Kent wrote:
> > 
> > > +       if (d_really_is_negative(dentry)) {
> > > +               struct dentry *d_parent = dget_parent(dentry);
> > > +               struct kernfs_node *parent;
> > 
> > What the hell is dget_parent() for?  You don't do anything blocking
> > here, so why not simply grab dentry->d_lock - that'll stabilize
> > the value of ->d_parent just fine.  Just don't forget to drop the
> > lock before returning and that's it...
> 
> Thanks Al, I'll change it.

But if I change to take the read lock to ensure there's no operation
in progress for the revision check I would need the dget_parent(), yes?

> 
> > 
> > > +               /* If the kernfs parent node has changed discard
> > > and
> > > +                * proceed to ->lookup.
> > > +                */
> > > +               parent = kernfs_dentry_node(d_parent);
> > > +               if (parent) {
> > > +                       if (kernfs_dir_changed(parent, dentry)) {
> > > +                               dput(d_parent);
> > > +                               return 0;
> > > +                       }
> > > +               }
> > > +               dput(d_parent);
> > > +
> > > +               /* The kernfs node doesn't exist, leave the
> > > dentry
> > > +                * negative and return success.
> > > +                */
> > > +               return 1;
> > > +       }
>
Al Viro June 12, 2021, 1:48 a.m. UTC | #6
On Sat, Jun 12, 2021 at 08:47:17AM +0800, Ian Kent wrote:

> > Perhaps add a note about this being dependent on parent of a negative
> > dentry never changing.
> 
> Which of course it it can change, at any time.

What?
Al Viro June 12, 2021, 1:51 a.m. UTC | #7
On Sat, Jun 12, 2021 at 09:08:05AM +0800, Ian Kent wrote:

> But if I change to take the read lock to ensure there's no operation
> in progress for the revision check I would need the dget_parent(), yes?

WTF for?  ->d_parent can change *ONLY* when ->d_lock is held on all
dentries involved (including old and new parents).

And it very definitely does *not* change for negative dentries.  I mean,
look at the very beginning of __d_move().
Ian Kent June 13, 2021, 1:16 a.m. UTC | #8
On Sat, 2021-06-12 at 01:48 +0000, Al Viro wrote:
> On Sat, Jun 12, 2021 at 08:47:17AM +0800, Ian Kent wrote:
> 
> > > Perhaps add a note about this being dependent on parent of a
> > > negative
> > > dentry never changing.
> > 
> > Which of course it it can change, at any time.
> 
> What?

For some reason I thought Miklos was talking about the revision
of the parent not changing but that's not what he's saying.

Ian
Ian Kent June 13, 2021, 1:57 a.m. UTC | #9
On Sat, 2021-06-12 at 01:51 +0000, Al Viro wrote:
> On Sat, Jun 12, 2021 at 09:08:05AM +0800, Ian Kent wrote:
> 
> > But if I change to take the read lock to ensure there's no
> > operation
> > in progress for the revision check I would need the dget_parent(),
> > yes?
> 
> WTF for?  ->d_parent can change *ONLY* when ->d_lock is held on all
> dentries involved (including old and new parents).

Understood, thanks.

> 
> And it very definitely does *not* change for negative dentries.  I
> mean,
> look at the very beginning of __d_move().
diff mbox series

Patch

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index b3d1bc0f317d0..4f037456a8e17 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1039,9 +1039,28 @@  static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
 	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
-	/* Always perform fresh lookup for negatives */
-	if (d_really_is_negative(dentry))
-		goto out_bad_unlocked;
+	/* Negative hashed dentry? */
+	if (d_really_is_negative(dentry)) {
+		struct dentry *d_parent = dget_parent(dentry);
+		struct kernfs_node *parent;
+
+		/* If the kernfs parent node has changed discard and
+		 * proceed to ->lookup.
+		 */
+		parent = kernfs_dentry_node(d_parent);
+		if (parent) {
+			if (kernfs_dir_changed(parent, dentry)) {
+				dput(d_parent);
+				return 0;
+			}
+		}
+		dput(d_parent);
+
+		/* The kernfs node doesn't exist, leave the dentry
+		 * negative and return success.
+		 */
+		return 1;
+	}
 
 	kn = kernfs_dentry_node(dentry);
 	mutex_lock(&kernfs_mutex);
@@ -1067,7 +1086,6 @@  static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
 	return 1;
 out_bad:
 	mutex_unlock(&kernfs_mutex);
-out_bad_unlocked:
 	return 0;
 }
 
@@ -1082,33 +1100,27 @@  static struct dentry *kernfs_iop_lookup(struct inode *dir,
 	struct dentry *ret;
 	struct kernfs_node *parent = dir->i_private;
 	struct kernfs_node *kn;
-	struct inode *inode;
+	struct inode *inode = NULL;
 	const void *ns = NULL;
 
 	mutex_lock(&kernfs_mutex);
-
 	if (kernfs_ns_enabled(parent))
 		ns = kernfs_info(dir->i_sb)->ns;
 
 	kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
-
-	/* no such entry */
-	if (!kn || !kernfs_active(kn)) {
-		ret = NULL;
-		goto out_unlock;
-	}
-
 	/* attach dentry and inode */
-	inode = kernfs_get_inode(dir->i_sb, kn);
-	if (!inode) {
-		ret = ERR_PTR(-ENOMEM);
-		goto out_unlock;
+	if (kn && kernfs_active(kn)) {
+		inode = kernfs_get_inode(dir->i_sb, kn);
+		if (!inode)
+			inode = ERR_PTR(-ENOMEM);
 	}
-
-	/* instantiate and hash dentry */
+	/* Needed only for negative dentry validation */
+	if (!inode)
+		kernfs_set_rev(parent, dentry);
+	/* instantiate and hash (possibly negative) dentry */
 	ret = d_splice_alias(inode, dentry);
- out_unlock:
 	mutex_unlock(&kernfs_mutex);
+
 	return ret;
 }