diff mbox series

[v2,1/6] libfs: Re-arrange locking in offset_iterate_dir()

Message ID 170820142021.6328.15047865406275957018.stgit@91.116.238.104.host.secureserver.net (mailing list archive)
State New, archived
Headers show
Series Use Maple Trees for simple_offset utilities | expand

Commit Message

Chuck Lever Feb. 17, 2024, 8:23 p.m. UTC
From: Chuck Lever <chuck.lever@oracle.com>

Liam and Matthew say that once the RCU read lock is released,
xa_state is not safe to re-use for the next xas_find() call. But the
RCU read lock must be released on each loop iteration so that
dput(), which might_sleep(), can be called safely.

Thus we are forced to walk the offset tree with fresh state for each
directory entry. xa_find() can do this for us, though it might be a
little less efficient than maintaining xa_state locally.

We believe that in the current code base, inode->i_rwsem provides
protection for the xa_state maintained in
offset_iterate_dir(). However, there is no guarantee that will
continue to be the case in the future.

Since offset_iterate_dir() doesn't build xa_state locally any more,
there's no longer a strong need for offset_find_next(). Clean up by
rolling these two helpers together.

Suggested-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
Message-ID: <170785993027.11135.8830043889278631735.stgit@91.116.238.104.host.secureserver.net>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/libfs.c |   12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

Comments

Christian Brauner Feb. 20, 2024, 9:56 a.m. UTC | #1
On Sat, Feb 17, 2024 at 03:23:40PM -0500, Chuck Lever wrote:
> From: Chuck Lever <chuck.lever@oracle.com>
> 
> Liam and Matthew say that once the RCU read lock is released,
> xa_state is not safe to re-use for the next xas_find() call. But the
> RCU read lock must be released on each loop iteration so that
> dput(), which might_sleep(), can be called safely.

Fwiw, functions like this:

static struct dentry *offset_find_next(struct xa_state *xas)
{
        struct dentry *child, *found = NULL;

        rcu_read_lock();
        child = xas_next_entry(xas, U32_MAX);
        if (!child)
                goto out;
        spin_lock(&child->d_lock);
        if (simple_positive(child))
                found = dget_dlock(child);
        spin_unlock(&child->d_lock);
out:
        rcu_read_unlock();
        return found;
}

should use the new guard feature going forward imho. IOW, in the future such
helpers should be written as:

static struct dentry *offset_find_next(struct xa_state *xas)
{
        struct dentry *child, *found = NULL;

	guard(rcu)();
        child = xas_next_entry(xas, U32_MAX);
        if (!child)
		return NULL;
        spin_lock(&child->d_lock);
        if (simple_positive(child))
                found = dget_dlock(child);
        spin_unlock(&child->d_lock);
        return found;
}

which allows you to eliminate the goto and to have the guarantee that the rcu
lock is released when you return. This also works for other locks btw.
Jan Kara Feb. 21, 2024, 1:15 p.m. UTC | #2
On Sat 17-02-24 15:23:40, Chuck Lever wrote:
> From: Chuck Lever <chuck.lever@oracle.com>
> 
> Liam and Matthew say that once the RCU read lock is released,
> xa_state is not safe to re-use for the next xas_find() call. But the
> RCU read lock must be released on each loop iteration so that
> dput(), which might_sleep(), can be called safely.
> 
> Thus we are forced to walk the offset tree with fresh state for each
> directory entry. xa_find() can do this for us, though it might be a
> little less efficient than maintaining xa_state locally.
> 
> We believe that in the current code base, inode->i_rwsem provides
> protection for the xa_state maintained in
> offset_iterate_dir(). However, there is no guarantee that will
> continue to be the case in the future.
> 
> Since offset_iterate_dir() doesn't build xa_state locally any more,
> there's no longer a strong need for offset_find_next(). Clean up by
> rolling these two helpers together.
> 
> Suggested-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
> Message-ID: <170785993027.11135.8830043889278631735.stgit@91.116.238.104.host.secureserver.net>
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  fs/libfs.c |   12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/libfs.c b/fs/libfs.c
> index eec6031b0155..752e24c669d9 100644
> --- a/fs/libfs.c
> +++ b/fs/libfs.c
> @@ -402,12 +402,13 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
>  	return vfs_setpos(file, offset, U32_MAX);
>  }
>  
> -static struct dentry *offset_find_next(struct xa_state *xas)
> +static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
>  {
>  	struct dentry *child, *found = NULL;
> +	XA_STATE(xas, &octx->xa, offset);
>  
>  	rcu_read_lock();
> -	child = xas_next_entry(xas, U32_MAX);
> +	child = xas_next_entry(&xas, U32_MAX);
>  	if (!child)
>  		goto out;
>  	spin_lock(&child->d_lock);
> @@ -430,12 +431,11 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
>  
>  static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
>  {
> -	struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
> -	XA_STATE(xas, &so_ctx->xa, ctx->pos);
> +	struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
>  	struct dentry *dentry;
>  
>  	while (true) {
> -		dentry = offset_find_next(&xas);
> +		dentry = offset_find_next(octx, ctx->pos);
>  		if (!dentry)
>  			return ERR_PTR(-ENOENT);
>  
> @@ -444,8 +444,8 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
>  			break;
>  		}
>  
> +		ctx->pos = dentry2offset(dentry) + 1;
>  		dput(dentry);
> -		ctx->pos = xas.xa_index + 1;
>  	}
>  	return NULL;
>  }
> 
>
diff mbox series

Patch

diff --git a/fs/libfs.c b/fs/libfs.c
index eec6031b0155..752e24c669d9 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -402,12 +402,13 @@  static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
 	return vfs_setpos(file, offset, U32_MAX);
 }
 
-static struct dentry *offset_find_next(struct xa_state *xas)
+static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
 {
 	struct dentry *child, *found = NULL;
+	XA_STATE(xas, &octx->xa, offset);
 
 	rcu_read_lock();
-	child = xas_next_entry(xas, U32_MAX);
+	child = xas_next_entry(&xas, U32_MAX);
 	if (!child)
 		goto out;
 	spin_lock(&child->d_lock);
@@ -430,12 +431,11 @@  static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
 
 static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
 {
-	struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
-	XA_STATE(xas, &so_ctx->xa, ctx->pos);
+	struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
 	struct dentry *dentry;
 
 	while (true) {
-		dentry = offset_find_next(&xas);
+		dentry = offset_find_next(octx, ctx->pos);
 		if (!dentry)
 			return ERR_PTR(-ENOENT);
 
@@ -444,8 +444,8 @@  static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
 			break;
 		}
 
+		ctx->pos = dentry2offset(dentry) + 1;
 		dput(dentry);
-		ctx->pos = xas.xa_index + 1;
 	}
 	return NULL;
 }