diff mbox series

[RFC] libfs: getdents() should return 0 after reaching EOD

Message ID 169997697704.4588.14555611205729567800.stgit@bazille.1015granger.net (mailing list archive)
State New, archived
Headers show
Series [RFC] libfs: getdents() should return 0 after reaching EOD | expand

Commit Message

Chuck Lever Nov. 14, 2023, 3:49 p.m. UTC
From: Chuck Lever <chuck.lever@oracle.com>

The new directory offset helpers don't conform with the convention
of getdents() returning no more entries once a directory file
descriptor has reached the current end-of-directory.

To address this, copy the logic from dcache_readdir() to mark the
open directory file descriptor once EOD has been reached. Rewinding
resets the mark.

Reported-by: Tavian Barnes <tavianator@tavianator.com>
Closes: https://lore.kernel.org/linux-fsdevel/20231113180616.2831430-1-tavianator@tavianator.com/
Fixes: 6faddda69f62 ("libfs: Add directory operations for stable offsets")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/libfs.c |   13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

Comments

Matthew Wilcox Nov. 14, 2023, 3:56 p.m. UTC | #1
On Tue, Nov 14, 2023 at 10:49:37AM -0500, Chuck Lever wrote:
> -static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
> +static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
>  {
>  	struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
>  	XA_STATE(xas, &so_ctx->xa, ctx->pos);
> @@ -437,7 +437,8 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
>  	while (true) {
>  		dentry = offset_find_next(&xas);
>  		if (!dentry)
> -			break;
> +			/* readdir has reached the current EOD */
> +			return (void *)0x10;

Funny, you used the same bit pattern as ZERO_SIZE_PTR without using
the macro ...

> @@ -479,7 +481,12 @@ static int offset_readdir(struct file *file, struct dir_context *ctx)
>  	if (!dir_emit_dots(file, ctx))
>  		return 0;
>  
> -	offset_iterate_dir(d_inode(dir), ctx);
> +	if (ctx->pos == 2)
> +		file->private_data = NULL;
> +	else if (file->private_data == (void *)0x10)
> +		return 0;
> +
> +	file->private_data = offset_iterate_dir(d_inode(dir), ctx);
>  	return 0;
>  }

It might make more sense to use ERR_PTR(-ENOENT) or ERANGE or something
that's a more understandable sentinel value?
Chuck Lever III Nov. 14, 2023, 4 p.m. UTC | #2
On Tue, Nov 14, 2023 at 03:56:50PM +0000, Matthew Wilcox wrote:
> On Tue, Nov 14, 2023 at 10:49:37AM -0500, Chuck Lever wrote:
> > -static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
> > +static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
> >  {
> >  	struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
> >  	XA_STATE(xas, &so_ctx->xa, ctx->pos);
> > @@ -437,7 +437,8 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
> >  	while (true) {
> >  		dentry = offset_find_next(&xas);
> >  		if (!dentry)
> > -			break;
> > +			/* readdir has reached the current EOD */
> > +			return (void *)0x10;
> 
> Funny, you used the same bit pattern as ZERO_SIZE_PTR without using
> the macro ...

On purpose, it's an eye catcher because I didn't have a better idea.
It obviously worked!


> > @@ -479,7 +481,12 @@ static int offset_readdir(struct file *file, struct dir_context *ctx)
> >  	if (!dir_emit_dots(file, ctx))
> >  		return 0;
> >  
> > -	offset_iterate_dir(d_inode(dir), ctx);
> > +	if (ctx->pos == 2)
> > +		file->private_data = NULL;
> > +	else if (file->private_data == (void *)0x10)
> > +		return 0;
> > +
> > +	file->private_data = offset_iterate_dir(d_inode(dir), ctx);
> >  	return 0;
> >  }
> 
> It might make more sense to use ERR_PTR(-ENOENT) or ERANGE or something
> that's a more understandable sentinel value?

Yes, thanks. That's a better idea.
Christian Brauner Nov. 14, 2023, 5:29 p.m. UTC | #3
On Tue, Nov 14, 2023 at 10:49:37AM -0500, Chuck Lever wrote:
> From: Chuck Lever <chuck.lever@oracle.com>
> 
> The new directory offset helpers don't conform with the convention
> of getdents() returning no more entries once a directory file
> descriptor has reached the current end-of-directory.
> 
> To address this, copy the logic from dcache_readdir() to mark the
> open directory file descriptor once EOD has been reached. Rewinding
> resets the mark.
> 
> Reported-by: Tavian Barnes <tavianator@tavianator.com>
> Closes: https://lore.kernel.org/linux-fsdevel/20231113180616.2831430-1-tavianator@tavianator.com/
> Fixes: 6faddda69f62 ("libfs: Add directory operations for stable offsets")
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  fs/libfs.c |   13 ++++++++++---
>  1 file changed, 10 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/libfs.c b/fs/libfs.c
> index e9440d55073c..1c866b087f0c 100644
> --- a/fs/libfs.c
> +++ b/fs/libfs.c
> @@ -428,7 +428,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
>  			  inode->i_ino, fs_umode_to_dtype(inode->i_mode));
>  }
>  
> -static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
> +static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
>  {
>  	struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
>  	XA_STATE(xas, &so_ctx->xa, ctx->pos);
> @@ -437,7 +437,8 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
>  	while (true) {
>  		dentry = offset_find_next(&xas);
>  		if (!dentry)
> -			break;
> +			/* readdir has reached the current EOD */
> +			return (void *)0x10;
>  
>  		if (!offset_dir_emit(ctx, dentry)) {
>  			dput(dentry);
> @@ -447,6 +448,7 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
>  		dput(dentry);
>  		ctx->pos = xas.xa_index + 1;
>  	}
> +	return NULL;
>  }
>  
>  /**
> @@ -479,7 +481,12 @@ static int offset_readdir(struct file *file, struct dir_context *ctx)
>  	if (!dir_emit_dots(file, ctx))
>  		return 0;
>  
> -	offset_iterate_dir(d_inode(dir), ctx);
> +	if (ctx->pos == 2)
> +		file->private_data = NULL;
> +	else if (file->private_data == (void *)0x10)
> +		return 0;
> +
> +	file->private_data = offset_iterate_dir(d_inode(dir), ctx);

I think it's usually best practice to only modify the file->private_data
pointer during f_op->open and f_op->close but not override
file->private_data once the file is visible to other threads. I think
here it might not matter because access to file->private_data is
serialized on f_pos_lock and it's not used by anything else.
Al Viro Nov. 14, 2023, 5:57 p.m. UTC | #4
On Tue, Nov 14, 2023 at 06:29:15PM +0100, Christian Brauner wrote:

> I think it's usually best practice to only modify the file->private_data
> pointer during f_op->open and f_op->close but not override
> file->private_data once the file is visible to other threads. I think
> here it might not matter because access to file->private_data is
> serialized on f_pos_lock and it's not used by anything else.

That is entirely up to filesystem.  Warning that use of that library
helper means that you can't use your ->d_fsdata for anything else - sure,
but that's it.
Chuck Lever III Nov. 14, 2023, 6:13 p.m. UTC | #5
On Tue, Nov 14, 2023 at 06:29:15PM +0100, Christian Brauner wrote:
> On Tue, Nov 14, 2023 at 10:49:37AM -0500, Chuck Lever wrote:
> > From: Chuck Lever <chuck.lever@oracle.com>
> > 
> > The new directory offset helpers don't conform with the convention
> > of getdents() returning no more entries once a directory file
> > descriptor has reached the current end-of-directory.
> > 
> > To address this, copy the logic from dcache_readdir() to mark the
> > open directory file descriptor once EOD has been reached. Rewinding
> > resets the mark.
> > 
> > Reported-by: Tavian Barnes <tavianator@tavianator.com>
> > Closes: https://lore.kernel.org/linux-fsdevel/20231113180616.2831430-1-tavianator@tavianator.com/
> > Fixes: 6faddda69f62 ("libfs: Add directory operations for stable offsets")
> > Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> > ---
> >  fs/libfs.c |   13 ++++++++++---
> >  1 file changed, 10 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fs/libfs.c b/fs/libfs.c
> > index e9440d55073c..1c866b087f0c 100644
> > --- a/fs/libfs.c
> > +++ b/fs/libfs.c
> > @@ -428,7 +428,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
> >  			  inode->i_ino, fs_umode_to_dtype(inode->i_mode));
> >  }
> >  
> > -static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
> > +static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
> >  {
> >  	struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
> >  	XA_STATE(xas, &so_ctx->xa, ctx->pos);
> > @@ -437,7 +437,8 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
> >  	while (true) {
> >  		dentry = offset_find_next(&xas);
> >  		if (!dentry)
> > -			break;
> > +			/* readdir has reached the current EOD */
> > +			return (void *)0x10;
> >  
> >  		if (!offset_dir_emit(ctx, dentry)) {
> >  			dput(dentry);
> > @@ -447,6 +448,7 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
> >  		dput(dentry);
> >  		ctx->pos = xas.xa_index + 1;
> >  	}
> > +	return NULL;
> >  }
> >  
> >  /**
> > @@ -479,7 +481,12 @@ static int offset_readdir(struct file *file, struct dir_context *ctx)
> >  	if (!dir_emit_dots(file, ctx))
> >  		return 0;
> >  
> > -	offset_iterate_dir(d_inode(dir), ctx);
> > +	if (ctx->pos == 2)
> > +		file->private_data = NULL;
> > +	else if (file->private_data == (void *)0x10)
> > +		return 0;
> > +
> > +	file->private_data = offset_iterate_dir(d_inode(dir), ctx);
> 
> I think it's usually best practice to only modify the file->private_data
> pointer during f_op->open and f_op->close but not override
> file->private_data once the file is visible to other threads. I think
> here it might not matter because access to file->private_data is
> serialized on f_pos_lock and it's not used by anything else.

I freely admit that using file->private_data this way is ugly. I was
hoping to find one bit somewhere that I could use to mark the file
descriptor, and file->private_data seemed the most handy.

We could go back to allocating a phony dentry (d_alloc_cursor) and
place the end-of-directory flag in there.
Christian Brauner Nov. 15, 2023, 8:25 p.m. UTC | #6
On Tue, Nov 14, 2023 at 05:57:14PM +0000, Al Viro wrote:
> On Tue, Nov 14, 2023 at 06:29:15PM +0100, Christian Brauner wrote:
> 
> > I think it's usually best practice to only modify the file->private_data
> > pointer during f_op->open and f_op->close but not override
> > file->private_data once the file is visible to other threads. I think
> > here it might not matter because access to file->private_data is
> > serialized on f_pos_lock and it's not used by anything else.
> 
> That is entirely up to filesystem.  Warning that use of that library
> helper means that you can't use your ->d_fsdata for anything else - sure,
> but that's it.

Yes, but it's usually easier to reason about if the pointer just changes
during open/close. Nothing I wrote said that it's mandated just so I'm
clear.
diff mbox series

Patch

diff --git a/fs/libfs.c b/fs/libfs.c
index e9440d55073c..1c866b087f0c 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -428,7 +428,7 @@  static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
 			  inode->i_ino, fs_umode_to_dtype(inode->i_mode));
 }
 
-static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
+static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
 {
 	struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
 	XA_STATE(xas, &so_ctx->xa, ctx->pos);
@@ -437,7 +437,8 @@  static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
 	while (true) {
 		dentry = offset_find_next(&xas);
 		if (!dentry)
-			break;
+			/* readdir has reached the current EOD */
+			return (void *)0x10;
 
 		if (!offset_dir_emit(ctx, dentry)) {
 			dput(dentry);
@@ -447,6 +448,7 @@  static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
 		dput(dentry);
 		ctx->pos = xas.xa_index + 1;
 	}
+	return NULL;
 }
 
 /**
@@ -479,7 +481,12 @@  static int offset_readdir(struct file *file, struct dir_context *ctx)
 	if (!dir_emit_dots(file, ctx))
 		return 0;
 
-	offset_iterate_dir(d_inode(dir), ctx);
+	if (ctx->pos == 2)
+		file->private_data = NULL;
+	else if (file->private_data == (void *)0x10)
+		return 0;
+
+	file->private_data = offset_iterate_dir(d_inode(dir), ctx);
 	return 0;
 }