diff mbox series

[v3] libfs: getdents() should return 0 after reaching EOD

Message ID 170033563101.235981.14540963282243913866.stgit@bazille.1015granger.net (mailing list archive)
State New
Headers show
Series [v3] libfs: getdents() should return 0 after reaching EOD | expand

Commit Message

Chuck Lever Nov. 18, 2023, 7:33 p.m. UTC
From: Chuck Lever <chuck.lever@oracle.com>

The new directory offset helpers don't conform with the convention
of getdents() returning no more entries once a directory file
descriptor has reached the current end-of-directory.

To address this, copy the logic from dcache_readdir() to mark the
open directory file descriptor once EOD has been reached. Rewinding
resets the mark.

Reported-by: Tavian Barnes <tavianator@tavianator.com>
Closes: https://lore.kernel.org/linux-fsdevel/20231113180616.2831430-1-tavianator@tavianator.com/
Fixes: 6faddda69f62 ("libfs: Add directory operations for stable offsets")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/libfs.c |   12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

This patch passes Tavian's reproducer. fstests over NFS shows no
regression. However, generic/676 fails when running directly against
a tmpfs mount:

     QA output created by 676
    -All tests passed
    +Unexpected EOF while reading dir.

I will look into that.

Changes since v2:
- Go back to marking EOD in file->private_data (with a comment)

Changes since RFC:
- Keep file->private_data stable while directory descriptor remains open

Comments

Chuck Lever Nov. 18, 2023, 10:11 p.m. UTC | #1
On Sat, Nov 18, 2023 at 02:33:19PM -0500, Chuck Lever wrote:
> From: Chuck Lever <chuck.lever@oracle.com>
> 
> The new directory offset helpers don't conform with the convention
> of getdents() returning no more entries once a directory file
> descriptor has reached the current end-of-directory.
> 
> To address this, copy the logic from dcache_readdir() to mark the
> open directory file descriptor once EOD has been reached. Rewinding
> resets the mark.
> 
> Reported-by: Tavian Barnes <tavianator@tavianator.com>
> Closes: https://lore.kernel.org/linux-fsdevel/20231113180616.2831430-1-tavianator@tavianator.com/
> Fixes: 6faddda69f62 ("libfs: Add directory operations for stable offsets")
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  fs/libfs.c |   12 +++++++++---
>  1 file changed, 9 insertions(+), 3 deletions(-)
> 
> This patch passes Tavian's reproducer. fstests over NFS shows no
> regression. However, generic/676 fails when running directly against
> a tmpfs mount:
> 
>      QA output created by 676
>     -All tests passed
>     +Unexpected EOF while reading dir.
> 
> I will look into that.

offset_dir_llseek() has to reset the file descriptor's EOD marker,
just like dcache_dir_lseek() does (effectively it resets
cursor->d_child).

We don't hold f_pos_lock in offset_dir_llseek(), though.


> Changes since v2:
> - Go back to marking EOD in file->private_data (with a comment)
> 
> Changes since RFC:
> - Keep file->private_data stable while directory descriptor remains open
> 
> 
> diff --git a/fs/libfs.c b/fs/libfs.c
> index e9440d55073c..851e29fdd7e7 100644
> --- a/fs/libfs.c
> +++ b/fs/libfs.c
> @@ -428,7 +428,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
>  			  inode->i_ino, fs_umode_to_dtype(inode->i_mode));
>  }
>  
> -static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
> +static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
>  {
>  	struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
>  	XA_STATE(xas, &so_ctx->xa, ctx->pos);
> @@ -437,7 +437,7 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
>  	while (true) {
>  		dentry = offset_find_next(&xas);
>  		if (!dentry)
> -			break;
> +			return ERR_PTR(-ENOENT);
>  
>  		if (!offset_dir_emit(ctx, dentry)) {
>  			dput(dentry);
> @@ -447,6 +447,7 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
>  		dput(dentry);
>  		ctx->pos = xas.xa_index + 1;
>  	}
> +	return NULL;
>  }
>  
>  /**
> @@ -479,7 +480,12 @@ static int offset_readdir(struct file *file, struct dir_context *ctx)
>  	if (!dir_emit_dots(file, ctx))
>  		return 0;
>  
> -	offset_iterate_dir(d_inode(dir), ctx);
> +	/* In this case, ->private_data is protected by f_pos_lock */
> +	if (ctx->pos == 2)
> +		file->private_data = NULL;
> +	else if (file->private_data == ERR_PTR(-ENOENT))
> +		return 0;
> +	file->private_data = offset_iterate_dir(d_inode(dir), ctx);
>  	return 0;
>  }
>  
> 
>
Al Viro Nov. 18, 2023, 11:36 p.m. UTC | #2
On Sat, Nov 18, 2023 at 05:11:39PM -0500, Chuck Lever wrote:

> We don't hold f_pos_lock in offset_dir_llseek(), though.

We'd better...  Which call chain leads to it without ->f_pos_lock?
Chuck Lever Nov. 19, 2023, 7:18 p.m. UTC | #3
> On Nov 18, 2023, at 6:36 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> 
> On Sat, Nov 18, 2023 at 05:11:39PM -0500, Chuck Lever wrote:
> 
>> We don't hold f_pos_lock in offset_dir_llseek(), though.
> 
> We'd better...  Which call chain leads to it without ->f_pos_lock?

I based that comment on code audit. It does not appear to be
obviously held in this code path on first (or second) reading.

However, let me look again, and test with lockdep_assert() to
confirm.

--
Chuck Lever
Chuck Lever Nov. 19, 2023, 8:22 p.m. UTC | #4
> On Nov 19, 2023, at 2:18 PM, Chuck Lever III <chuck.lever@oracle.com> wrote:
> 
> 
>> On Nov 18, 2023, at 6:36 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
>> 
>> On Sat, Nov 18, 2023 at 05:11:39PM -0500, Chuck Lever wrote:
>> 
>>> We don't hold f_pos_lock in offset_dir_llseek(), though.
>> 
>> We'd better...  Which call chain leads to it without ->f_pos_lock?
> 
> I based that comment on code audit. It does not appear to be
> obviously held in this code path on first (or second) reading.
> 
> However, let me look again, and test with lockdep_assert() to
> confirm.

lockdep assertion failure

Call Trace:
 <TASK>
 ? show_regs+0x5d/0x64
 ? offset_dir_llseek+0x39/0xa3
 ? __warn+0xab/0x158
 ? report_bug+0xd0/0x144
 ? offset_dir_llseek+0x39/0xa3
 ? handle_bug+0x45/0x74
 ? exc_invalid_op+0x18/0x68
 ? asm_exc_invalid_op+0x1b/0x20
 ? offset_dir_llseek+0x39/0xa3
 ? __pfx_nfs3svc_encode_entryplus3+0x10/0x10 [nfsd]
 vfs_llseek+0x1f/0x31
 nfsd_readdir+0x64/0xb7 [nfsd]
 nfsd3_proc_readdirplus+0xde/0x122 [nfsd]
 nfsd_dispatch+0xe8/0x1cf [nfsd]
 svc_process_common+0x43c/0x5d6 [sunrpc]
 ? __pfx_nfsd_dispatch+0x10/0x10 [nfsd]
 svc_process+0xc6/0xe3 [sunrpc]
 svc_handle_xprt+0x371/0x42f [sunrpc]
 svc_recv+0x10b/0x155 [sunrpc]
 nfsd+0xb1/0xe3 [nfsd]
 ? __pfx_nfsd+0x10/0x10 [nfsd]
 kthread+0x113/0x11b
 ? __pfx_kthread+0x10/0x10
 ret_from_fork+0x2a/0x43
 ? __pfx_kthread+0x10/0x10
 ret_from_fork_asm+0x1b/0x30
 </TASK>


--
Chuck Lever
Al Viro Nov. 19, 2023, 9:14 p.m. UTC | #5
On Sun, Nov 19, 2023 at 08:22:30PM +0000, Chuck Lever III wrote:

> lockdep assertion failure
> 
> Call Trace:
>  <TASK>
>  ? show_regs+0x5d/0x64
>  ? offset_dir_llseek+0x39/0xa3
>  ? __warn+0xab/0x158
>  ? report_bug+0xd0/0x144
>  ? offset_dir_llseek+0x39/0xa3
>  ? handle_bug+0x45/0x74
>  ? exc_invalid_op+0x18/0x68
>  ? asm_exc_invalid_op+0x1b/0x20
>  ? offset_dir_llseek+0x39/0xa3
>  ? __pfx_nfs3svc_encode_entryplus3+0x10/0x10 [nfsd]
>  vfs_llseek+0x1f/0x31
>  nfsd_readdir+0x64/0xb7 [nfsd]

Lovely...  Said that, file here is thread-local, so all accesses
to it are serialized.  The same file has ->iterate_shared() called
without ->f_pos_lock a well...

So that's whatever serialization between ->iterate_shared and ->llseek,
really; for normally opened files that's going to be on fdget_pos()
in relevant syscalls, for something private it's up to whatever's
opened them.
diff mbox series

Patch

diff --git a/fs/libfs.c b/fs/libfs.c
index e9440d55073c..851e29fdd7e7 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -428,7 +428,7 @@  static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
 			  inode->i_ino, fs_umode_to_dtype(inode->i_mode));
 }
 
-static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
+static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
 {
 	struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
 	XA_STATE(xas, &so_ctx->xa, ctx->pos);
@@ -437,7 +437,7 @@  static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
 	while (true) {
 		dentry = offset_find_next(&xas);
 		if (!dentry)
-			break;
+			return ERR_PTR(-ENOENT);
 
 		if (!offset_dir_emit(ctx, dentry)) {
 			dput(dentry);
@@ -447,6 +447,7 @@  static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
 		dput(dentry);
 		ctx->pos = xas.xa_index + 1;
 	}
+	return NULL;
 }
 
 /**
@@ -479,7 +480,12 @@  static int offset_readdir(struct file *file, struct dir_context *ctx)
 	if (!dir_emit_dots(file, ctx))
 		return 0;
 
-	offset_iterate_dir(d_inode(dir), ctx);
+	/* In this case, ->private_data is protected by f_pos_lock */
+	if (ctx->pos == 2)
+		file->private_data = NULL;
+	else if (file->private_data == ERR_PTR(-ENOENT))
+		return 0;
+	file->private_data = offset_iterate_dir(d_inode(dir), ctx);
 	return 0;
 }