diff mbox series

[v2] libfs: getdents() should return 0 after reaching EOD

Message ID 170007970281.4975.12356401645395490640.stgit@bazille.1015granger.net (mailing list archive)
State New, archived
Headers show
Series [v2] libfs: getdents() should return 0 after reaching EOD | expand

Commit Message

Chuck Lever Nov. 15, 2023, 8:22 p.m. UTC
From: Chuck Lever <chuck.lever@oracle.com>

The new directory offset helpers don't conform with the convention
of getdents() returning no more entries once a directory file
descriptor has reached the current end-of-directory.

To address this, copy the logic from dcache_readdir() to mark the
open directory file descriptor once EOD has been reached. Rewinding
resets the mark.

Reported-by: Tavian Barnes <tavianator@tavianator.com>
Closes: https://lore.kernel.org/linux-fsdevel/20231113180616.2831430-1-tavianator@tavianator.com/
Fixes: 6faddda69f62 ("libfs: Add directory operations for stable offsets")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/libfs.c             |   16 +++++++++++++---
 include/linux/dcache.h |    1 +
 2 files changed, 14 insertions(+), 3 deletions(-)

Changes since RFC:
- Keep file->private_data stable while directory descriptor remains open

Comments

Al Viro Nov. 18, 2023, 4:28 p.m. UTC | #1
On Wed, Nov 15, 2023 at 03:22:52PM -0500, Chuck Lever wrote:

>  static int offset_readdir(struct file *file, struct dir_context *ctx)
>  {
> +	struct dentry *cursor = file->private_data;
>  	struct dentry *dir = file->f_path.dentry;
>  
>  	lockdep_assert_held(&d_inode(dir)->i_rwsem);
> @@ -479,11 +481,19 @@ static int offset_readdir(struct file *file, struct dir_context *ctx)
>  	if (!dir_emit_dots(file, ctx))
>  		return 0;
>  
> -	offset_iterate_dir(d_inode(dir), ctx);
> +	if (ctx->pos == 2)
> +		cursor->d_flags &= ~DCACHE_EOD;
> +	else if (cursor->d_flags & DCACHE_EOD)
> +		return 0;
> +
> +	if (offset_iterate_dir(d_inode(dir), ctx))
> +		cursor->d_flags |= DCACHE_EOD;

This is simply grotesque - "it's better to keep ->private_data constant,
so we will allocate a dentry, just to store the one bit of data we need to
keep track of; oh, and let's grab a bit out of ->d_flags, while we are at it;
we will ignore the usual locking rules for ->d_flags modifications, 'cause
it's all serialized on ->f_pos_lock".

No.  If nothing else, this is harder to follow than the original.  It's
far easier to verify that these struct file instances only use ->private_data
as a flag and these accesses are serialized on ->f_pos_lock as claimed
than go through the accesses to ->d_flags, prove that the one above is
the only one that can happen to such dentries (while they are live, that
is - once they are in __dentry_kill(), there will be modifications of ->d_flags)
and that it can't happen to any other instances.

NAKed-by: Al Viro <viro@zeniv.linux.org.uk>
Chuck Lever III Nov. 18, 2023, 4:40 p.m. UTC | #2
> On Nov 18, 2023, at 11:28 AM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> 
> On Wed, Nov 15, 2023 at 03:22:52PM -0500, Chuck Lever wrote:
> 
>> static int offset_readdir(struct file *file, struct dir_context *ctx)
>> {
>> + struct dentry *cursor = file->private_data;
>> struct dentry *dir = file->f_path.dentry;
>> 
>> lockdep_assert_held(&d_inode(dir)->i_rwsem);
>> @@ -479,11 +481,19 @@ static int offset_readdir(struct file *file, struct dir_context *ctx)
>> if (!dir_emit_dots(file, ctx))
>> return 0;
>> 
>> - offset_iterate_dir(d_inode(dir), ctx);
>> + if (ctx->pos == 2)
>> + cursor->d_flags &= ~DCACHE_EOD;
>> + else if (cursor->d_flags & DCACHE_EOD)
>> + return 0;
>> +
>> + if (offset_iterate_dir(d_inode(dir), ctx))
>> + cursor->d_flags |= DCACHE_EOD;
> 
> This is simply grotesque - "it's better to keep ->private_data constant,
> so we will allocate a dentry, just to store the one bit of data we need to
> keep track of; oh, and let's grab a bit out of ->d_flags, while we are at it;
> we will ignore the usual locking rules for ->d_flags modifications, 'cause
> it's all serialized on ->f_pos_lock".
> 
> No.  If nothing else, this is harder to follow than the original.

No argument from me.


> It's
> far easier to verify that these struct file instances only use ->private_data
> as a flag and these accesses are serialized on ->f_pos_lock as claimed
> than go through the accesses to ->d_flags, prove that the one above is
> the only one that can happen to such dentries (while they are live, that
> is - once they are in __dentry_kill(), there will be modifications of ->d_flags)
> and that it can't happen to any other instances.
> 
> NAKed-by: Al Viro <viro@zeniv.linux.org.uk>

Fair enough. Are you comfortable enough with v1 to Ack it, or do
you want me to continue looking for another mechanism for marking
the end-of-directory stream?


--
Chuck Lever
diff mbox series

Patch

diff --git a/fs/libfs.c b/fs/libfs.c
index e9440d55073c..d4df19b1b666 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -428,7 +428,7 @@  static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
 			  inode->i_ino, fs_umode_to_dtype(inode->i_mode));
 }
 
-static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
+static bool offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
 {
 	struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
 	XA_STATE(xas, &so_ctx->xa, ctx->pos);
@@ -437,7 +437,7 @@  static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
 	while (true) {
 		dentry = offset_find_next(&xas);
 		if (!dentry)
-			break;
+			return true;
 
 		if (!offset_dir_emit(ctx, dentry)) {
 			dput(dentry);
@@ -447,6 +447,7 @@  static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
 		dput(dentry);
 		ctx->pos = xas.xa_index + 1;
 	}
+	return false;
 }
 
 /**
@@ -472,6 +473,7 @@  static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
  */
 static int offset_readdir(struct file *file, struct dir_context *ctx)
 {
+	struct dentry *cursor = file->private_data;
 	struct dentry *dir = file->f_path.dentry;
 
 	lockdep_assert_held(&d_inode(dir)->i_rwsem);
@@ -479,11 +481,19 @@  static int offset_readdir(struct file *file, struct dir_context *ctx)
 	if (!dir_emit_dots(file, ctx))
 		return 0;
 
-	offset_iterate_dir(d_inode(dir), ctx);
+	if (ctx->pos == 2)
+		cursor->d_flags &= ~DCACHE_EOD;
+	else if (cursor->d_flags & DCACHE_EOD)
+		return 0;
+
+	if (offset_iterate_dir(d_inode(dir), ctx))
+		cursor->d_flags |= DCACHE_EOD;
 	return 0;
 }
 
 const struct file_operations simple_offset_dir_operations = {
+	.open		= dcache_dir_open,
+	.release	= dcache_dir_close,
 	.llseek		= offset_dir_llseek,
 	.iterate_shared	= offset_readdir,
 	.read		= generic_read_dir,
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 3da2f0545d5d..ee1757001583 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -208,6 +208,7 @@  struct dentry_operations {
 #define DCACHE_FALLTHRU			0x01000000 /* Fall through to lower layer */
 #define DCACHE_NOKEY_NAME		0x02000000 /* Encrypted name encoded without key */
 #define DCACHE_OP_REAL			0x04000000
+#define DCACHE_EOD			0x08000000 /* Reached end-of-directory */
 
 #define DCACHE_PAR_LOOKUP		0x10000000 /* being looked up (with parent locked shared) */
 #define DCACHE_DENTRY_CURSOR		0x20000000