diff mbox series

[v2,2/2] fat: always use dir_emit_dots and ignore . and .. entries

Message ID 20240625175133.922758-3-cascardo@igalia.com (mailing list archive)
State New
Headers show
Series fat: add support for directories without . and .. entries | expand

Commit Message

Thadeu Lima de Souza Cascardo June 25, 2024, 5:51 p.m. UTC
Instead of only using dir_emit_dots for the root inode and explictily
requiring the . and .. entries to emit them, use dir_emit_dots for all
directories.

That allows filesystems with directories without the . or .. entries to
still show them.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
---
 fs/fat/dir.c | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

Comments

OGAWA Hirofumi June 25, 2024, 9:47 p.m. UTC | #1
Thadeu Lima de Souza Cascardo <cascardo@igalia.com> writes:

> Instead of only using dir_emit_dots for the root inode and explictily
> requiring the . and .. entries to emit them, use dir_emit_dots for all
> directories.
>
> That allows filesystems with directories without the . or .. entries to
> still show them.

Unacceptable to change the correct behavior to broken format. And
unlikely break the userspace, however this still has the user visible
change of seek pos.

Thanks.

> Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
> ---
>  fs/fat/dir.c | 24 +++++++++---------------
>  1 file changed, 9 insertions(+), 15 deletions(-)
>
> diff --git a/fs/fat/dir.c b/fs/fat/dir.c
> index 4e4a359a1ea3..e70781569de5 100644
> --- a/fs/fat/dir.c
> +++ b/fs/fat/dir.c
> @@ -583,15 +583,14 @@ static int __fat_readdir(struct inode *inode, struct file *file,
>  	mutex_lock(&sbi->s_lock);
>  
>  	cpos = ctx->pos;
> -	/* Fake . and .. for the root directory. */
> -	if (inode->i_ino == MSDOS_ROOT_INO) {
> -		if (!dir_emit_dots(file, ctx))
> -			goto out;
> -		if (ctx->pos == 2) {
> -			fake_offset = 1;
> -			cpos = 0;
> -		}
> +
> +	if (!dir_emit_dots(file, ctx))
> +		goto out;
> +	if (ctx->pos == 2) {
> +		fake_offset = 1;
> +		cpos = 0;
>  	}
> +
>  	if (cpos & (sizeof(struct msdos_dir_entry) - 1)) {
>  		ret = -ENOENT;
>  		goto out;
> @@ -671,13 +670,8 @@ static int __fat_readdir(struct inode *inode, struct file *file,
>  	if (fake_offset && ctx->pos < 2)
>  		ctx->pos = 2;
>  
> -	if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) {
> -		if (!dir_emit_dot(file, ctx))
> -			goto fill_failed;
> -	} else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) {
> -		if (!dir_emit_dotdot(file, ctx))
> -			goto fill_failed;
> -	} else {
> +	if (memcmp(de->name, MSDOS_DOT, MSDOS_NAME) &&
> +	    memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) {
>  		unsigned long inum;
>  		loff_t i_pos = fat_make_i_pos(sb, bh, de);
>  		struct inode *tmp = fat_iget(sb, i_pos);
Thadeu Lima de Souza Cascardo June 26, 2024, 7:46 p.m. UTC | #2
On Wed, Jun 26, 2024 at 06:47:15AM +0900, OGAWA Hirofumi wrote:
> Thadeu Lima de Souza Cascardo <cascardo@igalia.com> writes:
> 
> > Instead of only using dir_emit_dots for the root inode and explictily
> > requiring the . and .. entries to emit them, use dir_emit_dots for all
> > directories.
> >
> > That allows filesystems with directories without the . or .. entries to
> > still show them.
> 
> Unacceptable to change the correct behavior to broken format. And
> unlikely break the userspace, however this still has the user visible
> change of seek pos.
> 
> Thanks.
> 

I agree that if this breaks userspace with a good filesystem or regresses
in a way that real applications would break, that this needs to be redone.

However, I spent a few hours doing some extra testing (I had already run
some xfstests that include directory testing) and I failed to find any
issues with this fix.

If this would break, it would have broken the root directory. In the case
of a directory including the . and .. entries, the d_off for the .. entry
will be set for the first non-dot-or-dotdot entry. For ., it will be set as
1, which, if used by telldir (or llseek), will emit the .. entry, as
expected.

For the case where both . and .. are absent, the first real entry will have
d_off as 2, and it will just work.

So everything seems to work as expected. Do you see any user visible change
that would break any applications?

Thanks.
Cascardo.

> > Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
> > ---
> >  fs/fat/dir.c | 24 +++++++++---------------
> >  1 file changed, 9 insertions(+), 15 deletions(-)
> >
> > diff --git a/fs/fat/dir.c b/fs/fat/dir.c
> > index 4e4a359a1ea3..e70781569de5 100644
> > --- a/fs/fat/dir.c
> > +++ b/fs/fat/dir.c
> > @@ -583,15 +583,14 @@ static int __fat_readdir(struct inode *inode, struct file *file,
> >  	mutex_lock(&sbi->s_lock);
> >  
> >  	cpos = ctx->pos;
> > -	/* Fake . and .. for the root directory. */
> > -	if (inode->i_ino == MSDOS_ROOT_INO) {
> > -		if (!dir_emit_dots(file, ctx))
> > -			goto out;
> > -		if (ctx->pos == 2) {
> > -			fake_offset = 1;
> > -			cpos = 0;
> > -		}
> > +
> > +	if (!dir_emit_dots(file, ctx))
> > +		goto out;
> > +	if (ctx->pos == 2) {
> > +		fake_offset = 1;
> > +		cpos = 0;
> >  	}
> > +
> >  	if (cpos & (sizeof(struct msdos_dir_entry) - 1)) {
> >  		ret = -ENOENT;
> >  		goto out;
> > @@ -671,13 +670,8 @@ static int __fat_readdir(struct inode *inode, struct file *file,
> >  	if (fake_offset && ctx->pos < 2)
> >  		ctx->pos = 2;
> >  
> > -	if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) {
> > -		if (!dir_emit_dot(file, ctx))
> > -			goto fill_failed;
> > -	} else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) {
> > -		if (!dir_emit_dotdot(file, ctx))
> > -			goto fill_failed;
> > -	} else {
> > +	if (memcmp(de->name, MSDOS_DOT, MSDOS_NAME) &&
> > +	    memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) {
> >  		unsigned long inum;
> >  		loff_t i_pos = fat_make_i_pos(sb, bh, de);
> >  		struct inode *tmp = fat_iget(sb, i_pos);
> 
> -- 
> OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
OGAWA Hirofumi June 26, 2024, 8:10 p.m. UTC | #3
Thadeu Lima de Souza Cascardo <cascardo@igalia.com> writes:

>> Unacceptable to change the correct behavior to broken format. And
>> unlikely break the userspace, however this still has the user visible
>> change of seek pos.
>> 
>> Thanks.
>> 
>
> I agree that if this breaks userspace with a good filesystem or regresses
> in a way that real applications would break, that this needs to be redone.
>
> However, I spent a few hours doing some extra testing (I had already run
> some xfstests that include directory testing) and I failed to find any
> issues with this fix.
>
> If this would break, it would have broken the root directory. In the case
> of a directory including the . and .. entries, the d_off for the .. entry
> will be set for the first non-dot-or-dotdot entry. For ., it will be set as
> 1, which, if used by telldir (or llseek), will emit the .. entry, as
> expected.
>
> For the case where both . and .. are absent, the first real entry will have
> d_off as 2, and it will just work.
>
> So everything seems to work as expected. Do you see any user visible change
> that would break any applications?

First of all, I'm not thinking this is the fix, I'm thinking this as the
workaround of broken formatter (because the windows's fsck also think it
as broken). So very low priority to support.

As said, I also think low chance to break the userspace. However it
changes real offset to pseudo offset. So if userspace saved it to
persistent space, breaks userspace. Unlikely, but I think there is no
value to change the behavior for workaround.

Thanks.
Thadeu Lima de Souza Cascardo June 27, 2024, 12:51 p.m. UTC | #4
On Thu, Jun 27, 2024 at 05:10:44AM +0900, OGAWA Hirofumi wrote:
> Thadeu Lima de Souza Cascardo <cascardo@igalia.com> writes:
> 
> >> Unacceptable to change the correct behavior to broken format. And
> >> unlikely break the userspace, however this still has the user visible
> >> change of seek pos.
> >> 
> >> Thanks.
> >> 
> >
> > I agree that if this breaks userspace with a good filesystem or regresses
> > in a way that real applications would break, that this needs to be redone.
> >
> > However, I spent a few hours doing some extra testing (I had already run
> > some xfstests that include directory testing) and I failed to find any
> > issues with this fix.
> >
> > If this would break, it would have broken the root directory. In the case
> > of a directory including the . and .. entries, the d_off for the .. entry
> > will be set for the first non-dot-or-dotdot entry. For ., it will be set as
> > 1, which, if used by telldir (or llseek), will emit the .. entry, as
> > expected.
> >
> > For the case where both . and .. are absent, the first real entry will have
> > d_off as 2, and it will just work.
> >
> > So everything seems to work as expected. Do you see any user visible change
> > that would break any applications?
> 
> First of all, I'm not thinking this is the fix, I'm thinking this as the
> workaround of broken formatter (because the windows's fsck also think it
> as broken). So very low priority to support.
> 
> As said, I also think low chance to break the userspace. However it
> changes real offset to pseudo offset. So if userspace saved it to
> persistent space, breaks userspace. Unlikely, but I think there is no
> value to change the behavior for workaround.
> 
> Thanks.
> -- 
> OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>

I looked at that perspective, but still wanted to allow users to use such
filesystems, even if they needed to fsck it first.

But there is the issue that when such filesystems are mounted, they are
further corrupted, preventing such fsck from correctly fixing and allowing
access to the data.

So I started doing some investigation and that lead me to the following
code from fs/fat/inode.c:

static void fat_evict_inode(struct inode *inode)
{
	truncate_inode_pages_final(&inode->i_data);
	if (!inode->i_nlink) {
		inode->i_size = 0;
		fat_truncate_blocks(inode, 0);
	} else
		fat_free_eofblocks(inode);
[...]

That is, since the directory has no links, once it is evicted (which
happens right after reading the number of subdirectories and failing
verification), it is truncated. That means all clusters are marked as FREE.
Then, later, if trying to fsck or mount this filesystem again, the
directory entry is removed or further errors show up (as an EOF is
expected, not a FREE cluster).

And that is caused by attributing a number of 0 links. I looked it up on
how other filesystems handle this situation and I found out that exfat adds
2 to the number of subdirectories, just as I am suggesting. When
enumerating the directories (at its readdir), it also relies on
dir_emit_dots for all cases.

As for programs persisting the offset, the manpage for telldir has on its
NOTES section:

"""
Application programs should treat this strictly as an opaque value, making
no assumptions about its contents.
"""

I know this doesn't refer to persisting or not that opaque value, but any
other changes to the directory would change the offset of its current
subdirectories and given those values are opaque, no assumptions should be
made. And unless we find such programs in the wild, the same argunent could
be made that there may be programs that expect . and .. to be at offset 0
and 1, like every filesystem that uses dir_emit_dots does.

I understand the cautiousness to prevent regressions, but I did the work
here to test and understand the changes that are being proposed. I even
looked into another way of preventing the further corruption, but that
convinced me even more that the right fix is to assign a minimum number of
links to directories and I found precedence to this.

Thanks.
Cascardo.
OGAWA Hirofumi June 27, 2024, 3:28 p.m. UTC | #5
Thadeu Lima de Souza Cascardo <cascardo@igalia.com> writes:

>> First of all, I'm not thinking this is the fix, I'm thinking this as the
>> workaround of broken formatter (because the windows's fsck also think it
>> as broken). So very low priority to support.
>> 
>> As said, I also think low chance to break the userspace. However it
>> changes real offset to pseudo offset. So if userspace saved it to
>> persistent space, breaks userspace. Unlikely, but I think there is no
>> value to change the behavior for workaround.
>
> So I started doing some investigation and that lead me to the following
> code from fs/fat/inode.c:
>
> static void fat_evict_inode(struct inode *inode)
> {
> 	truncate_inode_pages_final(&inode->i_data);
> 	if (!inode->i_nlink) {
> 		inode->i_size = 0;
> 		fat_truncate_blocks(inode, 0);
> 	} else
> 		fat_free_eofblocks(inode);
> [...]
>
> That is, since the directory has no links, once it is evicted (which
> happens right after reading the number of subdirectories and failing
> verification), it is truncated. That means all clusters are marked as FREE.
> Then, later, if trying to fsck or mount this filesystem again, the
> directory entry is removed or further errors show up (as an EOF is
> expected, not a FREE cluster).
>
> And that is caused by attributing a number of 0 links. I looked it up on
> how other filesystems handle this situation and I found out that exfat adds
> 2 to the number of subdirectories, just as I am suggesting. When
> enumerating the directories (at its readdir), it also relies on
> dir_emit_dots for all cases.

Because exfat doesn't have "."/".." always, IIRC.

> As for programs persisting the offset, the manpage for telldir has on its
> NOTES section:
>
> """
> Application programs should treat this strictly as an opaque value, making
> no assumptions about its contents.
> """
>
> I know this doesn't refer to persisting or not that opaque value, but any
> other changes to the directory would change the offset of its current
> subdirectories and given those values are opaque, no assumptions should be
> made. And unless we find such programs in the wild, the same argunent could
> be made that there may be programs that expect . and .. to be at offset 0
> and 1, like every filesystem that uses dir_emit_dots does.
>
> I understand the cautiousness to prevent regressions, but I did the work
> here to test and understand the changes that are being proposed. I even
> looked into another way of preventing the further corruption, but that
> convinced me even more that the right fix is to assign a minimum number of
> links to directories and I found precedence to this.

I seriously recommend to change app that make this, or changing the fsck
to fix this. Because this looks like broken as FAT.

Honestly I'm not accepting willingly though, the way to add the
workaround for this would be, detect this breakage and warn it, then
mark the dir inode as broken. And add the workaround codes only for
broken dir inode, and make it work for all operations (just make
mountable and readable is not enough, at least write must not corrupt fs
or panic etc.), without changing the behavior of correct inodes.

Thanks.
diff mbox series

Patch

diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 4e4a359a1ea3..e70781569de5 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -583,15 +583,14 @@  static int __fat_readdir(struct inode *inode, struct file *file,
 	mutex_lock(&sbi->s_lock);
 
 	cpos = ctx->pos;
-	/* Fake . and .. for the root directory. */
-	if (inode->i_ino == MSDOS_ROOT_INO) {
-		if (!dir_emit_dots(file, ctx))
-			goto out;
-		if (ctx->pos == 2) {
-			fake_offset = 1;
-			cpos = 0;
-		}
+
+	if (!dir_emit_dots(file, ctx))
+		goto out;
+	if (ctx->pos == 2) {
+		fake_offset = 1;
+		cpos = 0;
 	}
+
 	if (cpos & (sizeof(struct msdos_dir_entry) - 1)) {
 		ret = -ENOENT;
 		goto out;
@@ -671,13 +670,8 @@  static int __fat_readdir(struct inode *inode, struct file *file,
 	if (fake_offset && ctx->pos < 2)
 		ctx->pos = 2;
 
-	if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) {
-		if (!dir_emit_dot(file, ctx))
-			goto fill_failed;
-	} else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) {
-		if (!dir_emit_dotdot(file, ctx))
-			goto fill_failed;
-	} else {
+	if (memcmp(de->name, MSDOS_DOT, MSDOS_NAME) &&
+	    memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) {
 		unsigned long inum;
 		loff_t i_pos = fat_make_i_pos(sb, bh, de);
 		struct inode *tmp = fat_iget(sb, i_pos);