diff mbox series

[1/2] vfs: Convert debugfs to use the new mount API

Message ID 49d1f108-46e3-443f-85a3-6dd730c5d076@redhat.com (mailing list archive)
State New, archived
Headers show
Series vfs: convert debugfs & tracefs to the new mount API | expand

Commit Message

Eric Sandeen March 5, 2024, 11:08 p.m. UTC
From: David Howells <dhowells@redhat.com>

Convert the debugfs filesystem to the new internal mount API as the old
one will be obsoleted and removed.  This allows greater flexibility in
communication of mount parameters between userspace, the VFS and the
filesystem.

See Documentation/filesystems/mount_api.txt for more information.

Signed-off-by: David Howells <dhowells@redhat.com>
Co-developed-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
[sandeen: forward port to modern kernel, fix remounting]
cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
cc: "Rafael J. Wysocki" <rafael@kernel.org>
---
 fs/debugfs/inode.c | 198 +++++++++++++++++++++------------------------
 1 file changed, 93 insertions(+), 105 deletions(-)

Comments

Christian Brauner March 6, 2024, 10:50 a.m. UTC | #1
On Tue, Mar 05, 2024 at 05:08:39PM -0600, Eric Sandeen wrote:
> From: David Howells <dhowells@redhat.com>
> 
> Convert the debugfs filesystem to the new internal mount API as the old
> one will be obsoleted and removed.  This allows greater flexibility in
> communication of mount parameters between userspace, the VFS and the
> filesystem.
> 
> See Documentation/filesystems/mount_api.txt for more information.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> Co-developed-by: Eric Sandeen <sandeen@redhat.com>
> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> [sandeen: forward port to modern kernel, fix remounting]
> cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> cc: "Rafael J. Wysocki" <rafael@kernel.org>
> ---
>  fs/debugfs/inode.c | 198 +++++++++++++++++++++------------------------
>  1 file changed, 93 insertions(+), 105 deletions(-)
> 
> diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
> index 034a617cb1a5..c2adfb272da8 100644
> --- a/fs/debugfs/inode.c
> +++ b/fs/debugfs/inode.c
> @@ -14,7 +14,8 @@
>  
>  #include <linux/module.h>
>  #include <linux/fs.h>
> -#include <linux/mount.h>
> +#include <linux/fs_context.h>
> +#include <linux/fs_parser.h>
>  #include <linux/pagemap.h>
>  #include <linux/init.h>
>  #include <linux/kobject.h>
> @@ -23,7 +24,6 @@
>  #include <linux/fsnotify.h>
>  #include <linux/string.h>
>  #include <linux/seq_file.h>
> -#include <linux/parser.h>
>  #include <linux/magic.h>
>  #include <linux/slab.h>
>  #include <linux/security.h>
> @@ -77,7 +77,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb)
>  	return inode;
>  }
>  
> -struct debugfs_mount_opts {
> +struct debugfs_fs_info {
>  	kuid_t uid;
>  	kgid_t gid;
>  	umode_t mode;
> @@ -89,68 +89,51 @@ enum {
>  	Opt_uid,
>  	Opt_gid,
>  	Opt_mode,
> -	Opt_err
>  };
>  
> -static const match_table_t tokens = {
> -	{Opt_uid, "uid=%u"},
> -	{Opt_gid, "gid=%u"},
> -	{Opt_mode, "mode=%o"},
> -	{Opt_err, NULL}
> +static const struct fs_parameter_spec debugfs_param_specs[] = {
> +	fsparam_u32	("gid",		Opt_gid),
> +	fsparam_u32oct	("mode",	Opt_mode),
> +	fsparam_u32	("uid",		Opt_uid),
> +	{}
>  };
>  
> -struct debugfs_fs_info {
> -	struct debugfs_mount_opts mount_opts;
> -};
> -
> -static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
> +static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
>  {
> -	substring_t args[MAX_OPT_ARGS];
> -	int option;
> -	int token;
> +	struct debugfs_fs_info *opts = fc->s_fs_info;
> +	struct fs_parse_result result;
>  	kuid_t uid;
>  	kgid_t gid;
> -	char *p;
> -
> -	opts->opts = 0;
> -	opts->mode = DEBUGFS_DEFAULT_MODE;
> -
> -	while ((p = strsep(&data, ",")) != NULL) {
> -		if (!*p)
> -			continue;
> -
> -		token = match_token(p, tokens, args);
> -		switch (token) {
> -		case Opt_uid:
> -			if (match_int(&args[0], &option))
> -				return -EINVAL;
> -			uid = make_kuid(current_user_ns(), option);
> -			if (!uid_valid(uid))
> -				return -EINVAL;
> -			opts->uid = uid;
> -			break;
> -		case Opt_gid:
> -			if (match_int(&args[0], &option))
> -				return -EINVAL;
> -			gid = make_kgid(current_user_ns(), option);
> -			if (!gid_valid(gid))
> -				return -EINVAL;
> -			opts->gid = gid;
> -			break;
> -		case Opt_mode:
> -			if (match_octal(&args[0], &option))
> -				return -EINVAL;
> -			opts->mode = option & S_IALLUGO;
> -			break;
> -		/*
> -		 * We might like to report bad mount options here;
> -		 * but traditionally debugfs has ignored all mount options
> -		 */
> -		}
> -
> -		opts->opts |= BIT(token);
> +	int opt;
> +
> +	opt = fs_parse(fc, debugfs_param_specs, param, &result);
> +	if (opt < 0)
> +		return opt;
> +
> +	switch (opt) {
> +	case Opt_uid:
> +		uid = make_kuid(current_user_ns(), result.uint_32);
> +		if (!uid_valid(uid))
> +			return invalf(fc, "Unknown uid");

Fwiw, Opt_{g,u}d I would like to see that either moved completely to the
VFS or we need to provide standardized helpers.

The issue is that for a userns mountable filesytems the validation done
here isn't enough and that's easy to miss (Obviously, debugfs isn't
relevant as it's not userns mountable but still.). For example, for
in tmpfs I recently fixed a bug where validation was wrong:

        case Opt_uid:
                kuid = make_kuid(current_user_ns(), result.uint_32);
                if (!uid_valid(kuid))
                        goto bad_value;

                /*
                 * The requested uid must be representable in the
                 * filesystem's idmapping.
                 */
                if (!kuid_has_mapping(fc->user_ns, kuid))
                        goto bad_value;

                ctx->uid = kuid;
                break;

The crucial step where the {g,u}id must also be representable in the
superblock's namespace not just in the caller's was missing. So really
we should have a generic helper that we can reycle for all Opt_{g,u}id
mount options or move that Opt_{g,u}id to the VFS itself. There was some
nastiness involved in this when I last looked at this though. And all
that invalfc() reporting should then also be identical across
filesystems.

So that's a ToDo for the future.

> +		opts->uid = uid;
> +		break;
> +	case Opt_gid:
> +		gid = make_kgid(current_user_ns(), result.uint_32);
> +		if (!gid_valid(gid))
> +			return invalf(fc, "Unknown gid");
> +		opts->gid = gid;
> +		break;
> +	case Opt_mode:
> +		opts->mode = result.uint_32 & S_IALLUGO;
> +		break;
> +	/*
> +	 * We might like to report bad mount options here;
> +	 * but traditionally debugfs has ignored all mount options
> +	 */
>  	}

We can actually differentiate this. During superblock creation and
remount we're now setting fc->oldapi e.g., what I've done for btrfs in
fs/btrfs/super.c:btrfs_reconfigure_for_mount() or what I did for
fs/overlayfs/params.c:ovl_parse_param().

There's a tiny wrinkle though. We currently have no way of letting
userspace know whether a filesystem supports the new mount API or not
(see that mount option probing systemd does we recently discussed). So
if say mount(8) remounts debugfs with mount options that were ignored in
the old mount api that are now rejected in the new mount api users now
see failures they didn't see before.

For the user it's completely intransparent why that failure happens. For
them nothing changed from util-linux's perspective. So really, we should
probably continue to ignore old mount options for backward compatibility.

>  
> +	opts->opts |= BIT(opt);
> +
>  	return 0;
>  }
>  
> @@ -158,23 +141,22 @@ static void _debugfs_apply_options(struct super_block *sb, bool remount)
>  {
>  	struct debugfs_fs_info *fsi = sb->s_fs_info;
>  	struct inode *inode = d_inode(sb->s_root);
> -	struct debugfs_mount_opts *opts = &fsi->mount_opts;
>  
>  	/*
>  	 * On remount, only reset mode/uid/gid if they were provided as mount
>  	 * options.
>  	 */
>  
> -	if (!remount || opts->opts & BIT(Opt_mode)) {
> +	if (!remount || fsi->opts & BIT(Opt_mode)) {
>  		inode->i_mode &= ~S_IALLUGO;
> -		inode->i_mode |= opts->mode;
> +		inode->i_mode |= fsi->mode;
>  	}
>  
> -	if (!remount || opts->opts & BIT(Opt_uid))
> -		inode->i_uid = opts->uid;
> +	if (!remount || fsi->opts & BIT(Opt_uid))
> +		inode->i_uid = fsi->uid;
>  
> -	if (!remount || opts->opts & BIT(Opt_gid))
> -		inode->i_gid = opts->gid;
> +	if (!remount || fsi->opts & BIT(Opt_gid))
> +		inode->i_gid = fsi->gid;
>  }
>  
>  static void debugfs_apply_options(struct super_block *sb)
> @@ -187,35 +169,33 @@ static void debugfs_apply_options_remount(struct super_block *sb)
>  	_debugfs_apply_options(sb, true);
>  }
>  
> -static int debugfs_remount(struct super_block *sb, int *flags, char *data)
> +static int debugfs_reconfigure(struct fs_context *fc)
>  {
> -	int err;
> -	struct debugfs_fs_info *fsi = sb->s_fs_info;
> +	struct super_block *sb = fc->root->d_sb;
> +	struct debugfs_fs_info *sb_opts = sb->s_fs_info;
> +	struct debugfs_fs_info *new_opts = fc->s_fs_info;
>  
>  	sync_filesystem(sb);
> -	err = debugfs_parse_options(data, &fsi->mount_opts);
> -	if (err)
> -		goto fail;
>  
> +	/* structure copy of new mount options to sb */
> +	*sb_opts = *new_opts;
>  	debugfs_apply_options_remount(sb);
>  
> -fail:
> -	return err;
> +	return 0;
>  }
>  
>  static int debugfs_show_options(struct seq_file *m, struct dentry *root)
>  {
>  	struct debugfs_fs_info *fsi = root->d_sb->s_fs_info;
> -	struct debugfs_mount_opts *opts = &fsi->mount_opts;
>  
> -	if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
> +	if (!uid_eq(fsi->uid, GLOBAL_ROOT_UID))
>  		seq_printf(m, ",uid=%u",
> -			   from_kuid_munged(&init_user_ns, opts->uid));
> -	if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
> +			   from_kuid_munged(&init_user_ns, fsi->uid));
> +	if (!gid_eq(fsi->gid, GLOBAL_ROOT_GID))
>  		seq_printf(m, ",gid=%u",
> -			   from_kgid_munged(&init_user_ns, opts->gid));
> -	if (opts->mode != DEBUGFS_DEFAULT_MODE)
> -		seq_printf(m, ",mode=%o", opts->mode);
> +			   from_kgid_munged(&init_user_ns, fsi->gid));
> +	if (fsi->mode != DEBUGFS_DEFAULT_MODE)
> +		seq_printf(m, ",mode=%o", fsi->mode);
>  
>  	return 0;
>  }
> @@ -229,7 +209,6 @@ static void debugfs_free_inode(struct inode *inode)
>  
>  static const struct super_operations debugfs_super_operations = {
>  	.statfs		= simple_statfs,
> -	.remount_fs	= debugfs_remount,
>  	.show_options	= debugfs_show_options,
>  	.free_inode	= debugfs_free_inode,
>  };
> @@ -263,26 +242,14 @@ static const struct dentry_operations debugfs_dops = {
>  	.d_automount = debugfs_automount,
>  };
>  
> -static int debug_fill_super(struct super_block *sb, void *data, int silent)
> +static int debugfs_fill_super(struct super_block *sb, struct fs_context *fc)
>  {
>  	static const struct tree_descr debug_files[] = {{""}};
> -	struct debugfs_fs_info *fsi;
>  	int err;
>  
> -	fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL);
> -	sb->s_fs_info = fsi;
> -	if (!fsi) {
> -		err = -ENOMEM;
> -		goto fail;
> -	}
> -
> -	err = debugfs_parse_options(data, &fsi->mount_opts);
> +	err = simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
>  	if (err)
> -		goto fail;
> -
> -	err  =  simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
> -	if (err)
> -		goto fail;
> +		return err;
>  
>  	sb->s_op = &debugfs_super_operations;
>  	sb->s_d_op = &debugfs_dops;
> @@ -290,27 +257,48 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
>  	debugfs_apply_options(sb);
>  
>  	return 0;
> -
> -fail:
> -	kfree(fsi);
> -	sb->s_fs_info = NULL;
> -	return err;
>  }
>  
> -static struct dentry *debug_mount(struct file_system_type *fs_type,
> -			int flags, const char *dev_name,
> -			void *data)
> +static int debugfs_get_tree(struct fs_context *fc)
>  {
>  	if (!(debugfs_allow & DEBUGFS_ALLOW_API))
> -		return ERR_PTR(-EPERM);
> +		return -EPERM;
> +
> +	return get_tree_single(fc, debugfs_fill_super);
> +}
> +
> +static void debugfs_free_fc(struct fs_context *fc)
> +{
> +	kfree(fc->s_fs_info);
> +}
>  
> -	return mount_single(fs_type, flags, data, debug_fill_super);
> +static const struct fs_context_operations debugfs_context_ops = {
> +	.free		= debugfs_free_fc,
> +	.parse_param	= debugfs_parse_param,
> +	.get_tree	= debugfs_get_tree,
> +	.reconfigure	= debugfs_reconfigure,
> +};
> +
> +static int debugfs_init_fs_context(struct fs_context *fc)
> +{
> +	struct debugfs_fs_info *fsi;
> +
> +	fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL);
> +	if (!fsi)
> +		return -ENOMEM;
> +
> +	fsi->mode = DEBUGFS_DEFAULT_MODE;
> +
> +	fc->s_fs_info = fsi;
> +	fc->ops = &debugfs_context_ops;
> +	return 0;
>  }
>  
>  static struct file_system_type debug_fs_type = {
>  	.owner =	THIS_MODULE,
>  	.name =		"debugfs",
> -	.mount =	debug_mount,
> +	.init_fs_context = debugfs_init_fs_context,
> +	.parameters =	debugfs_param_specs,
>  	.kill_sb =	kill_litter_super,
>  };
>  MODULE_ALIAS_FS("debugfs");
> -- 
> 2.43.0
> 
>
Miklos Szeredi March 6, 2024, 12:13 p.m. UTC | #2
On Wed, 6 Mar 2024 at 11:57, Christian Brauner <brauner@kernel.org> wrote:

> There's a tiny wrinkle though. We currently have no way of letting
> userspace know whether a filesystem supports the new mount API or not
> (see that mount option probing systemd does we recently discussed). So
> if say mount(8) remounts debugfs with mount options that were ignored in
> the old mount api that are now rejected in the new mount api users now
> see failures they didn't see before.
>
> For the user it's completely intransparent why that failure happens. For
> them nothing changed from util-linux's perspective. So really, we should
> probably continue to ignore old mount options for backward compatibility.

The reject behavior could be made conditional on e.g. an fsopen() flag.

I.e. FSOPEN_REJECT_UNKNOWN would make unknown options be always
rejected.  Without this flag fsconfig(2) would behave identically
before/after the conversion.

Thanks,
Miklos
Christian Brauner March 6, 2024, 12:17 p.m. UTC | #3
On Wed, Mar 06, 2024 at 01:13:05PM +0100, Miklos Szeredi wrote:
> On Wed, 6 Mar 2024 at 11:57, Christian Brauner <brauner@kernel.org> wrote:
> 
> > There's a tiny wrinkle though. We currently have no way of letting
> > userspace know whether a filesystem supports the new mount API or not
> > (see that mount option probing systemd does we recently discussed). So
> > if say mount(8) remounts debugfs with mount options that were ignored in
> > the old mount api that are now rejected in the new mount api users now
> > see failures they didn't see before.
> >
> > For the user it's completely intransparent why that failure happens. For
> > them nothing changed from util-linux's perspective. So really, we should
> > probably continue to ignore old mount options for backward compatibility.
> 
> The reject behavior could be made conditional on e.g. an fsopen() flag.

and fspick() which I think is more relevant.

> 
> I.e. FSOPEN_REJECT_UNKNOWN would make unknown options be always
> rejected.  Without this flag fsconfig(2) would behave identically
> before/after the conversion.

Yeah, that would work. That would only make sense if we make all
filesystems reject unknown mount options by default when they're
switched to the new mount api imho. When we recognize the request comes
from the old mount api fc->oldapi we continue ignoring as we did before.
If it comes from the new mount api we reject unless
FSOPEN/FSPICK_REJECT_UKNOWN was specified.
Eric Sandeen March 6, 2024, 4:35 p.m. UTC | #4
On 3/6/24 6:17 AM, Christian Brauner wrote:
> On Wed, Mar 06, 2024 at 01:13:05PM +0100, Miklos Szeredi wrote:
>> On Wed, 6 Mar 2024 at 11:57, Christian Brauner <brauner@kernel.org> wrote:
>>
>>> There's a tiny wrinkle though. We currently have no way of letting
>>> userspace know whether a filesystem supports the new mount API or not
>>> (see that mount option probing systemd does we recently discussed). So
>>> if say mount(8) remounts debugfs with mount options that were ignored in
>>> the old mount api that are now rejected in the new mount api users now
>>> see failures they didn't see before.

Oh, right - the problem is the new mount API rejects unknown options
internally, right?

>>> For the user it's completely intransparent why that failure happens. For
>>> them nothing changed from util-linux's perspective. So really, we should
>>> probably continue to ignore old mount options for backward compatibility.
>>
>> The reject behavior could be made conditional on e.g. an fsopen() flag.
> 
> and fspick() which I think is more relevant.
> 
>>
>> I.e. FSOPEN_REJECT_UNKNOWN would make unknown options be always
>> rejected.  Without this flag fsconfig(2) would behave identically
>> before/after the conversion.
> 
> Yeah, that would work. That would only make sense if we make all
> filesystems reject unknown mount options by default when they're
> switched to the new mount api imho. When we recognize the request comes
> from the old mount api fc->oldapi we continue ignoring as we did before.
> If it comes from the new mount api we reject unless
> FSOPEN/FSPICK_REJECT_UKNOWN was specified.

Ok, good point. Just thinking out loud, I guess an fsopen/fspick flag does
make more sense than i.e. each filesystem deciding whether it should reject
unknown options in its ->init_fs_context(), for consistency?

Right now it looks like the majority of filesystems do reject unknown
options internally, already.

(To muddy the waters more, other inconsistencies I've thought about are
re: how the fileystem handles remount. For example, which options are
remountable and which are not, and should non-remountable options fail?
Also whether the filesystem internally preserves the original set of
options and applies the new set as a delta, or whether it treats the
new set as the exact set of options requested post-remount, but that's
probably a topic for another day.)

-Eric
Christian Brauner March 7, 2024, 12:04 p.m. UTC | #5
On Wed, Mar 06, 2024 at 10:35:02AM -0600, Eric Sandeen wrote:
> On 3/6/24 6:17 AM, Christian Brauner wrote:
> > On Wed, Mar 06, 2024 at 01:13:05PM +0100, Miklos Szeredi wrote:
> >> On Wed, 6 Mar 2024 at 11:57, Christian Brauner <brauner@kernel.org> wrote:
> >>
> >>> There's a tiny wrinkle though. We currently have no way of letting
> >>> userspace know whether a filesystem supports the new mount API or not
> >>> (see that mount option probing systemd does we recently discussed). So
> >>> if say mount(8) remounts debugfs with mount options that were ignored in
> >>> the old mount api that are now rejected in the new mount api users now
> >>> see failures they didn't see before.
> 
> Oh, right - the problem is the new mount API rejects unknown options
> internally, right?
> 
> >>> For the user it's completely intransparent why that failure happens. For
> >>> them nothing changed from util-linux's perspective. So really, we should
> >>> probably continue to ignore old mount options for backward compatibility.
> >>
> >> The reject behavior could be made conditional on e.g. an fsopen() flag.
> > 
> > and fspick() which I think is more relevant.
> > 
> >>
> >> I.e. FSOPEN_REJECT_UNKNOWN would make unknown options be always
> >> rejected.  Without this flag fsconfig(2) would behave identically
> >> before/after the conversion.
> > 
> > Yeah, that would work. That would only make sense if we make all
> > filesystems reject unknown mount options by default when they're
> > switched to the new mount api imho. When we recognize the request comes
> > from the old mount api fc->oldapi we continue ignoring as we did before.
> > If it comes from the new mount api we reject unless
> > FSOPEN/FSPICK_REJECT_UKNOWN was specified.

I actually did misparse that I now realize. I read that as "ignore
unknown" instead of "reject unknown".

> 
> Ok, good point. Just thinking out loud, I guess an fsopen/fspick flag does
> make more sense than i.e. each filesystem deciding whether it should reject
> unknown options in its ->init_fs_context(), for consistency?

Yes, I think so. The interesting case for util-linux according to Karel
was remounting where mount(8) wants to gather all options from fstab and
mountinfo, add new options from the command line and send it to the
kernel without having to care about filesystems specific options that
cannot be changed on remount.

However, other users that do use the api programatically do care about
this. They want to get an error when changing a mount property doesn't
work.

I think doing this on a per-fs basis just leads to more inconsistency.
I'd rather have this be something we enforce on a higher level if we do
it at all.

> 
> Right now it looks like the majority of filesystems do reject unknown
> options internally, already.

Yeah, it's mostly pseudo fses that don't, I reckon.

> 
> (To muddy the waters more, other inconsistencies I've thought about are
> re: how the fileystem handles remount. For example, which options are
> remountable and which are not, and should non-remountable options fail?

Yes, they should but similar to fsopen() we should have an fspick()
flag. This was what I mentioned earlier in my response to Miklos.

But I'm not yet clear whether FSOPEN/FSPICK_IGNORE_UNKNOWN wouldn't make
more sense than FSOPEN/FSPICK_REJECT_UNKNOWN. IOW, invert the logic.

Because as you said most filesystems do already reject unknown mount
options and it's a few that don't. So I think we should focus on the
remount case and for that I think we want FSOPEN_IGNORE_UNKNOWN
otherwise default to rejecting unknown options if coming from the new
mount api? I'm not sure.

> Also whether the filesystem internally preserves the original set of
> options and applies the new set as a delta, or whether it treats the
> new set as the exact set of options requested post-remount, but that's
> probably a topic for another day.)

For vfs mount properties it's a delta in the new mount api. The old
mount api didn't have a concept of add or subtract. If you had a
read-only mount and you wanted to also make it noexec then you'd have to
specify "ro" again otherwise the mount would be made rw. mount(8) hides
that behavior by retrieving the current mountflags from mountinfo and
adding it to the remount call if the old mount api is used.
mount_setattr() does that directly in the kernel and always does a
delta.

For filesystem specific properties it's probably irrelevant because
remount already is effectively a delta for most filesystems. IOW, you
don't suddenly get "usrquota" unset because you've changed the "dax"
property on your xfs filesystem which would be worrisome. :)

The thing is though that changing one mount option might implicitly
change other mount options. But that's something that only the
filesystem should decide. So I don't think this is something we need to
worry about?

The way I see mount(8) currently doing it is to change:

(1) filesystem specific mount properties via fspick()+fsconfig()
(2) generic mount properties via mount_setattr()

during a remount call.
Greg KH March 7, 2024, 9:10 p.m. UTC | #6
On Tue, Mar 05, 2024 at 05:08:39PM -0600, Eric Sandeen wrote:
> From: David Howells <dhowells@redhat.com>
> 
> Convert the debugfs filesystem to the new internal mount API as the old
> one will be obsoleted and removed.  This allows greater flexibility in
> communication of mount parameters between userspace, the VFS and the
> filesystem.
> 
> See Documentation/filesystems/mount_api.txt for more information.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> Co-developed-by: Eric Sandeen <sandeen@redhat.com>
> Signed-off-by: Eric Sandeen <sandeen@redhat.com>
> [sandeen: forward port to modern kernel, fix remounting]
> cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> cc: "Rafael J. Wysocki" <rafael@kernel.org>
> ---
>  fs/debugfs/inode.c | 198 +++++++++++++++++++++------------------------
>  1 file changed, 93 insertions(+), 105 deletions(-)

If the vfs maintainers are ok with the conversion, it's fine with me:

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Miklos Szeredi March 8, 2024, 2:54 p.m. UTC | #7
On Thu, 7 Mar 2024 at 13:04, Christian Brauner <brauner@kernel.org> wrote:

> But I'm not yet clear whether FSOPEN/FSPICK_IGNORE_UNKNOWN wouldn't make
> more sense than FSOPEN/FSPICK_REJECT_UNKNOWN. IOW, invert the logic.

I think there needs to be a mode for fsopen/fspick/fconfig API that
allows implementing full backward compatibility with the old behavior
of mount(8), both in case of new mount and remount.  By old I mean
before any of the API conversions started.  If some filesystems
rejected unknown options and some ignored them, then that is what this
mode should continue to do.  This is what we currently have, so
without additional flags this is what the API should continue to
support.

And I think there needs to be a new "strict" mode for fsopen/fspick
that has clear rules for how filesystems should handle options, which
as you say most filesystem already do.   Since this is a new mode, I
think it needs a new flag, that is rejected if the API or the fs
doesn't support this mode.  Filesystems which already have sane
behavior need not care, they would work the same in both modes.
Filesystems that are currently inconsistent would have to implement
both modes.

Thanks,
Miklos
Eric Sandeen May 30, 2024, 2:21 p.m. UTC | #8
On 3/6/24 4:50 AM, Christian Brauner wrote:
> Fwiw, Opt_{g,u}d I would like to see that either moved completely to the
> VFS or we need to provide standardized helpers.
> 
> The issue is that for a userns mountable filesytems the validation done
> here isn't enough and that's easy to miss (Obviously, debugfs isn't
> relevant as it's not userns mountable but still.). For example, for
> in tmpfs I recently fixed a bug where validation was wrong:
> 
>         case Opt_uid:
>                 kuid = make_kuid(current_user_ns(), result.uint_32);
>                 if (!uid_valid(kuid))
>                         goto bad_value;
> 
>                 /*
>                  * The requested uid must be representable in the
>                  * filesystem's idmapping.
>                  */
>                 if (!kuid_has_mapping(fc->user_ns, kuid))
>                         goto bad_value;
> 
>                 ctx->uid = kuid;
>                 break;
> 
> The crucial step where the {g,u}id must also be representable in the
> superblock's namespace not just in the caller's was missing. So really
> we should have a generic helper that we can reycle for all Opt_{g,u}id
> mount options or move that Opt_{g,u}id to the VFS itself. There was some
> nastiness involved in this when I last looked at this though. And all
> that invalfc() reporting should then also be identical across
> filesystems.
> 
> So that's a ToDo for the future.

Just FWIW, I started looking at this, and making i.e. an fsparam_uid() and
moving all the checking into the parser seemed like a nice idea, and it
removed a lot of boilerplate from several filesystems. Option parsing was
then looking like simply:

        fsparam_uid     ("uid",         Opt_uid),

...

        case Opt_uid:
               opts->uid = result.uid;
               break;

which is pretty nice, I think.

But unfortunately after 7f5d38141e30 ("new primitive: __fs_parse()"), parsers
no longer get an *fc, which means we can't get to fc->user_ns while parsing,
which I guess we need for proper validation.

It seems that 7f5d38141e30 was done for the benefit of rbd (?) - I'll have
to look more closely at that and see if we can resurrect access to fc, unless
there are other ways around this.

-Eric
diff mbox series

Patch

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 034a617cb1a5..c2adfb272da8 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -14,7 +14,8 @@ 
 
 #include <linux/module.h>
 #include <linux/fs.h>
-#include <linux/mount.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
 #include <linux/pagemap.h>
 #include <linux/init.h>
 #include <linux/kobject.h>
@@ -23,7 +24,6 @@ 
 #include <linux/fsnotify.h>
 #include <linux/string.h>
 #include <linux/seq_file.h>
-#include <linux/parser.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
 #include <linux/security.h>
@@ -77,7 +77,7 @@  static struct inode *debugfs_get_inode(struct super_block *sb)
 	return inode;
 }
 
-struct debugfs_mount_opts {
+struct debugfs_fs_info {
 	kuid_t uid;
 	kgid_t gid;
 	umode_t mode;
@@ -89,68 +89,51 @@  enum {
 	Opt_uid,
 	Opt_gid,
 	Opt_mode,
-	Opt_err
 };
 
-static const match_table_t tokens = {
-	{Opt_uid, "uid=%u"},
-	{Opt_gid, "gid=%u"},
-	{Opt_mode, "mode=%o"},
-	{Opt_err, NULL}
+static const struct fs_parameter_spec debugfs_param_specs[] = {
+	fsparam_u32	("gid",		Opt_gid),
+	fsparam_u32oct	("mode",	Opt_mode),
+	fsparam_u32	("uid",		Opt_uid),
+	{}
 };
 
-struct debugfs_fs_info {
-	struct debugfs_mount_opts mount_opts;
-};
-
-static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
+static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
-	substring_t args[MAX_OPT_ARGS];
-	int option;
-	int token;
+	struct debugfs_fs_info *opts = fc->s_fs_info;
+	struct fs_parse_result result;
 	kuid_t uid;
 	kgid_t gid;
-	char *p;
-
-	opts->opts = 0;
-	opts->mode = DEBUGFS_DEFAULT_MODE;
-
-	while ((p = strsep(&data, ",")) != NULL) {
-		if (!*p)
-			continue;
-
-		token = match_token(p, tokens, args);
-		switch (token) {
-		case Opt_uid:
-			if (match_int(&args[0], &option))
-				return -EINVAL;
-			uid = make_kuid(current_user_ns(), option);
-			if (!uid_valid(uid))
-				return -EINVAL;
-			opts->uid = uid;
-			break;
-		case Opt_gid:
-			if (match_int(&args[0], &option))
-				return -EINVAL;
-			gid = make_kgid(current_user_ns(), option);
-			if (!gid_valid(gid))
-				return -EINVAL;
-			opts->gid = gid;
-			break;
-		case Opt_mode:
-			if (match_octal(&args[0], &option))
-				return -EINVAL;
-			opts->mode = option & S_IALLUGO;
-			break;
-		/*
-		 * We might like to report bad mount options here;
-		 * but traditionally debugfs has ignored all mount options
-		 */
-		}
-
-		opts->opts |= BIT(token);
+	int opt;
+
+	opt = fs_parse(fc, debugfs_param_specs, param, &result);
+	if (opt < 0)
+		return opt;
+
+	switch (opt) {
+	case Opt_uid:
+		uid = make_kuid(current_user_ns(), result.uint_32);
+		if (!uid_valid(uid))
+			return invalf(fc, "Unknown uid");
+		opts->uid = uid;
+		break;
+	case Opt_gid:
+		gid = make_kgid(current_user_ns(), result.uint_32);
+		if (!gid_valid(gid))
+			return invalf(fc, "Unknown gid");
+		opts->gid = gid;
+		break;
+	case Opt_mode:
+		opts->mode = result.uint_32 & S_IALLUGO;
+		break;
+	/*
+	 * We might like to report bad mount options here;
+	 * but traditionally debugfs has ignored all mount options
+	 */
 	}
 
+	opts->opts |= BIT(opt);
+
 	return 0;
 }
 
@@ -158,23 +141,22 @@  static void _debugfs_apply_options(struct super_block *sb, bool remount)
 {
 	struct debugfs_fs_info *fsi = sb->s_fs_info;
 	struct inode *inode = d_inode(sb->s_root);
-	struct debugfs_mount_opts *opts = &fsi->mount_opts;
 
 	/*
 	 * On remount, only reset mode/uid/gid if they were provided as mount
 	 * options.
 	 */
 
-	if (!remount || opts->opts & BIT(Opt_mode)) {
+	if (!remount || fsi->opts & BIT(Opt_mode)) {
 		inode->i_mode &= ~S_IALLUGO;
-		inode->i_mode |= opts->mode;
+		inode->i_mode |= fsi->mode;
 	}
 
-	if (!remount || opts->opts & BIT(Opt_uid))
-		inode->i_uid = opts->uid;
+	if (!remount || fsi->opts & BIT(Opt_uid))
+		inode->i_uid = fsi->uid;
 
-	if (!remount || opts->opts & BIT(Opt_gid))
-		inode->i_gid = opts->gid;
+	if (!remount || fsi->opts & BIT(Opt_gid))
+		inode->i_gid = fsi->gid;
 }
 
 static void debugfs_apply_options(struct super_block *sb)
@@ -187,35 +169,33 @@  static void debugfs_apply_options_remount(struct super_block *sb)
 	_debugfs_apply_options(sb, true);
 }
 
-static int debugfs_remount(struct super_block *sb, int *flags, char *data)
+static int debugfs_reconfigure(struct fs_context *fc)
 {
-	int err;
-	struct debugfs_fs_info *fsi = sb->s_fs_info;
+	struct super_block *sb = fc->root->d_sb;
+	struct debugfs_fs_info *sb_opts = sb->s_fs_info;
+	struct debugfs_fs_info *new_opts = fc->s_fs_info;
 
 	sync_filesystem(sb);
-	err = debugfs_parse_options(data, &fsi->mount_opts);
-	if (err)
-		goto fail;
 
+	/* structure copy of new mount options to sb */
+	*sb_opts = *new_opts;
 	debugfs_apply_options_remount(sb);
 
-fail:
-	return err;
+	return 0;
 }
 
 static int debugfs_show_options(struct seq_file *m, struct dentry *root)
 {
 	struct debugfs_fs_info *fsi = root->d_sb->s_fs_info;
-	struct debugfs_mount_opts *opts = &fsi->mount_opts;
 
-	if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
+	if (!uid_eq(fsi->uid, GLOBAL_ROOT_UID))
 		seq_printf(m, ",uid=%u",
-			   from_kuid_munged(&init_user_ns, opts->uid));
-	if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
+			   from_kuid_munged(&init_user_ns, fsi->uid));
+	if (!gid_eq(fsi->gid, GLOBAL_ROOT_GID))
 		seq_printf(m, ",gid=%u",
-			   from_kgid_munged(&init_user_ns, opts->gid));
-	if (opts->mode != DEBUGFS_DEFAULT_MODE)
-		seq_printf(m, ",mode=%o", opts->mode);
+			   from_kgid_munged(&init_user_ns, fsi->gid));
+	if (fsi->mode != DEBUGFS_DEFAULT_MODE)
+		seq_printf(m, ",mode=%o", fsi->mode);
 
 	return 0;
 }
@@ -229,7 +209,6 @@  static void debugfs_free_inode(struct inode *inode)
 
 static const struct super_operations debugfs_super_operations = {
 	.statfs		= simple_statfs,
-	.remount_fs	= debugfs_remount,
 	.show_options	= debugfs_show_options,
 	.free_inode	= debugfs_free_inode,
 };
@@ -263,26 +242,14 @@  static const struct dentry_operations debugfs_dops = {
 	.d_automount = debugfs_automount,
 };
 
-static int debug_fill_super(struct super_block *sb, void *data, int silent)
+static int debugfs_fill_super(struct super_block *sb, struct fs_context *fc)
 {
 	static const struct tree_descr debug_files[] = {{""}};
-	struct debugfs_fs_info *fsi;
 	int err;
 
-	fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL);
-	sb->s_fs_info = fsi;
-	if (!fsi) {
-		err = -ENOMEM;
-		goto fail;
-	}
-
-	err = debugfs_parse_options(data, &fsi->mount_opts);
+	err = simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
 	if (err)
-		goto fail;
-
-	err  =  simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
-	if (err)
-		goto fail;
+		return err;
 
 	sb->s_op = &debugfs_super_operations;
 	sb->s_d_op = &debugfs_dops;
@@ -290,27 +257,48 @@  static int debug_fill_super(struct super_block *sb, void *data, int silent)
 	debugfs_apply_options(sb);
 
 	return 0;
-
-fail:
-	kfree(fsi);
-	sb->s_fs_info = NULL;
-	return err;
 }
 
-static struct dentry *debug_mount(struct file_system_type *fs_type,
-			int flags, const char *dev_name,
-			void *data)
+static int debugfs_get_tree(struct fs_context *fc)
 {
 	if (!(debugfs_allow & DEBUGFS_ALLOW_API))
-		return ERR_PTR(-EPERM);
+		return -EPERM;
+
+	return get_tree_single(fc, debugfs_fill_super);
+}
+
+static void debugfs_free_fc(struct fs_context *fc)
+{
+	kfree(fc->s_fs_info);
+}
 
-	return mount_single(fs_type, flags, data, debug_fill_super);
+static const struct fs_context_operations debugfs_context_ops = {
+	.free		= debugfs_free_fc,
+	.parse_param	= debugfs_parse_param,
+	.get_tree	= debugfs_get_tree,
+	.reconfigure	= debugfs_reconfigure,
+};
+
+static int debugfs_init_fs_context(struct fs_context *fc)
+{
+	struct debugfs_fs_info *fsi;
+
+	fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL);
+	if (!fsi)
+		return -ENOMEM;
+
+	fsi->mode = DEBUGFS_DEFAULT_MODE;
+
+	fc->s_fs_info = fsi;
+	fc->ops = &debugfs_context_ops;
+	return 0;
 }
 
 static struct file_system_type debug_fs_type = {
 	.owner =	THIS_MODULE,
 	.name =		"debugfs",
-	.mount =	debug_mount,
+	.init_fs_context = debugfs_init_fs_context,
+	.parameters =	debugfs_param_specs,
 	.kill_sb =	kill_litter_super,
 };
 MODULE_ALIAS_FS("debugfs");