diff mbox series

[v2,01/18] vfs: add miscattr ops

Message ID 20210322144916.137245-2-mszeredi@redhat.com (mailing list archive)
State New, archived
Headers show
Series new kAPI for FS_IOC_[GS]ETFLAGS/FS_IOC_FS[GS]ETXATTR | expand

Commit Message

Miklos Szeredi March 22, 2021, 2:48 p.m. UTC
There's a substantial amount of boilerplate in filesystems handling
FS_IOC_[GS]ETFLAGS/ FS_IOC_FS[GS]ETXATTR ioctls.

Also due to userspace buffers being involved in the ioctl API this is
difficult to stack, as shown by overlayfs issues related to these ioctls.

Introduce a new internal API named "miscattr" (fsxattr can be confused with
xattr, xflags is inappropriate, since this is more than just flags).

There's significant overlap between flags and xflags and this API handles
the conversions automatically, so filesystems may choose which one to use.

In ->miscattr_get() a hint is provided to the filesystem whether flags or
xattr are being requested by userspace, but in this series this hint is
ignored by all filesystems, since generating all the attributes is cheap.

If a filesystem doesn't implemement the miscattr API, just fall back to
f_op->ioctl().  When all filesystems are converted, the fallback can be
removed.

32bit compat ioctls are now handled by the generic code as well.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 Documentation/filesystems/locking.rst |   5 +
 Documentation/filesystems/vfs.rst     |  15 ++
 fs/ioctl.c                            | 329 ++++++++++++++++++++++++++
 include/linux/fs.h                    |   4 +
 include/linux/miscattr.h              |  53 +++++
 5 files changed, 406 insertions(+)
 create mode 100644 include/linux/miscattr.h

Comments

Darrick J. Wong March 22, 2021, 10:33 p.m. UTC | #1
On Mon, Mar 22, 2021 at 03:48:59PM +0100, Miklos Szeredi wrote:
> There's a substantial amount of boilerplate in filesystems handling
> FS_IOC_[GS]ETFLAGS/ FS_IOC_FS[GS]ETXATTR ioctls.
> 
> Also due to userspace buffers being involved in the ioctl API this is
> difficult to stack, as shown by overlayfs issues related to these ioctls.
> 
> Introduce a new internal API named "miscattr" (fsxattr can be confused with
> xattr, xflags is inappropriate, since this is more than just flags).
> 
> There's significant overlap between flags and xflags and this API handles
> the conversions automatically, so filesystems may choose which one to use.
> 
> In ->miscattr_get() a hint is provided to the filesystem whether flags or
> xattr are being requested by userspace, but in this series this hint is
> ignored by all filesystems, since generating all the attributes is cheap.
> 
> If a filesystem doesn't implemement the miscattr API, just fall back to
> f_op->ioctl().  When all filesystems are converted, the fallback can be
> removed.
> 
> 32bit compat ioctls are now handled by the generic code as well.
> 
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
>  Documentation/filesystems/locking.rst |   5 +
>  Documentation/filesystems/vfs.rst     |  15 ++
>  fs/ioctl.c                            | 329 ++++++++++++++++++++++++++
>  include/linux/fs.h                    |   4 +
>  include/linux/miscattr.h              |  53 +++++
>  5 files changed, 406 insertions(+)
>  create mode 100644 include/linux/miscattr.h
> 
> diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
> index b7dcc86c92a4..a5aa2046d48f 100644
> --- a/Documentation/filesystems/locking.rst
> +++ b/Documentation/filesystems/locking.rst
> @@ -80,6 +80,9 @@ prototypes::
>  				struct file *, unsigned open_flag,
>  				umode_t create_mode);
>  	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
> +	int (*miscattr_set)(struct user_namespace *mnt_userns,
> +			    struct dentry *dentry, struct miscattr *ma);
> +	int (*miscattr_get)(struct dentry *dentry, struct miscattr *ma);
>  
>  locking rules:
>  	all may block
> @@ -107,6 +110,8 @@ fiemap:		no
>  update_time:	no
>  atomic_open:	shared (exclusive if O_CREAT is set in open flags)
>  tmpfile:	no
> +miscattr_get:	no or exclusive
> +miscattr_set:	exclusive
>  ============	=============================================
>  
>  
> diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
> index 2049bbf5e388..f125ce6c3b47 100644
> --- a/Documentation/filesystems/vfs.rst
> +++ b/Documentation/filesystems/vfs.rst
> @@ -441,6 +441,9 @@ As of kernel 2.6.22, the following members are defined:
>  				   unsigned open_flag, umode_t create_mode);
>  		int (*tmpfile) (struct user_namespace *, struct inode *, struct dentry *, umode_t);
>  	        int (*set_acl)(struct user_namespace *, struct inode *, struct posix_acl *, int);
> +		int (*miscattr_set)(struct user_namespace *mnt_userns,
> +				    struct dentry *dentry, struct miscattr *ma);
> +		int (*miscattr_get)(struct dentry *dentry, struct miscattr *ma);
>  	};
>  
>  Again, all methods are called without any locks being held, unless
> @@ -588,6 +591,18 @@ otherwise noted.
>  	atomically creating, opening and unlinking a file in given
>  	directory.
>  
> +``miscattr_get``

I wish this wasn't named "misc" because miscellaneous is vague.

fileattr_get, perhaps?

(FWIW I'm not /that/ passionate about starting a naming bikeshed, feel
free to ignore.)

> +	called on ioctl(FS_IOC_GETFLAGS) and ioctl(FS_IOC_FSGETXATTR) to
> +	retrieve miscellaneous filesystem flags and attributes.  Also

"...miscellaneous *file* flags and attributes."

> +	called before the relevant SET operation to check what is being
> +	changed (in this case with i_rwsem locked exclusive).  If unset,
> +	then fall back to f_op->ioctl().
> +
> +``miscattr_set``
> +	called on ioctl(FS_IOC_SETFLAGS) and ioctl(FS_IOC_FSSETXATTR) to
> +	change miscellaneous filesystem flags and attributes.  Callers hold

Same here.

> +	i_rwsem exclusive.  If unset, then fall back to f_op->ioctl().
> +
>  
>  The Address Space Object
>  ========================
> diff --git a/fs/ioctl.c b/fs/ioctl.c
> index 4e6cc0a7d69c..e5f3820809a4 100644
> --- a/fs/ioctl.c
> +++ b/fs/ioctl.c
> @@ -19,6 +19,9 @@
>  #include <linux/falloc.h>
>  #include <linux/sched/signal.h>
>  #include <linux/fiemap.h>
> +#include <linux/mount.h>
> +#include <linux/fscrypt.h>
> +#include <linux/miscattr.h>
>  
>  #include "internal.h"
>  
> @@ -657,6 +660,311 @@ static int ioctl_file_dedupe_range(struct file *file,
>  	return ret;
>  }
>  
> +/**
> + * miscattr_fill_xflags - initialize miscattr with xflags
> + * @ma:		miscattr pointer
> + * @xflags:	FS_XFLAG_* flags
> + *
> + * Set ->fsx_xflags, ->xattr_valid and ->flags (translated xflags).  All
> + * other fields are zeroed.
> + */
> +void miscattr_fill_xflags(struct miscattr *ma, u32 xflags)
> +{
> +	memset(ma, 0, sizeof(*ma));
> +	ma->xattr_valid = true;
> +	ma->fsx_xflags = xflags;
> +	if (ma->fsx_xflags & FS_XFLAG_IMMUTABLE)
> +		ma->flags |= FS_IMMUTABLE_FL;

I wonder if maintaining redundant sets of flags in the same structure is
going to bite us some day.

> +	if (ma->fsx_xflags & FS_XFLAG_APPEND)
> +		ma->flags |= FS_APPEND_FL;
> +	if (ma->fsx_xflags & FS_XFLAG_SYNC)
> +		ma->flags |= FS_SYNC_FL;
> +	if (ma->fsx_xflags & FS_XFLAG_NOATIME)
> +		ma->flags |= FS_NOATIME_FL;
> +	if (ma->fsx_xflags & FS_XFLAG_NODUMP)
> +		ma->flags |= FS_NODUMP_FL;
> +	if (ma->fsx_xflags & FS_XFLAG_DAX)
> +		ma->flags |= FS_DAX_FL;
> +	if (ma->fsx_xflags & FS_XFLAG_PROJINHERIT)
> +		ma->flags |= FS_PROJINHERIT_FL;
> +}
> +EXPORT_SYMBOL(miscattr_fill_xflags);
> +
> +/**
> + * miscattr_fill_flags - initialize miscattr with flags
> + * @ma:		miscattr pointer
> + * @flags:	FS_*_FL flags
> + *
> + * Set ->flags, ->flags_valid and ->fsx_xflags (translated flags).
> + * All other fields are zeroed.
> + */
> +void miscattr_fill_flags(struct miscattr *ma, u32 flags)
> +{
> +	memset(ma, 0, sizeof(*ma));
> +	ma->flags_valid = true;
> +	ma->flags = flags;
> +	if (ma->flags & FS_SYNC_FL)
> +		ma->fsx_xflags |= FS_XFLAG_SYNC;
> +	if (ma->flags & FS_IMMUTABLE_FL)
> +		ma->fsx_xflags |= FS_XFLAG_IMMUTABLE;
> +	if (ma->flags & FS_APPEND_FL)
> +		ma->fsx_xflags |= FS_XFLAG_APPEND;
> +	if (ma->flags & FS_NODUMP_FL)
> +		ma->fsx_xflags |= FS_XFLAG_NODUMP;
> +	if (ma->flags & FS_NOATIME_FL)
> +		ma->fsx_xflags |= FS_XFLAG_NOATIME;
> +	if (ma->flags & FS_DAX_FL)
> +		ma->fsx_xflags |= FS_XFLAG_DAX;
> +	if (ma->flags & FS_PROJINHERIT_FL)
> +		ma->fsx_xflags |= FS_XFLAG_PROJINHERIT;
> +}
> +EXPORT_SYMBOL(miscattr_fill_flags);
> +
> +/**
> + * vfs_miscattr_get - retrieve miscellaneous inode attributes
> + * @dentry:	the object to retrieve from
> + * @ma:		miscattr pointer
> + *
> + * Call i_op->miscattr_get() callback, if exists.
> + *
> + * Returns 0 on success, or a negative error on failure.
> + */
> +int vfs_miscattr_get(struct dentry *dentry, struct miscattr *ma)
> +{
> +	struct inode *inode = d_inode(dentry);
> +
> +	if (d_is_special(dentry))
> +		return -ENOTTY;
> +
> +	if (!inode->i_op->miscattr_get)
> +		return -ENOIOCTLCMD;
> +
> +	return inode->i_op->miscattr_get(dentry, ma);
> +}
> +EXPORT_SYMBOL(vfs_miscattr_get);
> +
> +/**
> + * fsxattr_copy_to_user - copy fsxattr to userspace.
> + * @ma:		miscattr pointer
> + * @ufa:	fsxattr user pointer
> + *
> + * Returns 0 on success, or -EFAULT on failure.
> + */
> +int fsxattr_copy_to_user(const struct miscattr *ma, struct fsxattr __user *ufa)
> +{
> +	struct fsxattr fa = {
> +		.fsx_xflags	= ma->fsx_xflags,
> +		.fsx_extsize	= ma->fsx_extsize,
> +		.fsx_nextents	= ma->fsx_nextents,
> +		.fsx_projid	= ma->fsx_projid,
> +		.fsx_cowextsize	= ma->fsx_cowextsize,
> +	};
> +
> +	if (copy_to_user(ufa, &fa, sizeof(fa)))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL(fsxattr_copy_to_user);
> +
> +static int fsxattr_copy_from_user(struct miscattr *ma,
> +				  struct fsxattr __user *ufa)
> +{
> +	struct fsxattr fa;
> +
> +	if (copy_from_user(&fa, ufa, sizeof(fa)))
> +		return -EFAULT;
> +
> +	miscattr_fill_xflags(ma, fa.fsx_xflags);
> +	ma->fsx_extsize = fa.fsx_extsize;
> +	ma->fsx_nextents = fa.fsx_nextents;
> +	ma->fsx_projid = fa.fsx_projid;
> +	ma->fsx_cowextsize = fa.fsx_cowextsize;
> +
> +	return 0;
> +}
> +
> +/*
> + * Generic function to check FS_IOC_FSSETXATTR/FS_IOC_SETFLAGS values and reject
> + * any invalid configurations.
> + *
> + * Note: must be called with inode lock held.
> + */
> +static int miscattr_set_prepare(struct inode *inode,
> +			      const struct miscattr *old_ma,
> +			      struct miscattr *ma)
> +{
> +	int err;
> +
> +	/*
> +	 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
> +	 * the relevant capability.
> +	 */
> +	if ((ma->flags ^ old_ma->flags) & (FS_APPEND_FL | FS_IMMUTABLE_FL) &&
> +	    !capable(CAP_LINUX_IMMUTABLE))
> +		return -EPERM;
> +
> +	err = fscrypt_prepare_setflags(inode, old_ma->flags, ma->flags);
> +	if (err)
> +		return err;
> +
> +	/*
> +	 * Project Quota ID state is only allowed to change from within the init
> +	 * namespace. Enforce that restriction only if we are trying to change
> +	 * the quota ID state. Everything else is allowed in user namespaces.
> +	 */
> +	if (current_user_ns() != &init_user_ns) {
> +		if (old_ma->fsx_projid != ma->fsx_projid)
> +			return -EINVAL;
> +		if ((old_ma->fsx_xflags ^ ma->fsx_xflags) &
> +				FS_XFLAG_PROJINHERIT)
> +			return -EINVAL;
> +	}
> +
> +	/* Check extent size hints. */
> +	if ((ma->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(inode->i_mode))
> +		return -EINVAL;
> +
> +	if ((ma->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
> +			!S_ISDIR(inode->i_mode))
> +		return -EINVAL;
> +
> +	if ((ma->fsx_xflags & FS_XFLAG_COWEXTSIZE) &&
> +	    !S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
> +		return -EINVAL;
> +
> +	/*
> +	 * It is only valid to set the DAX flag on regular files and
> +	 * directories on filesystems.
> +	 */
> +	if ((ma->fsx_xflags & FS_XFLAG_DAX) &&
> +	    !(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
> +		return -EINVAL;
> +
> +	/* Extent size hints of zero turn off the flags. */
> +	if (ma->fsx_extsize == 0)
> +		ma->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
> +	if (ma->fsx_cowextsize == 0)
> +		ma->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE;
> +
> +	return 0;
> +}
> +
> +/**
> + * vfs_miscattr_set - change miscellaneous inode attributes
> + * @dentry:	the object to change
> + * @ma:		miscattr pointer
> + *
> + * After verifying permissions, call i_op->miscattr_set() callback, if
> + * exists.
> + *
> + * Verifying attributes involves retrieving current attributes with
> + * i_op->miscattr_get(), this also allows initilaizing attributes that have
> + * not been set by the caller to current values.  Inode lock is held
> + * thoughout to prevent racing with another instance.
> + *
> + * Returns 0 on success, or a negative error on failure.
> + */
> +int vfs_miscattr_set(struct user_namespace *mnt_userns, struct dentry *dentry,
> +		     struct miscattr *ma)
> +{
> +	struct inode *inode = d_inode(dentry);
> +	struct miscattr old_ma = {};
> +	int err;
> +
> +	if (d_is_special(dentry))
> +		return -ENOTTY;
> +
> +	if (!inode->i_op->miscattr_set)
> +		return -ENOIOCTLCMD;
> +
> +	if (!inode_owner_or_capable(mnt_userns, inode))
> +		return -EPERM;
> +
> +	inode_lock(inode);
> +	err = vfs_miscattr_get(dentry, &old_ma);
> +	if (!err) {
> +		/* initialize missing bits from old_ma */
> +		if (ma->flags_valid) {
> +			ma->fsx_xflags |= old_ma.fsx_xflags & ~FS_XFLAG_COMMON;
> +			ma->fsx_extsize = old_ma.fsx_extsize;
> +			ma->fsx_nextents = old_ma.fsx_nextents;
> +			ma->fsx_projid = old_ma.fsx_projid;
> +			ma->fsx_cowextsize = old_ma.fsx_cowextsize;
> +		} else {
> +			ma->flags |= old_ma.flags & ~FS_COMMON_FL;
> +		}
> +		err = miscattr_set_prepare(inode, &old_ma, ma);
> +		if (!err)
> +			err = inode->i_op->miscattr_set(mnt_userns, dentry, ma);
> +	}
> +	inode_unlock(inode);
> +
> +	return err;
> +}
> +EXPORT_SYMBOL(vfs_miscattr_set);
> +
> +static int ioctl_getflags(struct file *file, void __user *argp)
> +{
> +	struct miscattr ma = { .flags_valid = true }; /* hint only */
> +	unsigned int flags;
> +	int err;
> +
> +	err = vfs_miscattr_get(file_dentry(file), &ma);
> +	if (!err) {
> +		flags = ma.flags;
> +		if (copy_to_user(argp, &flags, sizeof(flags)))
> +			err = -EFAULT;
> +	}
> +	return err;
> +}
> +
> +static int ioctl_setflags(struct file *file, void __user *argp)
> +{
> +	struct miscattr ma;
> +	unsigned int flags;
> +	int err;
> +
> +	if (copy_from_user(&flags, argp, sizeof(flags)))
> +		return -EFAULT;
> +
> +	err = mnt_want_write_file(file);
> +	if (!err) {
> +		miscattr_fill_flags(&ma, flags);
> +		err = vfs_miscattr_set(file_mnt_user_ns(file), file_dentry(file), &ma);
> +		mnt_drop_write_file(file);
> +	}
> +	return err;
> +}
> +
> +static int ioctl_fsgetxattr(struct file *file, void __user *argp)
> +{
> +	struct miscattr ma = { .xattr_valid = true }; /* hint only */
> +	int err;
> +
> +	err = vfs_miscattr_get(file_dentry(file), &ma);
> +	if (!err)
> +		err = fsxattr_copy_to_user(&ma, argp);
> +
> +	return err;
> +}
> +
> +static int ioctl_fssetxattr(struct file *file, void __user *argp)
> +{
> +	struct miscattr ma;
> +	int err;
> +
> +	err = fsxattr_copy_from_user(&ma, argp);
> +	if (!err) {
> +		err = mnt_want_write_file(file);
> +		if (!err) {
> +			err = vfs_miscattr_set(file_mnt_user_ns(file), file_dentry(file), &ma);
> +			mnt_drop_write_file(file);
> +		}
> +	}
> +	return err;
> +}
> +
>  /*
>   * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
>   * It's just a simple helper for sys_ioctl and compat_sys_ioctl.
> @@ -727,6 +1035,18 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
>  		return put_user(i_size_read(inode) - filp->f_pos,
>  				(int __user *)argp);
>  
> +	case FS_IOC_GETFLAGS:
> +		return ioctl_getflags(filp, argp);
> +
> +	case FS_IOC_SETFLAGS:
> +		return ioctl_setflags(filp, argp);
> +
> +	case FS_IOC_FSGETXATTR:
> +		return ioctl_fsgetxattr(filp, argp);
> +
> +	case FS_IOC_FSSETXATTR:
> +		return ioctl_fssetxattr(filp, argp);
> +
>  	default:
>  		if (S_ISREG(inode->i_mode))
>  			return file_ioctl(filp, cmd, argp);
> @@ -827,6 +1147,15 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
>  		break;
>  #endif
>  
> +	/*
> +	 * These access 32-bit values anyway so no further handling is
> +	 * necessary.
> +	 */
> +	case FS_IOC32_GETFLAGS:
> +	case FS_IOC32_SETFLAGS:
> +		cmd = (cmd == FS_IOC32_GETFLAGS) ?
> +			FS_IOC_GETFLAGS : FS_IOC_SETFLAGS;
> +		fallthrough;
>  	/*
>  	 * everything else in do_vfs_ioctl() takes either a compatible
>  	 * pointer argument or no argument -- call it with a modified
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index ec8f3ddf4a6a..9e7f6a592a70 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -70,6 +70,7 @@ struct fsverity_info;
>  struct fsverity_operations;
>  struct fs_context;
>  struct fs_parameter_spec;
> +struct miscattr;
>  
>  extern void __init inode_init(void);
>  extern void __init inode_init_early(void);
> @@ -1963,6 +1964,9 @@ struct inode_operations {
>  			struct dentry *, umode_t);
>  	int (*set_acl)(struct user_namespace *, struct inode *,
>  		       struct posix_acl *, int);
> +	int (*miscattr_set)(struct user_namespace *mnt_userns,
> +			    struct dentry *dentry, struct miscattr *ma);
> +	int (*miscattr_get)(struct dentry *dentry, struct miscattr *ma);
>  } ____cacheline_aligned;
>  
>  static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
> diff --git a/include/linux/miscattr.h b/include/linux/miscattr.h
> new file mode 100644
> index 000000000000..13683eb6ac78
> --- /dev/null
> +++ b/include/linux/miscattr.h
> @@ -0,0 +1,53 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef _LINUX_MISCATTR_H
> +#define _LINUX_MISCATTR_H
> +
> +/* Flags shared betwen flags/xflags */
> +#define FS_COMMON_FL \
> +	(FS_SYNC_FL | FS_IMMUTABLE_FL | FS_APPEND_FL | \
> +	 FS_NODUMP_FL |	FS_NOATIME_FL | FS_DAX_FL | \
> +	 FS_PROJINHERIT_FL)
> +
> +#define FS_XFLAG_COMMON \
> +	(FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND | \
> +	 FS_XFLAG_NODUMP | FS_XFLAG_NOATIME | FS_XFLAG_DAX | \
> +	 FS_XFLAG_PROJINHERIT)
> +
> +struct miscattr {
> +	u32	flags;		/* flags (FS_IOC_GETFLAGS/FS_IOC_SETFLAGS) */
> +	/* struct fsxattr: */
> +	u32	fsx_xflags;	/* xflags field value (get/set) */

Hrmm... could we have /some/ note here that fsx_xflags comes from XFS
and "flags" comes from ext*?

> +	u32	fsx_extsize;	/* extsize field value (get/set)*/
> +	u32	fsx_nextents;	/* nextents field value (get)	*/
> +	u32	fsx_projid;	/* project identifier (get/set) */
> +	u32	fsx_cowextsize;	/* CoW extsize field value (get/set)*/
> +	/* selectors: */
> +	bool	flags_valid:1;
> +	bool	xattr_valid:1;

What does this have to do with extended attributes?

OH, it has nothing to do with xattrs; this flag means that the fsx_*
fields of miscattr have any significance.  Can this be named fsx_valid?

So what should XFS do here?  Set the fsx_* fields, clear flags_valid,
and set fsx_valid?

Oh, I guess there's an XFS conversion patch, so I'll wander off there
now.

--D

> +};
> +
> +int fsxattr_copy_to_user(const struct miscattr *ma, struct fsxattr __user *ufa);
> +
> +void miscattr_fill_xflags(struct miscattr *ma, u32 xflags);
> +void miscattr_fill_flags(struct miscattr *ma, u32 flags);
> +
> +/**
> + * miscattr_has_xattr - check for extentended flags/attributes
> + * @ma:		miscattr pointer
> + *
> + * Returns true if any attributes are present that are not represented in
> + * ->flags.
> + */
> +static inline bool miscattr_has_xattr(const struct miscattr *ma)
> +{
> +	return ma->xattr_valid &&
> +		((ma->fsx_xflags & ~FS_XFLAG_COMMON) || ma->fsx_extsize != 0 ||
> +		 ma->fsx_projid != 0 ||	ma->fsx_cowextsize != 0);
> +}
> +
> +int vfs_miscattr_get(struct dentry *dentry, struct miscattr *ma);
> +int vfs_miscattr_set(struct user_namespace *mnt_userns, struct dentry *dentry,
> +		     struct miscattr *ma);
> +
> +#endif /* _LINUX_MISCATTR_H */
> -- 
> 2.30.2
>
Amir Goldstein March 23, 2021, 5:21 a.m. UTC | #2
> > +``miscattr_get``
>
> I wish this wasn't named "misc" because miscellaneous is vague.
>
> fileattr_get, perhaps?
>
> (FWIW I'm not /that/ passionate about starting a naming bikeshed, feel
> free to ignore.)
>

Eventual bikeshedding is hard to avoid in this case...

I don't feel strongly against "misc", but I do think the flags and
ioctl are already
known as "fsx" so it would be more friendly to go with that.

If you don't like "fsxflags" because it's not only flags and you think
"fsxattr" is too
close to "xattr" (FWIW I don't think it is going to be a source of
confusion), we
can simply go with get_fsx(), similar to get_acl(). It doesn't matter
what name we
use as long as everyone is clear on what it is.

"struct fsx" is not any more or any less clear than "struct statx" and
while "fsx"
it is a pretty arbitrary name, it is not much less arbitrary than "miscattr".

Thanks,
Amir.
David Sterba March 23, 2021, 11:22 a.m. UTC | #3
On Mon, Mar 22, 2021 at 03:33:38PM -0700, Darrick J. Wong wrote:
> On Mon, Mar 22, 2021 at 03:48:59PM +0100, Miklos Szeredi wrote:
> > --- a/Documentation/filesystems/vfs.rst
> > +++ b/Documentation/filesystems/vfs.rst
> > @@ -441,6 +441,9 @@ As of kernel 2.6.22, the following members are defined:
> >  				   unsigned open_flag, umode_t create_mode);
> >  		int (*tmpfile) (struct user_namespace *, struct inode *, struct dentry *, umode_t);
> >  	        int (*set_acl)(struct user_namespace *, struct inode *, struct posix_acl *, int);
> > +		int (*miscattr_set)(struct user_namespace *mnt_userns,
> > +				    struct dentry *dentry, struct miscattr *ma);
> > +		int (*miscattr_get)(struct dentry *dentry, struct miscattr *ma);
> >  	};
> >  
> >  Again, all methods are called without any locks being held, unless
> > @@ -588,6 +591,18 @@ otherwise noted.
> >  	atomically creating, opening and unlinking a file in given
> >  	directory.
> >  
> > +``miscattr_get``
> 
> I wish this wasn't named "misc" because miscellaneous is vague.

It also adds yet another way to name all the attributes (the "N + 1st
standard" problem). So I'd rather reuse a term that's already known and
understood by users. And this is 'file attributes', eg. as noted in
chattr manual page "change file attributes on a Linux file system".
For clarity avoid any 'x' in the name so we easily distinguish that from
the extended attributes aka xattrs.

We can perhaps live with miscattrs in code as anybody who has ever
touched the flags/attrs interfaces knows what it is referring to.

> fileattr_get, perhaps?

That sounds about right to me.
Al Viro March 24, 2021, 5:02 a.m. UTC | #4
On Mon, Mar 22, 2021 at 03:48:59PM +0100, Miklos Szeredi wrote:

minor nit: copy_fsxattr_{to,from}_user() might be better.

> +int fsxattr_copy_to_user(const struct miscattr *ma, struct fsxattr __user *ufa)
> +{
> +	struct fsxattr fa = {
> +		.fsx_xflags	= ma->fsx_xflags,
> +		.fsx_extsize	= ma->fsx_extsize,
> +		.fsx_nextents	= ma->fsx_nextents,
> +		.fsx_projid	= ma->fsx_projid,
> +		.fsx_cowextsize	= ma->fsx_cowextsize,
> +	};

That wants a comment along the lines of "guaranteed to be gap-free",
since otherwise you'd need memset() to avoid an infoleak.

> +static int ioctl_getflags(struct file *file, void __user *argp)
> +{
> +	struct miscattr ma = { .flags_valid = true }; /* hint only */
> +	unsigned int flags;
> +	int err;
> +
> +	err = vfs_miscattr_get(file_dentry(file), &ma);

Umm...  Just to clarify - do we plan to have that ever called via
ovl_real_ioctl()?  IOW, is file_dentry() anything other than a way
to spell ->f_path.dentry here?

> +struct miscattr {
> +	u32	flags;		/* flags (FS_IOC_GETFLAGS/FS_IOC_SETFLAGS) */
> +	/* struct fsxattr: */
> +	u32	fsx_xflags;	/* xflags field value (get/set) */
> +	u32	fsx_extsize;	/* extsize field value (get/set)*/
> +	u32	fsx_nextents;	/* nextents field value (get)	*/
> +	u32	fsx_projid;	/* project identifier (get/set) */
> +	u32	fsx_cowextsize;	/* CoW extsize field value (get/set)*/
> +	/* selectors: */
> +	bool	flags_valid:1;
> +	bool	xattr_valid:1;
> +};

OK as long as it stays kernel-only, but if we ever expose that to userland, we'd
better remember to turn the last two into an u32 with explicit bitmasks.
Christian Brauner March 24, 2021, 8:06 a.m. UTC | #5
On Mon, Mar 22, 2021 at 03:33:38PM -0700, Darrick J. Wong wrote:
> On Mon, Mar 22, 2021 at 03:48:59PM +0100, Miklos Szeredi wrote:
> > There's a substantial amount of boilerplate in filesystems handling
> > FS_IOC_[GS]ETFLAGS/ FS_IOC_FS[GS]ETXATTR ioctls.
> > 
> > Also due to userspace buffers being involved in the ioctl API this is
> > difficult to stack, as shown by overlayfs issues related to these ioctls.
> > 
> > Introduce a new internal API named "miscattr" (fsxattr can be confused with
> > xattr, xflags is inappropriate, since this is more than just flags).
> > 
> > There's significant overlap between flags and xflags and this API handles
> > the conversions automatically, so filesystems may choose which one to use.
> > 
> > In ->miscattr_get() a hint is provided to the filesystem whether flags or
> > xattr are being requested by userspace, but in this series this hint is
> > ignored by all filesystems, since generating all the attributes is cheap.
> > 
> > If a filesystem doesn't implemement the miscattr API, just fall back to
> > f_op->ioctl().  When all filesystems are converted, the fallback can be
> > removed.
> > 
> > 32bit compat ioctls are now handled by the generic code as well.
> > 
> > Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> > ---
> >  Documentation/filesystems/locking.rst |   5 +
> >  Documentation/filesystems/vfs.rst     |  15 ++
> >  fs/ioctl.c                            | 329 ++++++++++++++++++++++++++
> >  include/linux/fs.h                    |   4 +
> >  include/linux/miscattr.h              |  53 +++++
> >  5 files changed, 406 insertions(+)
> >  create mode 100644 include/linux/miscattr.h
> > 
> > diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
> > index b7dcc86c92a4..a5aa2046d48f 100644
> > --- a/Documentation/filesystems/locking.rst
> > +++ b/Documentation/filesystems/locking.rst
> > @@ -80,6 +80,9 @@ prototypes::
> >  				struct file *, unsigned open_flag,
> >  				umode_t create_mode);
> >  	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
> > +	int (*miscattr_set)(struct user_namespace *mnt_userns,
> > +			    struct dentry *dentry, struct miscattr *ma);
> > +	int (*miscattr_get)(struct dentry *dentry, struct miscattr *ma);
> >  
> >  locking rules:
> >  	all may block
> > @@ -107,6 +110,8 @@ fiemap:		no
> >  update_time:	no
> >  atomic_open:	shared (exclusive if O_CREAT is set in open flags)
> >  tmpfile:	no
> > +miscattr_get:	no or exclusive
> > +miscattr_set:	exclusive
> >  ============	=============================================
> >  
> >  
> > diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
> > index 2049bbf5e388..f125ce6c3b47 100644
> > --- a/Documentation/filesystems/vfs.rst
> > +++ b/Documentation/filesystems/vfs.rst
> > @@ -441,6 +441,9 @@ As of kernel 2.6.22, the following members are defined:
> >  				   unsigned open_flag, umode_t create_mode);
> >  		int (*tmpfile) (struct user_namespace *, struct inode *, struct dentry *, umode_t);
> >  	        int (*set_acl)(struct user_namespace *, struct inode *, struct posix_acl *, int);
> > +		int (*miscattr_set)(struct user_namespace *mnt_userns,
> > +				    struct dentry *dentry, struct miscattr *ma);
> > +		int (*miscattr_get)(struct dentry *dentry, struct miscattr *ma);
> >  	};
> >  
> >  Again, all methods are called without any locks being held, unless
> > @@ -588,6 +591,18 @@ otherwise noted.
> >  	atomically creating, opening and unlinking a file in given
> >  	directory.
> >  
> > +``miscattr_get``
> 
> I wish this wasn't named "misc" because miscellaneous is vague.
> 
> fileattr_get, perhaps?
> 
> (FWIW I'm not /that/ passionate about starting a naming bikeshed, feel
> free to ignore.)
> 
> > +	called on ioctl(FS_IOC_GETFLAGS) and ioctl(FS_IOC_FSGETXATTR) to
> > +	retrieve miscellaneous filesystem flags and attributes.  Also
> 
> "...miscellaneous *file* flags and attributes."
> 
> > +	called before the relevant SET operation to check what is being
> > +	changed (in this case with i_rwsem locked exclusive).  If unset,
> > +	then fall back to f_op->ioctl().
> > +
> > +``miscattr_set``
> > +	called on ioctl(FS_IOC_SETFLAGS) and ioctl(FS_IOC_FSSETXATTR) to
> > +	change miscellaneous filesystem flags and attributes.  Callers hold
> 
> Same here.
> 
> > +	i_rwsem exclusive.  If unset, then fall back to f_op->ioctl().
> > +
> >  
> >  The Address Space Object
> >  ========================
> > diff --git a/fs/ioctl.c b/fs/ioctl.c
> > index 4e6cc0a7d69c..e5f3820809a4 100644
> > --- a/fs/ioctl.c
> > +++ b/fs/ioctl.c
> > @@ -19,6 +19,9 @@
> >  #include <linux/falloc.h>
> >  #include <linux/sched/signal.h>
> >  #include <linux/fiemap.h>
> > +#include <linux/mount.h>
> > +#include <linux/fscrypt.h>
> > +#include <linux/miscattr.h>
> >  
> >  #include "internal.h"
> >  
> > @@ -657,6 +660,311 @@ static int ioctl_file_dedupe_range(struct file *file,
> >  	return ret;
> >  }
> >  
> > +/**
> > + * miscattr_fill_xflags - initialize miscattr with xflags
> > + * @ma:		miscattr pointer
> > + * @xflags:	FS_XFLAG_* flags
> > + *
> > + * Set ->fsx_xflags, ->xattr_valid and ->flags (translated xflags).  All
> > + * other fields are zeroed.
> > + */
> > +void miscattr_fill_xflags(struct miscattr *ma, u32 xflags)
> > +{
> > +	memset(ma, 0, sizeof(*ma));
> > +	ma->xattr_valid = true;
> > +	ma->fsx_xflags = xflags;
> > +	if (ma->fsx_xflags & FS_XFLAG_IMMUTABLE)
> > +		ma->flags |= FS_IMMUTABLE_FL;
> 
> I wonder if maintaining redundant sets of flags in the same structure is
> going to bite us some day.
> 
> > +	if (ma->fsx_xflags & FS_XFLAG_APPEND)
> > +		ma->flags |= FS_APPEND_FL;
> > +	if (ma->fsx_xflags & FS_XFLAG_SYNC)
> > +		ma->flags |= FS_SYNC_FL;
> > +	if (ma->fsx_xflags & FS_XFLAG_NOATIME)
> > +		ma->flags |= FS_NOATIME_FL;
> > +	if (ma->fsx_xflags & FS_XFLAG_NODUMP)
> > +		ma->flags |= FS_NODUMP_FL;
> > +	if (ma->fsx_xflags & FS_XFLAG_DAX)
> > +		ma->flags |= FS_DAX_FL;
> > +	if (ma->fsx_xflags & FS_XFLAG_PROJINHERIT)
> > +		ma->flags |= FS_PROJINHERIT_FL;
> > +}
> > +EXPORT_SYMBOL(miscattr_fill_xflags);
> > +
> > +/**
> > + * miscattr_fill_flags - initialize miscattr with flags
> > + * @ma:		miscattr pointer
> > + * @flags:	FS_*_FL flags
> > + *
> > + * Set ->flags, ->flags_valid and ->fsx_xflags (translated flags).
> > + * All other fields are zeroed.
> > + */
> > +void miscattr_fill_flags(struct miscattr *ma, u32 flags)
> > +{
> > +	memset(ma, 0, sizeof(*ma));
> > +	ma->flags_valid = true;
> > +	ma->flags = flags;
> > +	if (ma->flags & FS_SYNC_FL)
> > +		ma->fsx_xflags |= FS_XFLAG_SYNC;
> > +	if (ma->flags & FS_IMMUTABLE_FL)
> > +		ma->fsx_xflags |= FS_XFLAG_IMMUTABLE;
> > +	if (ma->flags & FS_APPEND_FL)
> > +		ma->fsx_xflags |= FS_XFLAG_APPEND;
> > +	if (ma->flags & FS_NODUMP_FL)
> > +		ma->fsx_xflags |= FS_XFLAG_NODUMP;
> > +	if (ma->flags & FS_NOATIME_FL)
> > +		ma->fsx_xflags |= FS_XFLAG_NOATIME;
> > +	if (ma->flags & FS_DAX_FL)
> > +		ma->fsx_xflags |= FS_XFLAG_DAX;
> > +	if (ma->flags & FS_PROJINHERIT_FL)
> > +		ma->fsx_xflags |= FS_XFLAG_PROJINHERIT;
> > +}
> > +EXPORT_SYMBOL(miscattr_fill_flags);
> > +
> > +/**
> > + * vfs_miscattr_get - retrieve miscellaneous inode attributes
> > + * @dentry:	the object to retrieve from
> > + * @ma:		miscattr pointer
> > + *
> > + * Call i_op->miscattr_get() callback, if exists.
> > + *
> > + * Returns 0 on success, or a negative error on failure.
> > + */
> > +int vfs_miscattr_get(struct dentry *dentry, struct miscattr *ma)
> > +{
> > +	struct inode *inode = d_inode(dentry);
> > +
> > +	if (d_is_special(dentry))
> > +		return -ENOTTY;
> > +
> > +	if (!inode->i_op->miscattr_get)
> > +		return -ENOIOCTLCMD;
> > +
> > +	return inode->i_op->miscattr_get(dentry, ma);
> > +}
> > +EXPORT_SYMBOL(vfs_miscattr_get);
> > +
> > +/**
> > + * fsxattr_copy_to_user - copy fsxattr to userspace.
> > + * @ma:		miscattr pointer
> > + * @ufa:	fsxattr user pointer
> > + *
> > + * Returns 0 on success, or -EFAULT on failure.
> > + */
> > +int fsxattr_copy_to_user(const struct miscattr *ma, struct fsxattr __user *ufa)
> > +{
> > +	struct fsxattr fa = {
> > +		.fsx_xflags	= ma->fsx_xflags,
> > +		.fsx_extsize	= ma->fsx_extsize,
> > +		.fsx_nextents	= ma->fsx_nextents,
> > +		.fsx_projid	= ma->fsx_projid,
> > +		.fsx_cowextsize	= ma->fsx_cowextsize,
> > +	};
> > +
> > +	if (copy_to_user(ufa, &fa, sizeof(fa)))
> > +		return -EFAULT;
> > +
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL(fsxattr_copy_to_user);
> > +
> > +static int fsxattr_copy_from_user(struct miscattr *ma,
> > +				  struct fsxattr __user *ufa)
> > +{
> > +	struct fsxattr fa;
> > +
> > +	if (copy_from_user(&fa, ufa, sizeof(fa)))
> > +		return -EFAULT;
> > +
> > +	miscattr_fill_xflags(ma, fa.fsx_xflags);
> > +	ma->fsx_extsize = fa.fsx_extsize;
> > +	ma->fsx_nextents = fa.fsx_nextents;
> > +	ma->fsx_projid = fa.fsx_projid;
> > +	ma->fsx_cowextsize = fa.fsx_cowextsize;
> > +
> > +	return 0;
> > +}
> > +
> > +/*
> > + * Generic function to check FS_IOC_FSSETXATTR/FS_IOC_SETFLAGS values and reject
> > + * any invalid configurations.
> > + *
> > + * Note: must be called with inode lock held.
> > + */
> > +static int miscattr_set_prepare(struct inode *inode,
> > +			      const struct miscattr *old_ma,
> > +			      struct miscattr *ma)
> > +{
> > +	int err;
> > +
> > +	/*
> > +	 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
> > +	 * the relevant capability.
> > +	 */
> > +	if ((ma->flags ^ old_ma->flags) & (FS_APPEND_FL | FS_IMMUTABLE_FL) &&
> > +	    !capable(CAP_LINUX_IMMUTABLE))
> > +		return -EPERM;
> > +
> > +	err = fscrypt_prepare_setflags(inode, old_ma->flags, ma->flags);
> > +	if (err)
> > +		return err;
> > +
> > +	/*
> > +	 * Project Quota ID state is only allowed to change from within the init
> > +	 * namespace. Enforce that restriction only if we are trying to change
> > +	 * the quota ID state. Everything else is allowed in user namespaces.
> > +	 */
> > +	if (current_user_ns() != &init_user_ns) {
> > +		if (old_ma->fsx_projid != ma->fsx_projid)
> > +			return -EINVAL;
> > +		if ((old_ma->fsx_xflags ^ ma->fsx_xflags) &
> > +				FS_XFLAG_PROJINHERIT)
> > +			return -EINVAL;
> > +	}
> > +
> > +	/* Check extent size hints. */
> > +	if ((ma->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(inode->i_mode))
> > +		return -EINVAL;
> > +
> > +	if ((ma->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
> > +			!S_ISDIR(inode->i_mode))
> > +		return -EINVAL;
> > +
> > +	if ((ma->fsx_xflags & FS_XFLAG_COWEXTSIZE) &&
> > +	    !S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
> > +		return -EINVAL;
> > +
> > +	/*
> > +	 * It is only valid to set the DAX flag on regular files and
> > +	 * directories on filesystems.
> > +	 */
> > +	if ((ma->fsx_xflags & FS_XFLAG_DAX) &&
> > +	    !(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
> > +		return -EINVAL;
> > +
> > +	/* Extent size hints of zero turn off the flags. */
> > +	if (ma->fsx_extsize == 0)
> > +		ma->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
> > +	if (ma->fsx_cowextsize == 0)
> > +		ma->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE;
> > +
> > +	return 0;
> > +}
> > +
> > +/**
> > + * vfs_miscattr_set - change miscellaneous inode attributes
> > + * @dentry:	the object to change
> > + * @ma:		miscattr pointer
> > + *
> > + * After verifying permissions, call i_op->miscattr_set() callback, if
> > + * exists.
> > + *
> > + * Verifying attributes involves retrieving current attributes with
> > + * i_op->miscattr_get(), this also allows initilaizing attributes that have
> > + * not been set by the caller to current values.  Inode lock is held
> > + * thoughout to prevent racing with another instance.
> > + *
> > + * Returns 0 on success, or a negative error on failure.
> > + */
> > +int vfs_miscattr_set(struct user_namespace *mnt_userns, struct dentry *dentry,
> > +		     struct miscattr *ma)
> > +{
> > +	struct inode *inode = d_inode(dentry);
> > +	struct miscattr old_ma = {};
> > +	int err;
> > +
> > +	if (d_is_special(dentry))
> > +		return -ENOTTY;
> > +
> > +	if (!inode->i_op->miscattr_set)
> > +		return -ENOIOCTLCMD;
> > +
> > +	if (!inode_owner_or_capable(mnt_userns, inode))
> > +		return -EPERM;
> > +
> > +	inode_lock(inode);
> > +	err = vfs_miscattr_get(dentry, &old_ma);
> > +	if (!err) {
> > +		/* initialize missing bits from old_ma */
> > +		if (ma->flags_valid) {
> > +			ma->fsx_xflags |= old_ma.fsx_xflags & ~FS_XFLAG_COMMON;
> > +			ma->fsx_extsize = old_ma.fsx_extsize;
> > +			ma->fsx_nextents = old_ma.fsx_nextents;
> > +			ma->fsx_projid = old_ma.fsx_projid;
> > +			ma->fsx_cowextsize = old_ma.fsx_cowextsize;
> > +		} else {
> > +			ma->flags |= old_ma.flags & ~FS_COMMON_FL;
> > +		}
> > +		err = miscattr_set_prepare(inode, &old_ma, ma);
> > +		if (!err)
> > +			err = inode->i_op->miscattr_set(mnt_userns, dentry, ma);
> > +	}
> > +	inode_unlock(inode);
> > +
> > +	return err;
> > +}
> > +EXPORT_SYMBOL(vfs_miscattr_set);
> > +
> > +static int ioctl_getflags(struct file *file, void __user *argp)
> > +{
> > +	struct miscattr ma = { .flags_valid = true }; /* hint only */
> > +	unsigned int flags;
> > +	int err;
> > +
> > +	err = vfs_miscattr_get(file_dentry(file), &ma);
> > +	if (!err) {
> > +		flags = ma.flags;
> > +		if (copy_to_user(argp, &flags, sizeof(flags)))
> > +			err = -EFAULT;
> > +	}
> > +	return err;
> > +}
> > +
> > +static int ioctl_setflags(struct file *file, void __user *argp)
> > +{
> > +	struct miscattr ma;
> > +	unsigned int flags;
> > +	int err;
> > +
> > +	if (copy_from_user(&flags, argp, sizeof(flags)))
> > +		return -EFAULT;
> > +
> > +	err = mnt_want_write_file(file);
> > +	if (!err) {
> > +		miscattr_fill_flags(&ma, flags);
> > +		err = vfs_miscattr_set(file_mnt_user_ns(file), file_dentry(file), &ma);
> > +		mnt_drop_write_file(file);
> > +	}
> > +	return err;
> > +}
> > +
> > +static int ioctl_fsgetxattr(struct file *file, void __user *argp)
> > +{
> > +	struct miscattr ma = { .xattr_valid = true }; /* hint only */
> > +	int err;
> > +
> > +	err = vfs_miscattr_get(file_dentry(file), &ma);
> > +	if (!err)
> > +		err = fsxattr_copy_to_user(&ma, argp);
> > +
> > +	return err;
> > +}
> > +
> > +static int ioctl_fssetxattr(struct file *file, void __user *argp)
> > +{
> > +	struct miscattr ma;
> > +	int err;
> > +
> > +	err = fsxattr_copy_from_user(&ma, argp);
> > +	if (!err) {
> > +		err = mnt_want_write_file(file);
> > +		if (!err) {
> > +			err = vfs_miscattr_set(file_mnt_user_ns(file), file_dentry(file), &ma);
> > +			mnt_drop_write_file(file);
> > +		}
> > +	}
> > +	return err;
> > +}
> > +
> >  /*
> >   * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
> >   * It's just a simple helper for sys_ioctl and compat_sys_ioctl.
> > @@ -727,6 +1035,18 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
> >  		return put_user(i_size_read(inode) - filp->f_pos,
> >  				(int __user *)argp);
> >  
> > +	case FS_IOC_GETFLAGS:
> > +		return ioctl_getflags(filp, argp);
> > +
> > +	case FS_IOC_SETFLAGS:
> > +		return ioctl_setflags(filp, argp);
> > +
> > +	case FS_IOC_FSGETXATTR:
> > +		return ioctl_fsgetxattr(filp, argp);
> > +
> > +	case FS_IOC_FSSETXATTR:
> > +		return ioctl_fssetxattr(filp, argp);
> > +
> >  	default:
> >  		if (S_ISREG(inode->i_mode))
> >  			return file_ioctl(filp, cmd, argp);
> > @@ -827,6 +1147,15 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
> >  		break;
> >  #endif
> >  
> > +	/*
> > +	 * These access 32-bit values anyway so no further handling is
> > +	 * necessary.
> > +	 */
> > +	case FS_IOC32_GETFLAGS:
> > +	case FS_IOC32_SETFLAGS:
> > +		cmd = (cmd == FS_IOC32_GETFLAGS) ?
> > +			FS_IOC_GETFLAGS : FS_IOC_SETFLAGS;
> > +		fallthrough;
> >  	/*
> >  	 * everything else in do_vfs_ioctl() takes either a compatible
> >  	 * pointer argument or no argument -- call it with a modified
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index ec8f3ddf4a6a..9e7f6a592a70 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -70,6 +70,7 @@ struct fsverity_info;
> >  struct fsverity_operations;
> >  struct fs_context;
> >  struct fs_parameter_spec;
> > +struct miscattr;
> >  
> >  extern void __init inode_init(void);
> >  extern void __init inode_init_early(void);
> > @@ -1963,6 +1964,9 @@ struct inode_operations {
> >  			struct dentry *, umode_t);
> >  	int (*set_acl)(struct user_namespace *, struct inode *,
> >  		       struct posix_acl *, int);
> > +	int (*miscattr_set)(struct user_namespace *mnt_userns,
> > +			    struct dentry *dentry, struct miscattr *ma);
> > +	int (*miscattr_get)(struct dentry *dentry, struct miscattr *ma);
> >  } ____cacheline_aligned;
> >  
> >  static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
> > diff --git a/include/linux/miscattr.h b/include/linux/miscattr.h
> > new file mode 100644
> > index 000000000000..13683eb6ac78
> > --- /dev/null
> > +++ b/include/linux/miscattr.h
> > @@ -0,0 +1,53 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +
> > +#ifndef _LINUX_MISCATTR_H
> > +#define _LINUX_MISCATTR_H
> > +
> > +/* Flags shared betwen flags/xflags */
> > +#define FS_COMMON_FL \
> > +	(FS_SYNC_FL | FS_IMMUTABLE_FL | FS_APPEND_FL | \
> > +	 FS_NODUMP_FL |	FS_NOATIME_FL | FS_DAX_FL | \
> > +	 FS_PROJINHERIT_FL)
> > +
> > +#define FS_XFLAG_COMMON \
> > +	(FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND | \
> > +	 FS_XFLAG_NODUMP | FS_XFLAG_NOATIME | FS_XFLAG_DAX | \
> > +	 FS_XFLAG_PROJINHERIT)
> > +
> > +struct miscattr {
> > +	u32	flags;		/* flags (FS_IOC_GETFLAGS/FS_IOC_SETFLAGS) */
> > +	/* struct fsxattr: */
> > +	u32	fsx_xflags;	/* xflags field value (get/set) */
> 
> Hrmm... could we have /some/ note here that fsx_xflags comes from XFS
> and "flags" comes from ext*?
> 
> > +	u32	fsx_extsize;	/* extsize field value (get/set)*/
> > +	u32	fsx_nextents;	/* nextents field value (get)	*/
> > +	u32	fsx_projid;	/* project identifier (get/set) */
> > +	u32	fsx_cowextsize;	/* CoW extsize field value (get/set)*/
> > +	/* selectors: */
> > +	bool	flags_valid:1;
> > +	bool	xattr_valid:1;
> 
> What does this have to do with extended attributes?
> 
> OH, it has nothing to do with xattrs; this flag means that the fsx_*
> fields of miscattr have any significance.  Can this be named fsx_valid?

Yeah, that confused me too. If this is really related to fsx then naming
it fsx_valid would be potentially nicer.

Christian
Christian Brauner March 24, 2021, 8:21 a.m. UTC | #6
On Mon, Mar 22, 2021 at 03:48:59PM +0100, Miklos Szeredi wrote:
> There's a substantial amount of boilerplate in filesystems handling
> FS_IOC_[GS]ETFLAGS/ FS_IOC_FS[GS]ETXATTR ioctls.
> 
> Also due to userspace buffers being involved in the ioctl API this is
> difficult to stack, as shown by overlayfs issues related to these ioctls.
> 
> Introduce a new internal API named "miscattr" (fsxattr can be confused with
> xattr, xflags is inappropriate, since this is more than just flags).
> 
> There's significant overlap between flags and xflags and this API handles
> the conversions automatically, so filesystems may choose which one to use.
> 
> In ->miscattr_get() a hint is provided to the filesystem whether flags or
> xattr are being requested by userspace, but in this series this hint is
> ignored by all filesystems, since generating all the attributes is cheap.
> 
> If a filesystem doesn't implemement the miscattr API, just fall back to
> f_op->ioctl().  When all filesystems are converted, the fallback can be
> removed.
> 
> 32bit compat ioctls are now handled by the generic code as well.
> 
> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---

Fwiw, I think this is a good cleanup. Changing something like the
miscattr_set() method to take a mnt_userns would've been less
churn then having to audit all ioctls individually.

(Only one small comment below.)

>  Documentation/filesystems/locking.rst |   5 +
>  Documentation/filesystems/vfs.rst     |  15 ++
>  fs/ioctl.c                            | 329 ++++++++++++++++++++++++++
>  include/linux/fs.h                    |   4 +
>  include/linux/miscattr.h              |  53 +++++
>  5 files changed, 406 insertions(+)
>  create mode 100644 include/linux/miscattr.h
> 
> diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
> index b7dcc86c92a4..a5aa2046d48f 100644
> --- a/Documentation/filesystems/locking.rst
> +++ b/Documentation/filesystems/locking.rst
> @@ -80,6 +80,9 @@ prototypes::
>  				struct file *, unsigned open_flag,
>  				umode_t create_mode);
>  	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
> +	int (*miscattr_set)(struct user_namespace *mnt_userns,
> +			    struct dentry *dentry, struct miscattr *ma);
> +	int (*miscattr_get)(struct dentry *dentry, struct miscattr *ma);
>  
>  locking rules:
>  	all may block
> @@ -107,6 +110,8 @@ fiemap:		no
>  update_time:	no
>  atomic_open:	shared (exclusive if O_CREAT is set in open flags)
>  tmpfile:	no
> +miscattr_get:	no or exclusive
> +miscattr_set:	exclusive
>  ============	=============================================
>  
>  
> diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
> index 2049bbf5e388..f125ce6c3b47 100644
> --- a/Documentation/filesystems/vfs.rst
> +++ b/Documentation/filesystems/vfs.rst
> @@ -441,6 +441,9 @@ As of kernel 2.6.22, the following members are defined:
>  				   unsigned open_flag, umode_t create_mode);
>  		int (*tmpfile) (struct user_namespace *, struct inode *, struct dentry *, umode_t);
>  	        int (*set_acl)(struct user_namespace *, struct inode *, struct posix_acl *, int);
> +		int (*miscattr_set)(struct user_namespace *mnt_userns,
> +				    struct dentry *dentry, struct miscattr *ma);
> +		int (*miscattr_get)(struct dentry *dentry, struct miscattr *ma);
>  	};
>  
>  Again, all methods are called without any locks being held, unless
> @@ -588,6 +591,18 @@ otherwise noted.
>  	atomically creating, opening and unlinking a file in given
>  	directory.
>  
> +``miscattr_get``
> +	called on ioctl(FS_IOC_GETFLAGS) and ioctl(FS_IOC_FSGETXATTR) to
> +	retrieve miscellaneous filesystem flags and attributes.  Also
> +	called before the relevant SET operation to check what is being
> +	changed (in this case with i_rwsem locked exclusive).  If unset,
> +	then fall back to f_op->ioctl().
> +
> +``miscattr_set``
> +	called on ioctl(FS_IOC_SETFLAGS) and ioctl(FS_IOC_FSSETXATTR) to
> +	change miscellaneous filesystem flags and attributes.  Callers hold
> +	i_rwsem exclusive.  If unset, then fall back to f_op->ioctl().
> +
>  
>  The Address Space Object
>  ========================
> diff --git a/fs/ioctl.c b/fs/ioctl.c
> index 4e6cc0a7d69c..e5f3820809a4 100644
> --- a/fs/ioctl.c
> +++ b/fs/ioctl.c
> @@ -19,6 +19,9 @@
>  #include <linux/falloc.h>
>  #include <linux/sched/signal.h>
>  #include <linux/fiemap.h>
> +#include <linux/mount.h>
> +#include <linux/fscrypt.h>
> +#include <linux/miscattr.h>
>  
>  #include "internal.h"
>  
> @@ -657,6 +660,311 @@ static int ioctl_file_dedupe_range(struct file *file,
>  	return ret;
>  }
>  
> +/**
> + * miscattr_fill_xflags - initialize miscattr with xflags
> + * @ma:		miscattr pointer
> + * @xflags:	FS_XFLAG_* flags
> + *
> + * Set ->fsx_xflags, ->xattr_valid and ->flags (translated xflags).  All
> + * other fields are zeroed.
> + */
> +void miscattr_fill_xflags(struct miscattr *ma, u32 xflags)
> +{
> +	memset(ma, 0, sizeof(*ma));
> +	ma->xattr_valid = true;
> +	ma->fsx_xflags = xflags;
> +	if (ma->fsx_xflags & FS_XFLAG_IMMUTABLE)
> +		ma->flags |= FS_IMMUTABLE_FL;
> +	if (ma->fsx_xflags & FS_XFLAG_APPEND)
> +		ma->flags |= FS_APPEND_FL;
> +	if (ma->fsx_xflags & FS_XFLAG_SYNC)
> +		ma->flags |= FS_SYNC_FL;
> +	if (ma->fsx_xflags & FS_XFLAG_NOATIME)
> +		ma->flags |= FS_NOATIME_FL;
> +	if (ma->fsx_xflags & FS_XFLAG_NODUMP)
> +		ma->flags |= FS_NODUMP_FL;
> +	if (ma->fsx_xflags & FS_XFLAG_DAX)
> +		ma->flags |= FS_DAX_FL;
> +	if (ma->fsx_xflags & FS_XFLAG_PROJINHERIT)
> +		ma->flags |= FS_PROJINHERIT_FL;
> +}
> +EXPORT_SYMBOL(miscattr_fill_xflags);
> +
> +/**
> + * miscattr_fill_flags - initialize miscattr with flags
> + * @ma:		miscattr pointer
> + * @flags:	FS_*_FL flags
> + *
> + * Set ->flags, ->flags_valid and ->fsx_xflags (translated flags).
> + * All other fields are zeroed.
> + */
> +void miscattr_fill_flags(struct miscattr *ma, u32 flags)
> +{
> +	memset(ma, 0, sizeof(*ma));
> +	ma->flags_valid = true;
> +	ma->flags = flags;
> +	if (ma->flags & FS_SYNC_FL)
> +		ma->fsx_xflags |= FS_XFLAG_SYNC;
> +	if (ma->flags & FS_IMMUTABLE_FL)
> +		ma->fsx_xflags |= FS_XFLAG_IMMUTABLE;
> +	if (ma->flags & FS_APPEND_FL)
> +		ma->fsx_xflags |= FS_XFLAG_APPEND;
> +	if (ma->flags & FS_NODUMP_FL)
> +		ma->fsx_xflags |= FS_XFLAG_NODUMP;
> +	if (ma->flags & FS_NOATIME_FL)
> +		ma->fsx_xflags |= FS_XFLAG_NOATIME;
> +	if (ma->flags & FS_DAX_FL)
> +		ma->fsx_xflags |= FS_XFLAG_DAX;
> +	if (ma->flags & FS_PROJINHERIT_FL)
> +		ma->fsx_xflags |= FS_XFLAG_PROJINHERIT;
> +}
> +EXPORT_SYMBOL(miscattr_fill_flags);
> +
> +/**
> + * vfs_miscattr_get - retrieve miscellaneous inode attributes
> + * @dentry:	the object to retrieve from
> + * @ma:		miscattr pointer
> + *
> + * Call i_op->miscattr_get() callback, if exists.
> + *
> + * Returns 0 on success, or a negative error on failure.
> + */
> +int vfs_miscattr_get(struct dentry *dentry, struct miscattr *ma)
> +{
> +	struct inode *inode = d_inode(dentry);
> +
> +	if (d_is_special(dentry))
> +		return -ENOTTY;
> +
> +	if (!inode->i_op->miscattr_get)
> +		return -ENOIOCTLCMD;
> +
> +	return inode->i_op->miscattr_get(dentry, ma);
> +}
> +EXPORT_SYMBOL(vfs_miscattr_get);
> +
> +/**
> + * fsxattr_copy_to_user - copy fsxattr to userspace.
> + * @ma:		miscattr pointer
> + * @ufa:	fsxattr user pointer
> + *
> + * Returns 0 on success, or -EFAULT on failure.
> + */
> +int fsxattr_copy_to_user(const struct miscattr *ma, struct fsxattr __user *ufa)
> +{
> +	struct fsxattr fa = {
> +		.fsx_xflags	= ma->fsx_xflags,
> +		.fsx_extsize	= ma->fsx_extsize,
> +		.fsx_nextents	= ma->fsx_nextents,
> +		.fsx_projid	= ma->fsx_projid,
> +		.fsx_cowextsize	= ma->fsx_cowextsize,
> +	};
> +
> +	if (copy_to_user(ufa, &fa, sizeof(fa)))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL(fsxattr_copy_to_user);
> +
> +static int fsxattr_copy_from_user(struct miscattr *ma,
> +				  struct fsxattr __user *ufa)
> +{
> +	struct fsxattr fa;
> +
> +	if (copy_from_user(&fa, ufa, sizeof(fa)))
> +		return -EFAULT;
> +
> +	miscattr_fill_xflags(ma, fa.fsx_xflags);
> +	ma->fsx_extsize = fa.fsx_extsize;
> +	ma->fsx_nextents = fa.fsx_nextents;
> +	ma->fsx_projid = fa.fsx_projid;
> +	ma->fsx_cowextsize = fa.fsx_cowextsize;
> +
> +	return 0;
> +}
> +
> +/*
> + * Generic function to check FS_IOC_FSSETXATTR/FS_IOC_SETFLAGS values and reject
> + * any invalid configurations.
> + *
> + * Note: must be called with inode lock held.
> + */
> +static int miscattr_set_prepare(struct inode *inode,
> +			      const struct miscattr *old_ma,
> +			      struct miscattr *ma)
> +{
> +	int err;
> +
> +	/*
> +	 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
> +	 * the relevant capability.
> +	 */
> +	if ((ma->flags ^ old_ma->flags) & (FS_APPEND_FL | FS_IMMUTABLE_FL) &&
> +	    !capable(CAP_LINUX_IMMUTABLE))
> +		return -EPERM;
> +
> +	err = fscrypt_prepare_setflags(inode, old_ma->flags, ma->flags);
> +	if (err)
> +		return err;
> +
> +	/*
> +	 * Project Quota ID state is only allowed to change from within the init
> +	 * namespace. Enforce that restriction only if we are trying to change
> +	 * the quota ID state. Everything else is allowed in user namespaces.
> +	 */
> +	if (current_user_ns() != &init_user_ns) {
> +		if (old_ma->fsx_projid != ma->fsx_projid)
> +			return -EINVAL;
> +		if ((old_ma->fsx_xflags ^ ma->fsx_xflags) &
> +				FS_XFLAG_PROJINHERIT)
> +			return -EINVAL;
> +	}
> +
> +	/* Check extent size hints. */
> +	if ((ma->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(inode->i_mode))
> +		return -EINVAL;
> +
> +	if ((ma->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
> +			!S_ISDIR(inode->i_mode))
> +		return -EINVAL;
> +
> +	if ((ma->fsx_xflags & FS_XFLAG_COWEXTSIZE) &&
> +	    !S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
> +		return -EINVAL;
> +
> +	/*
> +	 * It is only valid to set the DAX flag on regular files and
> +	 * directories on filesystems.
> +	 */
> +	if ((ma->fsx_xflags & FS_XFLAG_DAX) &&
> +	    !(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
> +		return -EINVAL;
> +
> +	/* Extent size hints of zero turn off the flags. */
> +	if (ma->fsx_extsize == 0)
> +		ma->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
> +	if (ma->fsx_cowextsize == 0)
> +		ma->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE;
> +
> +	return 0;
> +}
> +
> +/**
> + * vfs_miscattr_set - change miscellaneous inode attributes

I think this misses to document the @mnt_userns argument probably just
caused by the rebase.

> + * @dentry:	the object to change
> + * @ma:		miscattr pointer

> + *
> + * After verifying permissions, call i_op->miscattr_set() callback, if
> + * exists.
> + *
> + * Verifying attributes involves retrieving current attributes with
> + * i_op->miscattr_get(), this also allows initilaizing attributes that have
> + * not been set by the caller to current values.  Inode lock is held
> + * thoughout to prevent racing with another instance.
> + *
> + * Returns 0 on success, or a negative error on failure.

Fwiw, just because Willy made me aware of this, adding a ":" after the
Return will make kernel-doc generate a separate return value section. It
might also complain otherwise.

Christian

> + */
> +int vfs_miscattr_set(struct user_namespace *mnt_userns, struct dentry *dentry,
> +		     struct miscattr *ma)
> +{
> +	struct inode *inode = d_inode(dentry);
> +	struct miscattr old_ma = {};
> +	int err;
> +
> +	if (d_is_special(dentry))
> +		return -ENOTTY;
> +
> +	if (!inode->i_op->miscattr_set)
> +		return -ENOIOCTLCMD;
> +
> +	if (!inode_owner_or_capable(mnt_userns, inode))
> +		return -EPERM;
> +
> +	inode_lock(inode);
> +	err = vfs_miscattr_get(dentry, &old_ma);
> +	if (!err) {
> +		/* initialize missing bits from old_ma */
> +		if (ma->flags_valid) {
> +			ma->fsx_xflags |= old_ma.fsx_xflags & ~FS_XFLAG_COMMON;
> +			ma->fsx_extsize = old_ma.fsx_extsize;
> +			ma->fsx_nextents = old_ma.fsx_nextents;
> +			ma->fsx_projid = old_ma.fsx_projid;
> +			ma->fsx_cowextsize = old_ma.fsx_cowextsize;
> +		} else {
> +			ma->flags |= old_ma.flags & ~FS_COMMON_FL;
> +		}
> +		err = miscattr_set_prepare(inode, &old_ma, ma);
> +		if (!err)
> +			err = inode->i_op->miscattr_set(mnt_userns, dentry, ma);
> +	}
> +	inode_unlock(inode);
> +
> +	return err;
> +}
> +EXPORT_SYMBOL(vfs_miscattr_set);
Miklos Szeredi March 24, 2021, 8:45 a.m. UTC | #7
On Wed, Mar 24, 2021 at 6:03 AM Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> On Mon, Mar 22, 2021 at 03:48:59PM +0100, Miklos Szeredi wrote:
>
> minor nit: copy_fsxattr_{to,from}_user() might be better.
>
> > +int fsxattr_copy_to_user(const struct miscattr *ma, struct fsxattr __user *ufa)
> > +{
> > +     struct fsxattr fa = {
> > +             .fsx_xflags     = ma->fsx_xflags,
> > +             .fsx_extsize    = ma->fsx_extsize,
> > +             .fsx_nextents   = ma->fsx_nextents,
> > +             .fsx_projid     = ma->fsx_projid,
> > +             .fsx_cowextsize = ma->fsx_cowextsize,
> > +     };
>
> That wants a comment along the lines of "guaranteed to be gap-free",
> since otherwise you'd need memset() to avoid an infoleak.

Isn't structure initialization supposed to zero everything not
explicitly initialized?

>
> > +static int ioctl_getflags(struct file *file, void __user *argp)
> > +{
> > +     struct miscattr ma = { .flags_valid = true }; /* hint only */
> > +     unsigned int flags;
> > +     int err;
> > +
> > +     err = vfs_miscattr_get(file_dentry(file), &ma);
>
> Umm...  Just to clarify - do we plan to have that ever called via
> ovl_real_ioctl()?  IOW, is file_dentry() anything other than a way
> to spell ->f_path.dentry here?

Indeed, file_dentry() only makes sense when called from a layer inside
overlayfs.

The one in io_uring() seems wrong also, as a beast needing
file_dentry() should never get out of overlayfs and into io_uring:

--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9297,7 +9297,7 @@ static void __io_uring_show_fdinfo(struct
io_ring_ctx *ctx, struct seq_file *m)
                struct file *f = *io_fixed_file_slot(ctx->file_data, i);

                if (f)
-                       seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname);
+                       seq_printf(m, "%5u: %pD\n", i, f);
                else
                        seq_printf(m, "%5u: <none>\n", i);
        }


Thanks,
Miklos
Al Viro March 24, 2021, 12:26 p.m. UTC | #8
On Wed, Mar 24, 2021 at 09:45:02AM +0100, Miklos Szeredi wrote:
> On Wed, Mar 24, 2021 at 6:03 AM Al Viro <viro@zeniv.linux.org.uk> wrote:
> >
> > On Mon, Mar 22, 2021 at 03:48:59PM +0100, Miklos Szeredi wrote:
> >
> > minor nit: copy_fsxattr_{to,from}_user() might be better.
> >
> > > +int fsxattr_copy_to_user(const struct miscattr *ma, struct fsxattr __user *ufa)
> > > +{
> > > +     struct fsxattr fa = {
> > > +             .fsx_xflags     = ma->fsx_xflags,
> > > +             .fsx_extsize    = ma->fsx_extsize,
> > > +             .fsx_nextents   = ma->fsx_nextents,
> > > +             .fsx_projid     = ma->fsx_projid,
> > > +             .fsx_cowextsize = ma->fsx_cowextsize,
> > > +     };
> >
> > That wants a comment along the lines of "guaranteed to be gap-free",
> > since otherwise you'd need memset() to avoid an infoleak.
> 
> Isn't structure initialization supposed to zero everything not
> explicitly initialized?

All fields, but not the padding...

> The one in io_uring() seems wrong also, as a beast needing
> file_dentry() should never get out of overlayfs and into io_uring:

That one would be wrong in overlayfs as well - we'd better had the
same names in all layers...

> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -9297,7 +9297,7 @@ static void __io_uring_show_fdinfo(struct
> io_ring_ctx *ctx, struct seq_file *m)
>                 struct file *f = *io_fixed_file_slot(ctx->file_data, i);
> 
>                 if (f)
> -                       seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname);
> +                       seq_printf(m, "%5u: %pD\n", i, f);
>                 else
>                         seq_printf(m, "%5u: <none>\n", i);
>         }
> 
> 
> Thanks,
> Miklos
Miklos Szeredi March 24, 2021, 1:45 p.m. UTC | #9
On Wed, Mar 24, 2021 at 1:28 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> On Wed, Mar 24, 2021 at 09:45:02AM +0100, Miklos Szeredi wrote:

> > Isn't structure initialization supposed to zero everything not
> > explicitly initialized?
>
> All fields, but not the padding...

Ah...  while the structure is unlikely to change, I'll switch to
memset to be safe.

>
> > The one in io_uring() seems wrong also, as a beast needing
> > file_dentry() should never get out of overlayfs and into io_uring:
>
> That one would be wrong in overlayfs as well - we'd better had the
> same names in all layers...

Right.

Thanks,
Miklos
diff mbox series

Patch

diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index b7dcc86c92a4..a5aa2046d48f 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -80,6 +80,9 @@  prototypes::
 				struct file *, unsigned open_flag,
 				umode_t create_mode);
 	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
+	int (*miscattr_set)(struct user_namespace *mnt_userns,
+			    struct dentry *dentry, struct miscattr *ma);
+	int (*miscattr_get)(struct dentry *dentry, struct miscattr *ma);
 
 locking rules:
 	all may block
@@ -107,6 +110,8 @@  fiemap:		no
 update_time:	no
 atomic_open:	shared (exclusive if O_CREAT is set in open flags)
 tmpfile:	no
+miscattr_get:	no or exclusive
+miscattr_set:	exclusive
 ============	=============================================
 
 
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index 2049bbf5e388..f125ce6c3b47 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -441,6 +441,9 @@  As of kernel 2.6.22, the following members are defined:
 				   unsigned open_flag, umode_t create_mode);
 		int (*tmpfile) (struct user_namespace *, struct inode *, struct dentry *, umode_t);
 	        int (*set_acl)(struct user_namespace *, struct inode *, struct posix_acl *, int);
+		int (*miscattr_set)(struct user_namespace *mnt_userns,
+				    struct dentry *dentry, struct miscattr *ma);
+		int (*miscattr_get)(struct dentry *dentry, struct miscattr *ma);
 	};
 
 Again, all methods are called without any locks being held, unless
@@ -588,6 +591,18 @@  otherwise noted.
 	atomically creating, opening and unlinking a file in given
 	directory.
 
+``miscattr_get``
+	called on ioctl(FS_IOC_GETFLAGS) and ioctl(FS_IOC_FSGETXATTR) to
+	retrieve miscellaneous filesystem flags and attributes.  Also
+	called before the relevant SET operation to check what is being
+	changed (in this case with i_rwsem locked exclusive).  If unset,
+	then fall back to f_op->ioctl().
+
+``miscattr_set``
+	called on ioctl(FS_IOC_SETFLAGS) and ioctl(FS_IOC_FSSETXATTR) to
+	change miscellaneous filesystem flags and attributes.  Callers hold
+	i_rwsem exclusive.  If unset, then fall back to f_op->ioctl().
+
 
 The Address Space Object
 ========================
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 4e6cc0a7d69c..e5f3820809a4 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -19,6 +19,9 @@ 
 #include <linux/falloc.h>
 #include <linux/sched/signal.h>
 #include <linux/fiemap.h>
+#include <linux/mount.h>
+#include <linux/fscrypt.h>
+#include <linux/miscattr.h>
 
 #include "internal.h"
 
@@ -657,6 +660,311 @@  static int ioctl_file_dedupe_range(struct file *file,
 	return ret;
 }
 
+/**
+ * miscattr_fill_xflags - initialize miscattr with xflags
+ * @ma:		miscattr pointer
+ * @xflags:	FS_XFLAG_* flags
+ *
+ * Set ->fsx_xflags, ->xattr_valid and ->flags (translated xflags).  All
+ * other fields are zeroed.
+ */
+void miscattr_fill_xflags(struct miscattr *ma, u32 xflags)
+{
+	memset(ma, 0, sizeof(*ma));
+	ma->xattr_valid = true;
+	ma->fsx_xflags = xflags;
+	if (ma->fsx_xflags & FS_XFLAG_IMMUTABLE)
+		ma->flags |= FS_IMMUTABLE_FL;
+	if (ma->fsx_xflags & FS_XFLAG_APPEND)
+		ma->flags |= FS_APPEND_FL;
+	if (ma->fsx_xflags & FS_XFLAG_SYNC)
+		ma->flags |= FS_SYNC_FL;
+	if (ma->fsx_xflags & FS_XFLAG_NOATIME)
+		ma->flags |= FS_NOATIME_FL;
+	if (ma->fsx_xflags & FS_XFLAG_NODUMP)
+		ma->flags |= FS_NODUMP_FL;
+	if (ma->fsx_xflags & FS_XFLAG_DAX)
+		ma->flags |= FS_DAX_FL;
+	if (ma->fsx_xflags & FS_XFLAG_PROJINHERIT)
+		ma->flags |= FS_PROJINHERIT_FL;
+}
+EXPORT_SYMBOL(miscattr_fill_xflags);
+
+/**
+ * miscattr_fill_flags - initialize miscattr with flags
+ * @ma:		miscattr pointer
+ * @flags:	FS_*_FL flags
+ *
+ * Set ->flags, ->flags_valid and ->fsx_xflags (translated flags).
+ * All other fields are zeroed.
+ */
+void miscattr_fill_flags(struct miscattr *ma, u32 flags)
+{
+	memset(ma, 0, sizeof(*ma));
+	ma->flags_valid = true;
+	ma->flags = flags;
+	if (ma->flags & FS_SYNC_FL)
+		ma->fsx_xflags |= FS_XFLAG_SYNC;
+	if (ma->flags & FS_IMMUTABLE_FL)
+		ma->fsx_xflags |= FS_XFLAG_IMMUTABLE;
+	if (ma->flags & FS_APPEND_FL)
+		ma->fsx_xflags |= FS_XFLAG_APPEND;
+	if (ma->flags & FS_NODUMP_FL)
+		ma->fsx_xflags |= FS_XFLAG_NODUMP;
+	if (ma->flags & FS_NOATIME_FL)
+		ma->fsx_xflags |= FS_XFLAG_NOATIME;
+	if (ma->flags & FS_DAX_FL)
+		ma->fsx_xflags |= FS_XFLAG_DAX;
+	if (ma->flags & FS_PROJINHERIT_FL)
+		ma->fsx_xflags |= FS_XFLAG_PROJINHERIT;
+}
+EXPORT_SYMBOL(miscattr_fill_flags);
+
+/**
+ * vfs_miscattr_get - retrieve miscellaneous inode attributes
+ * @dentry:	the object to retrieve from
+ * @ma:		miscattr pointer
+ *
+ * Call i_op->miscattr_get() callback, if exists.
+ *
+ * Returns 0 on success, or a negative error on failure.
+ */
+int vfs_miscattr_get(struct dentry *dentry, struct miscattr *ma)
+{
+	struct inode *inode = d_inode(dentry);
+
+	if (d_is_special(dentry))
+		return -ENOTTY;
+
+	if (!inode->i_op->miscattr_get)
+		return -ENOIOCTLCMD;
+
+	return inode->i_op->miscattr_get(dentry, ma);
+}
+EXPORT_SYMBOL(vfs_miscattr_get);
+
+/**
+ * fsxattr_copy_to_user - copy fsxattr to userspace.
+ * @ma:		miscattr pointer
+ * @ufa:	fsxattr user pointer
+ *
+ * Returns 0 on success, or -EFAULT on failure.
+ */
+int fsxattr_copy_to_user(const struct miscattr *ma, struct fsxattr __user *ufa)
+{
+	struct fsxattr fa = {
+		.fsx_xflags	= ma->fsx_xflags,
+		.fsx_extsize	= ma->fsx_extsize,
+		.fsx_nextents	= ma->fsx_nextents,
+		.fsx_projid	= ma->fsx_projid,
+		.fsx_cowextsize	= ma->fsx_cowextsize,
+	};
+
+	if (copy_to_user(ufa, &fa, sizeof(fa)))
+		return -EFAULT;
+
+	return 0;
+}
+EXPORT_SYMBOL(fsxattr_copy_to_user);
+
+static int fsxattr_copy_from_user(struct miscattr *ma,
+				  struct fsxattr __user *ufa)
+{
+	struct fsxattr fa;
+
+	if (copy_from_user(&fa, ufa, sizeof(fa)))
+		return -EFAULT;
+
+	miscattr_fill_xflags(ma, fa.fsx_xflags);
+	ma->fsx_extsize = fa.fsx_extsize;
+	ma->fsx_nextents = fa.fsx_nextents;
+	ma->fsx_projid = fa.fsx_projid;
+	ma->fsx_cowextsize = fa.fsx_cowextsize;
+
+	return 0;
+}
+
+/*
+ * Generic function to check FS_IOC_FSSETXATTR/FS_IOC_SETFLAGS values and reject
+ * any invalid configurations.
+ *
+ * Note: must be called with inode lock held.
+ */
+static int miscattr_set_prepare(struct inode *inode,
+			      const struct miscattr *old_ma,
+			      struct miscattr *ma)
+{
+	int err;
+
+	/*
+	 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+	 * the relevant capability.
+	 */
+	if ((ma->flags ^ old_ma->flags) & (FS_APPEND_FL | FS_IMMUTABLE_FL) &&
+	    !capable(CAP_LINUX_IMMUTABLE))
+		return -EPERM;
+
+	err = fscrypt_prepare_setflags(inode, old_ma->flags, ma->flags);
+	if (err)
+		return err;
+
+	/*
+	 * Project Quota ID state is only allowed to change from within the init
+	 * namespace. Enforce that restriction only if we are trying to change
+	 * the quota ID state. Everything else is allowed in user namespaces.
+	 */
+	if (current_user_ns() != &init_user_ns) {
+		if (old_ma->fsx_projid != ma->fsx_projid)
+			return -EINVAL;
+		if ((old_ma->fsx_xflags ^ ma->fsx_xflags) &
+				FS_XFLAG_PROJINHERIT)
+			return -EINVAL;
+	}
+
+	/* Check extent size hints. */
+	if ((ma->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(inode->i_mode))
+		return -EINVAL;
+
+	if ((ma->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
+			!S_ISDIR(inode->i_mode))
+		return -EINVAL;
+
+	if ((ma->fsx_xflags & FS_XFLAG_COWEXTSIZE) &&
+	    !S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
+		return -EINVAL;
+
+	/*
+	 * It is only valid to set the DAX flag on regular files and
+	 * directories on filesystems.
+	 */
+	if ((ma->fsx_xflags & FS_XFLAG_DAX) &&
+	    !(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
+		return -EINVAL;
+
+	/* Extent size hints of zero turn off the flags. */
+	if (ma->fsx_extsize == 0)
+		ma->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
+	if (ma->fsx_cowextsize == 0)
+		ma->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE;
+
+	return 0;
+}
+
+/**
+ * vfs_miscattr_set - change miscellaneous inode attributes
+ * @dentry:	the object to change
+ * @ma:		miscattr pointer
+ *
+ * After verifying permissions, call i_op->miscattr_set() callback, if
+ * exists.
+ *
+ * Verifying attributes involves retrieving current attributes with
+ * i_op->miscattr_get(), this also allows initilaizing attributes that have
+ * not been set by the caller to current values.  Inode lock is held
+ * thoughout to prevent racing with another instance.
+ *
+ * Returns 0 on success, or a negative error on failure.
+ */
+int vfs_miscattr_set(struct user_namespace *mnt_userns, struct dentry *dentry,
+		     struct miscattr *ma)
+{
+	struct inode *inode = d_inode(dentry);
+	struct miscattr old_ma = {};
+	int err;
+
+	if (d_is_special(dentry))
+		return -ENOTTY;
+
+	if (!inode->i_op->miscattr_set)
+		return -ENOIOCTLCMD;
+
+	if (!inode_owner_or_capable(mnt_userns, inode))
+		return -EPERM;
+
+	inode_lock(inode);
+	err = vfs_miscattr_get(dentry, &old_ma);
+	if (!err) {
+		/* initialize missing bits from old_ma */
+		if (ma->flags_valid) {
+			ma->fsx_xflags |= old_ma.fsx_xflags & ~FS_XFLAG_COMMON;
+			ma->fsx_extsize = old_ma.fsx_extsize;
+			ma->fsx_nextents = old_ma.fsx_nextents;
+			ma->fsx_projid = old_ma.fsx_projid;
+			ma->fsx_cowextsize = old_ma.fsx_cowextsize;
+		} else {
+			ma->flags |= old_ma.flags & ~FS_COMMON_FL;
+		}
+		err = miscattr_set_prepare(inode, &old_ma, ma);
+		if (!err)
+			err = inode->i_op->miscattr_set(mnt_userns, dentry, ma);
+	}
+	inode_unlock(inode);
+
+	return err;
+}
+EXPORT_SYMBOL(vfs_miscattr_set);
+
+static int ioctl_getflags(struct file *file, void __user *argp)
+{
+	struct miscattr ma = { .flags_valid = true }; /* hint only */
+	unsigned int flags;
+	int err;
+
+	err = vfs_miscattr_get(file_dentry(file), &ma);
+	if (!err) {
+		flags = ma.flags;
+		if (copy_to_user(argp, &flags, sizeof(flags)))
+			err = -EFAULT;
+	}
+	return err;
+}
+
+static int ioctl_setflags(struct file *file, void __user *argp)
+{
+	struct miscattr ma;
+	unsigned int flags;
+	int err;
+
+	if (copy_from_user(&flags, argp, sizeof(flags)))
+		return -EFAULT;
+
+	err = mnt_want_write_file(file);
+	if (!err) {
+		miscattr_fill_flags(&ma, flags);
+		err = vfs_miscattr_set(file_mnt_user_ns(file), file_dentry(file), &ma);
+		mnt_drop_write_file(file);
+	}
+	return err;
+}
+
+static int ioctl_fsgetxattr(struct file *file, void __user *argp)
+{
+	struct miscattr ma = { .xattr_valid = true }; /* hint only */
+	int err;
+
+	err = vfs_miscattr_get(file_dentry(file), &ma);
+	if (!err)
+		err = fsxattr_copy_to_user(&ma, argp);
+
+	return err;
+}
+
+static int ioctl_fssetxattr(struct file *file, void __user *argp)
+{
+	struct miscattr ma;
+	int err;
+
+	err = fsxattr_copy_from_user(&ma, argp);
+	if (!err) {
+		err = mnt_want_write_file(file);
+		if (!err) {
+			err = vfs_miscattr_set(file_mnt_user_ns(file), file_dentry(file), &ma);
+			mnt_drop_write_file(file);
+		}
+	}
+	return err;
+}
+
 /*
  * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
  * It's just a simple helper for sys_ioctl and compat_sys_ioctl.
@@ -727,6 +1035,18 @@  static int do_vfs_ioctl(struct file *filp, unsigned int fd,
 		return put_user(i_size_read(inode) - filp->f_pos,
 				(int __user *)argp);
 
+	case FS_IOC_GETFLAGS:
+		return ioctl_getflags(filp, argp);
+
+	case FS_IOC_SETFLAGS:
+		return ioctl_setflags(filp, argp);
+
+	case FS_IOC_FSGETXATTR:
+		return ioctl_fsgetxattr(filp, argp);
+
+	case FS_IOC_FSSETXATTR:
+		return ioctl_fssetxattr(filp, argp);
+
 	default:
 		if (S_ISREG(inode->i_mode))
 			return file_ioctl(filp, cmd, argp);
@@ -827,6 +1147,15 @@  COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
 		break;
 #endif
 
+	/*
+	 * These access 32-bit values anyway so no further handling is
+	 * necessary.
+	 */
+	case FS_IOC32_GETFLAGS:
+	case FS_IOC32_SETFLAGS:
+		cmd = (cmd == FS_IOC32_GETFLAGS) ?
+			FS_IOC_GETFLAGS : FS_IOC_SETFLAGS;
+		fallthrough;
 	/*
 	 * everything else in do_vfs_ioctl() takes either a compatible
 	 * pointer argument or no argument -- call it with a modified
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ec8f3ddf4a6a..9e7f6a592a70 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -70,6 +70,7 @@  struct fsverity_info;
 struct fsverity_operations;
 struct fs_context;
 struct fs_parameter_spec;
+struct miscattr;
 
 extern void __init inode_init(void);
 extern void __init inode_init_early(void);
@@ -1963,6 +1964,9 @@  struct inode_operations {
 			struct dentry *, umode_t);
 	int (*set_acl)(struct user_namespace *, struct inode *,
 		       struct posix_acl *, int);
+	int (*miscattr_set)(struct user_namespace *mnt_userns,
+			    struct dentry *dentry, struct miscattr *ma);
+	int (*miscattr_get)(struct dentry *dentry, struct miscattr *ma);
 } ____cacheline_aligned;
 
 static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
diff --git a/include/linux/miscattr.h b/include/linux/miscattr.h
new file mode 100644
index 000000000000..13683eb6ac78
--- /dev/null
+++ b/include/linux/miscattr.h
@@ -0,0 +1,53 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_MISCATTR_H
+#define _LINUX_MISCATTR_H
+
+/* Flags shared betwen flags/xflags */
+#define FS_COMMON_FL \
+	(FS_SYNC_FL | FS_IMMUTABLE_FL | FS_APPEND_FL | \
+	 FS_NODUMP_FL |	FS_NOATIME_FL | FS_DAX_FL | \
+	 FS_PROJINHERIT_FL)
+
+#define FS_XFLAG_COMMON \
+	(FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND | \
+	 FS_XFLAG_NODUMP | FS_XFLAG_NOATIME | FS_XFLAG_DAX | \
+	 FS_XFLAG_PROJINHERIT)
+
+struct miscattr {
+	u32	flags;		/* flags (FS_IOC_GETFLAGS/FS_IOC_SETFLAGS) */
+	/* struct fsxattr: */
+	u32	fsx_xflags;	/* xflags field value (get/set) */
+	u32	fsx_extsize;	/* extsize field value (get/set)*/
+	u32	fsx_nextents;	/* nextents field value (get)	*/
+	u32	fsx_projid;	/* project identifier (get/set) */
+	u32	fsx_cowextsize;	/* CoW extsize field value (get/set)*/
+	/* selectors: */
+	bool	flags_valid:1;
+	bool	xattr_valid:1;
+};
+
+int fsxattr_copy_to_user(const struct miscattr *ma, struct fsxattr __user *ufa);
+
+void miscattr_fill_xflags(struct miscattr *ma, u32 xflags);
+void miscattr_fill_flags(struct miscattr *ma, u32 flags);
+
+/**
+ * miscattr_has_xattr - check for extentended flags/attributes
+ * @ma:		miscattr pointer
+ *
+ * Returns true if any attributes are present that are not represented in
+ * ->flags.
+ */
+static inline bool miscattr_has_xattr(const struct miscattr *ma)
+{
+	return ma->xattr_valid &&
+		((ma->fsx_xflags & ~FS_XFLAG_COMMON) || ma->fsx_extsize != 0 ||
+		 ma->fsx_projid != 0 ||	ma->fsx_cowextsize != 0);
+}
+
+int vfs_miscattr_get(struct dentry *dentry, struct miscattr *ma);
+int vfs_miscattr_set(struct user_namespace *mnt_userns, struct dentry *dentry,
+		     struct miscattr *ma);
+
+#endif /* _LINUX_MISCATTR_H */