@@ -31,6 +31,7 @@
#include <uapi/linux/mount.h>
#include <linux/fs_context.h>
#include <linux/shmem_fs.h>
+#include <linux/mnt_idmapping.h>
#include "pnode.h"
#include "internal.h"
@@ -561,7 +562,7 @@ static void free_vfsmnt(struct mount *mn
struct user_namespace *mnt_userns;
mnt_userns = mnt_user_ns(&mnt->mnt);
- if (mnt_userns != &init_user_ns)
+ if (!initial_idmapping(mnt_userns))
put_user_ns(mnt_userns);
kfree_const(mnt->mnt_devname);
#ifdef CONFIG_SMP
@@ -965,6 +966,7 @@ static struct mount *skip_mnt_tree(struc
struct vfsmount *vfs_create_mount(struct fs_context *fc)
{
struct mount *mnt;
+ struct user_namespace *fs_userns;
if (!fc->root)
return ERR_PTR(-EINVAL);
@@ -982,6 +984,10 @@ struct vfsmount *vfs_create_mount(struct
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
mnt->mnt_parent = mnt;
+ fs_userns = mnt->mnt.mnt_sb->s_user_ns;
+ if (!initial_idmapping(fs_userns))
+ mnt->mnt.mnt_userns = get_user_ns(fs_userns);
+
lock_mount_hash();
list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
unlock_mount_hash();
@@ -1072,7 +1078,7 @@ static struct mount *clone_mnt(struct mo
atomic_inc(&sb->s_active);
mnt->mnt.mnt_userns = mnt_user_ns(&old->mnt);
- if (mnt->mnt.mnt_userns != &init_user_ns)
+ if (!initial_idmapping(mnt->mnt.mnt_userns))
mnt->mnt.mnt_userns = get_user_ns(mnt->mnt.mnt_userns);
mnt->mnt.mnt_sb = sb;
mnt->mnt.mnt_root = dget(root);
@@ -3927,11 +3933,19 @@ static unsigned int recalc_flags(struct
static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
{
struct vfsmount *m = &mnt->mnt;
+ struct user_namespace *fs_userns = m->mnt_sb->s_user_ns;
if (!kattr->mnt_userns)
return 0;
/*
+ * Creating an idmapped mount with the filesystem wide idmapping
+ * doesn't make sense so block that. We don't allow mushy semantics.
+ */
+ if (kattr->mnt_userns == fs_userns)
+ return -EINVAL;
+
+ /*
* Once a mount has been idmapped we don't allow it to change its
* mapping. It makes things simpler and callers can just create
* another bind-mount they can idmap if they want to.
@@ -3943,12 +3957,8 @@ static int can_idmap_mount(const struct
if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
return -EINVAL;
- /* Don't yet support filesystem mountable in user namespaces. */
- if (m->mnt_sb->s_user_ns != &init_user_ns)
- return -EINVAL;
-
/* We're not controlling the superblock. */
- if (!capable(CAP_SYS_ADMIN))
+ if (!ns_capable(fs_userns, CAP_SYS_ADMIN))
return -EPERM;
/* Mount has already been visible in the filesystem hierarchy. */
@@ -4002,14 +4012,27 @@ out:
static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
{
- struct user_namespace *mnt_userns;
+ struct user_namespace *mnt_userns, *old_mnt_userns;
if (!kattr->mnt_userns)
return;
+ /*
+ * We're the only ones able to change the mount's idmapping. So
+ * mnt->mnt.mnt_userns is stable and we can retrieve it directly.
+ */
+ old_mnt_userns = mnt->mnt.mnt_userns;
+
mnt_userns = get_user_ns(kattr->mnt_userns);
/* Pairs with smp_load_acquire() in mnt_user_ns(). */
smp_store_release(&mnt->mnt.mnt_userns, mnt_userns);
+
+ /*
+ * If this is an idmapped filesystem drop the reference we've taken
+ * in vfs_create_mount() before.
+ */
+ if (!initial_idmapping(old_mnt_userns))
+ put_user_ns(old_mnt_userns);
}
static void mount_setattr_commit(struct mount_kattr *kattr,
@@ -4133,13 +4156,15 @@ static int build_mount_idmapped(const st
}
/*
- * The init_user_ns is used to indicate that a vfsmount is not idmapped.
- * This is simpler than just having to treat NULL as unmapped. Users
- * wanting to idmap a mount to init_user_ns can just use a namespace
- * with an identity mapping.
+ * The initial idmapping cannot be used to create an idmapped
+ * mount. We use the initial idmapping as an indicator of a mount
+ * that is not idmapped. It can simply be passed into helpers that
+ * are aware of idmapped mounts as a convenient shortcut. A user
+ * can just create a dedicated identity mapping to achieve the same
+ * result.
*/
mnt_userns = container_of(ns, struct user_namespace, ns);
- if (mnt_userns == &init_user_ns) {
+ if (initial_idmapping(mnt_userns)) {
err = -EPERM;
goto out_fput;
}
@@ -641,7 +641,7 @@ SYSCALL_DEFINE2(chmod, const char __user
int chown_common(const struct path *path, uid_t user, gid_t group)
{
- struct user_namespace *mnt_userns;
+ struct user_namespace *mnt_userns, *fs_userns;
struct inode *inode = path->dentry->d_inode;
struct inode *delegated_inode = NULL;
int error;
@@ -653,8 +653,9 @@ int chown_common(const struct path *path
gid = make_kgid(current_user_ns(), group);
mnt_userns = mnt_user_ns(path->mnt);
- uid = mapped_kuid_user(mnt_userns, &init_user_ns, uid);
- gid = mapped_kgid_user(mnt_userns, &init_user_ns, gid);
+ fs_userns = i_user_ns(inode);
+ uid = mapped_kuid_user(mnt_userns, fs_userns, uid);
+ gid = mapped_kgid_user(mnt_userns, fs_userns, gid);
retry_deleg:
newattrs.ia_valid = ATTR_CTIME;
@@ -377,8 +377,8 @@ posix_acl_permission(struct user_namespa
break;
case ACL_USER:
uid = mapped_kuid_fs(mnt_userns,
- &init_user_ns,
- pa->e_uid);
+ i_user_ns(inode),
+ pa->e_uid);
if (uid_eq(uid, current_fsuid()))
goto mask;
break;
@@ -392,8 +392,8 @@ posix_acl_permission(struct user_namespa
break;
case ACL_GROUP:
gid = mapped_kgid_fs(mnt_userns,
- &init_user_ns,
- pa->e_gid);
+ i_user_ns(inode),
+ pa->e_gid);
if (in_group_p(gid)) {
found = 1;
if ((pa->e_perm & want) == want)
@@ -1643,7 +1643,7 @@ static inline void i_gid_write(struct in
static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
const struct inode *inode)
{
- return mapped_kuid_fs(mnt_userns, &init_user_ns, inode->i_uid);
+ return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid);
}
/**
@@ -1657,7 +1657,7 @@ static inline kuid_t i_uid_into_mnt(stru
static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
const struct inode *inode)
{
- return mapped_kgid_fs(mnt_userns, &init_user_ns, inode->i_gid);
+ return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid);
}
/**
@@ -1671,7 +1671,7 @@ static inline kgid_t i_gid_into_mnt(stru
static inline void inode_fsuid_set(struct inode *inode,
struct user_namespace *mnt_userns)
{
- inode->i_uid = mapped_fsuid(mnt_userns, &init_user_ns);
+ inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode));
}
/**
@@ -1685,7 +1685,7 @@ static inline void inode_fsuid_set(struc
static inline void inode_fsgid_set(struct inode *inode,
struct user_namespace *mnt_userns)
{
- inode->i_gid = mapped_fsgid(mnt_userns, &init_user_ns);
+ inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode));
}
/**
@@ -1706,10 +1706,10 @@ static inline bool fsuidgid_has_mapping(
kuid_t kuid;
kgid_t kgid;
- kuid = mapped_fsuid(mnt_userns, &init_user_ns);
+ kuid = mapped_fsuid(mnt_userns, fs_userns);
if (!uid_valid(kuid))
return false;
- kgid = mapped_fsgid(mnt_userns, &init_user_ns);
+ kgid = mapped_fsgid(mnt_userns, fs_userns);
if (!gid_valid(kgid))
return false;
return kuid_has_mapping(fs_userns, kuid) &&
@@ -2655,13 +2655,14 @@ static inline struct user_namespace *fil
* is_idmapped_mnt - check whether a mount is mapped
* @mnt: the mount to check
*
- * If @mnt has an idmapping attached to it @mnt is mapped.
+ * If @mnt has an idmapping attached different from the
+ * filesystem's idmapping then @mnt is mapped.
*
* Return: true if mount is mapped, false if not.
*/
static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
{
- return mnt_user_ns(mnt) != &init_user_ns;
+ return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns;
}
extern long vfs_truncate(const struct path *, loff_t);
@@ -419,7 +419,7 @@ int cap_inode_getsecurity(struct user_na
kroot = make_kuid(fs_ns, root);
/* If this is an idmapped mount shift the kuid. */
- kroot = mapped_kuid_fs(mnt_userns, &init_user_ns, kroot);
+ kroot = mapped_kuid_fs(mnt_userns, fs_ns, kroot);
/* If the root kuid maps to a valid uid in current ns, then return
* this as a nscap. */
@@ -556,13 +556,12 @@ int cap_convert_nscap(struct user_namesp
return -EINVAL;
if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP))
return -EPERM;
- if (size == XATTR_CAPS_SZ_2 && (mnt_userns == &init_user_ns))
+ if (size == XATTR_CAPS_SZ_2 && (mnt_userns == fs_ns))
if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
/* user is privileged, just write the v2 */
return size;
- rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns,
- &init_user_ns);
+ rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns, fs_ns);
if (!uid_valid(rootid))
return -EINVAL;
@@ -703,7 +702,7 @@ int get_vfs_caps_from_disk(struct user_n
/* Limit the caps to the mounter of the filesystem
* or the more limited uid specified in the xattr.
*/
- rootkuid = mapped_kuid_fs(mnt_userns, &init_user_ns, rootkuid);
+ rootkuid = mapped_kuid_fs(mnt_userns, fs_ns, rootkuid);
if (!rootid_owns_currentns(rootkuid))
return -ENODATA;