Message ID | 1462372014-3786-4-git-send-email-tixxdz@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Quoting Djalal Harouni (tixxdz@gmail.com): > If a process gets access to a mount from a different user > namespace, that process should not be able to take advantage of > setuid files or selinux entrypoints from that filesystem. Prevent > this by treating mounts from other mount namespaces and those not > owned by current_user_ns() or an ancestor as nosuid. > > This patch was just adapted from the original one that was written > by Andy Lutomirski <luto@amacapital.net> > https://www.redhat.com/archives/dm-devel/2016-April/msg00374.html I'm not sure that this makes sense given what you're doing. In the case of Seth's set, a filesystem is mounted specifically (and privately) in a user namespace. We don't want for instance the initial user ns to find a link to a setuid-root exploit left in the container-mounted filesystem. But you are having a parent user namespace mount the fs so that its children can all access the fs, uid-shifted for convenience. Not allowing the child namespaces to make use of setuid-root does not seem applicable here. > Signed-off-by: Djalal Harouni <tixxdz@opendz.org> > --- > fs/exec.c | 2 +- > fs/namespace.c | 15 +++++++++++++++ > include/linux/mount.h | 1 + > include/linux/user_namespace.h | 8 ++++++++ > kernel/user_namespace.c | 13 +++++++++++++ > security/commoncap.c | 2 +- > security/selinux/hooks.c | 2 +- > 7 files changed, 40 insertions(+), 3 deletions(-) > > diff --git a/fs/exec.c b/fs/exec.c > index c4010b8..706088d 100644 > --- a/fs/exec.c > +++ b/fs/exec.c > @@ -1391,7 +1391,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm) > bprm->cred->euid = current_euid(); > bprm->cred->egid = current_egid(); > > - if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) > + if (!mnt_may_suid(bprm->file->f_path.mnt)) > return; > > if (task_no_new_privs(current)) > diff --git a/fs/namespace.c b/fs/namespace.c > index de02b39..a8820fb 100644 > --- a/fs/namespace.c > +++ b/fs/namespace.c > @@ -3374,6 +3374,21 @@ found: > return visible; > } > > +bool mnt_may_suid(struct vfsmount *mnt) > +{ > + struct mount *m = real_mount(mnt); > + > + /* > + * Foreign mounts (accessed via fchdir or through /proc > + * symlinks) are always treated as if they are nosuid. This > + * prevents namespaces from trusting potentially unsafe > + * suid/sgid bits, file caps, or security labels that originate > + * in other namespaces. > + */ > + return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(m) && > + in_userns(current_user_ns(), m->mnt_ns->user_ns); > +} > + > static struct ns_common *mntns_get(struct task_struct *task) > { > struct ns_common *ns = NULL; > diff --git a/include/linux/mount.h b/include/linux/mount.h > index f822c3c..54a594d 100644 > --- a/include/linux/mount.h > +++ b/include/linux/mount.h > @@ -81,6 +81,7 @@ extern void mntput(struct vfsmount *mnt); > extern struct vfsmount *mntget(struct vfsmount *mnt); > extern struct vfsmount *mnt_clone_internal(struct path *path); > extern int __mnt_is_readonly(struct vfsmount *mnt); > +extern bool mnt_may_suid(struct vfsmount *mnt); > > struct path; > extern struct vfsmount *clone_private_mount(struct path *path); > diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h > index 8297e5b..a43faa7 100644 > --- a/include/linux/user_namespace.h > +++ b/include/linux/user_namespace.h > @@ -72,6 +72,8 @@ extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, > extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *); > extern int proc_setgroups_show(struct seq_file *m, void *v); > extern bool userns_may_setgroups(const struct user_namespace *ns); > +extern bool in_userns(const struct user_namespace *ns, > + const struct user_namespace *target_ns); > #else > > static inline struct user_namespace *get_user_ns(struct user_namespace *ns) > @@ -100,6 +102,12 @@ static inline bool userns_may_setgroups(const struct user_namespace *ns) > { > return true; > } > + > +static inline bool in_userns(const struct user_namespace *ns, > + const struct user_namespace *target_ns) > +{ > + return true; > +} > #endif > > #endif /* _LINUX_USER_H */ > diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c > index 9bafc21..9a496a8 100644 > --- a/kernel/user_namespace.c > +++ b/kernel/user_namespace.c > @@ -938,6 +938,19 @@ bool userns_may_setgroups(const struct user_namespace *ns) > return allowed; > } > > +/* > + * Returns true if @ns is the same namespace as or a descendant of > + * @target_ns. > + */ > +bool in_userns(const struct user_namespace *ns, > + const struct user_namespace *target_ns) > +{ > + for (; ns; ns = ns->parent) { > + if (ns == target_ns) > + return true; > + } > +} > + > static inline struct user_namespace *to_user_ns(struct ns_common *ns) > { > return container_of(ns, struct user_namespace, ns); > diff --git a/security/commoncap.c b/security/commoncap.c > index 48071ed..6c082d2 100644 > --- a/security/commoncap.c > +++ b/security/commoncap.c > @@ -453,7 +453,7 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c > if (!file_caps_enabled) > return 0; > > - if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) > + if (!mnt_may_suid(bprm->file->f_path.mnt)) > return 0; > > rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps); > diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c > index 912deee..1350167 100644 > --- a/security/selinux/hooks.c > +++ b/security/selinux/hooks.c > @@ -2234,7 +2234,7 @@ static int check_nnp_nosuid(const struct linux_binprm *bprm, > const struct task_security_struct *new_tsec) > { > int nnp = (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS); > - int nosuid = (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID); > + int nosuid = !mnt_may_suid(bprm->file->f_path.mnt); > int rc; > > if (!nnp && !nosuid) > -- > 2.5.5 > -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, May 04, 2016 at 11:19:04PM +0000, Serge Hallyn wrote: > Quoting Djalal Harouni (tixxdz@gmail.com): > > If a process gets access to a mount from a different user > > namespace, that process should not be able to take advantage of > > setuid files or selinux entrypoints from that filesystem. Prevent > > this by treating mounts from other mount namespaces and those not > > owned by current_user_ns() or an ancestor as nosuid. > > > > This patch was just adapted from the original one that was written > > by Andy Lutomirski <luto@amacapital.net> > > https://www.redhat.com/archives/dm-devel/2016-April/msg00374.html > > I'm not sure that this makes sense given what you're doing. In the > case of Seth's set, a filesystem is mounted specifically (and privately) > in a user namespace. We don't want for instance the initial user ns > to find a link to a setuid-root exploit left in the container-mounted > filesystem. > > But you are having a parent user namespace mount the fs so that its > children can all access the fs, uid-shifted for convenience. Not > allowing the child namespaces to make use of setuid-root does not > seem applicable here. Right, the problem addressed by this patch probably isn't relevant to this sort of uid shifting. But I think there's another problem that needs to be addressed. bprm_fill_uid() still gets the ids for sxid files unshifted from the inode. We already protect against sxid to any user not in bprm->cred->user_ns, so it will just ignore the sxid instead of e.g. suid as global root from the id shifted mount, which is good. What would be wanted though is to use the shifted ids so that something like suid-root ping in the container rootfs would work. Seth -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi, On Thu, May 05, 2016 at 08:05:08AM -0500, Seth Forshee wrote: > On Wed, May 04, 2016 at 11:19:04PM +0000, Serge Hallyn wrote: > > Quoting Djalal Harouni (tixxdz@gmail.com): > > > If a process gets access to a mount from a different user > > > namespace, that process should not be able to take advantage of > > > setuid files or selinux entrypoints from that filesystem. Prevent > > > this by treating mounts from other mount namespaces and those not > > > owned by current_user_ns() or an ancestor as nosuid. > > > > > > This patch was just adapted from the original one that was written > > > by Andy Lutomirski <luto@amacapital.net> > > > https://www.redhat.com/archives/dm-devel/2016-April/msg00374.html > > > > I'm not sure that this makes sense given what you're doing. In the > > case of Seth's set, a filesystem is mounted specifically (and privately) > > in a user namespace. We don't want for instance the initial user ns > > to find a link to a setuid-root exploit left in the container-mounted > > filesystem. > > > > But you are having a parent user namespace mount the fs so that its > > children can all access the fs, uid-shifted for convenience. Not > > allowing the child namespaces to make use of setuid-root does not > > seem applicable here. > > Right, the problem addressed by this patch probably isn't relevant to > this sort of uid shifting. I'll have another deep look into it, yes the aim when I ported this, is I was not sure about setns(), or if you get a handle to a mount namespace through /proc or anything else... then you call into it from an external user namespace. > But I think there's another problem that needs to be addressed. > bprm_fill_uid() still gets the ids for sxid files unshifted from the > inode. We already protect against sxid to any user not in > bprm->cred->user_ns, so it will just ignore the sxid instead of e.g. > suid as global root from the id shifted mount, which is good. What would > be wanted though is to use the shifted ids so that something like > suid-root ping in the container rootfs would work. > > Seth Ok thank you Seth! I'll note it and try to fix it.
diff --git a/fs/exec.c b/fs/exec.c index c4010b8..706088d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1391,7 +1391,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm) bprm->cred->euid = current_euid(); bprm->cred->egid = current_egid(); - if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) + if (!mnt_may_suid(bprm->file->f_path.mnt)) return; if (task_no_new_privs(current)) diff --git a/fs/namespace.c b/fs/namespace.c index de02b39..a8820fb 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3374,6 +3374,21 @@ found: return visible; } +bool mnt_may_suid(struct vfsmount *mnt) +{ + struct mount *m = real_mount(mnt); + + /* + * Foreign mounts (accessed via fchdir or through /proc + * symlinks) are always treated as if they are nosuid. This + * prevents namespaces from trusting potentially unsafe + * suid/sgid bits, file caps, or security labels that originate + * in other namespaces. + */ + return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(m) && + in_userns(current_user_ns(), m->mnt_ns->user_ns); +} + static struct ns_common *mntns_get(struct task_struct *task) { struct ns_common *ns = NULL; diff --git a/include/linux/mount.h b/include/linux/mount.h index f822c3c..54a594d 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -81,6 +81,7 @@ extern void mntput(struct vfsmount *mnt); extern struct vfsmount *mntget(struct vfsmount *mnt); extern struct vfsmount *mnt_clone_internal(struct path *path); extern int __mnt_is_readonly(struct vfsmount *mnt); +extern bool mnt_may_suid(struct vfsmount *mnt); struct path; extern struct vfsmount *clone_private_mount(struct path *path); diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 8297e5b..a43faa7 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -72,6 +72,8 @@ extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *); extern int proc_setgroups_show(struct seq_file *m, void *v); extern bool userns_may_setgroups(const struct user_namespace *ns); +extern bool in_userns(const struct user_namespace *ns, + const struct user_namespace *target_ns); #else static inline struct user_namespace *get_user_ns(struct user_namespace *ns) @@ -100,6 +102,12 @@ static inline bool userns_may_setgroups(const struct user_namespace *ns) { return true; } + +static inline bool in_userns(const struct user_namespace *ns, + const struct user_namespace *target_ns) +{ + return true; +} #endif #endif /* _LINUX_USER_H */ diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 9bafc21..9a496a8 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -938,6 +938,19 @@ bool userns_may_setgroups(const struct user_namespace *ns) return allowed; } +/* + * Returns true if @ns is the same namespace as or a descendant of + * @target_ns. + */ +bool in_userns(const struct user_namespace *ns, + const struct user_namespace *target_ns) +{ + for (; ns; ns = ns->parent) { + if (ns == target_ns) + return true; + } +} + static inline struct user_namespace *to_user_ns(struct ns_common *ns) { return container_of(ns, struct user_namespace, ns); diff --git a/security/commoncap.c b/security/commoncap.c index 48071ed..6c082d2 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -453,7 +453,7 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c if (!file_caps_enabled) return 0; - if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) + if (!mnt_may_suid(bprm->file->f_path.mnt)) return 0; rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 912deee..1350167 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2234,7 +2234,7 @@ static int check_nnp_nosuid(const struct linux_binprm *bprm, const struct task_security_struct *new_tsec) { int nnp = (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS); - int nosuid = (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID); + int nosuid = !mnt_may_suid(bprm->file->f_path.mnt); int rc; if (!nnp && !nosuid)
If a process gets access to a mount from a different user namespace, that process should not be able to take advantage of setuid files or selinux entrypoints from that filesystem. Prevent this by treating mounts from other mount namespaces and those not owned by current_user_ns() or an ancestor as nosuid. This patch was just adapted from the original one that was written by Andy Lutomirski <luto@amacapital.net> https://www.redhat.com/archives/dm-devel/2016-April/msg00374.html Signed-off-by: Djalal Harouni <tixxdz@opendz.org> --- fs/exec.c | 2 +- fs/namespace.c | 15 +++++++++++++++ include/linux/mount.h | 1 + include/linux/user_namespace.h | 8 ++++++++ kernel/user_namespace.c | 13 +++++++++++++ security/commoncap.c | 2 +- security/selinux/hooks.c | 2 +- 7 files changed, 40 insertions(+), 3 deletions(-)