Message ID | 87r1yeqxbp.fsf_-_@x220.int.ebiederm.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | proc: Actually honor the mount options | expand |
On Fri, Feb 28, 2020 at 02:19:22PM -0600, Eric W. Biederman wrote: > > There remains no more code in the kernel using pids_ns->proc_mnt, > therefore remove it from the kernel. > > The big benefit of this change is that one of the most error prone and > tricky parts of the pid namespace implementation, maintaining kernel > mounts of proc is removed. > > In addition removing the unnecessary complexity of the kernel mount > fixes a regression that caused the proc mount options to be ignored. > Now that the initial mount of proc comes from userspace, those mount > options are again honored. This fixes Android's usage of the proc > hidepid option. > > Reported-by: Alistair Strachan <astrachan@google.com> > Fixes: e94591d0d90c ("proc: Convert proc_mount to use mount_ns.") > Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> > --- > fs/proc/root.c | 36 ----------------------------------- What about proc_flush_task()? Or is this on top of your other series? > include/linux/pid_namespace.h | 2 -- > include/linux/proc_ns.h | 5 ----- > kernel/pid.c | 8 -------- > kernel/pid_namespace.c | 7 ------- > 5 files changed, 58 deletions(-) > > diff --git a/fs/proc/root.c b/fs/proc/root.c > index 608233dfd29c..2633f10446c3 100644 > --- a/fs/proc/root.c > +++ b/fs/proc/root.c > @@ -292,39 +292,3 @@ struct proc_dir_entry proc_root = { > .subdir = RB_ROOT, > .name = "/proc", > }; > - > -int pid_ns_prepare_proc(struct pid_namespace *ns) > -{ > - struct proc_fs_context *ctx; > - struct fs_context *fc; > - struct vfsmount *mnt; > - > - fc = fs_context_for_mount(&proc_fs_type, SB_KERNMOUNT); > - if (IS_ERR(fc)) > - return PTR_ERR(fc); > - > - if (fc->user_ns != ns->user_ns) { > - put_user_ns(fc->user_ns); > - fc->user_ns = get_user_ns(ns->user_ns); > - } > - > - ctx = fc->fs_private; > - if (ctx->pid_ns != ns) { > - put_pid_ns(ctx->pid_ns); > - get_pid_ns(ns); > - ctx->pid_ns = ns; > - } > - > - mnt = fc_mount(fc); > - put_fs_context(fc); > - if (IS_ERR(mnt)) > - return PTR_ERR(mnt); > - > - ns->proc_mnt = mnt; > - return 0; > -} > - > -void pid_ns_release_proc(struct pid_namespace *ns) > -{ > - kern_unmount(ns->proc_mnt); > -} > diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h > index 2ed6af88794b..4956e362e55e 100644 > --- a/include/linux/pid_namespace.h > +++ b/include/linux/pid_namespace.h > @@ -33,7 +33,6 @@ struct pid_namespace { > unsigned int level; > struct pid_namespace *parent; > #ifdef CONFIG_PROC_FS > - struct vfsmount *proc_mnt; > struct dentry *proc_self; > struct dentry *proc_thread_self; > #endif > @@ -42,7 +41,6 @@ struct pid_namespace { > #endif > struct user_namespace *user_ns; > struct ucounts *ucounts; > - struct work_struct proc_work; > kgid_t pid_gid; > int hide_pid; > int reboot; /* group exit code if this pidns was rebooted */ > diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h > index 4626b1ac3b6c..e1106a077c1a 100644 > --- a/include/linux/proc_ns.h > +++ b/include/linux/proc_ns.h > @@ -50,16 +50,11 @@ enum { > > #ifdef CONFIG_PROC_FS > > -extern int pid_ns_prepare_proc(struct pid_namespace *ns); > -extern void pid_ns_release_proc(struct pid_namespace *ns); > extern int proc_alloc_inum(unsigned int *pino); > extern void proc_free_inum(unsigned int inum); > > #else /* CONFIG_PROC_FS */ > > -static inline int pid_ns_prepare_proc(struct pid_namespace *ns) { return 0; } > -static inline void pid_ns_release_proc(struct pid_namespace *ns) {} > - > static inline int proc_alloc_inum(unsigned int *inum) > { > *inum = 1; > diff --git a/kernel/pid.c b/kernel/pid.c > index ca08d6a3aa77..60820e72634c 100644 > --- a/kernel/pid.c > +++ b/kernel/pid.c > @@ -144,9 +144,6 @@ void free_pid(struct pid *pid) > /* Handle a fork failure of the first process */ > WARN_ON(ns->child_reaper); > ns->pid_allocated = 0; > - /* fall through */ > - case 0: > - schedule_work(&ns->proc_work); > break; > } > > @@ -247,11 +244,6 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, > tmp = tmp->parent; > } > > - if (unlikely(is_child_reaper(pid))) { > - if (pid_ns_prepare_proc(ns)) > - goto out_free; > - } > - > get_pid_ns(ns); > refcount_set(&pid->count, 1); > for (type = 0; type < PIDTYPE_MAX; ++type) > diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c > index d40017e79ebe..318fcc6ba301 100644 > --- a/kernel/pid_namespace.c > +++ b/kernel/pid_namespace.c > @@ -57,12 +57,6 @@ static struct kmem_cache *create_pid_cachep(unsigned int level) > return READ_ONCE(*pkc); > } > > -static void proc_cleanup_work(struct work_struct *work) There's a comment in kernel/pid_namespace.c that references proc_cleanup_work(). Can you please remove that as well? > -{ > - struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work); > - pid_ns_release_proc(ns); > -} > - > static struct ucounts *inc_pid_namespaces(struct user_namespace *ns) > { > return inc_ucount(ns, current_euid(), UCOUNT_PID_NAMESPACES); > @@ -114,7 +108,6 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns > ns->user_ns = get_user_ns(user_ns); > ns->ucounts = ucounts; > ns->pid_allocated = PIDNS_ADDING; > - INIT_WORK(&ns->proc_work, proc_cleanup_work); > > return ns; > > -- > 2.25.0 >
Christian Brauner <christian.brauner@ubuntu.com> writes: > On Fri, Feb 28, 2020 at 02:19:22PM -0600, Eric W. Biederman wrote: >> >> There remains no more code in the kernel using pids_ns->proc_mnt, >> therefore remove it from the kernel. >> >> The big benefit of this change is that one of the most error prone and >> tricky parts of the pid namespace implementation, maintaining kernel >> mounts of proc is removed. >> >> In addition removing the unnecessary complexity of the kernel mount >> fixes a regression that caused the proc mount options to be ignored. >> Now that the initial mount of proc comes from userspace, those mount >> options are again honored. This fixes Android's usage of the proc >> hidepid option. >> >> Reported-by: Alistair Strachan <astrachan@google.com> >> Fixes: e94591d0d90c ("proc: Convert proc_mount to use mount_ns.") >> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> >> --- >> fs/proc/root.c | 36 ----------------------------------- > > What about proc_flush_task()? Or is this on top of your other series? On top of the other series. Which is why it is a reply to it. That I have pushed into linux-next earlier today. No one seems worried enough about that patchset to comment on v2 so I am just going with it. >> include/linux/pid_namespace.h | 2 -- >> include/linux/proc_ns.h | 5 ----- >> kernel/pid.c | 8 -------- >> kernel/pid_namespace.c | 7 ------- >> 5 files changed, 58 deletions(-) >> >> diff --git a/fs/proc/root.c b/fs/proc/root.c >> index 608233dfd29c..2633f10446c3 100644 >> --- a/fs/proc/root.c >> +++ b/fs/proc/root.c >> @@ -292,39 +292,3 @@ struct proc_dir_entry proc_root = { >> .subdir = RB_ROOT, >> .name = "/proc", >> }; >> - >> -int pid_ns_prepare_proc(struct pid_namespace *ns) >> -{ >> - struct proc_fs_context *ctx; >> - struct fs_context *fc; >> - struct vfsmount *mnt; >> - >> - fc = fs_context_for_mount(&proc_fs_type, SB_KERNMOUNT); >> - if (IS_ERR(fc)) >> - return PTR_ERR(fc); >> - >> - if (fc->user_ns != ns->user_ns) { >> - put_user_ns(fc->user_ns); >> - fc->user_ns = get_user_ns(ns->user_ns); >> - } >> - >> - ctx = fc->fs_private; >> - if (ctx->pid_ns != ns) { >> - put_pid_ns(ctx->pid_ns); >> - get_pid_ns(ns); >> - ctx->pid_ns = ns; >> - } >> - >> - mnt = fc_mount(fc); >> - put_fs_context(fc); >> - if (IS_ERR(mnt)) >> - return PTR_ERR(mnt); >> - >> - ns->proc_mnt = mnt; >> - return 0; >> -} >> - >> -void pid_ns_release_proc(struct pid_namespace *ns) >> -{ >> - kern_unmount(ns->proc_mnt); >> -} >> diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h >> index 2ed6af88794b..4956e362e55e 100644 >> --- a/include/linux/pid_namespace.h >> +++ b/include/linux/pid_namespace.h >> @@ -33,7 +33,6 @@ struct pid_namespace { >> unsigned int level; >> struct pid_namespace *parent; >> #ifdef CONFIG_PROC_FS >> - struct vfsmount *proc_mnt; >> struct dentry *proc_self; >> struct dentry *proc_thread_self; >> #endif >> @@ -42,7 +41,6 @@ struct pid_namespace { >> #endif >> struct user_namespace *user_ns; >> struct ucounts *ucounts; >> - struct work_struct proc_work; >> kgid_t pid_gid; >> int hide_pid; >> int reboot; /* group exit code if this pidns was rebooted */ >> diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h >> index 4626b1ac3b6c..e1106a077c1a 100644 >> --- a/include/linux/proc_ns.h >> +++ b/include/linux/proc_ns.h >> @@ -50,16 +50,11 @@ enum { >> >> #ifdef CONFIG_PROC_FS >> >> -extern int pid_ns_prepare_proc(struct pid_namespace *ns); >> -extern void pid_ns_release_proc(struct pid_namespace *ns); >> extern int proc_alloc_inum(unsigned int *pino); >> extern void proc_free_inum(unsigned int inum); >> >> #else /* CONFIG_PROC_FS */ >> >> -static inline int pid_ns_prepare_proc(struct pid_namespace *ns) { return 0; } >> -static inline void pid_ns_release_proc(struct pid_namespace *ns) {} >> - >> static inline int proc_alloc_inum(unsigned int *inum) >> { >> *inum = 1; >> diff --git a/kernel/pid.c b/kernel/pid.c >> index ca08d6a3aa77..60820e72634c 100644 >> --- a/kernel/pid.c >> +++ b/kernel/pid.c >> @@ -144,9 +144,6 @@ void free_pid(struct pid *pid) >> /* Handle a fork failure of the first process */ >> WARN_ON(ns->child_reaper); >> ns->pid_allocated = 0; >> - /* fall through */ >> - case 0: >> - schedule_work(&ns->proc_work); >> break; >> } >> >> @@ -247,11 +244,6 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, >> tmp = tmp->parent; >> } >> >> - if (unlikely(is_child_reaper(pid))) { >> - if (pid_ns_prepare_proc(ns)) >> - goto out_free; >> - } >> - >> get_pid_ns(ns); >> refcount_set(&pid->count, 1); >> for (type = 0; type < PIDTYPE_MAX; ++type) >> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c >> index d40017e79ebe..318fcc6ba301 100644 >> --- a/kernel/pid_namespace.c >> +++ b/kernel/pid_namespace.c >> @@ -57,12 +57,6 @@ static struct kmem_cache *create_pid_cachep(unsigned int level) >> return READ_ONCE(*pkc); >> } >> >> -static void proc_cleanup_work(struct work_struct *work) > > There's a comment in kernel/pid_namespace.c that references > proc_cleanup_work(). Can you please remove that as well? Good catch. It isn't immediately obvious to me how that sentence needs to be updated but it should be done. Eric >> -{ >> - struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work); >> - pid_ns_release_proc(ns); >> -} >> - >> static struct ucounts *inc_pid_namespaces(struct user_namespace *ns) >> { >> return inc_ucount(ns, current_euid(), UCOUNT_PID_NAMESPACES); >> @@ -114,7 +108,6 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns >> ns->user_ns = get_user_ns(user_ns); >> ns->ucounts = ucounts; >> ns->pid_allocated = PIDNS_ADDING; >> - INIT_WORK(&ns->proc_work, proc_cleanup_work); >> >> return ns; >> >> -- >> 2.25.0 >>
diff --git a/fs/proc/root.c b/fs/proc/root.c index 608233dfd29c..2633f10446c3 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -292,39 +292,3 @@ struct proc_dir_entry proc_root = { .subdir = RB_ROOT, .name = "/proc", }; - -int pid_ns_prepare_proc(struct pid_namespace *ns) -{ - struct proc_fs_context *ctx; - struct fs_context *fc; - struct vfsmount *mnt; - - fc = fs_context_for_mount(&proc_fs_type, SB_KERNMOUNT); - if (IS_ERR(fc)) - return PTR_ERR(fc); - - if (fc->user_ns != ns->user_ns) { - put_user_ns(fc->user_ns); - fc->user_ns = get_user_ns(ns->user_ns); - } - - ctx = fc->fs_private; - if (ctx->pid_ns != ns) { - put_pid_ns(ctx->pid_ns); - get_pid_ns(ns); - ctx->pid_ns = ns; - } - - mnt = fc_mount(fc); - put_fs_context(fc); - if (IS_ERR(mnt)) - return PTR_ERR(mnt); - - ns->proc_mnt = mnt; - return 0; -} - -void pid_ns_release_proc(struct pid_namespace *ns) -{ - kern_unmount(ns->proc_mnt); -} diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 2ed6af88794b..4956e362e55e 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -33,7 +33,6 @@ struct pid_namespace { unsigned int level; struct pid_namespace *parent; #ifdef CONFIG_PROC_FS - struct vfsmount *proc_mnt; struct dentry *proc_self; struct dentry *proc_thread_self; #endif @@ -42,7 +41,6 @@ struct pid_namespace { #endif struct user_namespace *user_ns; struct ucounts *ucounts; - struct work_struct proc_work; kgid_t pid_gid; int hide_pid; int reboot; /* group exit code if this pidns was rebooted */ diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 4626b1ac3b6c..e1106a077c1a 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -50,16 +50,11 @@ enum { #ifdef CONFIG_PROC_FS -extern int pid_ns_prepare_proc(struct pid_namespace *ns); -extern void pid_ns_release_proc(struct pid_namespace *ns); extern int proc_alloc_inum(unsigned int *pino); extern void proc_free_inum(unsigned int inum); #else /* CONFIG_PROC_FS */ -static inline int pid_ns_prepare_proc(struct pid_namespace *ns) { return 0; } -static inline void pid_ns_release_proc(struct pid_namespace *ns) {} - static inline int proc_alloc_inum(unsigned int *inum) { *inum = 1; diff --git a/kernel/pid.c b/kernel/pid.c index ca08d6a3aa77..60820e72634c 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -144,9 +144,6 @@ void free_pid(struct pid *pid) /* Handle a fork failure of the first process */ WARN_ON(ns->child_reaper); ns->pid_allocated = 0; - /* fall through */ - case 0: - schedule_work(&ns->proc_work); break; } @@ -247,11 +244,6 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, tmp = tmp->parent; } - if (unlikely(is_child_reaper(pid))) { - if (pid_ns_prepare_proc(ns)) - goto out_free; - } - get_pid_ns(ns); refcount_set(&pid->count, 1); for (type = 0; type < PIDTYPE_MAX; ++type) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index d40017e79ebe..318fcc6ba301 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -57,12 +57,6 @@ static struct kmem_cache *create_pid_cachep(unsigned int level) return READ_ONCE(*pkc); } -static void proc_cleanup_work(struct work_struct *work) -{ - struct pid_namespace *ns = container_of(work, struct pid_namespace, proc_work); - pid_ns_release_proc(ns); -} - static struct ucounts *inc_pid_namespaces(struct user_namespace *ns) { return inc_ucount(ns, current_euid(), UCOUNT_PID_NAMESPACES); @@ -114,7 +108,6 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns ns->user_ns = get_user_ns(user_ns); ns->ucounts = ucounts; ns->pid_allocated = PIDNS_ADDING; - INIT_WORK(&ns->proc_work, proc_cleanup_work); return ns;
There remains no more code in the kernel using pids_ns->proc_mnt, therefore remove it from the kernel. The big benefit of this change is that one of the most error prone and tricky parts of the pid namespace implementation, maintaining kernel mounts of proc is removed. In addition removing the unnecessary complexity of the kernel mount fixes a regression that caused the proc mount options to be ignored. Now that the initial mount of proc comes from userspace, those mount options are again honored. This fixes Android's usage of the proc hidepid option. Reported-by: Alistair Strachan <astrachan@google.com> Fixes: e94591d0d90c ("proc: Convert proc_mount to use mount_ns.") Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> --- fs/proc/root.c | 36 ----------------------------------- include/linux/pid_namespace.h | 2 -- include/linux/proc_ns.h | 5 ----- kernel/pid.c | 8 -------- kernel/pid_namespace.c | 7 ------- 5 files changed, 58 deletions(-)