diff mbox series

[11/23] fs: Add /proc/namespaces/ directory

Message ID 159611041929.535980.14513096920129728440.stgit@localhost.localdomain (mailing list archive)
State New, archived
Headers show
Series proc: Introduce /proc/namespaces/ directory to expose namespaces lineary | expand

Commit Message

Kirill Tkhai July 30, 2020, noon UTC
This is a new directory to show all namespaces, which can be
accessed from this /proc tasks credentials.

Every /proc is related to a pid_namespace, and the pid_namespace
is related to a user_namespace. The items, we show in this
/proc/namespaces/ directory, are the namespaces,
whose user_namespaces are the same as /proc's user_namespace,
or their descendants.

Say, /proc has pid_ns->user_ns, so in /proc/namespace we show
only a ns, which is in_userns(pid_ns->user_ns, ns->user_ns).

The final result is like below:

# ls /proc/namespaces/ -l
lrwxrwxrwx 1 root root 0 Jul 29 16:50 'cgroup:[4026531835]' -> 'cgroup:[4026531835]'
lrwxrwxrwx 1 root root 0 Jul 29 16:50 'ipc:[4026531839]' -> 'ipc:[4026531839]'
lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026531840]' -> 'mnt:[4026531840]'
lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026531861]' -> 'mnt:[4026531861]'
lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532133]' -> 'mnt:[4026532133]'
lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532134]' -> 'mnt:[4026532134]'
lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532135]' -> 'mnt:[4026532135]'
lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532136]' -> 'mnt:[4026532136]'
lrwxrwxrwx 1 root root 0 Jul 29 16:50 'net:[4026531993]' -> 'net:[4026531993]'
lrwxrwxrwx 1 root root 0 Jul 29 16:50 'pid:[4026531836]' -> 'pid:[4026531836]'
lrwxrwxrwx 1 root root 0 Jul 29 16:50 'time:[4026531834]' -> 'time:[4026531834]'
lrwxrwxrwx 1 root root 0 Jul 29 16:50 'user:[4026531837]' -> 'user:[4026531837]'
lrwxrwxrwx 1 root root 0 Jul 29 16:50 'uts:[4026531838]' -> 'uts:[4026531838]'

Every namespace may be open like ordinary file in /proc/[pid]/ns.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 fs/nsfs.c               |    2 
 fs/proc/Makefile        |    1 
 fs/proc/internal.h      |   16 ++
 fs/proc/namespaces.c    |  314 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/proc/root.c          |   17 ++-
 include/linux/proc_fs.h |    1 
 6 files changed, 345 insertions(+), 6 deletions(-)
 create mode 100644 fs/proc/namespaces.c

Comments

Alexey Dobriyan July 30, 2020, 12:18 p.m. UTC | #1
On Thu, Jul 30, 2020 at 03:00:19PM +0300, Kirill Tkhai wrote:

> # ls /proc/namespaces/ -l
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'cgroup:[4026531835]' -> 'cgroup:[4026531835]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'ipc:[4026531839]' -> 'ipc:[4026531839]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026531840]' -> 'mnt:[4026531840]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026531861]' -> 'mnt:[4026531861]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532133]' -> 'mnt:[4026532133]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532134]' -> 'mnt:[4026532134]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532135]' -> 'mnt:[4026532135]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532136]' -> 'mnt:[4026532136]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'net:[4026531993]' -> 'net:[4026531993]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'pid:[4026531836]' -> 'pid:[4026531836]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'time:[4026531834]' -> 'time:[4026531834]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'user:[4026531837]' -> 'user:[4026531837]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'uts:[4026531838]' -> 'uts:[4026531838]'

I'd say make it '%s-%llu'. The brackets don't carry any information.
And ':' forces quoting with recent coreutils.

> +static int parse_namespace_dentry_name(const struct dentry *dentry,
> +		const char **type, unsigned int *type_len, unsigned int *inum)
> +{
> +	const char *p, *name;
> +	int count;
> +
> +	*type = name = dentry->d_name.name;
> +	p = strchr(name, ':');
> +	*type_len = p - name;
> +	if (!p || p == name)
> +		return -ENOENT;
> +
> +	p += 1;
> +	if (sscanf(p, "[%u]%n", inum, &count) != 1 || *(p + count) != '\0' ||
> +	    *inum < PROC_NS_MIN_INO)
> +		return -ENOENT;

sscanf is banned from lookup code due to lax whitespace rules.
See

	commit ac7f1061c2c11bb8936b1b6a94cdb48de732f7a4
	proc: fix /proc/*/map_files lookup

Of course someone sneaked in 1 instance, yikes.

	$ grep -e scanf -n -r fs/proc/
	fs/proc/base.c:1596:            err = sscanf(pos, "%9s %lld %lu", clock,

> +static int proc_namespaces_readdir(struct file *file, struct dir_context *ctx)

> +		len = snprintf(name, sizeof(name), "%s:[%u]", ns->ops->name, inum);

[] -- no need.
Kirill Tkhai July 30, 2020, 1:22 p.m. UTC | #2
On 30.07.2020 15:18, Alexey Dobriyan wrote:
> On Thu, Jul 30, 2020 at 03:00:19PM +0300, Kirill Tkhai wrote:
> 
>> # ls /proc/namespaces/ -l
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'cgroup:[4026531835]' -> 'cgroup:[4026531835]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'ipc:[4026531839]' -> 'ipc:[4026531839]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026531840]' -> 'mnt:[4026531840]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026531861]' -> 'mnt:[4026531861]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532133]' -> 'mnt:[4026532133]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532134]' -> 'mnt:[4026532134]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532135]' -> 'mnt:[4026532135]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532136]' -> 'mnt:[4026532136]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'net:[4026531993]' -> 'net:[4026531993]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'pid:[4026531836]' -> 'pid:[4026531836]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'time:[4026531834]' -> 'time:[4026531834]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'user:[4026531837]' -> 'user:[4026531837]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'uts:[4026531838]' -> 'uts:[4026531838]'
> 
> I'd say make it '%s-%llu'. The brackets don't carry any information.
> And ':' forces quoting with recent coreutils.
> 
>> +static int parse_namespace_dentry_name(const struct dentry *dentry,
>> +		const char **type, unsigned int *type_len, unsigned int *inum)
>> +{
>> +	const char *p, *name;
>> +	int count;
>> +
>> +	*type = name = dentry->d_name.name;
>> +	p = strchr(name, ':');
>> +	*type_len = p - name;
>> +	if (!p || p == name)
>> +		return -ENOENT;
>> +
>> +	p += 1;
>> +	if (sscanf(p, "[%u]%n", inum, &count) != 1 || *(p + count) != '\0' ||
>> +	    *inum < PROC_NS_MIN_INO)
>> +		return -ENOENT;
> 
> sscanf is banned from lookup code due to lax whitespace rules.
> See
> 
> 	commit ac7f1061c2c11bb8936b1b6a94cdb48de732f7a4
> 	proc: fix /proc/*/map_files lookup

Ok, thanks for pointing this.

> Of course someone sneaked in 1 instance, yikes.
> 
> 	$ grep -e scanf -n -r fs/proc/
> 	fs/proc/base.c:1596:            err = sscanf(pos, "%9s %lld %lu", clock,
> 
>> +static int proc_namespaces_readdir(struct file *file, struct dir_context *ctx)
> 
>> +		len = snprintf(name, sizeof(name), "%s:[%u]", ns->ops->name, inum);
> 
> [] -- no need.
>
Christian Brauner July 30, 2020, 1:26 p.m. UTC | #3
On Thu, Jul 30, 2020 at 03:00:19PM +0300, Kirill Tkhai wrote:
> This is a new directory to show all namespaces, which can be
> accessed from this /proc tasks credentials.
> 
> Every /proc is related to a pid_namespace, and the pid_namespace
> is related to a user_namespace. The items, we show in this
> /proc/namespaces/ directory, are the namespaces,
> whose user_namespaces are the same as /proc's user_namespace,
> or their descendants.
> 
> Say, /proc has pid_ns->user_ns, so in /proc/namespace we show
> only a ns, which is in_userns(pid_ns->user_ns, ns->user_ns).
> 
> The final result is like below:
> 
> # ls /proc/namespaces/ -l
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'cgroup:[4026531835]' -> 'cgroup:[4026531835]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'ipc:[4026531839]' -> 'ipc:[4026531839]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026531840]' -> 'mnt:[4026531840]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026531861]' -> 'mnt:[4026531861]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532133]' -> 'mnt:[4026532133]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532134]' -> 'mnt:[4026532134]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532135]' -> 'mnt:[4026532135]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532136]' -> 'mnt:[4026532136]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'net:[4026531993]' -> 'net:[4026531993]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'pid:[4026531836]' -> 'pid:[4026531836]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'time:[4026531834]' -> 'time:[4026531834]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'user:[4026531837]' -> 'user:[4026531837]'
> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'uts:[4026531838]' -> 'uts:[4026531838]'

So usually, the /proc/<pid>/ns entries are guarded by
ptrace_may_access() but from skimming the patch it seems that
/proc/namespaces/ would be accessible by any user.

I think we should guard /proc/namespaces/. Either by restricting it to
userns CAP_SYS_ADMIN or - to make it work with unprivileged CRIU - by
ns_capable(proc's_pid_ns->user_ns, CAP_SYS_PTRACE).


This should probably also be a mount option on procfs given that we now
allow a restricted view of procfs.

Christian

> 
> Every namespace may be open like ordinary file in /proc/[pid]/ns.
> 
> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> ---
>  fs/nsfs.c               |    2 
>  fs/proc/Makefile        |    1 
>  fs/proc/internal.h      |   16 ++
>  fs/proc/namespaces.c    |  314 +++++++++++++++++++++++++++++++++++++++++++++++
>  fs/proc/root.c          |   17 ++-
>  include/linux/proc_fs.h |    1 
>  6 files changed, 345 insertions(+), 6 deletions(-)
>  create mode 100644 fs/proc/namespaces.c
> 
> diff --git a/fs/nsfs.c b/fs/nsfs.c
> index ee4be67d3a0b..61b789d2089c 100644
> --- a/fs/nsfs.c
> +++ b/fs/nsfs.c
> @@ -58,7 +58,7 @@ static void nsfs_evict(struct inode *inode)
>  	ns->ops->put(ns);
>  }
>  
> -static int __ns_get_path(struct path *path, struct ns_common *ns)
> +int __ns_get_path(struct path *path, struct ns_common *ns)
>  {
>  	struct vfsmount *mnt = nsfs_mnt;
>  	struct dentry *dentry;
> diff --git a/fs/proc/Makefile b/fs/proc/Makefile
> index dc2d51f42905..34ff671c6d59 100644
> --- a/fs/proc/Makefile
> +++ b/fs/proc/Makefile
> @@ -25,6 +25,7 @@ proc-y	+= util.o
>  proc-y	+= version.o
>  proc-y	+= softirqs.o
>  proc-y	+= task_namespaces.o
> +proc-y	+= namespaces.o
>  proc-y	+= self.o
>  proc-y	+= thread_self.o
>  proc-$(CONFIG_PROC_SYSCTL)	+= proc_sysctl.o
> diff --git a/fs/proc/internal.h b/fs/proc/internal.h
> index 572757ff97be..d19fe5574799 100644
> --- a/fs/proc/internal.h
> +++ b/fs/proc/internal.h
> @@ -134,10 +134,11 @@ void task_dump_owner(struct task_struct *task, umode_t mode,
>  		     kuid_t *ruid, kgid_t *rgid);
>  
>  unsigned name_to_int(const struct qstr *qstr);
> -/*
> - * Offset of the first process in the /proc root directory..
> - */
> -#define FIRST_PROCESS_ENTRY 256
> +
> +/* Offset of "namespaces" entry in /proc root directory */
> +#define NAMESPACES_ENTRY 256
> +/* Offset of the first process in the /proc root directory */
> +#define FIRST_PROCESS_ENTRY (NAMESPACES_ENTRY + 1)
>  
>  /* Worst case buffer size needed for holding an integer. */
>  #define PROC_NUMBUF 13
> @@ -168,6 +169,7 @@ extern void proc_pid_evict_inode(struct proc_inode *);
>  extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t);
>  extern void pid_update_inode(struct task_struct *, struct inode *);
>  extern int pid_delete_dentry(const struct dentry *);
> +extern int proc_emit_namespaces(struct file *, struct dir_context *);
>  extern int proc_pid_readdir(struct file *, struct dir_context *);
>  struct dentry *proc_pid_lookup(struct dentry *, unsigned int);
>  extern loff_t mem_lseek(struct file *, loff_t, int);
> @@ -222,6 +224,12 @@ void set_proc_pid_nlink(void);
>  extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
>  extern void proc_entry_rundown(struct proc_dir_entry *);
>  
> +/*
> + * namespaces.c
> + */
> +extern int proc_setup_namespaces(struct super_block *);
> +extern void proc_namespaces_init(void);
> +
>  /*
>   * task_namespaces.c
>   */
> diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
> new file mode 100644
> index 000000000000..ab47e1555619
> --- /dev/null
> +++ b/fs/proc/namespaces.c
> @@ -0,0 +1,314 @@
> +#include <linux/pid_namespace.h>
> +#include <linux/user_namespace.h>
> +#include <linux/namei.h>
> +#include "internal.h"
> +
> +static unsigned namespaces_inum __ro_after_init;
> +
> +int proc_emit_namespaces(struct file *file, struct dir_context *ctx)
> +{
> +	struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb);
> +	struct inode *inode = d_inode(fs_info->proc_namespaces);
> +
> +	return dir_emit(ctx, "namespaces", 10, inode->i_ino, DT_DIR);
> +}
> +
> +static int parse_namespace_dentry_name(const struct dentry *dentry,
> +		const char **type, unsigned int *type_len, unsigned int *inum)
> +{
> +	const char *p, *name;
> +	int count;
> +
> +	*type = name = dentry->d_name.name;
> +	p = strchr(name, ':');
> +	*type_len = p - name;
> +	if (!p || p == name)
> +		return -ENOENT;

Hm, rather:

p = strchr(name, ':');
if (!p || p == name)
	return -ENOENT;
*type_len = p - name;

> +
> +	p += 1;
> +	if (sscanf(p, "[%u]%n", inum, &count) != 1 || *(p + count) != '\0' ||
> +	    *inum < PROC_NS_MIN_INO)
> +		return -ENOENT;
> +
> +	return 0;
> +}
> +
> +static struct ns_common *get_namespace_by_dentry(struct pid_namespace *pid_ns,
> +						 const struct dentry *dentry)
> +{
> +	unsigned int type_len, inum, p_inum;
> +	struct user_namespace *user_ns;
> +	struct ns_common *ns;
> +	const char *type;
> +
> +	if (parse_namespace_dentry_name(dentry, &type, &type_len, &inum) < 0)
> +		return NULL;
> +
> +	p_inum = inum - 1;
> +	ns = ns_get_next(&p_inum);
> +	if (!ns)
> +		return NULL;
> +
> +	if (ns->inum != inum || strncmp(type, ns->ops->name, type_len) != 0 ||
> +	    ns->ops->name[type_len] != '\0') {
> +		ns->ops->put(ns);
> +		return NULL;
> +	}
> +
> +	if (ns->ops != &userns_operations)
> +		user_ns = ns->ops->owner(ns);
> +	else
> +		user_ns = container_of(ns, struct user_namespace, ns);
> +
> +	if (!in_userns(pid_ns->user_ns, user_ns)) {
> +		ns->ops->put(ns);
> +		return NULL;
> +	}
> +
> +	return ns;
> +}
> +
> +static struct dentry *proc_namespace_instantiate(struct dentry *dentry,
> +		struct task_struct *task, const void *ptr);
> +
> +static struct dentry *proc_namespaces_lookup(struct inode *dir, struct dentry *dentry,
> +					     unsigned int flags)
> +{
> +	struct pid_namespace *pid_ns = proc_pid_ns(dir->i_sb);
> +	struct task_struct *task;
> +	struct ns_common *ns;
> +
> +	ns = get_namespace_by_dentry(pid_ns, dentry);
> +	if (!ns)
> +		return ERR_PTR(-ENOENT);
> +
> +	read_lock(&tasklist_lock);
> +	task = get_task_struct(pid_ns->child_reaper);
> +	read_unlock(&tasklist_lock);
> +
> +	dentry = proc_namespace_instantiate(dentry, task, ns);
> +	put_task_struct(task);
> +	ns->ops->put(ns);
> +
> +	return dentry;
> +}
> +
> +static int proc_namespaces_permission(struct inode *inode, int mask)
> +{
> +	if ((mask & MAY_EXEC) && S_ISLNK(inode->i_mode))
> +		return -EACCES;
> +
> +	return 0;
> +}
> +
> +static int proc_namespaces_getattr(const struct path *path, struct kstat *stat,
> +				   u32 request_mask, unsigned int query_flags)
> +{
> +	struct inode *inode = d_inode(path->dentry);
> +
> +	generic_fillattr(inode, stat);
> +	return 0;
> +}
> +
> +static const struct inode_operations proc_namespaces_inode_operations = {
> +	.lookup		= proc_namespaces_lookup,
> +	.permission	= proc_namespaces_permission,
> +	.getattr	= proc_namespaces_getattr,
> +};
> +
> +static int proc_namespaces_readlink(struct dentry *dentry, char __user *buffer, int buflen)
> +{
> +	struct inode *dir = dentry->d_parent->d_inode;
> +	struct pid_namespace *pid_ns = proc_pid_ns(dir->i_sb);
> +	struct ns_common *ns;
> +
> +	ns = get_namespace_by_dentry(pid_ns, dentry);
> +	if (!ns)
> +		return -ENOENT;
> +	ns->ops->put(ns);
> +
> +	/* proc_namespaces_readdir() creates dentry names in namespace format */
> +	return readlink_copy(buffer, buflen, dentry->d_iname);
> +}
> +
> +int __ns_get_path(struct path *path, struct ns_common *ns);
> +
> +static const char *proc_namespaces_getlink(struct dentry *dentry,
> +				struct inode *inode, struct delayed_call *done)
> +{
> +	struct pid_namespace *pid_ns = proc_pid_ns(inode->i_sb);
> +	struct ns_common *ns;
> +	struct path path;
> +	int ret;
> +
> +	if (!dentry)
> +		return ERR_PTR(-ECHILD);
> +
> +	while (1) {
> +		ret = -ENOENT;
> +		ns = get_namespace_by_dentry(pid_ns, dentry);
> +		if (!ns)
> +			goto out;
> +
> +		ret = __ns_get_path(&path, ns);
> +		if (ret == -EAGAIN)
> +			continue;
> +		if (ret)
> +			goto out;
> +		break;
> +	}
> +
> +	ret = nd_jump_link(&path);
> +out:
> +	return ERR_PTR(ret);
> +}
> +
> +static const struct inode_operations proc_namespaces_link_inode_operations = {
> +	.readlink	= proc_namespaces_readlink,
> +	.get_link	= proc_namespaces_getlink,
> +};
> +
> +static int namespace_delete_dentry(const struct dentry *dentry)
> +{
> +	struct inode *dir = dentry->d_parent->d_inode;
> +	struct pid_namespace *pid_ns = proc_pid_ns(dir->i_sb);
> +	struct ns_common *ns;
> +
> +	ns = get_namespace_by_dentry(pid_ns, dentry);
> +	if (!ns)
> +		return 1;
> +
> +	ns->ops->put(ns);
> +	return 0;
> +}
> +
> +const struct dentry_operations namespaces_dentry_operations = {
> +	.d_delete	= namespace_delete_dentry,
> +};
> +
> +static void namespace_update_inode(struct inode *inode)
> +{
> +	struct user_namespace *user_ns = proc_pid_ns(inode->i_sb)->user_ns;
> +
> +	inode->i_uid = make_kuid(user_ns, 0);
> +	if (!uid_valid(inode->i_uid))
> +		inode->i_uid = GLOBAL_ROOT_UID;
> +
> +	inode->i_gid = make_kgid(user_ns, 0);
> +	if (!gid_valid(inode->i_gid))
> +		inode->i_gid = GLOBAL_ROOT_GID;
> +}
> +
> +static struct dentry *proc_namespace_instantiate(struct dentry *dentry,
> +	struct task_struct *task, const void *ptr)
> +{
> +	const struct ns_common *ns = ptr;
> +	struct inode *inode;
> +	struct proc_inode *ei;
> +
> +	/*
> +	 * Create inode with credentials of @task, and add it to @task's
> +	 * quick removal list.
> +	 */
> +	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | S_IRWXUGO);
> +	if (!inode)
> +		return ERR_PTR(-ENOENT);
> +
> +	ei = PROC_I(inode);
> +	inode->i_op = &proc_namespaces_link_inode_operations;
> +	ei->ns_ops = ns->ops;
> +	namespace_update_inode(inode);
> +
> +	d_set_d_op(dentry, &namespaces_dentry_operations);
> +	return d_splice_alias(inode, dentry);
> +}
> +
> +static int proc_namespaces_readdir(struct file *file, struct dir_context *ctx)
> +{
> +	struct pid_namespace *pid_ns = proc_pid_ns(file_inode(file)->i_sb);
> +	struct user_namespace *user_ns;
> +	struct task_struct *task;
> +	struct ns_common *ns;
> +	unsigned int inum;
> +
> +	read_lock(&tasklist_lock);
> +	task = get_task_struct(pid_ns->child_reaper);
> +	read_unlock(&tasklist_lock);
> +
> +	if (!dir_emit_dots(file, ctx))
> +		goto out;
> +
> +	inum = ctx->pos - 2;
> +	while ((ns = ns_get_next(&inum)) != NULL) {
> +		unsigned int len;
> +		char name[32];
> +
> +		if (ns->ops != &userns_operations)
> +			user_ns = ns->ops->owner(ns);
> +		else
> +			user_ns = container_of(ns, struct user_namespace, ns);
> +
> +		if (!in_userns(pid_ns->user_ns, user_ns))
> +			goto next;
> +
> +		len = snprintf(name, sizeof(name), "%s:[%u]", ns->ops->name, inum);
> +
> +		if (!proc_fill_cache(file, ctx, name, len,
> +			proc_namespace_instantiate, task, ns)) {
> +			ns->ops->put(ns);
> +			break;
> +		}
> +next:
> +		ns->ops->put(ns);
> +		ctx->pos = inum + 2;
> +	}
> +out:
> +	put_task_struct(task);
> +	return 0;
> +}
> +
> +static const struct file_operations proc_namespaces_file_operations = {
> +	.read		= generic_read_dir,
> +	.iterate_shared	= proc_namespaces_readdir,
> +	.llseek		= generic_file_llseek,
> +};
> +
> +int proc_setup_namespaces(struct super_block *s)
> +{
> +	struct proc_fs_info *fs_info = proc_sb_info(s);
> +	struct inode *root_inode = d_inode(s->s_root);
> +	struct dentry *namespaces;
> +	int ret = -ENOMEM;
> +
> +	inode_lock(root_inode);
> +	namespaces = d_alloc_name(s->s_root, "namespaces");
> +	if (namespaces) {
> +		struct inode *inode = new_inode_pseudo(s);
> +		if (inode) {
> +			inode->i_ino = namespaces_inum;
> +			inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
> +			inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
> +			inode->i_uid = GLOBAL_ROOT_UID;
> +			inode->i_gid = GLOBAL_ROOT_GID;
> +			inode->i_op = &proc_namespaces_inode_operations;
> +			inode->i_fop = &proc_namespaces_file_operations;
> +			d_add(namespaces, inode);
> +			ret = 0;
> +		} else {
> +			dput(namespaces);
> +		}
> +	}
> +	inode_unlock(root_inode);
> +
> +	if (ret)
> +		pr_err("proc_setup_namespaces: can't allocate /proc/namespaces\n");
> +	else
> +		fs_info->proc_namespaces = namespaces;
> +
> +	return ret;
> +}
> +
> +void __init proc_namespaces_init(void)
> +{
> +	proc_alloc_inum(&namespaces_inum);
> +}
> diff --git a/fs/proc/root.c b/fs/proc/root.c
> index 5e444d4f9717..e4e4f90fca3d 100644
> --- a/fs/proc/root.c
> +++ b/fs/proc/root.c
> @@ -206,6 +206,10 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
>  		return -ENOMEM;
>  	}
>  
> +	ret = proc_setup_namespaces(s);
> +	if (ret)
> +		return ret;
> +
>  	ret = proc_setup_self(s);
>  	if (ret) {
>  		return ret;
> @@ -272,6 +276,9 @@ static void proc_kill_sb(struct super_block *sb)
>  	dput(fs_info->proc_self);
>  	dput(fs_info->proc_thread_self);
>  
> +	if (fs_info->proc_namespaces)
> +		dput(fs_info->proc_namespaces);
> +
>  	kill_anon_super(sb);
>  	put_pid_ns(fs_info->pid_ns);
>  	kfree(fs_info);
> @@ -289,6 +296,7 @@ void __init proc_root_init(void)
>  {
>  	proc_init_kmemcache();
>  	set_proc_pid_nlink();
> +	proc_namespaces_init();
>  	proc_self_init();
>  	proc_thread_self_init();
>  	proc_symlink("mounts", NULL, "self/mounts");
> @@ -326,8 +334,15 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr
>  
>  static int proc_root_readdir(struct file *file, struct dir_context *ctx)
>  {
> -	if (ctx->pos < FIRST_PROCESS_ENTRY) {
> +	if (ctx->pos < NAMESPACES_ENTRY) {
>  		int error = proc_readdir(file, ctx);
> +		if (unlikely(error <= 0))
> +			return error;
> +		ctx->pos = NAMESPACES_ENTRY;
> +	}
> +
> +	if (ctx->pos == NAMESPACES_ENTRY) {
> +		int error = proc_emit_namespaces(file, ctx);
>  		if (unlikely(error <= 0))
>  			return error;
>  		ctx->pos = FIRST_PROCESS_ENTRY;
> diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
> index 97b3f5f06db9..8b0002a6cacf 100644
> --- a/include/linux/proc_fs.h
> +++ b/include/linux/proc_fs.h
> @@ -61,6 +61,7 @@ struct proc_fs_info {
>  	struct pid_namespace *pid_ns;
>  	struct dentry *proc_self;        /* For /proc/self */
>  	struct dentry *proc_thread_self; /* For /proc/thread-self */
> +	struct dentry *proc_namespaces;	 /* For /proc/namespaces */
>  	kgid_t pid_gid;
>  	enum proc_hidepid hide_pid;
>  	enum proc_pidonly pidonly;
> 
>
Kirill Tkhai July 30, 2020, 2:30 p.m. UTC | #4
On 30.07.2020 16:26, Christian Brauner wrote:
> On Thu, Jul 30, 2020 at 03:00:19PM +0300, Kirill Tkhai wrote:
>> This is a new directory to show all namespaces, which can be
>> accessed from this /proc tasks credentials.
>>
>> Every /proc is related to a pid_namespace, and the pid_namespace
>> is related to a user_namespace. The items, we show in this
>> /proc/namespaces/ directory, are the namespaces,
>> whose user_namespaces are the same as /proc's user_namespace,
>> or their descendants.
>>
>> Say, /proc has pid_ns->user_ns, so in /proc/namespace we show
>> only a ns, which is in_userns(pid_ns->user_ns, ns->user_ns).
>>
>> The final result is like below:
>>
>> # ls /proc/namespaces/ -l
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'cgroup:[4026531835]' -> 'cgroup:[4026531835]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'ipc:[4026531839]' -> 'ipc:[4026531839]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026531840]' -> 'mnt:[4026531840]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026531861]' -> 'mnt:[4026531861]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532133]' -> 'mnt:[4026532133]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532134]' -> 'mnt:[4026532134]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532135]' -> 'mnt:[4026532135]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532136]' -> 'mnt:[4026532136]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'net:[4026531993]' -> 'net:[4026531993]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'pid:[4026531836]' -> 'pid:[4026531836]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'time:[4026531834]' -> 'time:[4026531834]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'user:[4026531837]' -> 'user:[4026531837]'
>> lrwxrwxrwx 1 root root 0 Jul 29 16:50 'uts:[4026531838]' -> 'uts:[4026531838]'
> 
> So usually, the /proc/<pid>/ns entries are guarded by
> ptrace_may_access() but from skimming the patch it seems that
> /proc/namespaces/ would be accessible by any user.
> 
> I think we should guard /proc/namespaces/. Either by restricting it to
> userns CAP_SYS_ADMIN or - to make it work with unprivileged CRIU - by
> ns_capable(proc's_pid_ns->user_ns, CAP_SYS_PTRACE).

I do agree with you, the restrictions have to be strict.

Advising this, do you mean only open() on /proc/namespaces/* files?
I'm not sure we should prohibit simple readdir of this directory. What do you think?
 
> This should probably also be a mount option on procfs given that we now
> allow a restricted view of procfs.
> 
> Christian
> 
>>
>> Every namespace may be open like ordinary file in /proc/[pid]/ns.
>>
>> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
>> ---
>>  fs/nsfs.c               |    2 
>>  fs/proc/Makefile        |    1 
>>  fs/proc/internal.h      |   16 ++
>>  fs/proc/namespaces.c    |  314 +++++++++++++++++++++++++++++++++++++++++++++++
>>  fs/proc/root.c          |   17 ++-
>>  include/linux/proc_fs.h |    1 
>>  6 files changed, 345 insertions(+), 6 deletions(-)
>>  create mode 100644 fs/proc/namespaces.c
>>
>> diff --git a/fs/nsfs.c b/fs/nsfs.c
>> index ee4be67d3a0b..61b789d2089c 100644
>> --- a/fs/nsfs.c
>> +++ b/fs/nsfs.c
>> @@ -58,7 +58,7 @@ static void nsfs_evict(struct inode *inode)
>>  	ns->ops->put(ns);
>>  }
>>  
>> -static int __ns_get_path(struct path *path, struct ns_common *ns)
>> +int __ns_get_path(struct path *path, struct ns_common *ns)
>>  {
>>  	struct vfsmount *mnt = nsfs_mnt;
>>  	struct dentry *dentry;
>> diff --git a/fs/proc/Makefile b/fs/proc/Makefile
>> index dc2d51f42905..34ff671c6d59 100644
>> --- a/fs/proc/Makefile
>> +++ b/fs/proc/Makefile
>> @@ -25,6 +25,7 @@ proc-y	+= util.o
>>  proc-y	+= version.o
>>  proc-y	+= softirqs.o
>>  proc-y	+= task_namespaces.o
>> +proc-y	+= namespaces.o
>>  proc-y	+= self.o
>>  proc-y	+= thread_self.o
>>  proc-$(CONFIG_PROC_SYSCTL)	+= proc_sysctl.o
>> diff --git a/fs/proc/internal.h b/fs/proc/internal.h
>> index 572757ff97be..d19fe5574799 100644
>> --- a/fs/proc/internal.h
>> +++ b/fs/proc/internal.h
>> @@ -134,10 +134,11 @@ void task_dump_owner(struct task_struct *task, umode_t mode,
>>  		     kuid_t *ruid, kgid_t *rgid);
>>  
>>  unsigned name_to_int(const struct qstr *qstr);
>> -/*
>> - * Offset of the first process in the /proc root directory..
>> - */
>> -#define FIRST_PROCESS_ENTRY 256
>> +
>> +/* Offset of "namespaces" entry in /proc root directory */
>> +#define NAMESPACES_ENTRY 256
>> +/* Offset of the first process in the /proc root directory */
>> +#define FIRST_PROCESS_ENTRY (NAMESPACES_ENTRY + 1)
>>  
>>  /* Worst case buffer size needed for holding an integer. */
>>  #define PROC_NUMBUF 13
>> @@ -168,6 +169,7 @@ extern void proc_pid_evict_inode(struct proc_inode *);
>>  extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t);
>>  extern void pid_update_inode(struct task_struct *, struct inode *);
>>  extern int pid_delete_dentry(const struct dentry *);
>> +extern int proc_emit_namespaces(struct file *, struct dir_context *);
>>  extern int proc_pid_readdir(struct file *, struct dir_context *);
>>  struct dentry *proc_pid_lookup(struct dentry *, unsigned int);
>>  extern loff_t mem_lseek(struct file *, loff_t, int);
>> @@ -222,6 +224,12 @@ void set_proc_pid_nlink(void);
>>  extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
>>  extern void proc_entry_rundown(struct proc_dir_entry *);
>>  
>> +/*
>> + * namespaces.c
>> + */
>> +extern int proc_setup_namespaces(struct super_block *);
>> +extern void proc_namespaces_init(void);
>> +
>>  /*
>>   * task_namespaces.c
>>   */
>> diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
>> new file mode 100644
>> index 000000000000..ab47e1555619
>> --- /dev/null
>> +++ b/fs/proc/namespaces.c
>> @@ -0,0 +1,314 @@
>> +#include <linux/pid_namespace.h>
>> +#include <linux/user_namespace.h>
>> +#include <linux/namei.h>
>> +#include "internal.h"
>> +
>> +static unsigned namespaces_inum __ro_after_init;
>> +
>> +int proc_emit_namespaces(struct file *file, struct dir_context *ctx)
>> +{
>> +	struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb);
>> +	struct inode *inode = d_inode(fs_info->proc_namespaces);
>> +
>> +	return dir_emit(ctx, "namespaces", 10, inode->i_ino, DT_DIR);
>> +}
>> +
>> +static int parse_namespace_dentry_name(const struct dentry *dentry,
>> +		const char **type, unsigned int *type_len, unsigned int *inum)
>> +{
>> +	const char *p, *name;
>> +	int count;
>> +
>> +	*type = name = dentry->d_name.name;
>> +	p = strchr(name, ':');
>> +	*type_len = p - name;
>> +	if (!p || p == name)
>> +		return -ENOENT;
> 
> Hm, rather:
> 
> p = strchr(name, ':');
> if (!p || p == name)
> 	return -ENOENT;
> *type_len = p - name;
> 
>> +
>> +	p += 1;
>> +	if (sscanf(p, "[%u]%n", inum, &count) != 1 || *(p + count) != '\0' ||
>> +	    *inum < PROC_NS_MIN_INO)
>> +		return -ENOENT;
>> +
>> +	return 0;
>> +}
>> +
>> +static struct ns_common *get_namespace_by_dentry(struct pid_namespace *pid_ns,
>> +						 const struct dentry *dentry)
>> +{
>> +	unsigned int type_len, inum, p_inum;
>> +	struct user_namespace *user_ns;
>> +	struct ns_common *ns;
>> +	const char *type;
>> +
>> +	if (parse_namespace_dentry_name(dentry, &type, &type_len, &inum) < 0)
>> +		return NULL;
>> +
>> +	p_inum = inum - 1;
>> +	ns = ns_get_next(&p_inum);
>> +	if (!ns)
>> +		return NULL;
>> +
>> +	if (ns->inum != inum || strncmp(type, ns->ops->name, type_len) != 0 ||
>> +	    ns->ops->name[type_len] != '\0') {
>> +		ns->ops->put(ns);
>> +		return NULL;
>> +	}
>> +
>> +	if (ns->ops != &userns_operations)
>> +		user_ns = ns->ops->owner(ns);
>> +	else
>> +		user_ns = container_of(ns, struct user_namespace, ns);
>> +
>> +	if (!in_userns(pid_ns->user_ns, user_ns)) {
>> +		ns->ops->put(ns);
>> +		return NULL;
>> +	}
>> +
>> +	return ns;
>> +}
>> +
>> +static struct dentry *proc_namespace_instantiate(struct dentry *dentry,
>> +		struct task_struct *task, const void *ptr);
>> +
>> +static struct dentry *proc_namespaces_lookup(struct inode *dir, struct dentry *dentry,
>> +					     unsigned int flags)
>> +{
>> +	struct pid_namespace *pid_ns = proc_pid_ns(dir->i_sb);
>> +	struct task_struct *task;
>> +	struct ns_common *ns;
>> +
>> +	ns = get_namespace_by_dentry(pid_ns, dentry);
>> +	if (!ns)
>> +		return ERR_PTR(-ENOENT);
>> +
>> +	read_lock(&tasklist_lock);
>> +	task = get_task_struct(pid_ns->child_reaper);
>> +	read_unlock(&tasklist_lock);
>> +
>> +	dentry = proc_namespace_instantiate(dentry, task, ns);
>> +	put_task_struct(task);
>> +	ns->ops->put(ns);
>> +
>> +	return dentry;
>> +}
>> +
>> +static int proc_namespaces_permission(struct inode *inode, int mask)
>> +{
>> +	if ((mask & MAY_EXEC) && S_ISLNK(inode->i_mode))
>> +		return -EACCES;
>> +
>> +	return 0;
>> +}
>> +
>> +static int proc_namespaces_getattr(const struct path *path, struct kstat *stat,
>> +				   u32 request_mask, unsigned int query_flags)
>> +{
>> +	struct inode *inode = d_inode(path->dentry);
>> +
>> +	generic_fillattr(inode, stat);
>> +	return 0;
>> +}
>> +
>> +static const struct inode_operations proc_namespaces_inode_operations = {
>> +	.lookup		= proc_namespaces_lookup,
>> +	.permission	= proc_namespaces_permission,
>> +	.getattr	= proc_namespaces_getattr,
>> +};
>> +
>> +static int proc_namespaces_readlink(struct dentry *dentry, char __user *buffer, int buflen)
>> +{
>> +	struct inode *dir = dentry->d_parent->d_inode;
>> +	struct pid_namespace *pid_ns = proc_pid_ns(dir->i_sb);
>> +	struct ns_common *ns;
>> +
>> +	ns = get_namespace_by_dentry(pid_ns, dentry);
>> +	if (!ns)
>> +		return -ENOENT;
>> +	ns->ops->put(ns);
>> +
>> +	/* proc_namespaces_readdir() creates dentry names in namespace format */
>> +	return readlink_copy(buffer, buflen, dentry->d_iname);
>> +}
>> +
>> +int __ns_get_path(struct path *path, struct ns_common *ns);
>> +
>> +static const char *proc_namespaces_getlink(struct dentry *dentry,
>> +				struct inode *inode, struct delayed_call *done)
>> +{
>> +	struct pid_namespace *pid_ns = proc_pid_ns(inode->i_sb);
>> +	struct ns_common *ns;
>> +	struct path path;
>> +	int ret;
>> +
>> +	if (!dentry)
>> +		return ERR_PTR(-ECHILD);
>> +
>> +	while (1) {
>> +		ret = -ENOENT;
>> +		ns = get_namespace_by_dentry(pid_ns, dentry);
>> +		if (!ns)
>> +			goto out;
>> +
>> +		ret = __ns_get_path(&path, ns);
>> +		if (ret == -EAGAIN)
>> +			continue;
>> +		if (ret)
>> +			goto out;
>> +		break;
>> +	}
>> +
>> +	ret = nd_jump_link(&path);
>> +out:
>> +	return ERR_PTR(ret);
>> +}
>> +
>> +static const struct inode_operations proc_namespaces_link_inode_operations = {
>> +	.readlink	= proc_namespaces_readlink,
>> +	.get_link	= proc_namespaces_getlink,
>> +};
>> +
>> +static int namespace_delete_dentry(const struct dentry *dentry)
>> +{
>> +	struct inode *dir = dentry->d_parent->d_inode;
>> +	struct pid_namespace *pid_ns = proc_pid_ns(dir->i_sb);
>> +	struct ns_common *ns;
>> +
>> +	ns = get_namespace_by_dentry(pid_ns, dentry);
>> +	if (!ns)
>> +		return 1;
>> +
>> +	ns->ops->put(ns);
>> +	return 0;
>> +}
>> +
>> +const struct dentry_operations namespaces_dentry_operations = {
>> +	.d_delete	= namespace_delete_dentry,
>> +};
>> +
>> +static void namespace_update_inode(struct inode *inode)
>> +{
>> +	struct user_namespace *user_ns = proc_pid_ns(inode->i_sb)->user_ns;
>> +
>> +	inode->i_uid = make_kuid(user_ns, 0);
>> +	if (!uid_valid(inode->i_uid))
>> +		inode->i_uid = GLOBAL_ROOT_UID;
>> +
>> +	inode->i_gid = make_kgid(user_ns, 0);
>> +	if (!gid_valid(inode->i_gid))
>> +		inode->i_gid = GLOBAL_ROOT_GID;
>> +}
>> +
>> +static struct dentry *proc_namespace_instantiate(struct dentry *dentry,
>> +	struct task_struct *task, const void *ptr)
>> +{
>> +	const struct ns_common *ns = ptr;
>> +	struct inode *inode;
>> +	struct proc_inode *ei;
>> +
>> +	/*
>> +	 * Create inode with credentials of @task, and add it to @task's
>> +	 * quick removal list.
>> +	 */
>> +	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | S_IRWXUGO);
>> +	if (!inode)
>> +		return ERR_PTR(-ENOENT);
>> +
>> +	ei = PROC_I(inode);
>> +	inode->i_op = &proc_namespaces_link_inode_operations;
>> +	ei->ns_ops = ns->ops;
>> +	namespace_update_inode(inode);
>> +
>> +	d_set_d_op(dentry, &namespaces_dentry_operations);
>> +	return d_splice_alias(inode, dentry);
>> +}
>> +
>> +static int proc_namespaces_readdir(struct file *file, struct dir_context *ctx)
>> +{
>> +	struct pid_namespace *pid_ns = proc_pid_ns(file_inode(file)->i_sb);
>> +	struct user_namespace *user_ns;
>> +	struct task_struct *task;
>> +	struct ns_common *ns;
>> +	unsigned int inum;
>> +
>> +	read_lock(&tasklist_lock);
>> +	task = get_task_struct(pid_ns->child_reaper);
>> +	read_unlock(&tasklist_lock);
>> +
>> +	if (!dir_emit_dots(file, ctx))
>> +		goto out;
>> +
>> +	inum = ctx->pos - 2;
>> +	while ((ns = ns_get_next(&inum)) != NULL) {
>> +		unsigned int len;
>> +		char name[32];
>> +
>> +		if (ns->ops != &userns_operations)
>> +			user_ns = ns->ops->owner(ns);
>> +		else
>> +			user_ns = container_of(ns, struct user_namespace, ns);
>> +
>> +		if (!in_userns(pid_ns->user_ns, user_ns))
>> +			goto next;
>> +
>> +		len = snprintf(name, sizeof(name), "%s:[%u]", ns->ops->name, inum);
>> +
>> +		if (!proc_fill_cache(file, ctx, name, len,
>> +			proc_namespace_instantiate, task, ns)) {
>> +			ns->ops->put(ns);
>> +			break;
>> +		}
>> +next:
>> +		ns->ops->put(ns);
>> +		ctx->pos = inum + 2;
>> +	}
>> +out:
>> +	put_task_struct(task);
>> +	return 0;
>> +}
>> +
>> +static const struct file_operations proc_namespaces_file_operations = {
>> +	.read		= generic_read_dir,
>> +	.iterate_shared	= proc_namespaces_readdir,
>> +	.llseek		= generic_file_llseek,
>> +};
>> +
>> +int proc_setup_namespaces(struct super_block *s)
>> +{
>> +	struct proc_fs_info *fs_info = proc_sb_info(s);
>> +	struct inode *root_inode = d_inode(s->s_root);
>> +	struct dentry *namespaces;
>> +	int ret = -ENOMEM;
>> +
>> +	inode_lock(root_inode);
>> +	namespaces = d_alloc_name(s->s_root, "namespaces");
>> +	if (namespaces) {
>> +		struct inode *inode = new_inode_pseudo(s);
>> +		if (inode) {
>> +			inode->i_ino = namespaces_inum;
>> +			inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
>> +			inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
>> +			inode->i_uid = GLOBAL_ROOT_UID;
>> +			inode->i_gid = GLOBAL_ROOT_GID;
>> +			inode->i_op = &proc_namespaces_inode_operations;
>> +			inode->i_fop = &proc_namespaces_file_operations;
>> +			d_add(namespaces, inode);
>> +			ret = 0;
>> +		} else {
>> +			dput(namespaces);
>> +		}
>> +	}
>> +	inode_unlock(root_inode);
>> +
>> +	if (ret)
>> +		pr_err("proc_setup_namespaces: can't allocate /proc/namespaces\n");
>> +	else
>> +		fs_info->proc_namespaces = namespaces;
>> +
>> +	return ret;
>> +}
>> +
>> +void __init proc_namespaces_init(void)
>> +{
>> +	proc_alloc_inum(&namespaces_inum);
>> +}
>> diff --git a/fs/proc/root.c b/fs/proc/root.c
>> index 5e444d4f9717..e4e4f90fca3d 100644
>> --- a/fs/proc/root.c
>> +++ b/fs/proc/root.c
>> @@ -206,6 +206,10 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
>>  		return -ENOMEM;
>>  	}
>>  
>> +	ret = proc_setup_namespaces(s);
>> +	if (ret)
>> +		return ret;
>> +
>>  	ret = proc_setup_self(s);
>>  	if (ret) {
>>  		return ret;
>> @@ -272,6 +276,9 @@ static void proc_kill_sb(struct super_block *sb)
>>  	dput(fs_info->proc_self);
>>  	dput(fs_info->proc_thread_self);
>>  
>> +	if (fs_info->proc_namespaces)
>> +		dput(fs_info->proc_namespaces);
>> +
>>  	kill_anon_super(sb);
>>  	put_pid_ns(fs_info->pid_ns);
>>  	kfree(fs_info);
>> @@ -289,6 +296,7 @@ void __init proc_root_init(void)
>>  {
>>  	proc_init_kmemcache();
>>  	set_proc_pid_nlink();
>> +	proc_namespaces_init();
>>  	proc_self_init();
>>  	proc_thread_self_init();
>>  	proc_symlink("mounts", NULL, "self/mounts");
>> @@ -326,8 +334,15 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr
>>  
>>  static int proc_root_readdir(struct file *file, struct dir_context *ctx)
>>  {
>> -	if (ctx->pos < FIRST_PROCESS_ENTRY) {
>> +	if (ctx->pos < NAMESPACES_ENTRY) {
>>  		int error = proc_readdir(file, ctx);
>> +		if (unlikely(error <= 0))
>> +			return error;
>> +		ctx->pos = NAMESPACES_ENTRY;
>> +	}
>> +
>> +	if (ctx->pos == NAMESPACES_ENTRY) {
>> +		int error = proc_emit_namespaces(file, ctx);
>>  		if (unlikely(error <= 0))
>>  			return error;
>>  		ctx->pos = FIRST_PROCESS_ENTRY;
>> diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
>> index 97b3f5f06db9..8b0002a6cacf 100644
>> --- a/include/linux/proc_fs.h
>> +++ b/include/linux/proc_fs.h
>> @@ -61,6 +61,7 @@ struct proc_fs_info {
>>  	struct pid_namespace *pid_ns;
>>  	struct dentry *proc_self;        /* For /proc/self */
>>  	struct dentry *proc_thread_self; /* For /proc/thread-self */
>> +	struct dentry *proc_namespaces;	 /* For /proc/namespaces */
>>  	kgid_t pid_gid;
>>  	enum proc_hidepid hide_pid;
>>  	enum proc_pidonly pidonly;
>>
>>
kernel test robot July 30, 2020, 8:47 p.m. UTC | #5
Hi Kirill,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on next-20200729]
[also build test ERROR on v5.8-rc7]
[cannot apply to cgroup/for-next tip/timers/core net-next/master sparc-next/master net/master linus/master v5.8-rc7 v5.8-rc6 v5.8-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Kirill-Tkhai/proc-Introduce-proc-namespaces-directory-to-expose-namespaces-lineary/20200730-200346
base:    04b4571786305a76ad81757bbec78eb16a5de582
config: m68k-defconfig (attached as .config)
compiler: m68k-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=m68k 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   m68k-linux-ld: fs/proc/namespaces.o: in function `get_namespace_by_dentry.isra.0':
>> namespaces.c:(.text+0x170): undefined reference to `userns_operations'
   m68k-linux-ld: fs/proc/namespaces.o: in function `proc_namespaces_readdir':
   namespaces.c:(.text+0x3d2): undefined reference to `userns_operations'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot July 30, 2020, 10:20 p.m. UTC | #6
Hi Kirill,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on next-20200729]
[also build test ERROR on v5.8-rc7]
[cannot apply to cgroup/for-next tip/timers/core net-next/master sparc-next/master net/master linus/master v5.8-rc7 v5.8-rc6 v5.8-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Kirill-Tkhai/proc-Introduce-proc-namespaces-directory-to-expose-namespaces-lineary/20200730-200346
base:    04b4571786305a76ad81757bbec78eb16a5de582
config: csky-defconfig (attached as .config)
compiler: csky-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=csky 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   csky-linux-ld: fs/proc/namespaces.o: in function `$d':
   namespaces.c:(.text+0x1d0): undefined reference to `userns_operations'
>> csky-linux-ld: namespaces.c:(.text+0x370): undefined reference to `userns_operations'

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
kernel test robot Aug. 5, 2020, 8:17 a.m. UTC | #7
Hi Kirill,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on next-20200729]
[also build test WARNING on v5.8]
[cannot apply to cgroup/for-next tip/timers/core net-next/master sparc-next/master net/master linus/master v5.8-rc7 v5.8-rc6 v5.8-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Kirill-Tkhai/proc-Introduce-proc-namespaces-directory-to-expose-namespaces-lineary/20200730-200346
base:    04b4571786305a76ad81757bbec78eb16a5de582
config: i386-randconfig-s001-20200805 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.2-117-g8c7aee71-dirty
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)

>> fs/proc/namespaces.c:185:32: sparse: sparse: symbol 'namespaces_dentry_operations' was not declared. Should it be static?

Please review and possibly fold the followup patch.

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/fs/nsfs.c b/fs/nsfs.c
index ee4be67d3a0b..61b789d2089c 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -58,7 +58,7 @@  static void nsfs_evict(struct inode *inode)
 	ns->ops->put(ns);
 }
 
-static int __ns_get_path(struct path *path, struct ns_common *ns)
+int __ns_get_path(struct path *path, struct ns_common *ns)
 {
 	struct vfsmount *mnt = nsfs_mnt;
 	struct dentry *dentry;
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index dc2d51f42905..34ff671c6d59 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -25,6 +25,7 @@  proc-y	+= util.o
 proc-y	+= version.o
 proc-y	+= softirqs.o
 proc-y	+= task_namespaces.o
+proc-y	+= namespaces.o
 proc-y	+= self.o
 proc-y	+= thread_self.o
 proc-$(CONFIG_PROC_SYSCTL)	+= proc_sysctl.o
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 572757ff97be..d19fe5574799 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -134,10 +134,11 @@  void task_dump_owner(struct task_struct *task, umode_t mode,
 		     kuid_t *ruid, kgid_t *rgid);
 
 unsigned name_to_int(const struct qstr *qstr);
-/*
- * Offset of the first process in the /proc root directory..
- */
-#define FIRST_PROCESS_ENTRY 256
+
+/* Offset of "namespaces" entry in /proc root directory */
+#define NAMESPACES_ENTRY 256
+/* Offset of the first process in the /proc root directory */
+#define FIRST_PROCESS_ENTRY (NAMESPACES_ENTRY + 1)
 
 /* Worst case buffer size needed for holding an integer. */
 #define PROC_NUMBUF 13
@@ -168,6 +169,7 @@  extern void proc_pid_evict_inode(struct proc_inode *);
 extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t);
 extern void pid_update_inode(struct task_struct *, struct inode *);
 extern int pid_delete_dentry(const struct dentry *);
+extern int proc_emit_namespaces(struct file *, struct dir_context *);
 extern int proc_pid_readdir(struct file *, struct dir_context *);
 struct dentry *proc_pid_lookup(struct dentry *, unsigned int);
 extern loff_t mem_lseek(struct file *, loff_t, int);
@@ -222,6 +224,12 @@  void set_proc_pid_nlink(void);
 extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
 extern void proc_entry_rundown(struct proc_dir_entry *);
 
+/*
+ * namespaces.c
+ */
+extern int proc_setup_namespaces(struct super_block *);
+extern void proc_namespaces_init(void);
+
 /*
  * task_namespaces.c
  */
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
new file mode 100644
index 000000000000..ab47e1555619
--- /dev/null
+++ b/fs/proc/namespaces.c
@@ -0,0 +1,314 @@ 
+#include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
+#include <linux/namei.h>
+#include "internal.h"
+
+static unsigned namespaces_inum __ro_after_init;
+
+int proc_emit_namespaces(struct file *file, struct dir_context *ctx)
+{
+	struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb);
+	struct inode *inode = d_inode(fs_info->proc_namespaces);
+
+	return dir_emit(ctx, "namespaces", 10, inode->i_ino, DT_DIR);
+}
+
+static int parse_namespace_dentry_name(const struct dentry *dentry,
+		const char **type, unsigned int *type_len, unsigned int *inum)
+{
+	const char *p, *name;
+	int count;
+
+	*type = name = dentry->d_name.name;
+	p = strchr(name, ':');
+	*type_len = p - name;
+	if (!p || p == name)
+		return -ENOENT;
+
+	p += 1;
+	if (sscanf(p, "[%u]%n", inum, &count) != 1 || *(p + count) != '\0' ||
+	    *inum < PROC_NS_MIN_INO)
+		return -ENOENT;
+
+	return 0;
+}
+
+static struct ns_common *get_namespace_by_dentry(struct pid_namespace *pid_ns,
+						 const struct dentry *dentry)
+{
+	unsigned int type_len, inum, p_inum;
+	struct user_namespace *user_ns;
+	struct ns_common *ns;
+	const char *type;
+
+	if (parse_namespace_dentry_name(dentry, &type, &type_len, &inum) < 0)
+		return NULL;
+
+	p_inum = inum - 1;
+	ns = ns_get_next(&p_inum);
+	if (!ns)
+		return NULL;
+
+	if (ns->inum != inum || strncmp(type, ns->ops->name, type_len) != 0 ||
+	    ns->ops->name[type_len] != '\0') {
+		ns->ops->put(ns);
+		return NULL;
+	}
+
+	if (ns->ops != &userns_operations)
+		user_ns = ns->ops->owner(ns);
+	else
+		user_ns = container_of(ns, struct user_namespace, ns);
+
+	if (!in_userns(pid_ns->user_ns, user_ns)) {
+		ns->ops->put(ns);
+		return NULL;
+	}
+
+	return ns;
+}
+
+static struct dentry *proc_namespace_instantiate(struct dentry *dentry,
+		struct task_struct *task, const void *ptr);
+
+static struct dentry *proc_namespaces_lookup(struct inode *dir, struct dentry *dentry,
+					     unsigned int flags)
+{
+	struct pid_namespace *pid_ns = proc_pid_ns(dir->i_sb);
+	struct task_struct *task;
+	struct ns_common *ns;
+
+	ns = get_namespace_by_dentry(pid_ns, dentry);
+	if (!ns)
+		return ERR_PTR(-ENOENT);
+
+	read_lock(&tasklist_lock);
+	task = get_task_struct(pid_ns->child_reaper);
+	read_unlock(&tasklist_lock);
+
+	dentry = proc_namespace_instantiate(dentry, task, ns);
+	put_task_struct(task);
+	ns->ops->put(ns);
+
+	return dentry;
+}
+
+static int proc_namespaces_permission(struct inode *inode, int mask)
+{
+	if ((mask & MAY_EXEC) && S_ISLNK(inode->i_mode))
+		return -EACCES;
+
+	return 0;
+}
+
+static int proc_namespaces_getattr(const struct path *path, struct kstat *stat,
+				   u32 request_mask, unsigned int query_flags)
+{
+	struct inode *inode = d_inode(path->dentry);
+
+	generic_fillattr(inode, stat);
+	return 0;
+}
+
+static const struct inode_operations proc_namespaces_inode_operations = {
+	.lookup		= proc_namespaces_lookup,
+	.permission	= proc_namespaces_permission,
+	.getattr	= proc_namespaces_getattr,
+};
+
+static int proc_namespaces_readlink(struct dentry *dentry, char __user *buffer, int buflen)
+{
+	struct inode *dir = dentry->d_parent->d_inode;
+	struct pid_namespace *pid_ns = proc_pid_ns(dir->i_sb);
+	struct ns_common *ns;
+
+	ns = get_namespace_by_dentry(pid_ns, dentry);
+	if (!ns)
+		return -ENOENT;
+	ns->ops->put(ns);
+
+	/* proc_namespaces_readdir() creates dentry names in namespace format */
+	return readlink_copy(buffer, buflen, dentry->d_iname);
+}
+
+int __ns_get_path(struct path *path, struct ns_common *ns);
+
+static const char *proc_namespaces_getlink(struct dentry *dentry,
+				struct inode *inode, struct delayed_call *done)
+{
+	struct pid_namespace *pid_ns = proc_pid_ns(inode->i_sb);
+	struct ns_common *ns;
+	struct path path;
+	int ret;
+
+	if (!dentry)
+		return ERR_PTR(-ECHILD);
+
+	while (1) {
+		ret = -ENOENT;
+		ns = get_namespace_by_dentry(pid_ns, dentry);
+		if (!ns)
+			goto out;
+
+		ret = __ns_get_path(&path, ns);
+		if (ret == -EAGAIN)
+			continue;
+		if (ret)
+			goto out;
+		break;
+	}
+
+	ret = nd_jump_link(&path);
+out:
+	return ERR_PTR(ret);
+}
+
+static const struct inode_operations proc_namespaces_link_inode_operations = {
+	.readlink	= proc_namespaces_readlink,
+	.get_link	= proc_namespaces_getlink,
+};
+
+static int namespace_delete_dentry(const struct dentry *dentry)
+{
+	struct inode *dir = dentry->d_parent->d_inode;
+	struct pid_namespace *pid_ns = proc_pid_ns(dir->i_sb);
+	struct ns_common *ns;
+
+	ns = get_namespace_by_dentry(pid_ns, dentry);
+	if (!ns)
+		return 1;
+
+	ns->ops->put(ns);
+	return 0;
+}
+
+const struct dentry_operations namespaces_dentry_operations = {
+	.d_delete	= namespace_delete_dentry,
+};
+
+static void namespace_update_inode(struct inode *inode)
+{
+	struct user_namespace *user_ns = proc_pid_ns(inode->i_sb)->user_ns;
+
+	inode->i_uid = make_kuid(user_ns, 0);
+	if (!uid_valid(inode->i_uid))
+		inode->i_uid = GLOBAL_ROOT_UID;
+
+	inode->i_gid = make_kgid(user_ns, 0);
+	if (!gid_valid(inode->i_gid))
+		inode->i_gid = GLOBAL_ROOT_GID;
+}
+
+static struct dentry *proc_namespace_instantiate(struct dentry *dentry,
+	struct task_struct *task, const void *ptr)
+{
+	const struct ns_common *ns = ptr;
+	struct inode *inode;
+	struct proc_inode *ei;
+
+	/*
+	 * Create inode with credentials of @task, and add it to @task's
+	 * quick removal list.
+	 */
+	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | S_IRWXUGO);
+	if (!inode)
+		return ERR_PTR(-ENOENT);
+
+	ei = PROC_I(inode);
+	inode->i_op = &proc_namespaces_link_inode_operations;
+	ei->ns_ops = ns->ops;
+	namespace_update_inode(inode);
+
+	d_set_d_op(dentry, &namespaces_dentry_operations);
+	return d_splice_alias(inode, dentry);
+}
+
+static int proc_namespaces_readdir(struct file *file, struct dir_context *ctx)
+{
+	struct pid_namespace *pid_ns = proc_pid_ns(file_inode(file)->i_sb);
+	struct user_namespace *user_ns;
+	struct task_struct *task;
+	struct ns_common *ns;
+	unsigned int inum;
+
+	read_lock(&tasklist_lock);
+	task = get_task_struct(pid_ns->child_reaper);
+	read_unlock(&tasklist_lock);
+
+	if (!dir_emit_dots(file, ctx))
+		goto out;
+
+	inum = ctx->pos - 2;
+	while ((ns = ns_get_next(&inum)) != NULL) {
+		unsigned int len;
+		char name[32];
+
+		if (ns->ops != &userns_operations)
+			user_ns = ns->ops->owner(ns);
+		else
+			user_ns = container_of(ns, struct user_namespace, ns);
+
+		if (!in_userns(pid_ns->user_ns, user_ns))
+			goto next;
+
+		len = snprintf(name, sizeof(name), "%s:[%u]", ns->ops->name, inum);
+
+		if (!proc_fill_cache(file, ctx, name, len,
+			proc_namespace_instantiate, task, ns)) {
+			ns->ops->put(ns);
+			break;
+		}
+next:
+		ns->ops->put(ns);
+		ctx->pos = inum + 2;
+	}
+out:
+	put_task_struct(task);
+	return 0;
+}
+
+static const struct file_operations proc_namespaces_file_operations = {
+	.read		= generic_read_dir,
+	.iterate_shared	= proc_namespaces_readdir,
+	.llseek		= generic_file_llseek,
+};
+
+int proc_setup_namespaces(struct super_block *s)
+{
+	struct proc_fs_info *fs_info = proc_sb_info(s);
+	struct inode *root_inode = d_inode(s->s_root);
+	struct dentry *namespaces;
+	int ret = -ENOMEM;
+
+	inode_lock(root_inode);
+	namespaces = d_alloc_name(s->s_root, "namespaces");
+	if (namespaces) {
+		struct inode *inode = new_inode_pseudo(s);
+		if (inode) {
+			inode->i_ino = namespaces_inum;
+			inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+			inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
+			inode->i_uid = GLOBAL_ROOT_UID;
+			inode->i_gid = GLOBAL_ROOT_GID;
+			inode->i_op = &proc_namespaces_inode_operations;
+			inode->i_fop = &proc_namespaces_file_operations;
+			d_add(namespaces, inode);
+			ret = 0;
+		} else {
+			dput(namespaces);
+		}
+	}
+	inode_unlock(root_inode);
+
+	if (ret)
+		pr_err("proc_setup_namespaces: can't allocate /proc/namespaces\n");
+	else
+		fs_info->proc_namespaces = namespaces;
+
+	return ret;
+}
+
+void __init proc_namespaces_init(void)
+{
+	proc_alloc_inum(&namespaces_inum);
+}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 5e444d4f9717..e4e4f90fca3d 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -206,6 +206,10 @@  static int proc_fill_super(struct super_block *s, struct fs_context *fc)
 		return -ENOMEM;
 	}
 
+	ret = proc_setup_namespaces(s);
+	if (ret)
+		return ret;
+
 	ret = proc_setup_self(s);
 	if (ret) {
 		return ret;
@@ -272,6 +276,9 @@  static void proc_kill_sb(struct super_block *sb)
 	dput(fs_info->proc_self);
 	dput(fs_info->proc_thread_self);
 
+	if (fs_info->proc_namespaces)
+		dput(fs_info->proc_namespaces);
+
 	kill_anon_super(sb);
 	put_pid_ns(fs_info->pid_ns);
 	kfree(fs_info);
@@ -289,6 +296,7 @@  void __init proc_root_init(void)
 {
 	proc_init_kmemcache();
 	set_proc_pid_nlink();
+	proc_namespaces_init();
 	proc_self_init();
 	proc_thread_self_init();
 	proc_symlink("mounts", NULL, "self/mounts");
@@ -326,8 +334,15 @@  static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr
 
 static int proc_root_readdir(struct file *file, struct dir_context *ctx)
 {
-	if (ctx->pos < FIRST_PROCESS_ENTRY) {
+	if (ctx->pos < NAMESPACES_ENTRY) {
 		int error = proc_readdir(file, ctx);
+		if (unlikely(error <= 0))
+			return error;
+		ctx->pos = NAMESPACES_ENTRY;
+	}
+
+	if (ctx->pos == NAMESPACES_ENTRY) {
+		int error = proc_emit_namespaces(file, ctx);
 		if (unlikely(error <= 0))
 			return error;
 		ctx->pos = FIRST_PROCESS_ENTRY;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 97b3f5f06db9..8b0002a6cacf 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -61,6 +61,7 @@  struct proc_fs_info {
 	struct pid_namespace *pid_ns;
 	struct dentry *proc_self;        /* For /proc/self */
 	struct dentry *proc_thread_self; /* For /proc/thread-self */
+	struct dentry *proc_namespaces;	 /* For /proc/namespaces */
 	kgid_t pid_gid;
 	enum proc_hidepid hide_pid;
 	enum proc_pidonly pidonly;