diff mbox

[RESEND,2/2] pidns: Expose task pid_ns_for_children to userspace

Message ID 149086967937.4388.471494976517194744.stgit@localhost.localdomain (mailing list archive)
State New, archived
Headers show

Commit Message

Kirill Tkhai March 30, 2017, 10:27 a.m. UTC
pid_ns_for_children set by a task is known only to the task itself,
and it's impossible to identify it from outside.

It's a big problem for checkpoint/restore software like CRIU,
because it can't correctly handle tasks, that do setns(CLONE_NEWPID)
in proccess of their work.

This patch solves the problem, and it exposes pid_ns_for_children
to ns directory in standard way with the name "pid_for_children":

~# ls /proc/5531/ns -l | grep pid
lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid -> pid:[4026531836]
lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid_for_children -> pid:[4026532286]

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
---
 fs/proc/namespaces.c    |    1 +
 include/linux/proc_ns.h |    1 +
 kernel/pid_namespace.c  |   25 +++++++++++++++++++++++++
 3 files changed, 27 insertions(+)

Comments

Andrew Morton March 30, 2017, 10:05 p.m. UTC | #1
On Thu, 30 Mar 2017 13:27:59 +0300 Kirill Tkhai <ktkhai@virtuozzo.com> wrote:

> pid_ns_for_children set by a task is known only to the task itself,
> and it's impossible to identify it from outside.
> 
> It's a big problem for checkpoint/restore software like CRIU,
> because it can't correctly handle tasks, that do setns(CLONE_NEWPID)
> in proccess of their work.
> 
> This patch solves the problem, and it exposes pid_ns_for_children
> to ns directory in standard way with the name "pid_for_children":
> 
> ~# ls /proc/5531/ns -l | grep pid
> lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid -> pid:[4026531836]
> lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid_for_children -> pid:[4026532286]
> 
> --- a/fs/proc/namespaces.c
> +++ b/fs/proc/namespaces.c
> @@ -23,6 +23,7 @@ static const struct proc_ns_operations *ns_entries[] = {
>  #endif
>  #ifdef CONFIG_PID_NS
>  	&pidns_operations,
> +	&pidns_for_children_operations,
>  #endif

This interface should be documented somewhere under Documentation/. 
But I can't immediately find where the /proc/pid/ns/ pseudo-files are
documented...
Andrey Vagin March 31, 2017, 1:04 a.m. UTC | #2
On Thu, Mar 30, 2017 at 03:05:20PM -0700, Andrew Morton wrote:
> On Thu, 30 Mar 2017 13:27:59 +0300 Kirill Tkhai <ktkhai@virtuozzo.com> wrote:
> 
> > pid_ns_for_children set by a task is known only to the task itself,
> > and it's impossible to identify it from outside.
> > 
> > It's a big problem for checkpoint/restore software like CRIU,
> > because it can't correctly handle tasks, that do setns(CLONE_NEWPID)
> > in proccess of their work.
> > 
> > This patch solves the problem, and it exposes pid_ns_for_children
> > to ns directory in standard way with the name "pid_for_children":
> > 
> > ~# ls /proc/5531/ns -l | grep pid
> > lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid -> pid:[4026531836]
> > lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid_for_children -> pid:[4026532286]
> > 
> > --- a/fs/proc/namespaces.c
> > +++ b/fs/proc/namespaces.c
> > @@ -23,6 +23,7 @@ static const struct proc_ns_operations *ns_entries[] = {
> >  #endif
> >  #ifdef CONFIG_PID_NS
> >  	&pidns_operations,
> > +	&pidns_for_children_operations,
> >  #endif
> 
> This interface should be documented somewhere under Documentation/. 
> But I can't immediately find where the /proc/pid/ns/ pseudo-files are
> documented...

I know that they are documented in man7/namespaces.7

https://git.kernel.org/pub/scm/docs/man-pages/man-pages.git/tree/man7/namespaces.7#n187

> 
>
diff mbox

Patch

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 766f0c637ad1..3803b24ca220 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -23,6 +23,7 @@  static const struct proc_ns_operations *ns_entries[] = {
 #endif
 #ifdef CONFIG_PID_NS
 	&pidns_operations,
+	&pidns_for_children_operations,
 #endif
 #ifdef CONFIG_USER_NS
 	&userns_operations,
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 88dba3b53375..58ab28d81fc2 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -27,6 +27,7 @@  extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
 extern const struct proc_ns_operations ipcns_operations;
 extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations pidns_for_children_operations;
 extern const struct proc_ns_operations userns_operations;
 extern const struct proc_ns_operations mntns_operations;
 extern const struct proc_ns_operations cgroupns_operations;
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index de461aa0bf9a..4dd02ff0b0bd 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -374,6 +374,20 @@  static struct ns_common *pidns_get(struct task_struct *task)
 	return ns ? &ns->ns : NULL;
 }
 
+static struct ns_common *pidns_for_children_get(struct task_struct *task)
+{
+	struct pid_namespace *ns = NULL;
+
+	task_lock(task);
+	if (task->nsproxy) {
+		ns = task->nsproxy->pid_ns_for_children;
+		get_pid_ns(ns);
+	}
+	task_unlock(task);
+
+	return ns ? &ns->ns : NULL;
+}
+
 static void pidns_put(struct ns_common *ns)
 {
 	put_pid_ns(to_pid_ns(ns));
@@ -443,6 +457,17 @@  const struct proc_ns_operations pidns_operations = {
 	.get_parent	= pidns_get_parent,
 };
 
+const struct proc_ns_operations pidns_for_children_operations = {
+	.name		= "pid_for_children",
+	.real_ns_name	= "pid",
+	.type		= CLONE_NEWPID,
+	.get		= pidns_for_children_get,
+	.put		= pidns_put,
+	.install	= pidns_install,
+	.owner		= pidns_owner,
+	.get_parent	= pidns_get_parent,
+};
+
 static __init int pid_namespaces_init(void)
 {
 	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);