@@ -23,6 +23,7 @@ static const struct proc_ns_operations *ns_entries[] = {
#endif
#ifdef CONFIG_PID_NS
&pidns_operations,
+ &pidns_for_children_operations,
#endif
#ifdef CONFIG_USER_NS
&userns_operations,
@@ -27,6 +27,7 @@ extern const struct proc_ns_operations netns_operations;
extern const struct proc_ns_operations utsns_operations;
extern const struct proc_ns_operations ipcns_operations;
extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations pidns_for_children_operations;
extern const struct proc_ns_operations userns_operations;
extern const struct proc_ns_operations mntns_operations;
extern const struct proc_ns_operations cgroupns_operations;
@@ -374,6 +374,23 @@ static struct ns_common *pidns_get(struct task_struct *task)
return ns ? &ns->ns : NULL;
}
+static struct ns_common *pidns_for_children_get(struct task_struct *task)
+{
+ struct pid_namespace *ns = NULL;
+
+ task_lock(task);
+ if (task->nsproxy) {
+ ns = task->nsproxy->pid_ns_for_children;
+ if (ns->child_reaper)
+ get_pid_ns(ns);
+ else
+ ns = NULL;
+ }
+ task_unlock(task);
+
+ return ns ? &ns->ns : NULL;
+}
+
static void pidns_put(struct ns_common *ns)
{
put_pid_ns(to_pid_ns(ns));
@@ -443,6 +460,17 @@ const struct proc_ns_operations pidns_operations = {
.get_parent = pidns_get_parent,
};
+const struct proc_ns_operations pidns_for_children_operations = {
+ .name = "pid_for_children",
+ .real_ns_name = "pid",
+ .type = CLONE_NEWPID,
+ .get = pidns_for_children_get,
+ .put = pidns_put,
+ .install = pidns_install,
+ .owner = pidns_owner,
+ .get_parent = pidns_get_parent,
+};
+
static __init int pid_namespaces_init(void)
{
pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
pid_ns_for_children set by a task is known only to the task itself, and it's impossible to identify it from outside. It's a big problem for checkpoint/restore software like CRIU, because it can't correctly handle tasks, that do setns(CLONE_NEWPID) in proccess of their work. This patch solves the problem, and it exposes pid_ns_for_children to ns directory in standard way with the name "pid_for_children": ~# ls /proc/5531/ns -l | grep pid lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid -> pid:[4026531836] lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid_for_children -> pid:[4026532286] v3: Check child_reaper without tasklist_lock as we are only interested of it's not zero, not in specific value. v2: Do not allow to get namespace if there is no child reaper created, as other tasks need initializations it did (e.g., pid_namespace::proc_mnt), and we don't want they race. Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com> --- fs/proc/namespaces.c | 1 + include/linux/proc_ns.h | 1 + kernel/pid_namespace.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 30 insertions(+)