Message ID | 180449959d5a756af7306d6bda55f41b9d53e3cb.1719243756.git.josef@toxicpanda.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Support foreign mount namespace with statmount/listmount | expand |
On Mon, 2024-06-24 at 11:49 -0400, Josef Bacik wrote: > In order to utilize the listmount() and statmount() extensions that > allow us to call them on different namespaces we need a way to get > the > mnt namespace id from user space. Add an ioctl to nsfs that will > allow > us to extract the mnt namespace id in order to make these new > extensions > usable. > > Signed-off-by: Josef Bacik <josef@toxicpanda.com> > --- > fs/nsfs.c | 14 ++++++++++++++ > include/uapi/linux/nsfs.h | 2 ++ > 2 files changed, 16 insertions(+) > > diff --git a/fs/nsfs.c b/fs/nsfs.c > index 07e22a15ef02..af352dadffe1 100644 > --- a/fs/nsfs.c > +++ b/fs/nsfs.c > @@ -12,6 +12,7 @@ > #include <linux/nsfs.h> > #include <linux/uaccess.h> > > +#include "mount.h" > #include "internal.h" > > static struct vfsmount *nsfs_mnt; > @@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned > int ioctl, > argp = (uid_t __user *) arg; > uid = from_kuid_munged(current_user_ns(), user_ns- > >owner); > return put_user(uid, argp); > + case NS_GET_MNTNS_ID: { > + struct mnt_namespace *mnt_ns; > + __u64 __user *idp; > + __u64 id; > + > + if (ns->ops->type != CLONE_NEWNS) > + return -EINVAL; > + > + mnt_ns = container_of(ns, struct mnt_namespace, ns); > + idp = (__u64 __user *)arg; > + id = mnt_ns->seq; > + return put_user(id, idp); > + } > default: > return -ENOTTY; > } > diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h > index a0c8552b64ee..56e8b1639b98 100644 > --- a/include/uapi/linux/nsfs.h > +++ b/include/uapi/linux/nsfs.h > @@ -15,5 +15,7 @@ > #define NS_GET_NSTYPE _IO(NSIO, 0x3) > /* Get owner UID (in the caller's user namespace) for a user > namespace */ > #define NS_GET_OWNER_UID _IO(NSIO, 0x4) > +/* Get the id for a mount namespace */ > +#define NS_GET_MNTNS_ID _IO(NSIO, 0x5) > > #endif /* __LINUX_NSFS_H */ Thinking about this more... Would it also make sense to wire up a similar ioctl in pidfs? It seems like it might be nice to just open a pidfd for pid and then issue the above to get its mntns id, rather than having to grovel around in nsfs.
On Tue, Jun 25, 2024 at 10:10:29AM GMT, Jeff Layton wrote: > On Mon, 2024-06-24 at 11:49 -0400, Josef Bacik wrote: > > In order to utilize the listmount() and statmount() extensions that > > allow us to call them on different namespaces we need a way to get > > the > > mnt namespace id from user space. Add an ioctl to nsfs that will > > allow > > us to extract the mnt namespace id in order to make these new > > extensions > > usable. > > > > Signed-off-by: Josef Bacik <josef@toxicpanda.com> > > --- > > fs/nsfs.c | 14 ++++++++++++++ > > include/uapi/linux/nsfs.h | 2 ++ > > 2 files changed, 16 insertions(+) > > > > diff --git a/fs/nsfs.c b/fs/nsfs.c > > index 07e22a15ef02..af352dadffe1 100644 > > --- a/fs/nsfs.c > > +++ b/fs/nsfs.c > > @@ -12,6 +12,7 @@ > > #include <linux/nsfs.h> > > #include <linux/uaccess.h> > > > > +#include "mount.h" > > #include "internal.h" > > > > static struct vfsmount *nsfs_mnt; > > @@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned > > int ioctl, > > argp = (uid_t __user *) arg; > > uid = from_kuid_munged(current_user_ns(), user_ns- > > >owner); > > return put_user(uid, argp); > > + case NS_GET_MNTNS_ID: { > > + struct mnt_namespace *mnt_ns; > > + __u64 __user *idp; > > + __u64 id; > > + > > + if (ns->ops->type != CLONE_NEWNS) > > + return -EINVAL; > > + > > + mnt_ns = container_of(ns, struct mnt_namespace, ns); > > + idp = (__u64 __user *)arg; > > + id = mnt_ns->seq; > > + return put_user(id, idp); > > + } > > default: > > return -ENOTTY; > > } > > diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h > > index a0c8552b64ee..56e8b1639b98 100644 > > --- a/include/uapi/linux/nsfs.h > > +++ b/include/uapi/linux/nsfs.h > > @@ -15,5 +15,7 @@ > > #define NS_GET_NSTYPE _IO(NSIO, 0x3) > > /* Get owner UID (in the caller's user namespace) for a user > > namespace */ > > #define NS_GET_OWNER_UID _IO(NSIO, 0x4) > > +/* Get the id for a mount namespace */ > > +#define NS_GET_MNTNS_ID _IO(NSIO, 0x5) > > > > #endif /* __LINUX_NSFS_H */ > > Thinking about this more... > > Would it also make sense to wire up a similar ioctl in pidfs? It seems > like it might be nice to just open a pidfd for pid and then issue the > above to get its mntns id, rather than having to grovel around in nsfs. I had a different idea yesterday: get a mount namespace fd from a pidfd in fact, get any namespace fd based on a pidfd. It's the equivalent to: /proc/$pid/ns* and then you can avoid having to go via procfs at all. Needs to be governed by ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS). (We also need an ioctl() on the pidfd to get to the PID without procfs btw.)
On Tue, 2024-06-25 at 16:21 +0200, Christian Brauner wrote: > On Tue, Jun 25, 2024 at 10:10:29AM GMT, Jeff Layton wrote: > > On Mon, 2024-06-24 at 11:49 -0400, Josef Bacik wrote: > > > In order to utilize the listmount() and statmount() extensions that > > > allow us to call them on different namespaces we need a way to get > > > the > > > mnt namespace id from user space. Add an ioctl to nsfs that will > > > allow > > > us to extract the mnt namespace id in order to make these new > > > extensions > > > usable. > > > > > > Signed-off-by: Josef Bacik <josef@toxicpanda.com> > > > --- > > > fs/nsfs.c | 14 ++++++++++++++ > > > include/uapi/linux/nsfs.h | 2 ++ > > > 2 files changed, 16 insertions(+) > > > > > > diff --git a/fs/nsfs.c b/fs/nsfs.c > > > index 07e22a15ef02..af352dadffe1 100644 > > > --- a/fs/nsfs.c > > > +++ b/fs/nsfs.c > > > @@ -12,6 +12,7 @@ > > > #include <linux/nsfs.h> > > > #include <linux/uaccess.h> > > > > > > +#include "mount.h" > > > #include "internal.h" > > > > > > static struct vfsmount *nsfs_mnt; > > > @@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned > > > int ioctl, > > > argp = (uid_t __user *) arg; > > > uid = from_kuid_munged(current_user_ns(), user_ns- > > > > owner); > > > return put_user(uid, argp); > > > + case NS_GET_MNTNS_ID: { > > > + struct mnt_namespace *mnt_ns; > > > + __u64 __user *idp; > > > + __u64 id; > > > + > > > + if (ns->ops->type != CLONE_NEWNS) > > > + return -EINVAL; > > > + > > > + mnt_ns = container_of(ns, struct mnt_namespace, ns); > > > + idp = (__u64 __user *)arg; > > > + id = mnt_ns->seq; > > > + return put_user(id, idp); > > > + } > > > default: > > > return -ENOTTY; > > > } > > > diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h > > > index a0c8552b64ee..56e8b1639b98 100644 > > > --- a/include/uapi/linux/nsfs.h > > > +++ b/include/uapi/linux/nsfs.h > > > @@ -15,5 +15,7 @@ > > > #define NS_GET_NSTYPE _IO(NSIO, 0x3) > > > /* Get owner UID (in the caller's user namespace) for a user > > > namespace */ > > > #define NS_GET_OWNER_UID _IO(NSIO, 0x4) > > > +/* Get the id for a mount namespace */ > > > +#define NS_GET_MNTNS_ID _IO(NSIO, 0x5) > > > > > > #endif /* __LINUX_NSFS_H */ > > > > Thinking about this more... > > > > Would it also make sense to wire up a similar ioctl in pidfs? It seems > > like it might be nice to just open a pidfd for pid and then issue the > > above to get its mntns id, rather than having to grovel around in nsfs. > > I had a different idea yesterday: get a mount namespace fd from a pidfd > in fact, get any namespace fd based on a pidfd. It's the equivalent to: > /proc/$pid/ns* and then you can avoid having to go via procfs at all. That would work too. I'd specifically like to be able to avoid crawling around in /proc/<pid> as much as possible. At this point, I think we're still stuck with having to walk /proc to know what pids currently exist though, right? I don't think there is any way to walk all the pids just using pidfds is there? > > Needs to be governed by ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS). > > (We also need an ioctl() on the pidfd to get to the PID without procfs btw. A PIDFS_GET_PID ioctl seems like a reasonable thing to add.
On Tue, Jun 25, 2024 at 10:50:57AM GMT, Jeff Layton wrote: > On Tue, 2024-06-25 at 16:21 +0200, Christian Brauner wrote: > > On Tue, Jun 25, 2024 at 10:10:29AM GMT, Jeff Layton wrote: > > > On Mon, 2024-06-24 at 11:49 -0400, Josef Bacik wrote: > > > > In order to utilize the listmount() and statmount() extensions that > > > > allow us to call them on different namespaces we need a way to get > > > > the > > > > mnt namespace id from user space. Add an ioctl to nsfs that will > > > > allow > > > > us to extract the mnt namespace id in order to make these new > > > > extensions > > > > usable. > > > > > > > > Signed-off-by: Josef Bacik <josef@toxicpanda.com> > > > > --- > > > > fs/nsfs.c | 14 ++++++++++++++ > > > > include/uapi/linux/nsfs.h | 2 ++ > > > > 2 files changed, 16 insertions(+) > > > > > > > > diff --git a/fs/nsfs.c b/fs/nsfs.c > > > > index 07e22a15ef02..af352dadffe1 100644 > > > > --- a/fs/nsfs.c > > > > +++ b/fs/nsfs.c > > > > @@ -12,6 +12,7 @@ > > > > #include <linux/nsfs.h> > > > > #include <linux/uaccess.h> > > > > > > > > +#include "mount.h" > > > > #include "internal.h" > > > > > > > > static struct vfsmount *nsfs_mnt; > > > > @@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned > > > > int ioctl, > > > > argp = (uid_t __user *) arg; > > > > uid = from_kuid_munged(current_user_ns(), user_ns- > > > > > owner); > > > > return put_user(uid, argp); > > > > + case NS_GET_MNTNS_ID: { > > > > + struct mnt_namespace *mnt_ns; > > > > + __u64 __user *idp; > > > > + __u64 id; > > > > + > > > > + if (ns->ops->type != CLONE_NEWNS) > > > > + return -EINVAL; > > > > + > > > > + mnt_ns = container_of(ns, struct mnt_namespace, ns); > > > > + idp = (__u64 __user *)arg; > > > > + id = mnt_ns->seq; > > > > + return put_user(id, idp); > > > > + } > > > > default: > > > > return -ENOTTY; > > > > } > > > > diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h > > > > index a0c8552b64ee..56e8b1639b98 100644 > > > > --- a/include/uapi/linux/nsfs.h > > > > +++ b/include/uapi/linux/nsfs.h > > > > @@ -15,5 +15,7 @@ > > > > #define NS_GET_NSTYPE _IO(NSIO, 0x3) > > > > /* Get owner UID (in the caller's user namespace) for a user > > > > namespace */ > > > > #define NS_GET_OWNER_UID _IO(NSIO, 0x4) > > > > +/* Get the id for a mount namespace */ > > > > +#define NS_GET_MNTNS_ID _IO(NSIO, 0x5) > > > > > > > > #endif /* __LINUX_NSFS_H */ > > > > > > Thinking about this more... > > > > > > Would it also make sense to wire up a similar ioctl in pidfs? It seems > > > like it might be nice to just open a pidfd for pid and then issue the > > > above to get its mntns id, rather than having to grovel around in nsfs. > > > > I had a different idea yesterday: get a mount namespace fd from a pidfd > > in fact, get any namespace fd based on a pidfd. It's the equivalent to: > > /proc/$pid/ns* and then you can avoid having to go via procfs at all. > > That would work too. I'd specifically like to be able to avoid crawling > around in /proc/<pid> as much as possible. Yes, this would do just that for namespaces and it is a natural extension of what I added some time ago, namely that you can already use a pidfd in lieu of a namespace fd in setns. For example, you can do: setns(pidfd, CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET | ...) to switch to the namespaces of another task (atomically ) where the kernel also takes care of the ordering between owning user namespace and other namespaces for the caller. (This still lacks a SETNS_PIDFD_ALL extension that would amount to switching all namespaces that are different from the caller's current namespaces (bc right now you would get EPERM if you're in the same user namespace as the target).) > At this point, I think we're still stuck with having to walk /proc to > know what pids currently exist though, right? I don't think there is > any way to walk all the pids just using pidfds is there? No, that doesn't exist right now. But it wouldn't be out of the question to add something like this (but it would have to be a new data structure most likely) if the use-case is sufficiently compelling. However, a container manager will know what what processes it tracks and it doesn't have to crawl /proc and so there it's immediately useful. Another thing is that I find it useful if there's a connection between two interfaces, say pidfs and nsfs so that you could go from a pidfd to a namespace fd. But I find it odd if we just duplicate nsfs ioctls on top of pidfs. > > > > > Needs to be governed by ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS). > > > > (We also need an ioctl() on the pidfd to get to the PID without procfs btw. > > A PIDFS_GET_PID ioctl seems like a reasonable thing to add. > -- > Jeff Layton <jlayton@kernel.org>
Hi, On Mon, Jun 24, 2024 at 11:49:50AM -0400, Josef Bacik wrote: > In order to utilize the listmount() and statmount() extensions that > allow us to call them on different namespaces we need a way to get the > mnt namespace id from user space. Add an ioctl to nsfs that will allow > us to extract the mnt namespace id in order to make these new extensions > usable. > > Signed-off-by: Josef Bacik <josef@toxicpanda.com> > --- > fs/nsfs.c | 14 ++++++++++++++ > include/uapi/linux/nsfs.h | 2 ++ > 2 files changed, 16 insertions(+) > > diff --git a/fs/nsfs.c b/fs/nsfs.c > index 07e22a15ef02..af352dadffe1 100644 > --- a/fs/nsfs.c > +++ b/fs/nsfs.c > @@ -12,6 +12,7 @@ > #include <linux/nsfs.h> > #include <linux/uaccess.h> > > +#include "mount.h" > #include "internal.h" > > static struct vfsmount *nsfs_mnt; > @@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl, > argp = (uid_t __user *) arg; > uid = from_kuid_munged(current_user_ns(), user_ns->owner); > return put_user(uid, argp); > + case NS_GET_MNTNS_ID: { > + struct mnt_namespace *mnt_ns; > + __u64 __user *idp; > + __u64 id; > + > + if (ns->ops->type != CLONE_NEWNS) > + return -EINVAL; > + > + mnt_ns = container_of(ns, struct mnt_namespace, ns); > + idp = (__u64 __user *)arg; > + id = mnt_ns->seq; > + return put_user(id, idp); > + } > default: > return -ENOTTY; > } > diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h > index a0c8552b64ee..56e8b1639b98 100644 > --- a/include/uapi/linux/nsfs.h > +++ b/include/uapi/linux/nsfs.h > @@ -15,5 +15,7 @@ > #define NS_GET_NSTYPE _IO(NSIO, 0x3) > /* Get owner UID (in the caller's user namespace) for a user namespace */ > #define NS_GET_OWNER_UID _IO(NSIO, 0x4) > +/* Get the id for a mount namespace */ > +#define NS_GET_MNTNS_ID _IO(NSIO, 0x5) As the kernel is writing an object of type __u64, this has to be defined to _IOR(NSIO, 0x5, __u64) instead, see the corresponding comments in uapi/asm-generic/ioctl.h file.
On Tue, Jul 30, 2024 at 07:45:54PM GMT, Dmitry V. Levin wrote: > Hi, > > On Mon, Jun 24, 2024 at 11:49:50AM -0400, Josef Bacik wrote: > > In order to utilize the listmount() and statmount() extensions that > > allow us to call them on different namespaces we need a way to get the > > mnt namespace id from user space. Add an ioctl to nsfs that will allow > > us to extract the mnt namespace id in order to make these new extensions > > usable. > > > > Signed-off-by: Josef Bacik <josef@toxicpanda.com> > > --- > > fs/nsfs.c | 14 ++++++++++++++ > > include/uapi/linux/nsfs.h | 2 ++ > > 2 files changed, 16 insertions(+) > > > > diff --git a/fs/nsfs.c b/fs/nsfs.c > > index 07e22a15ef02..af352dadffe1 100644 > > --- a/fs/nsfs.c > > +++ b/fs/nsfs.c > > @@ -12,6 +12,7 @@ > > #include <linux/nsfs.h> > > #include <linux/uaccess.h> > > > > +#include "mount.h" > > #include "internal.h" > > > > static struct vfsmount *nsfs_mnt; > > @@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl, > > argp = (uid_t __user *) arg; > > uid = from_kuid_munged(current_user_ns(), user_ns->owner); > > return put_user(uid, argp); > > + case NS_GET_MNTNS_ID: { > > + struct mnt_namespace *mnt_ns; > > + __u64 __user *idp; > > + __u64 id; > > + > > + if (ns->ops->type != CLONE_NEWNS) > > + return -EINVAL; > > + > > + mnt_ns = container_of(ns, struct mnt_namespace, ns); > > + idp = (__u64 __user *)arg; > > + id = mnt_ns->seq; > > + return put_user(id, idp); > > + } > > default: > > return -ENOTTY; > > } > > diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h > > index a0c8552b64ee..56e8b1639b98 100644 > > --- a/include/uapi/linux/nsfs.h > > +++ b/include/uapi/linux/nsfs.h > > @@ -15,5 +15,7 @@ > > #define NS_GET_NSTYPE _IO(NSIO, 0x3) > > /* Get owner UID (in the caller's user namespace) for a user namespace */ > > #define NS_GET_OWNER_UID _IO(NSIO, 0x4) > > +/* Get the id for a mount namespace */ > > +#define NS_GET_MNTNS_ID _IO(NSIO, 0x5) > > As the kernel is writing an object of type __u64, > this has to be defined to _IOR(NSIO, 0x5, __u64) instead, > see the corresponding comments in uapi/asm-generic/ioctl.h file. Thanks for spotting that. I've pushed a fix to vfs.fixes. See the appended patch.
diff --git a/fs/nsfs.c b/fs/nsfs.c index 07e22a15ef02..af352dadffe1 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -12,6 +12,7 @@ #include <linux/nsfs.h> #include <linux/uaccess.h> +#include "mount.h" #include "internal.h" static struct vfsmount *nsfs_mnt; @@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl, argp = (uid_t __user *) arg; uid = from_kuid_munged(current_user_ns(), user_ns->owner); return put_user(uid, argp); + case NS_GET_MNTNS_ID: { + struct mnt_namespace *mnt_ns; + __u64 __user *idp; + __u64 id; + + if (ns->ops->type != CLONE_NEWNS) + return -EINVAL; + + mnt_ns = container_of(ns, struct mnt_namespace, ns); + idp = (__u64 __user *)arg; + id = mnt_ns->seq; + return put_user(id, idp); + } default: return -ENOTTY; } diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index a0c8552b64ee..56e8b1639b98 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -15,5 +15,7 @@ #define NS_GET_NSTYPE _IO(NSIO, 0x3) /* Get owner UID (in the caller's user namespace) for a user namespace */ #define NS_GET_OWNER_UID _IO(NSIO, 0x4) +/* Get the id for a mount namespace */ +#define NS_GET_MNTNS_ID _IO(NSIO, 0x5) #endif /* __LINUX_NSFS_H */
In order to utilize the listmount() and statmount() extensions that allow us to call them on different namespaces we need a way to get the mnt namespace id from user space. Add an ioctl to nsfs that will allow us to extract the mnt namespace id in order to make these new extensions usable. Signed-off-by: Josef Bacik <josef@toxicpanda.com> --- fs/nsfs.c | 14 ++++++++++++++ include/uapi/linux/nsfs.h | 2 ++ 2 files changed, 16 insertions(+)