diff mbox series

[7/8] fs: add an ioctl to get the mnt ns id from nsfs

Message ID 180449959d5a756af7306d6bda55f41b9d53e3cb.1719243756.git.josef@toxicpanda.com (mailing list archive)
State New
Headers show
Series Support foreign mount namespace with statmount/listmount | expand

Commit Message

Josef Bacik June 24, 2024, 3:49 p.m. UTC
In order to utilize the listmount() and statmount() extensions that
allow us to call them on different namespaces we need a way to get the
mnt namespace id from user space.  Add an ioctl to nsfs that will allow
us to extract the mnt namespace id in order to make these new extensions
usable.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
---
 fs/nsfs.c                 | 14 ++++++++++++++
 include/uapi/linux/nsfs.h |  2 ++
 2 files changed, 16 insertions(+)

Comments

Jeff Layton June 25, 2024, 2:10 p.m. UTC | #1
On Mon, 2024-06-24 at 11:49 -0400, Josef Bacik wrote:
> In order to utilize the listmount() and statmount() extensions that
> allow us to call them on different namespaces we need a way to get
> the
> mnt namespace id from user space.  Add an ioctl to nsfs that will
> allow
> us to extract the mnt namespace id in order to make these new
> extensions
> usable.
> 
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> ---
>  fs/nsfs.c                 | 14 ++++++++++++++
>  include/uapi/linux/nsfs.h |  2 ++
>  2 files changed, 16 insertions(+)
> 
> diff --git a/fs/nsfs.c b/fs/nsfs.c
> index 07e22a15ef02..af352dadffe1 100644
> --- a/fs/nsfs.c
> +++ b/fs/nsfs.c
> @@ -12,6 +12,7 @@
>  #include <linux/nsfs.h>
>  #include <linux/uaccess.h>
>  
> +#include "mount.h"
>  #include "internal.h"
>  
>  static struct vfsmount *nsfs_mnt;
> @@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned
> int ioctl,
>  		argp = (uid_t __user *) arg;
>  		uid = from_kuid_munged(current_user_ns(), user_ns-
> >owner);
>  		return put_user(uid, argp);
> +	case NS_GET_MNTNS_ID: {
> +		struct mnt_namespace *mnt_ns;
> +		__u64 __user *idp;
> +		__u64 id;
> +
> +		if (ns->ops->type != CLONE_NEWNS)
> +			return -EINVAL;
> +
> +		mnt_ns = container_of(ns, struct mnt_namespace, ns);
> +		idp = (__u64 __user *)arg;
> +		id = mnt_ns->seq;
> +		return put_user(id, idp);
> +	}
>  	default:
>  		return -ENOTTY;
>  	}
> diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
> index a0c8552b64ee..56e8b1639b98 100644
> --- a/include/uapi/linux/nsfs.h
> +++ b/include/uapi/linux/nsfs.h
> @@ -15,5 +15,7 @@
>  #define NS_GET_NSTYPE		_IO(NSIO, 0x3)
>  /* Get owner UID (in the caller's user namespace) for a user
> namespace */
>  #define NS_GET_OWNER_UID	_IO(NSIO, 0x4)
> +/* Get the id for a mount namespace */
> +#define NS_GET_MNTNS_ID		_IO(NSIO, 0x5)
>  
>  #endif /* __LINUX_NSFS_H */

Thinking about this more...

Would it also make sense to wire up a similar ioctl in pidfs? It seems
like it might be nice to just open a pidfd for pid and then issue the
above to get its mntns id, rather than having to grovel around in nsfs.
Christian Brauner June 25, 2024, 2:21 p.m. UTC | #2
On Tue, Jun 25, 2024 at 10:10:29AM GMT, Jeff Layton wrote:
> On Mon, 2024-06-24 at 11:49 -0400, Josef Bacik wrote:
> > In order to utilize the listmount() and statmount() extensions that
> > allow us to call them on different namespaces we need a way to get
> > the
> > mnt namespace id from user space.  Add an ioctl to nsfs that will
> > allow
> > us to extract the mnt namespace id in order to make these new
> > extensions
> > usable.
> > 
> > Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> > ---
> >  fs/nsfs.c                 | 14 ++++++++++++++
> >  include/uapi/linux/nsfs.h |  2 ++
> >  2 files changed, 16 insertions(+)
> > 
> > diff --git a/fs/nsfs.c b/fs/nsfs.c
> > index 07e22a15ef02..af352dadffe1 100644
> > --- a/fs/nsfs.c
> > +++ b/fs/nsfs.c
> > @@ -12,6 +12,7 @@
> >  #include <linux/nsfs.h>
> >  #include <linux/uaccess.h>
> >  
> > +#include "mount.h"
> >  #include "internal.h"
> >  
> >  static struct vfsmount *nsfs_mnt;
> > @@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned
> > int ioctl,
> >  		argp = (uid_t __user *) arg;
> >  		uid = from_kuid_munged(current_user_ns(), user_ns-
> > >owner);
> >  		return put_user(uid, argp);
> > +	case NS_GET_MNTNS_ID: {
> > +		struct mnt_namespace *mnt_ns;
> > +		__u64 __user *idp;
> > +		__u64 id;
> > +
> > +		if (ns->ops->type != CLONE_NEWNS)
> > +			return -EINVAL;
> > +
> > +		mnt_ns = container_of(ns, struct mnt_namespace, ns);
> > +		idp = (__u64 __user *)arg;
> > +		id = mnt_ns->seq;
> > +		return put_user(id, idp);
> > +	}
> >  	default:
> >  		return -ENOTTY;
> >  	}
> > diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
> > index a0c8552b64ee..56e8b1639b98 100644
> > --- a/include/uapi/linux/nsfs.h
> > +++ b/include/uapi/linux/nsfs.h
> > @@ -15,5 +15,7 @@
> >  #define NS_GET_NSTYPE		_IO(NSIO, 0x3)
> >  /* Get owner UID (in the caller's user namespace) for a user
> > namespace */
> >  #define NS_GET_OWNER_UID	_IO(NSIO, 0x4)
> > +/* Get the id for a mount namespace */
> > +#define NS_GET_MNTNS_ID		_IO(NSIO, 0x5)
> >  
> >  #endif /* __LINUX_NSFS_H */
> 
> Thinking about this more...
> 
> Would it also make sense to wire up a similar ioctl in pidfs? It seems
> like it might be nice to just open a pidfd for pid and then issue the
> above to get its mntns id, rather than having to grovel around in nsfs.

I had a different idea yesterday: get a mount namespace fd from a pidfd
in fact, get any namespace fd based on a pidfd. It's the equivalent to:
/proc/$pid/ns* and then you can avoid having to go via procfs at all.

Needs to be governed by ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS).

(We also need an ioctl() on the pidfd to get to the PID without procfs btw.)
Jeff Layton June 25, 2024, 2:50 p.m. UTC | #3
On Tue, 2024-06-25 at 16:21 +0200, Christian Brauner wrote:
> On Tue, Jun 25, 2024 at 10:10:29AM GMT, Jeff Layton wrote:
> > On Mon, 2024-06-24 at 11:49 -0400, Josef Bacik wrote:
> > > In order to utilize the listmount() and statmount() extensions that
> > > allow us to call them on different namespaces we need a way to get
> > > the
> > > mnt namespace id from user space.  Add an ioctl to nsfs that will
> > > allow
> > > us to extract the mnt namespace id in order to make these new
> > > extensions
> > > usable.
> > > 
> > > Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> > > ---
> > >  fs/nsfs.c                 | 14 ++++++++++++++
> > >  include/uapi/linux/nsfs.h |  2 ++
> > >  2 files changed, 16 insertions(+)
> > > 
> > > diff --git a/fs/nsfs.c b/fs/nsfs.c
> > > index 07e22a15ef02..af352dadffe1 100644
> > > --- a/fs/nsfs.c
> > > +++ b/fs/nsfs.c
> > > @@ -12,6 +12,7 @@
> > >  #include <linux/nsfs.h>
> > >  #include <linux/uaccess.h>
> > >  
> > > +#include "mount.h"
> > >  #include "internal.h"
> > >  
> > >  static struct vfsmount *nsfs_mnt;
> > > @@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned
> > > int ioctl,
> > >  		argp = (uid_t __user *) arg;
> > >  		uid = from_kuid_munged(current_user_ns(), user_ns-
> > > > owner);
> > >  		return put_user(uid, argp);
> > > +	case NS_GET_MNTNS_ID: {
> > > +		struct mnt_namespace *mnt_ns;
> > > +		__u64 __user *idp;
> > > +		__u64 id;
> > > +
> > > +		if (ns->ops->type != CLONE_NEWNS)
> > > +			return -EINVAL;
> > > +
> > > +		mnt_ns = container_of(ns, struct mnt_namespace, ns);
> > > +		idp = (__u64 __user *)arg;
> > > +		id = mnt_ns->seq;
> > > +		return put_user(id, idp);
> > > +	}
> > >  	default:
> > >  		return -ENOTTY;
> > >  	}
> > > diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
> > > index a0c8552b64ee..56e8b1639b98 100644
> > > --- a/include/uapi/linux/nsfs.h
> > > +++ b/include/uapi/linux/nsfs.h
> > > @@ -15,5 +15,7 @@
> > >  #define NS_GET_NSTYPE		_IO(NSIO, 0x3)
> > >  /* Get owner UID (in the caller's user namespace) for a user
> > > namespace */
> > >  #define NS_GET_OWNER_UID	_IO(NSIO, 0x4)
> > > +/* Get the id for a mount namespace */
> > > +#define NS_GET_MNTNS_ID		_IO(NSIO, 0x5)
> > >  
> > >  #endif /* __LINUX_NSFS_H */
> > 
> > Thinking about this more...
> > 
> > Would it also make sense to wire up a similar ioctl in pidfs? It seems
> > like it might be nice to just open a pidfd for pid and then issue the
> > above to get its mntns id, rather than having to grovel around in nsfs.
> 
> I had a different idea yesterday: get a mount namespace fd from a pidfd
> in fact, get any namespace fd based on a pidfd. It's the equivalent to:
> /proc/$pid/ns* and then you can avoid having to go via procfs at all.

That would work too. I'd specifically like to be able to avoid crawling
around in /proc/<pid> as much as possible.

At this point, I think we're still stuck with having to walk /proc to
know what pids currently exist though, right? I don't think there is
any way to walk all the pids just using pidfds is there?

> 
> Needs to be governed by ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS).
> 
> (We also need an ioctl() on the pidfd to get to the PID without procfs btw.

A PIDFS_GET_PID ioctl seems like a reasonable thing to add.
Christian Brauner June 25, 2024, 3:02 p.m. UTC | #4
On Tue, Jun 25, 2024 at 10:50:57AM GMT, Jeff Layton wrote:
> On Tue, 2024-06-25 at 16:21 +0200, Christian Brauner wrote:
> > On Tue, Jun 25, 2024 at 10:10:29AM GMT, Jeff Layton wrote:
> > > On Mon, 2024-06-24 at 11:49 -0400, Josef Bacik wrote:
> > > > In order to utilize the listmount() and statmount() extensions that
> > > > allow us to call them on different namespaces we need a way to get
> > > > the
> > > > mnt namespace id from user space.  Add an ioctl to nsfs that will
> > > > allow
> > > > us to extract the mnt namespace id in order to make these new
> > > > extensions
> > > > usable.
> > > > 
> > > > Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> > > > ---
> > > >  fs/nsfs.c                 | 14 ++++++++++++++
> > > >  include/uapi/linux/nsfs.h |  2 ++
> > > >  2 files changed, 16 insertions(+)
> > > > 
> > > > diff --git a/fs/nsfs.c b/fs/nsfs.c
> > > > index 07e22a15ef02..af352dadffe1 100644
> > > > --- a/fs/nsfs.c
> > > > +++ b/fs/nsfs.c
> > > > @@ -12,6 +12,7 @@
> > > >  #include <linux/nsfs.h>
> > > >  #include <linux/uaccess.h>
> > > >  
> > > > +#include "mount.h"
> > > >  #include "internal.h"
> > > >  
> > > >  static struct vfsmount *nsfs_mnt;
> > > > @@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned
> > > > int ioctl,
> > > >  		argp = (uid_t __user *) arg;
> > > >  		uid = from_kuid_munged(current_user_ns(), user_ns-
> > > > > owner);
> > > >  		return put_user(uid, argp);
> > > > +	case NS_GET_MNTNS_ID: {
> > > > +		struct mnt_namespace *mnt_ns;
> > > > +		__u64 __user *idp;
> > > > +		__u64 id;
> > > > +
> > > > +		if (ns->ops->type != CLONE_NEWNS)
> > > > +			return -EINVAL;
> > > > +
> > > > +		mnt_ns = container_of(ns, struct mnt_namespace, ns);
> > > > +		idp = (__u64 __user *)arg;
> > > > +		id = mnt_ns->seq;
> > > > +		return put_user(id, idp);
> > > > +	}
> > > >  	default:
> > > >  		return -ENOTTY;
> > > >  	}
> > > > diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
> > > > index a0c8552b64ee..56e8b1639b98 100644
> > > > --- a/include/uapi/linux/nsfs.h
> > > > +++ b/include/uapi/linux/nsfs.h
> > > > @@ -15,5 +15,7 @@
> > > >  #define NS_GET_NSTYPE		_IO(NSIO, 0x3)
> > > >  /* Get owner UID (in the caller's user namespace) for a user
> > > > namespace */
> > > >  #define NS_GET_OWNER_UID	_IO(NSIO, 0x4)
> > > > +/* Get the id for a mount namespace */
> > > > +#define NS_GET_MNTNS_ID		_IO(NSIO, 0x5)
> > > >  
> > > >  #endif /* __LINUX_NSFS_H */
> > > 
> > > Thinking about this more...
> > > 
> > > Would it also make sense to wire up a similar ioctl in pidfs? It seems
> > > like it might be nice to just open a pidfd for pid and then issue the
> > > above to get its mntns id, rather than having to grovel around in nsfs.
> > 
> > I had a different idea yesterday: get a mount namespace fd from a pidfd
> > in fact, get any namespace fd based on a pidfd. It's the equivalent to:
> > /proc/$pid/ns* and then you can avoid having to go via procfs at all.
> 
> That would work too. I'd specifically like to be able to avoid crawling
> around in /proc/<pid> as much as possible.

Yes, this would do just that for namespaces and it is a natural
extension of what I added some time ago, namely that you can already use
a pidfd in lieu of a namespace fd in setns. For example, you can do:

setns(pidfd, CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET | ...)

to switch to the namespaces of another task (atomically ) where the
kernel also takes care of the ordering between owning user namespace and
other namespaces for the caller.

(This still lacks a SETNS_PIDFD_ALL extension that would amount to
switching all namespaces that are different from the caller's current
namespaces (bc right now you would get EPERM if you're in the same user
namespace as the target).)

> At this point, I think we're still stuck with having to walk /proc to
> know what pids currently exist though, right? I don't think there is
> any way to walk all the pids just using pidfds is there?

No, that doesn't exist right now. But it wouldn't be out of the question
to add something like this (but it would have to be a new data structure
most likely) if the use-case is sufficiently compelling.

However, a container manager will know what what processes it tracks and
it doesn't have to crawl /proc and so there it's immediately useful.

Another thing is that I find it useful if there's a connection between
two interfaces, say pidfs and nsfs so that you could go from a pidfd to
a namespace fd. But I find it odd if we just duplicate nsfs ioctls on
top of pidfs.

> 
> > 
> > Needs to be governed by ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS).
> > 
> > (We also need an ioctl() on the pidfd to get to the PID without procfs btw.
> 
> A PIDFS_GET_PID ioctl seems like a reasonable thing to add.
> -- 
> Jeff Layton <jlayton@kernel.org>
Dmitry V . Levin July 30, 2024, 4:45 p.m. UTC | #5
Hi,

On Mon, Jun 24, 2024 at 11:49:50AM -0400, Josef Bacik wrote:
> In order to utilize the listmount() and statmount() extensions that
> allow us to call them on different namespaces we need a way to get the
> mnt namespace id from user space.  Add an ioctl to nsfs that will allow
> us to extract the mnt namespace id in order to make these new extensions
> usable.
> 
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> ---
>  fs/nsfs.c                 | 14 ++++++++++++++
>  include/uapi/linux/nsfs.h |  2 ++
>  2 files changed, 16 insertions(+)
> 
> diff --git a/fs/nsfs.c b/fs/nsfs.c
> index 07e22a15ef02..af352dadffe1 100644
> --- a/fs/nsfs.c
> +++ b/fs/nsfs.c
> @@ -12,6 +12,7 @@
>  #include <linux/nsfs.h>
>  #include <linux/uaccess.h>
>  
> +#include "mount.h"
>  #include "internal.h"
>  
>  static struct vfsmount *nsfs_mnt;
> @@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
>  		argp = (uid_t __user *) arg;
>  		uid = from_kuid_munged(current_user_ns(), user_ns->owner);
>  		return put_user(uid, argp);
> +	case NS_GET_MNTNS_ID: {
> +		struct mnt_namespace *mnt_ns;
> +		__u64 __user *idp;
> +		__u64 id;
> +
> +		if (ns->ops->type != CLONE_NEWNS)
> +			return -EINVAL;
> +
> +		mnt_ns = container_of(ns, struct mnt_namespace, ns);
> +		idp = (__u64 __user *)arg;
> +		id = mnt_ns->seq;
> +		return put_user(id, idp);
> +	}
>  	default:
>  		return -ENOTTY;
>  	}
> diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
> index a0c8552b64ee..56e8b1639b98 100644
> --- a/include/uapi/linux/nsfs.h
> +++ b/include/uapi/linux/nsfs.h
> @@ -15,5 +15,7 @@
>  #define NS_GET_NSTYPE		_IO(NSIO, 0x3)
>  /* Get owner UID (in the caller's user namespace) for a user namespace */
>  #define NS_GET_OWNER_UID	_IO(NSIO, 0x4)
> +/* Get the id for a mount namespace */
> +#define NS_GET_MNTNS_ID		_IO(NSIO, 0x5)

As the kernel is writing an object of type __u64,
this has to be defined to _IOR(NSIO, 0x5, __u64) instead,
see the corresponding comments in uapi/asm-generic/ioctl.h file.
Christian Brauner July 31, 2024, 5:51 a.m. UTC | #6
On Tue, Jul 30, 2024 at 07:45:54PM GMT, Dmitry V. Levin wrote:
> Hi,
> 
> On Mon, Jun 24, 2024 at 11:49:50AM -0400, Josef Bacik wrote:
> > In order to utilize the listmount() and statmount() extensions that
> > allow us to call them on different namespaces we need a way to get the
> > mnt namespace id from user space.  Add an ioctl to nsfs that will allow
> > us to extract the mnt namespace id in order to make these new extensions
> > usable.
> > 
> > Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> > ---
> >  fs/nsfs.c                 | 14 ++++++++++++++
> >  include/uapi/linux/nsfs.h |  2 ++
> >  2 files changed, 16 insertions(+)
> > 
> > diff --git a/fs/nsfs.c b/fs/nsfs.c
> > index 07e22a15ef02..af352dadffe1 100644
> > --- a/fs/nsfs.c
> > +++ b/fs/nsfs.c
> > @@ -12,6 +12,7 @@
> >  #include <linux/nsfs.h>
> >  #include <linux/uaccess.h>
> >  
> > +#include "mount.h"
> >  #include "internal.h"
> >  
> >  static struct vfsmount *nsfs_mnt;
> > @@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
> >  		argp = (uid_t __user *) arg;
> >  		uid = from_kuid_munged(current_user_ns(), user_ns->owner);
> >  		return put_user(uid, argp);
> > +	case NS_GET_MNTNS_ID: {
> > +		struct mnt_namespace *mnt_ns;
> > +		__u64 __user *idp;
> > +		__u64 id;
> > +
> > +		if (ns->ops->type != CLONE_NEWNS)
> > +			return -EINVAL;
> > +
> > +		mnt_ns = container_of(ns, struct mnt_namespace, ns);
> > +		idp = (__u64 __user *)arg;
> > +		id = mnt_ns->seq;
> > +		return put_user(id, idp);
> > +	}
> >  	default:
> >  		return -ENOTTY;
> >  	}
> > diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
> > index a0c8552b64ee..56e8b1639b98 100644
> > --- a/include/uapi/linux/nsfs.h
> > +++ b/include/uapi/linux/nsfs.h
> > @@ -15,5 +15,7 @@
> >  #define NS_GET_NSTYPE		_IO(NSIO, 0x3)
> >  /* Get owner UID (in the caller's user namespace) for a user namespace */
> >  #define NS_GET_OWNER_UID	_IO(NSIO, 0x4)
> > +/* Get the id for a mount namespace */
> > +#define NS_GET_MNTNS_ID		_IO(NSIO, 0x5)
> 
> As the kernel is writing an object of type __u64,
> this has to be defined to _IOR(NSIO, 0x5, __u64) instead,
> see the corresponding comments in uapi/asm-generic/ioctl.h file.

Thanks for spotting that. I've pushed a fix to vfs.fixes. See the
appended patch.
diff mbox series

Patch

diff --git a/fs/nsfs.c b/fs/nsfs.c
index 07e22a15ef02..af352dadffe1 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -12,6 +12,7 @@ 
 #include <linux/nsfs.h>
 #include <linux/uaccess.h>
 
+#include "mount.h"
 #include "internal.h"
 
 static struct vfsmount *nsfs_mnt;
@@ -143,6 +144,19 @@  static long ns_ioctl(struct file *filp, unsigned int ioctl,
 		argp = (uid_t __user *) arg;
 		uid = from_kuid_munged(current_user_ns(), user_ns->owner);
 		return put_user(uid, argp);
+	case NS_GET_MNTNS_ID: {
+		struct mnt_namespace *mnt_ns;
+		__u64 __user *idp;
+		__u64 id;
+
+		if (ns->ops->type != CLONE_NEWNS)
+			return -EINVAL;
+
+		mnt_ns = container_of(ns, struct mnt_namespace, ns);
+		idp = (__u64 __user *)arg;
+		id = mnt_ns->seq;
+		return put_user(id, idp);
+	}
 	default:
 		return -ENOTTY;
 	}
diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
index a0c8552b64ee..56e8b1639b98 100644
--- a/include/uapi/linux/nsfs.h
+++ b/include/uapi/linux/nsfs.h
@@ -15,5 +15,7 @@ 
 #define NS_GET_NSTYPE		_IO(NSIO, 0x3)
 /* Get owner UID (in the caller's user namespace) for a user namespace */
 #define NS_GET_OWNER_UID	_IO(NSIO, 0x4)
+/* Get the id for a mount namespace */
+#define NS_GET_MNTNS_ID		_IO(NSIO, 0x5)
 
 #endif /* __LINUX_NSFS_H */