Message ID | 20200831134551.1599689-2-christian.brauner@ubuntu.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Support non-blocking pidfds | expand |
On 08/31, Christian Brauner wrote: > > --- /dev/null > +++ b/include/uapi/linux/pidfd.h > @@ -0,0 +1,12 @@ > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ > + > +#ifndef _UAPI_LINUX_PIDFD_H > +#define _UAPI_LINUX_PIDFD_H > + > +#include <linux/types.h> > +#include <linux/fcntl.h> > + > +/* Flags for pidfd_open(). */ > +#define PIDFD_NONBLOCK O_NONBLOCK > + > +#endif /* _UAPI_LINUX_PIDFD_H */ Why? Can't we simply use O_NONBLOCK ? Oleg.
On Tue, Sep 01, 2020 at 06:23:10PM +0200, Oleg Nesterov wrote: > On 08/31, Christian Brauner wrote: > > > > --- /dev/null > > +++ b/include/uapi/linux/pidfd.h > > @@ -0,0 +1,12 @@ > > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ > > + > > +#ifndef _UAPI_LINUX_PIDFD_H > > +#define _UAPI_LINUX_PIDFD_H > > + > > +#include <linux/types.h> > > +#include <linux/fcntl.h> > > + > > +/* Flags for pidfd_open(). */ > > +#define PIDFD_NONBLOCK O_NONBLOCK > > + > > +#endif /* _UAPI_LINUX_PIDFD_H */ > > Why? Can't we simply use O_NONBLOCK ? It's the same thing we seem to do for any other (anon inode) fds: include/linux/eventfd.h:#define EFD_NONBLOCK O_NONBLOCK include/uapi/linux/inotify.h:#define IN_NONBLOCK O_NONBLOCK include/uapi/linux/signalfd.h:#define SFD_NONBLOCK O_NONBLOCK include/uapi/linux/timerfd.h:#define TFD_NONBLOCK O_NONBLOCK also for O_CLOEXEC: include/linux/eventfd.h:#define EFD_CLOEXEC O_CLOEXEC include/linux/userfaultfd_k.h:#define UFFD_CLOEXEC O_CLOEXEC include/uapi/linux/eventpoll.h:#define EPOLL_CLOEXEC O_CLOEXEC include/uapi/linux/mount.h:#define OPEN_TREE_CLOEXEC O_CLOEXEC include/uapi/linux/perf_event.h:#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ include/uapi/linux/signalfd.h:#define SFD_CLOEXEC O_CLOEXEC include/uapi/linux/timerfd.h:#define TFD_CLOEXEC O_CLOEXEC So I think we should just do the same. A clean flag namespace seems nicer to me too tbh. Christian
On 09/01, Christian Brauner wrote: > > On Tue, Sep 01, 2020 at 06:23:10PM +0200, Oleg Nesterov wrote: > > On 08/31, Christian Brauner wrote: > > > > > > --- /dev/null > > > +++ b/include/uapi/linux/pidfd.h > > > @@ -0,0 +1,12 @@ > > > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ > > > + > > > +#ifndef _UAPI_LINUX_PIDFD_H > > > +#define _UAPI_LINUX_PIDFD_H > > > + > > > +#include <linux/types.h> > > > +#include <linux/fcntl.h> > > > + > > > +/* Flags for pidfd_open(). */ > > > +#define PIDFD_NONBLOCK O_NONBLOCK > > > + > > > +#endif /* _UAPI_LINUX_PIDFD_H */ > > > > Why? Can't we simply use O_NONBLOCK ? > > It's the same thing we seem to do for any other (anon inode) fds: > > include/linux/eventfd.h:#define EFD_NONBLOCK O_NONBLOCK > include/uapi/linux/inotify.h:#define IN_NONBLOCK O_NONBLOCK > include/uapi/linux/signalfd.h:#define SFD_NONBLOCK O_NONBLOCK > include/uapi/linux/timerfd.h:#define TFD_NONBLOCK O_NONBLOCK > > also for O_CLOEXEC: > > include/linux/eventfd.h:#define EFD_CLOEXEC O_CLOEXEC > include/linux/userfaultfd_k.h:#define UFFD_CLOEXEC O_CLOEXEC > include/uapi/linux/eventpoll.h:#define EPOLL_CLOEXEC O_CLOEXEC > include/uapi/linux/mount.h:#define OPEN_TREE_CLOEXEC O_CLOEXEC > include/uapi/linux/perf_event.h:#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ > include/uapi/linux/signalfd.h:#define SFD_CLOEXEC O_CLOEXEC > include/uapi/linux/timerfd.h:#define TFD_CLOEXEC O_CLOEXEC > > So I think we should just do the same. Hmm, OK, then I have to agree. > A clean flag namespace seems > nicer to me too tbh. Disagree but this doesn't matter ;) Oleg.
diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h new file mode 100644 index 000000000000..5406fbc13074 --- /dev/null +++ b/include/uapi/linux/pidfd.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_PIDFD_H +#define _UAPI_LINUX_PIDFD_H + +#include <linux/types.h> +#include <linux/fcntl.h> + +/* Flags for pidfd_open(). */ +#define PIDFD_NONBLOCK O_NONBLOCK + +#endif /* _UAPI_LINUX_PIDFD_H */ diff --git a/kernel/pid.c b/kernel/pid.c index b2562a7ce525..74ddbff1a6ba 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -43,6 +43,7 @@ #include <linux/sched/task.h> #include <linux/idr.h> #include <net/sock.h> +#include <uapi/linux/pidfd.h> struct pid init_struct_pid = { .count = REFCOUNT_INIT(1), @@ -522,7 +523,8 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) /** * pidfd_create() - Create a new pid file descriptor. * - * @pid: struct pid that the pidfd will reference + * @pid: struct pid that the pidfd will reference + * @flags: flags to pass * * This creates a new pid file descriptor with the O_CLOEXEC flag set. * @@ -532,12 +534,12 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) * Return: On success, a cloexec pidfd is returned. * On error, a negative errno number will be returned. */ -static int pidfd_create(struct pid *pid) +static int pidfd_create(struct pid *pid, unsigned int flags) { int fd; fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid), - O_RDWR | O_CLOEXEC); + flags | O_RDWR | O_CLOEXEC); if (fd < 0) put_pid(pid); @@ -565,7 +567,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) int fd; struct pid *p; - if (flags) + if (flags & ~PIDFD_NONBLOCK) return -EINVAL; if (pid <= 0) @@ -576,7 +578,7 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) return -ESRCH; if (pid_has_task(p, PIDTYPE_TGID)) - fd = pidfd_create(p); + fd = pidfd_create(p, flags); else fd = -EINVAL;
Introduce PIDFD_NONBLOCK to support non-blocking pidfd file descriptors. Ever since the introduction of pidfds and more advanced async io various programming languages such as Rust have grown support for async event libraries. These libraries are created to help build epoll-based event loops around file descriptors. A common pattern is to automatically make all file descriptors they manage to O_NONBLOCK. For such libraries the EAGAIN error code is treated specially. When a function is called that returns EAGAIN the function isn't called again until the event loop indicates the the file descriptor is ready. Supporting EAGAIN when waiting on pidfds makes such libraries just work with little effort. In the following patch we will extend waitid() internally to support non-blocking pidfds. Link: https://lore.kernel.org/lkml/20200811181236.GA18763@localhost/ Link: https://github.com/joshtriplett/async-pidfd Cc: Kees Cook <keescook@chromium.org> Cc: Sargun Dhillon <sargun@sargun.me> Cc: Oleg Nesterov <oleg@redhat.com> Suggested-by: Josh Triplett <josh@joshtriplett.org> Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> --- include/uapi/linux/pidfd.h | 12 ++++++++++++ kernel/pid.c | 12 +++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) create mode 100644 include/uapi/linux/pidfd.h