diff mbox series

[v5,13/14] epoll: implement epoll_create2() syscall

Message ID 20190624144151.22688-14-rpenyaev@suse.de (mailing list archive)
State New, archived
Headers show
Series epoll: support pollable epoll from userspace | expand

Commit Message

Roman Penyaev June 24, 2019, 2:41 p.m. UTC
epoll_create2() is needed to accept EPOLL_USERPOLL flags
and size, i.e. this patch wires up polling from userspace.

Signed-off-by: Roman Penyaev <rpenyaev@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 arch/alpha/kernel/syscalls/syscall.tbl      | 2 ++
 arch/arm/tools/syscall.tbl                  | 1 +
 arch/arm64/include/asm/unistd.h             | 2 +-
 arch/arm64/include/asm/unistd32.h           | 2 ++
 arch/ia64/kernel/syscalls/syscall.tbl       | 2 ++
 arch/m68k/kernel/syscalls/syscall.tbl       | 2 ++
 arch/microblaze/kernel/syscalls/syscall.tbl | 1 +
 arch/mips/kernel/syscalls/syscall_n32.tbl   | 2 ++
 arch/mips/kernel/syscalls/syscall_n64.tbl   | 2 ++
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 2 ++
 arch/parisc/kernel/syscalls/syscall.tbl     | 2 ++
 arch/powerpc/kernel/syscalls/syscall.tbl    | 2 ++
 arch/s390/kernel/syscalls/syscall.tbl       | 2 ++
 arch/sh/kernel/syscalls/syscall.tbl         | 2 ++
 arch/sparc/kernel/syscalls/syscall.tbl      | 2 ++
 arch/x86/entry/syscalls/syscall_32.tbl      | 1 +
 arch/x86/entry/syscalls/syscall_64.tbl      | 1 +
 arch/xtensa/kernel/syscalls/syscall.tbl     | 1 +
 fs/eventpoll.c                              | 5 +++++
 include/linux/syscalls.h                    | 1 +
 include/uapi/asm-generic/unistd.h           | 4 +++-
 kernel/sys_ni.c                             | 1 +
 22 files changed, 40 insertions(+), 2 deletions(-)

Comments

Arnd Bergmann June 24, 2019, 4:14 p.m. UTC | #1
On Mon, Jun 24, 2019 at 4:42 PM Roman Penyaev <rpenyaev@suse.de> wrote:
>
> epoll_create2() is needed to accept EPOLL_USERPOLL flags
> and size, i.e. this patch wires up polling from userspace.

Can you explain in the patch description more what it's needed for?

The man page only states that "Since Linux 2.6.8, the size argument
is ignored", so your description above does not explain why you need
to add the size argument back.

> diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
> index 1db9bbcfb84e..a1d7b695063d 100644
> --- a/arch/alpha/kernel/syscalls/syscall.tbl
> +++ b/arch/alpha/kernel/syscalls/syscall.tbl
> @@ -474,3 +474,5 @@
>  542    common  fsmount                         sys_fsmount
>  543    common  fspick                          sys_fspick
>  544    common  pidfd_open                      sys_pidfd_open
> +# 546  common  clone3                  sys_clone3
> +547    common  epoll_create2                   sys_epoll_create2
> diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
> index ff45d8807cb8..1497f3c87d54 100644
> --- a/arch/arm/tools/syscall.tbl
> +++ b/arch/arm/tools/syscall.tbl
> @@ -449,3 +449,4 @@
>  433    common  fspick                          sys_fspick
>  434    common  pidfd_open                      sys_pidfd_open
>  436    common  clone3                          sys_clone3
> +437    common  epoll_create2                   sys_epoll_create2

The table changes all look correct and complete, provided we
don't get another patch picking the same number.

          Arnd
Roman Penyaev June 24, 2019, 4:25 p.m. UTC | #2
On 2019-06-24 18:14, Arnd Bergmann wrote:
> On Mon, Jun 24, 2019 at 4:42 PM Roman Penyaev <rpenyaev@suse.de> wrote:
>> 
>> epoll_create2() is needed to accept EPOLL_USERPOLL flags
>> and size, i.e. this patch wires up polling from userspace.
> 
> Can you explain in the patch description more what it's needed for?

Sure. Will update on next iteration.

> 
> The man page only states that "Since Linux 2.6.8, the size argument
> is ignored", so your description above does not explain why you need
> to add the size argument back.
> 
>> diff --git a/arch/alpha/kernel/syscalls/syscall.tbl 
>> b/arch/alpha/kernel/syscalls/syscall.tbl
>> index 1db9bbcfb84e..a1d7b695063d 100644
>> --- a/arch/alpha/kernel/syscalls/syscall.tbl
>> +++ b/arch/alpha/kernel/syscalls/syscall.tbl
>> @@ -474,3 +474,5 @@
>>  542    common  fsmount                         sys_fsmount
>>  543    common  fspick                          sys_fspick
>>  544    common  pidfd_open                      sys_pidfd_open
>> +# 546  common  clone3                  sys_clone3
>> +547    common  epoll_create2                   sys_epoll_create2
>> diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
>> index ff45d8807cb8..1497f3c87d54 100644
>> --- a/arch/arm/tools/syscall.tbl
>> +++ b/arch/arm/tools/syscall.tbl
>> @@ -449,3 +449,4 @@
>>  433    common  fspick                          sys_fspick
>>  434    common  pidfd_open                      sys_pidfd_open
>>  436    common  clone3                          sys_clone3
>> +437    common  epoll_create2                   sys_epoll_create2
> 
> The table changes all look correct and complete, provided we
> don't get another patch picking the same number.

Good.  I had doubts, because on some archs there is a gap,
(sys_clone3 was missing).  So I left a placeholder, so
can be uncommented when sys_clone3 is implemented.

--
Roman
Eric Wong June 25, 2019, 6:29 p.m. UTC | #3
Roman Penyaev <rpenyaev@suse.de> wrote:
> epoll_create2() is needed to accept EPOLL_USERPOLL flags
> and size, i.e. this patch wires up polling from userspace.

Instead of adding a new syscall, is setting size (and/or even
the EPOLL_USEREPOLL flag) something that could be done via
ioctl?

There's no race like CLOEXEC to worry about and it's not
going to be in a hot path where the extra syscall matters.

glibc won't need to increase in .so size, either.
diff mbox series

Patch

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 1db9bbcfb84e..a1d7b695063d 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -474,3 +474,5 @@ 
 542	common	fsmount				sys_fsmount
 543	common	fspick				sys_fspick
 544	common	pidfd_open			sys_pidfd_open
+# 546	common	clone3			sys_clone3
+547	common	epoll_create2			sys_epoll_create2
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index ff45d8807cb8..1497f3c87d54 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -449,3 +449,4 @@ 
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 436	common	clone3				sys_clone3
+437	common	epoll_create2			sys_epoll_create2
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index e4e0523102e2..7fb8d77e2340 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@ 
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls		437
+#define __NR_compat_syscalls		438
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 10f16b0175ca..eb467e639352 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -890,6 +890,8 @@  __SYSCALL(__NR_fspick, sys_fspick)
 __SYSCALL(__NR_pidfd_open, sys_pidfd_open)
 #define __NR_clone3 436
 __SYSCALL(__NR_clone3, sys_clone3)
+#define __NR_epoll_create2 437
+__SYSCALL(__NR_epoll_create2, sys_epoll_create2)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index ecc44926737b..5cecff901853 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -355,3 +355,5 @@ 
 432	common	fsmount				sys_fsmount
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
+# 436	common	clone3			sys_clone3
+437	common	epoll_create2			sys_epoll_create2
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 9a3eb2558568..29d944e2e9d6 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -434,3 +434,5 @@ 
 432	common	fsmount				sys_fsmount
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
+# 436	common	clone3			sys_clone3
+437	common	epoll_create2			sys_epoll_create2
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 9181f181f76d..fad83841b16b 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -441,3 +441,4 @@ 
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 436	common	clone3				sys_clone3
+437	common	epoll_create2			sys_epoll_create2
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 97035e19ad03..661d63d7ea84 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -373,3 +373,5 @@ 
 432	n32	fsmount				sys_fsmount
 433	n32	fspick				sys_fspick
 434	n32	pidfd_open			sys_pidfd_open
+# 436	n32	clone3			sys_clone3
+437	n32	epoll_create2			sys_epoll_create2
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index d7292722d3b0..4a7f270ef126 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -349,3 +349,5 @@ 
 432	n64	fsmount				sys_fsmount
 433	n64	fspick				sys_fspick
 434	n64	pidfd_open			sys_pidfd_open
+# 436	n64	clone3			sys_clone3
+437	n64	epoll_create2			sys_epoll_create2
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index dba084c92f14..db87225a9dc5 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -422,3 +422,5 @@ 
 432	o32	fsmount				sys_fsmount
 433	o32	fspick				sys_fspick
 434	o32	pidfd_open			sys_pidfd_open
+# 436	o32	clone3			sys_clone3
+437	o32	epoll_create2			sys_epoll_create2
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 5022b9e179c2..25374a2c4e16 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -431,3 +431,5 @@ 
 432	common	fsmount				sys_fsmount
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
+# 436 common	clone3			sys_clone3
+437	common	epoll_create2			sys_epoll_create2
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index f2c3bda2d39f..9f48d8a737ab 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -516,3 +516,5 @@ 
 432	common	fsmount				sys_fsmount
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
+# 436	common	clone3			sys_clone3
+437	common	epoll_create2			sys_epoll_create2
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 6ebacfeaf853..0cef54850a1c 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -437,3 +437,5 @@ 
 432  common	fsmount			sys_fsmount			sys_fsmount
 433  common	fspick			sys_fspick			sys_fspick
 434  common	pidfd_open		sys_pidfd_open			sys_pidfd_open
+# 436  common	clone3		sys_clone3			sys_clone3
+437  common	epoll_create2		sys_epoll_create2               sys_epoll_create2
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 834c9c7d79fa..42769d6ec8fe 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -437,3 +437,5 @@ 
 432	common	fsmount				sys_fsmount
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
+# 436  common	clone3		sys_clone3
+437	common	epoll_create2			sys_epoll_create2
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index c58e71f21129..bf86fa32913d 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -480,3 +480,5 @@ 
 432	common	fsmount				sys_fsmount
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
+# 436  common	clone3		sys_clone3
+437	common	epoll_create2			sys_epoll_create2
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 6a99dbbf7e04..26d64f09b097 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -440,3 +440,4 @@ 
 433	i386	fspick			sys_fspick			__ia32_sys_fspick
 434	i386	pidfd_open		sys_pidfd_open			__ia32_sys_pidfd_open
 436	i386	clone3			sys_clone3			__ia32_sys_clone3
+437	i386	epoll_create2		sys_epoll_create2		__ia32_sys_epoll_create2
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 68227c7f71e5..4b58d8694693 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -357,6 +357,7 @@ 
 433	common	fspick			__x64_sys_fspick
 434	common	pidfd_open		__x64_sys_pidfd_open
 436	common	clone3			__x64_sys_clone3/ptregs
+437	common	epoll_create2		__x64_sys_epoll_create2
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index d5a1fd2c96c7..eef4367d433e 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -406,3 +406,4 @@ 
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
 436	common	clone3				sys_clone3
+437	common	epoll_create2			sys_epoll_create2
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 34239564bdfb..c3379bb4209b 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2825,6 +2825,11 @@  static int do_epoll_create(int flags, size_t size)
 	return error;
 }
 
+SYSCALL_DEFINE2(epoll_create2, int, flags, size_t, size)
+{
+	return do_epoll_create(flags, size);
+}
+
 SYSCALL_DEFINE1(epoll_create1, int, flags)
 {
 	return do_epoll_create(flags, 0);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b01d54a5732e..655ac0ebfdf9 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -357,6 +357,7 @@  asmlinkage long sys_eventfd2(unsigned int count, int flags);
 
 /* fs/eventpoll.c */
 asmlinkage long sys_epoll_create1(int flags);
+asmlinkage long sys_epoll_create2(int flags, size_t size);
 asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
 				struct epoll_event __user *event);
 asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 7c7c14a2e097..59c9dea64565 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -848,9 +848,11 @@  __SYSCALL(__NR_fspick, sys_fspick)
 __SYSCALL(__NR_pidfd_open, sys_pidfd_open)
 #define __NR_clone3 436
 __SYSCALL(__NR_clone3, sys_clone3)
+#define __NR_epoll_create2 437
+__SYSCALL(__NR_epoll_create2, sys_epoll_create2)
 
 #undef __NR_syscalls
-#define __NR_syscalls 437
+#define __NR_syscalls 438
 
 /*
  * 32 bit systems traditionally used different
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 34b76895b81e..090ff3aa7568 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -65,6 +65,7 @@  COND_SYSCALL(eventfd2);
 
 /* fs/eventfd.c */
 COND_SYSCALL(epoll_create1);
+COND_SYSCALL(epoll_create2);
 COND_SYSCALL(epoll_ctl);
 COND_SYSCALL(epoll_pwait);
 COND_SYSCALL_COMPAT(epoll_pwait);