diff mbox

[v2,3/7] Introduce a new clone4 syscall with more flag bits and extensible arguments

Message ID 367b888ef58831b6812c3cf80ca973c65edc67f5.1426376419.git.josh@joshtriplett.org (mailing list archive)
State New, archived
Headers show

Commit Message

Josh Triplett March 15, 2015, 7:59 a.m. UTC
clone() has no more usable flags available.  It has three now-unused
flags (CLONE_PID, CLONE_DETACHED, and CLONE_STOPPED), but current
kernels just ignore those flags without returning an error like EINVAL,
so reusing those flags would not allow userspace to detect the
availability of the new functionality.

Introduce a new system call, clone4, which accepts a second 32-bit flags
field.  clone4 also returns EINVAL for the currently unused flags in
clone, allowing their reuse.

To process these new flags, change the flags argument of _do_fork to a
u64.  sys_clone and do_fork both still use "unsigned long" for flags as
they did before, truncating it to 32-bit and masking out the obsolete
flags to behave like clone currently does.

clone4 accepts its remaining arguments as a structure, and userspace
passes in the size of that structure.  clone4 has well-defined semantics
that allow extending that structure in the future.  New userspace
passing in a larger structure than the kernel expects will receive
EINVAL, and can use a smaller structure to work with old kernels.  New
kernels accept smaller argument structures passed by userspace, and any
un-passed arguments default to 0.

clone4 handles arguments in the same order on all architectures, with no
backwards variations; to do so, it depends on the new
HAVE_COPY_THREAD_TLS.

The new system call currently accepts exactly the same flags as clone;
future commits will introduce new flags for additional functionality.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Thiago Macieira <thiago.macieira@intel.com>
---
 arch/x86/ia32/ia32entry.S         |  1 +
 arch/x86/kernel/entry_64.S        |  1 +
 arch/x86/syscalls/syscall_32.tbl  |  1 +
 arch/x86/syscalls/syscall_64.tbl  |  2 ++
 include/linux/compat.h            | 12 +++++++++
 include/uapi/asm-generic/unistd.h |  4 ++-
 include/uapi/linux/sched.h        | 36 ++++++++++++++++++++++---
 init/Kconfig                      | 10 +++++++
 kernel/fork.c                     | 56 ++++++++++++++++++++++++++++++++++++---
 kernel/sys_ni.c                   |  1 +
 10 files changed, 116 insertions(+), 8 deletions(-)

Comments

David Drysdale March 23, 2015, 2:11 p.m. UTC | #1
On Sun, Mar 15, 2015 at 7:59 AM, Josh Triplett <josh@joshtriplett.org> wrote:
> diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
> index 0286735..ba28306 100644
> --- a/arch/x86/ia32/ia32entry.S
> +++ b/arch/x86/ia32/ia32entry.S
> @@ -483,6 +483,7 @@ GLOBAL(\label)
>         PTREGSCALL stub32_execveat, compat_sys_execveat
>         PTREGSCALL stub32_fork, sys_fork
>         PTREGSCALL stub32_vfork, sys_vfork
> +       PTREGSCALL stub32_clone4, compat_sys_clone4
>
>         ALIGN
>  GLOBAL(stub32_clone)
> diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> index 1d74d16..ead143f 100644
> --- a/arch/x86/kernel/entry_64.S
> +++ b/arch/x86/kernel/entry_64.S
> @@ -520,6 +520,7 @@ END(\label)
>         FORK_LIKE  clone
>         FORK_LIKE  fork
>         FORK_LIKE  vfork
> +       FORK_LIKE  clone4
>         FIXED_FRAME stub_iopl, sys_iopl
>
>  ENTRY(stub_execve)
> diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
> index b3560ec..56fcc90 100644
> --- a/arch/x86/syscalls/syscall_32.tbl
> +++ b/arch/x86/syscalls/syscall_32.tbl
> @@ -365,3 +365,4 @@
>  356    i386    memfd_create            sys_memfd_create
>  357    i386    bpf                     sys_bpf
>  358    i386    execveat                sys_execveat                    stub32_execveat
> +359    i386    clone4                  sys_clone4                      stub32_clone4
> diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
> index 8d656fb..af15b0f 100644
> --- a/arch/x86/syscalls/syscall_64.tbl
> +++ b/arch/x86/syscalls/syscall_64.tbl
> @@ -329,6 +329,7 @@
>  320    common  kexec_file_load         sys_kexec_file_load
>  321    common  bpf                     sys_bpf
>  322    64      execveat                stub_execveat
> +323    64      clone4                  stub_clone4
>
>  #
>  # x32-specific system call numbers start at 512 to avoid cache impact
> @@ -368,3 +369,4 @@
>  543    x32     io_setup                compat_sys_io_setup
>  544    x32     io_submit               compat_sys_io_submit
>  545    x32     execveat                stub_x32_execveat
> +546    x32     clone4                  stub32_clone4

Doesn't this need an x32 specific wrapper (to ensure the full
set of registers are saved)?
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Josh Triplett March 23, 2015, 3:05 p.m. UTC | #2
On Mon, Mar 23, 2015 at 02:11:45PM +0000, David Drysdale wrote:
> On Sun, Mar 15, 2015 at 7:59 AM, Josh Triplett <josh@joshtriplett.org> wrote:
> > diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
> > index 0286735..ba28306 100644
> > --- a/arch/x86/ia32/ia32entry.S
> > +++ b/arch/x86/ia32/ia32entry.S
> > @@ -483,6 +483,7 @@ GLOBAL(\label)
> >         PTREGSCALL stub32_execveat, compat_sys_execveat
> >         PTREGSCALL stub32_fork, sys_fork
> >         PTREGSCALL stub32_vfork, sys_vfork
> > +       PTREGSCALL stub32_clone4, compat_sys_clone4
> >
> >         ALIGN
> >  GLOBAL(stub32_clone)
> > diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
> > index 1d74d16..ead143f 100644
> > --- a/arch/x86/kernel/entry_64.S
> > +++ b/arch/x86/kernel/entry_64.S
> > @@ -520,6 +520,7 @@ END(\label)
> >         FORK_LIKE  clone
> >         FORK_LIKE  fork
> >         FORK_LIKE  vfork
> > +       FORK_LIKE  clone4
> >         FIXED_FRAME stub_iopl, sys_iopl
> >
> >  ENTRY(stub_execve)
> > diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
> > index b3560ec..56fcc90 100644
> > --- a/arch/x86/syscalls/syscall_32.tbl
> > +++ b/arch/x86/syscalls/syscall_32.tbl
> > @@ -365,3 +365,4 @@
> >  356    i386    memfd_create            sys_memfd_create
> >  357    i386    bpf                     sys_bpf
> >  358    i386    execveat                sys_execveat                    stub32_execveat
> > +359    i386    clone4                  sys_clone4                      stub32_clone4
> > diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
> > index 8d656fb..af15b0f 100644
> > --- a/arch/x86/syscalls/syscall_64.tbl
> > +++ b/arch/x86/syscalls/syscall_64.tbl
> > @@ -329,6 +329,7 @@
> >  320    common  kexec_file_load         sys_kexec_file_load
> >  321    common  bpf                     sys_bpf
> >  322    64      execveat                stub_execveat
> > +323    64      clone4                  stub_clone4
> >
> >  #
> >  # x32-specific system call numbers start at 512 to avoid cache impact
> > @@ -368,3 +369,4 @@
> >  543    x32     io_setup                compat_sys_io_setup
> >  544    x32     io_submit               compat_sys_io_submit
> >  545    x32     execveat                stub_x32_execveat
> > +546    x32     clone4                  stub32_clone4
> 
> Doesn't this need an x32 specific wrapper (to ensure the full
> set of registers are saved)?

I'm not an x32 expert; I don't know how x32 interacts with pt_regs and
compat syscalls.  Could an x32 expert weigh in, please?

- Josh Triplett
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Drysdale March 31, 2015, 2:41 p.m. UTC | #3
On Mon, Mar 23, 2015 at 3:05 PM,  <josh@joshtriplett.org> wrote:
> On Mon, Mar 23, 2015 at 02:11:45PM +0000, David Drysdale wrote:
>> On Sun, Mar 15, 2015 at 7:59 AM, Josh Triplett <josh@joshtriplett.org> wrote:
>> > diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
>> > index 0286735..ba28306 100644
>> > --- a/arch/x86/ia32/ia32entry.S
>> > +++ b/arch/x86/ia32/ia32entry.S
>> > @@ -483,6 +483,7 @@ GLOBAL(\label)
>> >         PTREGSCALL stub32_execveat, compat_sys_execveat
>> >         PTREGSCALL stub32_fork, sys_fork
>> >         PTREGSCALL stub32_vfork, sys_vfork
>> > +       PTREGSCALL stub32_clone4, compat_sys_clone4
>> >
>> >         ALIGN
>> >  GLOBAL(stub32_clone)
>> > diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
>> > index 1d74d16..ead143f 100644
>> > --- a/arch/x86/kernel/entry_64.S
>> > +++ b/arch/x86/kernel/entry_64.S
>> > @@ -520,6 +520,7 @@ END(\label)
>> >         FORK_LIKE  clone
>> >         FORK_LIKE  fork
>> >         FORK_LIKE  vfork
>> > +       FORK_LIKE  clone4
>> >         FIXED_FRAME stub_iopl, sys_iopl
>> >
>> >  ENTRY(stub_execve)
>> > diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
>> > index b3560ec..56fcc90 100644
>> > --- a/arch/x86/syscalls/syscall_32.tbl
>> > +++ b/arch/x86/syscalls/syscall_32.tbl
>> > @@ -365,3 +365,4 @@
>> >  356    i386    memfd_create            sys_memfd_create
>> >  357    i386    bpf                     sys_bpf
>> >  358    i386    execveat                sys_execveat                    stub32_execveat
>> > +359    i386    clone4                  sys_clone4                      stub32_clone4
>> > diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
>> > index 8d656fb..af15b0f 100644
>> > --- a/arch/x86/syscalls/syscall_64.tbl
>> > +++ b/arch/x86/syscalls/syscall_64.tbl
>> > @@ -329,6 +329,7 @@
>> >  320    common  kexec_file_load         sys_kexec_file_load
>> >  321    common  bpf                     sys_bpf
>> >  322    64      execveat                stub_execveat
>> > +323    64      clone4                  stub_clone4
>> >
>> >  #
>> >  # x32-specific system call numbers start at 512 to avoid cache impact
>> > @@ -368,3 +369,4 @@
>> >  543    x32     io_setup                compat_sys_io_setup
>> >  544    x32     io_submit               compat_sys_io_submit
>> >  545    x32     execveat                stub_x32_execveat
>> > +546    x32     clone4                  stub32_clone4
>>
>> Doesn't this need an x32 specific wrapper (to ensure the full
>> set of registers are saved)?
>
> I'm not an x32 expert; I don't know how x32 interacts with pt_regs and
> compat syscalls.  Could an x32 expert weigh in, please?
>
> - Josh Triplett

(In the absence of an x32 expert chiming in...)

As I understand it:
 - stub32_clone4 expects 32-bit calling conventions and calls compat_sys_clone4
 - stub_clone4 expects 64-bit calling conventions and calls sys_clone4
 - stub_x32_clone4 would expect 64-bit calling conventions but call
   compat_sys_clone4.

Also, I have a suspicion that different field types in the [compat_]clone4_args
structure may cause problems -- I *think* its (final) layout will be 4+4+4+4+4+4
on 32-bit, 8+8+8+8+8+4 on 64-bit, but 4+4+8+8+4+4 on x32.

Have you tried running a test with a userspace program compiled with -mx32?
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 0286735..ba28306 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -483,6 +483,7 @@  GLOBAL(\label)
 	PTREGSCALL stub32_execveat, compat_sys_execveat
 	PTREGSCALL stub32_fork, sys_fork
 	PTREGSCALL stub32_vfork, sys_vfork
+	PTREGSCALL stub32_clone4, compat_sys_clone4
 
 	ALIGN
 GLOBAL(stub32_clone)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1d74d16..ead143f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -520,6 +520,7 @@  END(\label)
 	FORK_LIKE  clone
 	FORK_LIKE  fork
 	FORK_LIKE  vfork
+	FORK_LIKE  clone4
 	FIXED_FRAME stub_iopl, sys_iopl
 
 ENTRY(stub_execve)
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index b3560ec..56fcc90 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -365,3 +365,4 @@ 
 356	i386	memfd_create		sys_memfd_create
 357	i386	bpf			sys_bpf
 358	i386	execveat		sys_execveat			stub32_execveat
+359	i386	clone4			sys_clone4			stub32_clone4
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 8d656fb..af15b0f 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -329,6 +329,7 @@ 
 320	common	kexec_file_load		sys_kexec_file_load
 321	common	bpf			sys_bpf
 322	64	execveat		stub_execveat
+323	64	clone4			stub_clone4
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
@@ -368,3 +369,4 @@ 
 543	x32	io_setup		compat_sys_io_setup
 544	x32	io_submit		compat_sys_io_submit
 545	x32	execveat		stub_x32_execveat
+546	x32	clone4			stub32_clone4
diff --git a/include/linux/compat.h b/include/linux/compat.h
index ab25814..6c4a68d 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -293,6 +293,14 @@  struct compat_old_sigaction {
 };
 #endif
 
+struct compat_clone4_args {
+	compat_uptr_t ptid;
+	compat_uptr_t ctid;
+	compat_ulong_t stack_start;
+	compat_ulong_t stack_size;
+	compat_ulong_t tls;
+};
+
 struct compat_statfs;
 struct compat_statfs64;
 struct compat_old_linux_dirent;
@@ -713,6 +721,10 @@  asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
 
 asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
 					    int, const char __user *);
+
+asmlinkage long compat_sys_clone4(unsigned, unsigned, compat_ulong_t,
+				  struct compat_clone4_args __user *);
+
 #else
 
 #define is_compat_task() (0)
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index e016bd9..3740166 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -709,9 +709,11 @@  __SYSCALL(__NR_memfd_create, sys_memfd_create)
 __SYSCALL(__NR_bpf, sys_bpf)
 #define __NR_execveat 281
 __SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
+#define __NR_clone4 282
+__SC_COMP(__NR_clone4, sys_clone4, compat_sys_clone4)
 
 #undef __NR_syscalls
-#define __NR_syscalls 282
+#define __NR_syscalls 283
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index cc89dde..7656152 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -1,6 +1,8 @@ 
 #ifndef _UAPI_LINUX_SCHED_H
 #define _UAPI_LINUX_SCHED_H
 
+#include <linux/types.h>
+
 /*
  * cloning flags:
  */
@@ -18,11 +20,8 @@ 
 #define CLONE_SETTLS	0x00080000	/* create a new TLS for the child */
 #define CLONE_PARENT_SETTID	0x00100000	/* set the TID in the parent */
 #define CLONE_CHILD_CLEARTID	0x00200000	/* clear the TID in the child */
-#define CLONE_DETACHED		0x00400000	/* Unused, ignored */
 #define CLONE_UNTRACED		0x00800000	/* set if the tracing process can't force CLONE_PTRACE on this clone */
 #define CLONE_CHILD_SETTID	0x01000000	/* set the TID in the child */
-/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
-   and is now available for re-use. */
 #define CLONE_NEWUTS		0x04000000	/* New utsname namespace */
 #define CLONE_NEWIPC		0x08000000	/* New ipc namespace */
 #define CLONE_NEWUSER		0x10000000	/* New user namespace */
@@ -31,6 +30,37 @@ 
 #define CLONE_IO		0x80000000	/* Clone io context */
 
 /*
+ * Old flags, unused by current clone.  clone does not return EINVAL for these
+ * flags, so they can't easily be reused.  clone4 can use them.
+ */
+#define CLONE_PID	0x00001000
+#define CLONE_DETACHED	0x00400000
+#define CLONE_STOPPED	0x02000000
+
+#ifdef __KERNEL__
+/*
+ * Valid flags for clone and for clone4. Kept in this file next to the flag
+ * list above, but not exposed to userspace.
+ */
+#define CLONE_VALID_FLAGS	(0xffffffffULL & ~(CLONE_PID | CLONE_DETACHED | CLONE_STOPPED))
+#define CLONE4_VALID_FLAGS	CLONE_VALID_FLAGS
+#endif /* __KERNEL__ */
+
+/*
+ * Structure passed to clone4 for additional arguments.  Initialized to 0,
+ * then overwritten with arguments from userspace, so arguments not supplied by
+ * userspace will remain 0.  New versions of the kernel may safely append new
+ * arguments to the end.
+ */
+struct clone4_args {
+	__kernel_pid_t __user *ptid;
+	__kernel_pid_t __user *ctid;
+	__kernel_ulong_t stack_start;
+	__kernel_ulong_t stack_size;
+	__kernel_ulong_t tls;
+};
+
+/*
  * Scheduling policies
  */
 #define SCHED_NORMAL		0
diff --git a/init/Kconfig b/init/Kconfig
index f5dbc6d..3ab6649 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1511,6 +1511,16 @@  config EVENTFD
 
 	  If unsure, say Y.
 
+config CLONE4
+	bool "Enable clone4() system call" if EXPERT
+	depends on HAVE_COPY_THREAD_TLS
+	default y
+	help
+	  Enable the clone4() system call, which supports passing additional
+	  flags.
+
+	  If unsure, say Y.
+
 # syscall, maps, verifier
 config BPF_SYSCALL
 	bool "Enable bpf() system call" if EXPERT
diff --git a/kernel/fork.c b/kernel/fork.c
index b3dadf4..8a21f9e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1187,7 +1187,7 @@  init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
  * parts of the process environment (as per the clone
  * flags). The actual kick-off is left to the caller.
  */
-static struct task_struct *copy_process(unsigned long clone_flags,
+static struct task_struct *copy_process(u64 clone_flags,
 					unsigned long stack_start,
 					unsigned long stack_size,
 					int __user *child_tidptr,
@@ -1198,6 +1198,9 @@  static struct task_struct *copy_process(unsigned long clone_flags,
 	int retval;
 	struct task_struct *p;
 
+	if (clone_flags & ~CLONE4_VALID_FLAGS)
+		return ERR_PTR(-EINVAL);
+
 	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
 		return ERR_PTR(-EINVAL);
 
@@ -1630,7 +1633,7 @@  struct task_struct *fork_idle(int cpu)
  * it and waits for it to finish using the VM if required.
  */
 static long _do_fork(
-		unsigned long clone_flags,
+		u64 clone_flags,
 		unsigned long stack_start,
 		unsigned long stack_size,
 		int __user *parent_tidptr,
@@ -1701,6 +1704,15 @@  static long _do_fork(
 	return nr;
 }
 
+/*
+ * Convenience function for callers passing unsigned long flags, to prevent old
+ * syscall entry points from unexpectedly returning EINVAL.
+ */
+static inline u64 squelch_clone_flags(unsigned long clone_flags)
+{
+	return clone_flags & CLONE_VALID_FLAGS;
+}
+
 #ifndef CONFIG_HAVE_COPY_THREAD_TLS
 /* For compatibility with architectures that call do_fork directly rather than
  * using the syscall entry points below. */
@@ -1710,7 +1722,8 @@  long do_fork(unsigned long clone_flags,
 	      int __user *parent_tidptr,
 	      int __user *child_tidptr)
 {
-	return _do_fork(clone_flags, stack_start, stack_size,
+	return _do_fork(squelch_clone_flags(clone_flags),
+			stack_start, stack_size,
 			parent_tidptr, child_tidptr, 0);
 }
 #endif
@@ -1768,10 +1781,45 @@  SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
 		 unsigned long, tls)
 #endif
 {
-	return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls);
+	return _do_fork(squelch_clone_flags(clone_flags), newsp, 0,
+			parent_tidptr, child_tidptr, tls);
 }
 #endif
 
+#ifdef CONFIG_CLONE4
+SYSCALL_DEFINE4(clone4, unsigned, flags_high, unsigned, flags_low,
+		unsigned long, args_size, struct clone4_args __user *, args)
+{
+	u64 flags = (u64)flags_high << 32 | flags_low;
+	struct clone4_args kargs = {};
+	if (args_size > sizeof(kargs))
+		return -EINVAL;
+	if (args_size && copy_from_user(&kargs, args, args_size))
+		return -EFAULT;
+	return _do_fork(flags, kargs.stack_start, kargs.stack_size,
+			kargs.ptid, kargs.ctid, kargs.tls);
+}
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE4(clone4, unsigned, flags_high, unsigned, flags_low,
+			compat_ulong_t, args_size,
+			struct compat_clone4_args __user *, args)
+{
+	u64 flags = (u64)flags_high << 32 | flags_low;
+	struct compat_clone4_args compat_kargs = {};
+	if (args_size > sizeof(compat_kargs))
+		return -EINVAL;
+	if (args_size && copy_from_user(&compat_kargs, args, args_size))
+		return -EFAULT;
+	return _do_fork(flags, compat_kargs.stack_start,
+			compat_kargs.stack_size,
+			compat_ptr(compat_kargs.ptid),
+			compat_ptr(compat_kargs.ctid),
+			compat_kargs.tls);
+}
+#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_CLONE4 */
+
 #ifndef ARCH_MIN_MMSTRUCT_ALIGN
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 5adcb0a..5b5d2b9 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -159,6 +159,7 @@  cond_syscall(sys_uselib);
 cond_syscall(sys_fadvise64);
 cond_syscall(sys_fadvise64_64);
 cond_syscall(sys_madvise);
+cond_syscall(sys_clone4);
 
 /* arch-specific weak syscall entries */
 cond_syscall(sys_pciconfig_read);