diff mbox series

[RFC,v3,07/10] clone3: add CHECK_FIELDS flag to usize argument

Message ID 20241010-extensible-structs-check_fields-v3-7-d2833dfe6edd@cyphar.com (mailing list archive)
State New
Headers show
Series extensible syscalls: CHECK_FIELDS to allow for easier feature detection | expand

Commit Message

Aleksa Sarai Oct. 9, 2024, 8:40 p.m. UTC
As with openat2(2), this allows userspace to easily figure out what
flags and fields are supported by clone3(2). For fields which are not
flag-based, we simply set every bit in the field so that a naive
bitwise-and would show that any value of the field is valid.

For args->exit_signal, since we have an explicit bitmask for the field
defined already (CSIGNAL) we can indicate that only those bits are
supported by current kernels. If we add some extra bits to exit_signal
in the future, being able to detect them as new features would be quite
useful.

The intended way of using this interface to get feature information
looks something like the following:

  static bool clone3_clear_sighand_supported;
  static bool clone3_cgroup_supported;

  int check_clone3_support(void)
  {
      int err;
      struct clone_args args = {};

      err = clone3(&args, CHECK_FIELDS | sizeof(args));
      assert(err < 0);
      switch (errno) {
      case EFAULT: case E2BIG:
          /* Old kernel... */
          check_support_the_old_way();
          break;
      case EEXTSYS_NOOP:
          clone3_clear_sighand_supported = (how.flags & CLONE_CLEAR_SIGHAND);
          clone3_cgroup_supported = (how.flags & CLONE_INTO_CGROUP) &&
                                    (how.cgroup != 0);
          break;
      }
  }

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
---
 kernel/fork.c | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/kernel/fork.c b/kernel/fork.c
index cc760491f201..bded93f4f5f4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2925,11 +2925,15 @@  SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
 }
 #endif
 
+
+#define CLONE3_VALID_FLAGS (CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP)
+
 noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
 					      struct clone_args __user *uargs,
 					      size_t usize)
 {
 	int err;
+	bool check_fields;
 	struct clone_args args;
 	pid_t *kset_tid = kargs->set_tid;
 
@@ -2941,11 +2945,34 @@  noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
 		     CLONE_ARGS_SIZE_VER2);
 	BUILD_BUG_ON(sizeof(struct clone_args) != CLONE_ARGS_SIZE_VER2);
 
+	check_fields = usize & CHECK_FIELDS;
+	usize &= ~CHECK_FIELDS;
+
 	if (unlikely(usize > PAGE_SIZE))
 		return -E2BIG;
 	if (unlikely(usize < CLONE_ARGS_SIZE_VER0))
 		return -EINVAL;
 
+	if (unlikely(check_fields)) {
+		memset(&args, 0, sizeof(args));
+		args = (struct clone_args) {
+			.flags = CLONE3_VALID_FLAGS,
+			.pidfd = 0xFFFFFFFFFFFFFFFF,
+			.child_tid = 0xFFFFFFFFFFFFFFFF,
+			.parent_tid = 0xFFFFFFFFFFFFFFFF,
+			.exit_signal = (u64) CSIGNAL,
+			.stack = 0xFFFFFFFFFFFFFFFF,
+			.stack_size = 0xFFFFFFFFFFFFFFFF,
+			.tls = 0xFFFFFFFFFFFFFFFF,
+			.set_tid = 0xFFFFFFFFFFFFFFFF,
+			.set_tid_size = 0xFFFFFFFFFFFFFFFF,
+			.cgroup = 0xFFFFFFFFFFFFFFFF,
+		};
+
+		err = copy_struct_to_user(uargs, usize, &args, sizeof(args), NULL);
+		return err ?: -EEXTSYS_NOOP;
+	}
+
 	err = copy_struct_from_user(&args, sizeof(args), uargs, usize);
 	if (err)
 		return err;
@@ -3025,8 +3052,7 @@  static inline bool clone3_stack_valid(struct kernel_clone_args *kargs)
 static bool clone3_args_valid(struct kernel_clone_args *kargs)
 {
 	/* Verify that no unknown flags are passed along. */
-	if (kargs->flags &
-	    ~(CLONE_LEGACY_FLAGS | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP))
+	if (kargs->flags & ~CLONE3_VALID_FLAGS)
 		return false;
 
 	/*