diff mbox series

[v2,bpf-next,01/16] bpf: Introduce bpf_sys_bpf() helper and program type.

Message ID 20210423002646.35043-2-alexei.starovoitov@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: syscall program, FD array, loader program, light skeleton. | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count fail Series longer than 15 patches
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 9 maintainers not CCed: joe@cilium.io yhs@fb.com kpsingh@kernel.org kafai@fb.com ast@kernel.org john.fastabend@gmail.com songliubraving@fb.com quentin@isovalent.com kuba@kernel.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 11956 this patch: 11955
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning CHECK: Blank lines aren't necessary after an open brace '{' WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP WARNING: please, no space before tabs
netdev/build_allmodconfig_warn success Errors and warnings before: 12443 this patch: 12442
netdev/header_inline success Link

Commit Message

Alexei Starovoitov April 23, 2021, 12:26 a.m. UTC
From: Alexei Starovoitov <ast@kernel.org>

Add placeholders for bpf_sys_bpf() helper and new program type.

v1->v2:
- check that expected_attach_type is zero
- allow more helper functions to be used in this program type, since they will
  only execute from user context via bpf_prog_test_run.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h            | 10 +++++++
 include/linux/bpf_types.h      |  2 ++
 include/uapi/linux/bpf.h       |  8 +++++
 kernel/bpf/syscall.c           | 54 ++++++++++++++++++++++++++++++++++
 net/bpf/test_run.c             | 43 +++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  8 +++++
 6 files changed, 125 insertions(+)

Comments

Yonghong Song April 23, 2021, 6:15 p.m. UTC | #1
On 4/22/21 5:26 PM, Alexei Starovoitov wrote:
> From: Alexei Starovoitov <ast@kernel.org>
> 
> Add placeholders for bpf_sys_bpf() helper and new program type.
> 
> v1->v2:
> - check that expected_attach_type is zero
> - allow more helper functions to be used in this program type, since they will
>    only execute from user context via bpf_prog_test_run.
> 
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---
>   include/linux/bpf.h            | 10 +++++++
>   include/linux/bpf_types.h      |  2 ++
>   include/uapi/linux/bpf.h       |  8 +++++
>   kernel/bpf/syscall.c           | 54 ++++++++++++++++++++++++++++++++++
>   net/bpf/test_run.c             | 43 +++++++++++++++++++++++++++
>   tools/include/uapi/linux/bpf.h |  8 +++++
>   6 files changed, 125 insertions(+)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index f8a45f109e96..aed30bbffb54 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1824,6 +1824,9 @@ static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
>   
>   struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr);
>   void bpf_map_offload_map_free(struct bpf_map *map);
> +int bpf_prog_test_run_syscall(struct bpf_prog *prog,
> +			      const union bpf_attr *kattr,
> +			      union bpf_attr __user *uattr);
>   #else
>   static inline int bpf_prog_offload_init(struct bpf_prog *prog,
>   					union bpf_attr *attr)
> @@ -1849,6 +1852,13 @@ static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
>   static inline void bpf_map_offload_map_free(struct bpf_map *map)
>   {
>   }
> +
> +static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog,
> +					    const union bpf_attr *kattr,
> +					    union bpf_attr __user *uattr)
> +{
> +	return -ENOTSUPP;
> +}
>   #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
>   
>   #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
> diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
> index f883f01a5061..a9db1eae6796 100644
> --- a/include/linux/bpf_types.h
> +++ b/include/linux/bpf_types.h
> @@ -77,6 +77,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
>   	       void *, void *)
>   #endif /* CONFIG_BPF_LSM */
>   #endif
> +BPF_PROG_TYPE(BPF_PROG_TYPE_SYSCALL, bpf_syscall,
> +	      void *, void *)
>   
>   BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
>   BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index ec6d85a81744..c92648f38144 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -937,6 +937,7 @@ enum bpf_prog_type {
>   	BPF_PROG_TYPE_EXT,
>   	BPF_PROG_TYPE_LSM,
>   	BPF_PROG_TYPE_SK_LOOKUP,
> +	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
>   };
>   
>   enum bpf_attach_type {
> @@ -4735,6 +4736,12 @@ union bpf_attr {
>    *		be zero-terminated except when **str_size** is 0.
>    *
>    *		Or **-EBUSY** if the per-CPU memory copy buffer is busy.
> + *
> + * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size)
> + * 	Description
> + * 		Execute bpf syscall with given arguments.
> + * 	Return
> + * 		A syscall result.
>    */
>   #define __BPF_FUNC_MAPPER(FN)		\
>   	FN(unspec),			\
> @@ -4903,6 +4910,7 @@ union bpf_attr {
>   	FN(check_mtu),			\
>   	FN(for_each_map_elem),		\
>   	FN(snprintf),			\
> +	FN(sys_bpf),			\
>   	/* */
>   
>   /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index fd495190115e..8636876f3e6b 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -2014,6 +2014,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
>   		if (expected_attach_type == BPF_SK_LOOKUP)
>   			return 0;
>   		return -EINVAL;
> +	case BPF_PROG_TYPE_SYSCALL:
>   	case BPF_PROG_TYPE_EXT:
>   		if (expected_attach_type)
>   			return -EINVAL;
> @@ -4497,3 +4498,56 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
>   
>   	return err;
>   }
> +
> +static bool syscall_prog_is_valid_access(int off, int size,
> +					 enum bpf_access_type type,
> +					 const struct bpf_prog *prog,
> +					 struct bpf_insn_access_aux *info)
> +{
> +	if (off < 0 || off >= U16_MAX)
> +		return false;

Is this enough? If I understand correctly, the new program type
allows any arbitrary context data from user as long as its size
meets the following constraints:
    if (ctx_size_in < prog->aux->max_ctx_offset ||
  	    ctx_size_in > U16_MAX)
		return -EINVAL;

So if user provides a ctx with size say 40 and inside the program looks
it is still able to read/write to say offset 400.
Should we be a little more restrictive on this?

> +	if (off % size != 0)
> +		return false;
> +	return true;
> +}
> +
> +BPF_CALL_3(bpf_sys_bpf, int, cmd, void *, attr, u32, attr_size)
> +{
> +	return -EINVAL;
> +}
> +
> +const struct bpf_func_proto bpf_sys_bpf_proto = {
> +	.func		= bpf_sys_bpf,
> +	.gpl_only	= false,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_ANYTHING,
> +	.arg2_type	= ARG_PTR_TO_MEM,
> +	.arg3_type	= ARG_CONST_SIZE,
> +};
> +
> +const struct bpf_func_proto * __weak
> +tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> +{
> +
> +	return bpf_base_func_proto(func_id);
> +}
> +
> +static const struct bpf_func_proto *
> +syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> +{
> +	switch (func_id) {
> +	case BPF_FUNC_sys_bpf:
> +		return &bpf_sys_bpf_proto;
> +	default:
> +		return tracing_prog_func_proto(func_id, prog);
> +	}
> +}
> +
> +const struct bpf_verifier_ops bpf_syscall_verifier_ops = {
> +	.get_func_proto  = syscall_prog_func_proto,
> +	.is_valid_access = syscall_prog_is_valid_access,
> +};
> +
> +const struct bpf_prog_ops bpf_syscall_prog_ops = {
> +	.test_run = bpf_prog_test_run_syscall,
> +};
> diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
> index a5d72c48fb66..1783ea77b95c 100644
> --- a/net/bpf/test_run.c
> +++ b/net/bpf/test_run.c
> @@ -918,3 +918,46 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
>   	kfree(user_ctx);
>   	return ret;
>   }
> +
> +int bpf_prog_test_run_syscall(struct bpf_prog *prog,
> +			      const union bpf_attr *kattr,
> +			      union bpf_attr __user *uattr)
> +{
> +	void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
> +	__u32 ctx_size_in = kattr->test.ctx_size_in;
> +	void *ctx = NULL;
> +	u32 retval;
> +	int err = 0;
> +
> +	/* doesn't support data_in/out, ctx_out, duration, or repeat or flags */
> +	if (kattr->test.data_in || kattr->test.data_out ||
> +	    kattr->test.ctx_out || kattr->test.duration ||
> +	    kattr->test.repeat || kattr->test.flags)
> +		return -EINVAL;
> +
> +	if (ctx_size_in < prog->aux->max_ctx_offset ||
> +	    ctx_size_in > U16_MAX)
> +		return -EINVAL;
> +
> +	if (ctx_size_in) {
> +		ctx = kzalloc(ctx_size_in, GFP_USER);
> +		if (!ctx)
> +			return -ENOMEM;
> +		if (copy_from_user(ctx, ctx_in, ctx_size_in)) {
> +			err = -EFAULT;
> +			goto out;
> +		}
> +	}
> +	retval = bpf_prog_run_pin_on_cpu(prog, ctx);
> +
> +	if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32)))
> +		err = -EFAULT;
> +	if (ctx_size_in)
> +		if (copy_to_user(ctx_in, ctx, ctx_size_in)) {
> +			err = -EFAULT;
> +			goto out;
> +		}
> +out:
> +	kfree(ctx);
> +	return err;
> +}
[...]
Alexei Starovoitov April 23, 2021, 6:28 p.m. UTC | #2
On Fri, Apr 23, 2021 at 11:16 AM Yonghong Song <yhs@fb.com> wrote:
> > +
> > +static bool syscall_prog_is_valid_access(int off, int size,
> > +                                      enum bpf_access_type type,
> > +                                      const struct bpf_prog *prog,
> > +                                      struct bpf_insn_access_aux *info)
> > +{
> > +     if (off < 0 || off >= U16_MAX)
> > +             return false;
>
> Is this enough? If I understand correctly, the new program type
> allows any arbitrary context data from user as long as its size
> meets the following constraints:
>     if (ctx_size_in < prog->aux->max_ctx_offset ||
>             ctx_size_in > U16_MAX)
>                 return -EINVAL;
>
> So if user provides a ctx with size say 40 and inside the program looks
> it is still able to read/write to say offset 400.
> Should we be a little more restrictive on this?

At the load time the program can have a read/write at offset 400,
but it will be rejected at prog_test_run time.
That's similar to tp and raw_tp test_run-s and attach-es.
That's why test_run has that check you've quoted.
It's a two step verification.
The verifier rejects <0 || > u16_max right away and
keeps the track of max_ctx_offset.
Then at attach/test_run the final check is done with an actual ctx_size_in.
Yonghong Song April 23, 2021, 7:32 p.m. UTC | #3
On 4/23/21 11:28 AM, Alexei Starovoitov wrote:
> On Fri, Apr 23, 2021 at 11:16 AM Yonghong Song <yhs@fb.com> wrote:
>>> +
>>> +static bool syscall_prog_is_valid_access(int off, int size,
>>> +                                      enum bpf_access_type type,
>>> +                                      const struct bpf_prog *prog,
>>> +                                      struct bpf_insn_access_aux *info)
>>> +{
>>> +     if (off < 0 || off >= U16_MAX)
>>> +             return false;
>>
>> Is this enough? If I understand correctly, the new program type
>> allows any arbitrary context data from user as long as its size
>> meets the following constraints:
>>      if (ctx_size_in < prog->aux->max_ctx_offset ||
>>              ctx_size_in > U16_MAX)
>>                  return -EINVAL;
>>
>> So if user provides a ctx with size say 40 and inside the program looks
>> it is still able to read/write to say offset 400.
>> Should we be a little more restrictive on this?
> 
> At the load time the program can have a read/write at offset 400,
> but it will be rejected at prog_test_run time.
> That's similar to tp and raw_tp test_run-s and attach-es.
> That's why test_run has that check you've quoted.
> It's a two step verification.
> The verifier rejects <0 || > u16_max right away and
> keeps the track of max_ctx_offset.
> Then at attach/test_run the final check is done with an actual ctx_size_in.

Thanks! That is indeed the case. Somehow although I copy-pasted it,
I missed the code "ctx_size_in < prog->aux->max_ctx_offset"...
Andrii Nakryiko April 26, 2021, 4:51 p.m. UTC | #4
On Thu, Apr 22, 2021 at 5:26 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> From: Alexei Starovoitov <ast@kernel.org>
>
> Add placeholders for bpf_sys_bpf() helper and new program type.
>
> v1->v2:
> - check that expected_attach_type is zero
> - allow more helper functions to be used in this program type, since they will
>   only execute from user context via bpf_prog_test_run.
>
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---

LGTM, see minor comments below.

Acked-by: Andrii Nakryiko <andrii@kernel.org>

>  include/linux/bpf.h            | 10 +++++++
>  include/linux/bpf_types.h      |  2 ++
>  include/uapi/linux/bpf.h       |  8 +++++
>  kernel/bpf/syscall.c           | 54 ++++++++++++++++++++++++++++++++++
>  net/bpf/test_run.c             | 43 +++++++++++++++++++++++++++
>  tools/include/uapi/linux/bpf.h |  8 +++++
>  6 files changed, 125 insertions(+)
>

[...]

> +
> +const struct bpf_func_proto * __weak
> +tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> +{
> +

extra empty line

> +       return bpf_base_func_proto(func_id);
> +}
> +
> +static const struct bpf_func_proto *
> +syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> +{
> +       switch (func_id) {
> +       case BPF_FUNC_sys_bpf:
> +               return &bpf_sys_bpf_proto;
> +       default:
> +               return tracing_prog_func_proto(func_id, prog);
> +       }
> +}
> +

[...]

> +       if (ctx_size_in) {
> +               ctx = kzalloc(ctx_size_in, GFP_USER);
> +               if (!ctx)
> +                       return -ENOMEM;
> +               if (copy_from_user(ctx, ctx_in, ctx_size_in)) {
> +                       err = -EFAULT;
> +                       goto out;
> +               }
> +       }
> +       retval = bpf_prog_run_pin_on_cpu(prog, ctx);
> +
> +       if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32)))
> +               err = -EFAULT;

is there a point in trying to do another copy_to_user if this fails?
I.e., why not goto out here?

> +       if (ctx_size_in)
> +               if (copy_to_user(ctx_in, ctx, ctx_size_in)) {
> +                       err = -EFAULT;
> +                       goto out;
> +               }
> +out:
> +       kfree(ctx);
> +       return err;
> +}

[...]
John Fastabend April 27, 2021, 6:45 p.m. UTC | #5
Alexei Starovoitov wrote:
> From: Alexei Starovoitov <ast@kernel.org>
> 
> Add placeholders for bpf_sys_bpf() helper and new program type.
> 
> v1->v2:
> - check that expected_attach_type is zero
> - allow more helper functions to be used in this program type, since they will
>   only execute from user context via bpf_prog_test_run.
> 
> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---

Acked-by: John Fastabend <john.fastabend@gmail.com>

> +int bpf_prog_test_run_syscall(struct bpf_prog *prog,
> +			      const union bpf_attr *kattr,
> +			      union bpf_attr __user *uattr)
> +{
> +	void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
> +	__u32 ctx_size_in = kattr->test.ctx_size_in;
> +	void *ctx = NULL;
> +	u32 retval;
> +	int err = 0;
> +
> +	/* doesn't support data_in/out, ctx_out, duration, or repeat or flags */
> +	if (kattr->test.data_in || kattr->test.data_out ||
> +	    kattr->test.ctx_out || kattr->test.duration ||
> +	    kattr->test.repeat || kattr->test.flags)
> +		return -EINVAL;
> +
> +	if (ctx_size_in < prog->aux->max_ctx_offset ||
> +	    ctx_size_in > U16_MAX)
> +		return -EINVAL;
> +
> +	if (ctx_size_in) {
> +		ctx = kzalloc(ctx_size_in, GFP_USER);
> +		if (!ctx)
> +			return -ENOMEM;
> +		if (copy_from_user(ctx, ctx_in, ctx_size_in)) {
> +			err = -EFAULT;
> +			goto out;
> +		}
> +	}
> +	retval = bpf_prog_run_pin_on_cpu(prog, ctx);
> +
> +	if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32)))
> +		err = -EFAULT;
> +	if (ctx_size_in)
> +		if (copy_to_user(ctx_in, ctx, ctx_size_in)) {
> +			err = -EFAULT;
> +			goto out;
> +		}

stupid nit, the last goto there is not needed.

> +out:
> +	kfree(ctx);
> +	return err;
> +}
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f8a45f109e96..aed30bbffb54 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1824,6 +1824,9 @@  static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
 
 struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr);
 void bpf_map_offload_map_free(struct bpf_map *map);
+int bpf_prog_test_run_syscall(struct bpf_prog *prog,
+			      const union bpf_attr *kattr,
+			      union bpf_attr __user *uattr);
 #else
 static inline int bpf_prog_offload_init(struct bpf_prog *prog,
 					union bpf_attr *attr)
@@ -1849,6 +1852,13 @@  static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
 static inline void bpf_map_offload_map_free(struct bpf_map *map)
 {
 }
+
+static inline int bpf_prog_test_run_syscall(struct bpf_prog *prog,
+					    const union bpf_attr *kattr,
+					    union bpf_attr __user *uattr)
+{
+	return -ENOTSUPP;
+}
 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
 
 #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index f883f01a5061..a9db1eae6796 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -77,6 +77,8 @@  BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm,
 	       void *, void *)
 #endif /* CONFIG_BPF_LSM */
 #endif
+BPF_PROG_TYPE(BPF_PROG_TYPE_SYSCALL, bpf_syscall,
+	      void *, void *)
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ec6d85a81744..c92648f38144 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -937,6 +937,7 @@  enum bpf_prog_type {
 	BPF_PROG_TYPE_EXT,
 	BPF_PROG_TYPE_LSM,
 	BPF_PROG_TYPE_SK_LOOKUP,
+	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
 };
 
 enum bpf_attach_type {
@@ -4735,6 +4736,12 @@  union bpf_attr {
  *		be zero-terminated except when **str_size** is 0.
  *
  *		Or **-EBUSY** if the per-CPU memory copy buffer is busy.
+ *
+ * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size)
+ * 	Description
+ * 		Execute bpf syscall with given arguments.
+ * 	Return
+ * 		A syscall result.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4903,6 +4910,7 @@  union bpf_attr {
 	FN(check_mtu),			\
 	FN(for_each_map_elem),		\
 	FN(snprintf),			\
+	FN(sys_bpf),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index fd495190115e..8636876f3e6b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2014,6 +2014,7 @@  bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 		if (expected_attach_type == BPF_SK_LOOKUP)
 			return 0;
 		return -EINVAL;
+	case BPF_PROG_TYPE_SYSCALL:
 	case BPF_PROG_TYPE_EXT:
 		if (expected_attach_type)
 			return -EINVAL;
@@ -4497,3 +4498,56 @@  SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 
 	return err;
 }
+
+static bool syscall_prog_is_valid_access(int off, int size,
+					 enum bpf_access_type type,
+					 const struct bpf_prog *prog,
+					 struct bpf_insn_access_aux *info)
+{
+	if (off < 0 || off >= U16_MAX)
+		return false;
+	if (off % size != 0)
+		return false;
+	return true;
+}
+
+BPF_CALL_3(bpf_sys_bpf, int, cmd, void *, attr, u32, attr_size)
+{
+	return -EINVAL;
+}
+
+const struct bpf_func_proto bpf_sys_bpf_proto = {
+	.func		= bpf_sys_bpf,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_ANYTHING,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+};
+
+const struct bpf_func_proto * __weak
+tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+
+	return bpf_base_func_proto(func_id);
+}
+
+static const struct bpf_func_proto *
+syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_sys_bpf:
+		return &bpf_sys_bpf_proto;
+	default:
+		return tracing_prog_func_proto(func_id, prog);
+	}
+}
+
+const struct bpf_verifier_ops bpf_syscall_verifier_ops = {
+	.get_func_proto  = syscall_prog_func_proto,
+	.is_valid_access = syscall_prog_is_valid_access,
+};
+
+const struct bpf_prog_ops bpf_syscall_prog_ops = {
+	.test_run = bpf_prog_test_run_syscall,
+};
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index a5d72c48fb66..1783ea77b95c 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -918,3 +918,46 @@  int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
 	kfree(user_ctx);
 	return ret;
 }
+
+int bpf_prog_test_run_syscall(struct bpf_prog *prog,
+			      const union bpf_attr *kattr,
+			      union bpf_attr __user *uattr)
+{
+	void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
+	__u32 ctx_size_in = kattr->test.ctx_size_in;
+	void *ctx = NULL;
+	u32 retval;
+	int err = 0;
+
+	/* doesn't support data_in/out, ctx_out, duration, or repeat or flags */
+	if (kattr->test.data_in || kattr->test.data_out ||
+	    kattr->test.ctx_out || kattr->test.duration ||
+	    kattr->test.repeat || kattr->test.flags)
+		return -EINVAL;
+
+	if (ctx_size_in < prog->aux->max_ctx_offset ||
+	    ctx_size_in > U16_MAX)
+		return -EINVAL;
+
+	if (ctx_size_in) {
+		ctx = kzalloc(ctx_size_in, GFP_USER);
+		if (!ctx)
+			return -ENOMEM;
+		if (copy_from_user(ctx, ctx_in, ctx_size_in)) {
+			err = -EFAULT;
+			goto out;
+		}
+	}
+	retval = bpf_prog_run_pin_on_cpu(prog, ctx);
+
+	if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32)))
+		err = -EFAULT;
+	if (ctx_size_in)
+		if (copy_to_user(ctx_in, ctx, ctx_size_in)) {
+			err = -EFAULT;
+			goto out;
+		}
+out:
+	kfree(ctx);
+	return err;
+}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ec6d85a81744..0c13016d3d2c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -937,6 +937,7 @@  enum bpf_prog_type {
 	BPF_PROG_TYPE_EXT,
 	BPF_PROG_TYPE_LSM,
 	BPF_PROG_TYPE_SK_LOOKUP,
+	BPF_PROG_TYPE_SYSCALL,
 };
 
 enum bpf_attach_type {
@@ -4735,6 +4736,12 @@  union bpf_attr {
  *		be zero-terminated except when **str_size** is 0.
  *
  *		Or **-EBUSY** if the per-CPU memory copy buffer is busy.
+ *
+ * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size)
+ * 	Description
+ * 		Execute bpf syscall with given arguments.
+ * 	Return
+ * 		A syscall result.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4903,6 +4910,7 @@  union bpf_attr {
 	FN(check_mtu),			\
 	FN(for_each_map_elem),		\
 	FN(snprintf),			\
+	FN(sys_bpf),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper