diff mbox series

[bpf-next,v5,1/7] bpf: add bpf_link support for BPF_NETFILTER programs

Message ID 20230421170300.24115-2-fw@strlen.de (mailing list archive)
State Accepted
Commit 84601d6ee68ae820dec97450934797046d62db4b
Delegated to: BPF
Headers show
Series bpf: add netfilter program type | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on s390x with gcc
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/apply success Patch already applied to bpf-next

Commit Message

Florian Westphal April 21, 2023, 5:02 p.m. UTC
Add bpf_link support skeleton.  To keep this reviewable, no bpf program
can be invoked yet, if a program is attached only a c-stub is called and
not the actual bpf program.

Defaults to 'y' if both netfilter and bpf syscall are enabled in kconfig.

Uapi example usage:
	union bpf_attr attr = { };

	attr.link_create.prog_fd = progfd;
	attr.link_create.attach_type = 0; /* unused */
	attr.link_create.netfilter.pf = PF_INET;
	attr.link_create.netfilter.hooknum = NF_INET_LOCAL_IN;
	attr.link_create.netfilter.priority = -128;

	err = bpf(BPF_LINK_CREATE, &attr, sizeof(attr));

... this would attach progfd to ipv4:input hook.

Such hook gets removed automatically if the calling program exits.

BPF_NETFILTER program invocation is added in followup change.

NF_HOOK_OP_BPF enum will eventually be read from nfnetlink_hook, it
allows to tell userspace which program is attached at the given hook
when user runs 'nft hook list' command rather than just the priority
and not-very-helpful 'this hook runs a bpf prog but I can't tell which
one'.

Will also be used to disallow registration of two bpf programs with
same priority in a followup patch.

v4: arm32 cmpxchg only supports 32bit operand
    s/prio/priority/
v3: restrict prog attachment to ip/ip6 for now, lets lift restrictions if
    more use cases pop up (arptables, ebtables, netdev ingress/egress etc).

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 no changes since last version

 include/linux/netfilter.h           |   1 +
 include/net/netfilter/nf_bpf_link.h |  10 ++
 include/uapi/linux/bpf.h            |  14 +++
 kernel/bpf/syscall.c                |   6 ++
 net/netfilter/Kconfig               |   3 +
 net/netfilter/Makefile              |   1 +
 net/netfilter/nf_bpf_link.c         | 159 ++++++++++++++++++++++++++++
 7 files changed, 194 insertions(+)
 create mode 100644 include/net/netfilter/nf_bpf_link.h
 create mode 100644 net/netfilter/nf_bpf_link.c

Comments

Andrii Nakryiko April 27, 2023, 4:51 a.m. UTC | #1
On Fri, Apr 21, 2023 at 10:07 AM Florian Westphal <fw@strlen.de> wrote:
>
> Add bpf_link support skeleton.  To keep this reviewable, no bpf program
> can be invoked yet, if a program is attached only a c-stub is called and
> not the actual bpf program.
>
> Defaults to 'y' if both netfilter and bpf syscall are enabled in kconfig.
>
> Uapi example usage:
>         union bpf_attr attr = { };
>
>         attr.link_create.prog_fd = progfd;
>         attr.link_create.attach_type = 0; /* unused */
>         attr.link_create.netfilter.pf = PF_INET;
>         attr.link_create.netfilter.hooknum = NF_INET_LOCAL_IN;
>         attr.link_create.netfilter.priority = -128;
>
>         err = bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
>
> ... this would attach progfd to ipv4:input hook.
>
> Such hook gets removed automatically if the calling program exits.
>
> BPF_NETFILTER program invocation is added in followup change.
>
> NF_HOOK_OP_BPF enum will eventually be read from nfnetlink_hook, it
> allows to tell userspace which program is attached at the given hook
> when user runs 'nft hook list' command rather than just the priority
> and not-very-helpful 'this hook runs a bpf prog but I can't tell which
> one'.
>
> Will also be used to disallow registration of two bpf programs with
> same priority in a followup patch.
>
> v4: arm32 cmpxchg only supports 32bit operand
>     s/prio/priority/
> v3: restrict prog attachment to ip/ip6 for now, lets lift restrictions if
>     more use cases pop up (arptables, ebtables, netdev ingress/egress etc).
>
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---
>  no changes since last version
>
>  include/linux/netfilter.h           |   1 +
>  include/net/netfilter/nf_bpf_link.h |  10 ++
>  include/uapi/linux/bpf.h            |  14 +++
>  kernel/bpf/syscall.c                |   6 ++
>  net/netfilter/Kconfig               |   3 +
>  net/netfilter/Makefile              |   1 +
>  net/netfilter/nf_bpf_link.c         | 159 ++++++++++++++++++++++++++++
>  7 files changed, 194 insertions(+)
>  create mode 100644 include/net/netfilter/nf_bpf_link.h
>  create mode 100644 net/netfilter/nf_bpf_link.c
>
> diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
> index c8e03bcaecaa..0762444e3767 100644
> --- a/include/linux/netfilter.h
> +++ b/include/linux/netfilter.h
> @@ -80,6 +80,7 @@ typedef unsigned int nf_hookfn(void *priv,
>  enum nf_hook_ops_type {
>         NF_HOOK_OP_UNDEFINED,
>         NF_HOOK_OP_NF_TABLES,
> +       NF_HOOK_OP_BPF,
>  };
>
>  struct nf_hook_ops {
> diff --git a/include/net/netfilter/nf_bpf_link.h b/include/net/netfilter/nf_bpf_link.h
> new file mode 100644
> index 000000000000..eeaeaf3d15de
> --- /dev/null
> +++ b/include/net/netfilter/nf_bpf_link.h
> @@ -0,0 +1,10 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#if IS_ENABLED(CONFIG_NETFILTER_BPF_LINK)
> +int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
> +#else
> +static inline int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
> +{
> +       return -EOPNOTSUPP;
> +}
> +#endif
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 4b20a7269bee..1bb11a6ee667 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -986,6 +986,7 @@ enum bpf_prog_type {
>         BPF_PROG_TYPE_LSM,
>         BPF_PROG_TYPE_SK_LOOKUP,
>         BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
> +       BPF_PROG_TYPE_NETFILTER,
>  };
>
>  enum bpf_attach_type {
> @@ -1050,6 +1051,7 @@ enum bpf_link_type {
>         BPF_LINK_TYPE_PERF_EVENT = 7,
>         BPF_LINK_TYPE_KPROBE_MULTI = 8,
>         BPF_LINK_TYPE_STRUCT_OPS = 9,
> +       BPF_LINK_TYPE_NETFILTER = 10,
>
>         MAX_BPF_LINK_TYPE,
>  };
> @@ -1560,6 +1562,12 @@ union bpf_attr {
>                                  */
>                                 __u64           cookie;
>                         } tracing;
> +                       struct {
> +                               __u32           pf;
> +                               __u32           hooknum;

catching up on stuff a bit...

enum nf_inet_hooks {
        NF_INET_PRE_ROUTING,
        NF_INET_LOCAL_IN,
        NF_INET_FORWARD,
        NF_INET_LOCAL_OUT,
        NF_INET_POST_ROUTING,
        NF_INET_NUMHOOKS,
        NF_INET_INGRESS = NF_INET_NUMHOOKS,
};

So it seems like this "hook number" is more like "hook type", is my
understanding correct? If so, wouldn't it be cleaner and more uniform
with, say, cgroup network hooks to provide hook type as
expected_attach_type? It would also allow to have a nicer interface in
libbpf, by specifying that as part of SEC():

SEC("netfilter/pre_routing"), SEC("netfilter/local_in"), etc...

Also, it seems like you actually didn't wire NETFILTER link support in
libbpf completely. See bpf_link_create under tools/lib/bpf/bpf.c, it
has to handle this new type of link as well. Existing tests seem a bit
bare-bones for SEC("netfilter"), would it be possible to add something
that will demonstrate it a bit better and will be actually executed at
runtime and validated?


> +                               __s32           priority;
> +                               __u32           flags;
> +                       } netfilter;
>                 };
>         } link_create;
>

[...]
Florian Westphal April 27, 2023, 9:10 a.m. UTC | #2
Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> > @@ -1560,6 +1562,12 @@ union bpf_attr {
> >                                  */
> >                                 __u64           cookie;
> >                         } tracing;
> > +                       struct {
> > +                               __u32           pf;
> > +                               __u32           hooknum;
> 
> catching up on stuff a bit...
> 
> enum nf_inet_hooks {
>         NF_INET_PRE_ROUTING,
>         NF_INET_LOCAL_IN,
>         NF_INET_FORWARD,
>         NF_INET_LOCAL_OUT,
>         NF_INET_POST_ROUTING,
>         NF_INET_NUMHOOKS,
>         NF_INET_INGRESS = NF_INET_NUMHOOKS,
> };
> 
> So it seems like this "hook number" is more like "hook type", is my
> understanding correct?

What is 'hook type'?

> If so, wouldn't it be cleaner and more uniform
> with, say, cgroup network hooks to provide hook type as
> expected_attach_type? It would also allow to have a nicer interface in
> libbpf, by specifying that as part of SEC():
> 
> SEC("netfilter/pre_routing"), SEC("netfilter/local_in"), etc...

I don't understand how that would help.
Attachment needs a priority and a family (ipv4, arp, etc.).

If we allow netdev type we'll also need an ifindex.
Daniel Xu work will need to pass extra arguments ("please enable ip
defrag").

> Also, it seems like you actually didn't wire NETFILTER link support in
> libbpf completely. See bpf_link_create under tools/lib/bpf/bpf.c, it
> has to handle this new type of link as well. Existing tests seem a bit
> bare-bones for SEC("netfilter"), would it be possible to add something
> that will demonstrate it a bit better and will be actually executed at
> runtime and validated?

I can have a look.
Andrii Nakryiko April 27, 2023, 10:21 p.m. UTC | #3
On Thu, Apr 27, 2023 at 2:10 AM Florian Westphal <fw@strlen.de> wrote:
>
> Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> > > @@ -1560,6 +1562,12 @@ union bpf_attr {
> > >                                  */
> > >                                 __u64           cookie;
> > >                         } tracing;
> > > +                       struct {
> > > +                               __u32           pf;
> > > +                               __u32           hooknum;
> >
> > catching up on stuff a bit...
> >
> > enum nf_inet_hooks {
> >         NF_INET_PRE_ROUTING,
> >         NF_INET_LOCAL_IN,
> >         NF_INET_FORWARD,
> >         NF_INET_LOCAL_OUT,
> >         NF_INET_POST_ROUTING,
> >         NF_INET_NUMHOOKS,
> >         NF_INET_INGRESS = NF_INET_NUMHOOKS,
> > };
> >
> > So it seems like this "hook number" is more like "hook type", is my
> > understanding correct?
>
> What is 'hook type'?

I meant that it's not some dynamically generated number that could
change from the system to system, it's a fixed set of point in which
this BPF program can be triggered. The distinction I was trying to
make that it's actually different in nature compared to, say, ifindex,
as it is fixed by the kernel.

>
> > If so, wouldn't it be cleaner and more uniform
> > with, say, cgroup network hooks to provide hook type as
> > expected_attach_type? It would also allow to have a nicer interface in
> > libbpf, by specifying that as part of SEC():
> >
> > SEC("netfilter/pre_routing"), SEC("netfilter/local_in"), etc...
>
> I don't understand how that would help.
> Attachment needs a priority and a family (ipv4, arp, etc.).
>
> If we allow netdev type we'll also need an ifindex.
> Daniel Xu work will need to pass extra arguments ("please enable ip
> defrag").

Ok, that's fine, if you think it doesn't make sense to pre-declare
that a given BPF program is supposed to be run only in, say,
PRE_ROUTING, then it's fine. We do declare this for other programs
(e.g., cgroup_skb/egress vs cgroup_skb/ingress), so it felt like this
might be a similar case.

>
> > Also, it seems like you actually didn't wire NETFILTER link support in
> > libbpf completely. See bpf_link_create under tools/lib/bpf/bpf.c, it
> > has to handle this new type of link as well. Existing tests seem a bit
> > bare-bones for SEC("netfilter"), would it be possible to add something
> > that will demonstrate it a bit better and will be actually executed at
> > runtime and validated?
>
> I can have a look.

It probably makes sense to add bpf_program__attach_netfilter() API as
well which will return `struct bpf_link *`. Right now libbpf support
for NETFILTER is very incomplete.
Quentin Deslandes April 28, 2023, 4:54 p.m. UTC | #4
On 28/04/2023 00:21, Andrii Nakryiko wrote:
> On Thu, Apr 27, 2023 at 2:10 AM Florian Westphal <fw@strlen.de> wrote:
>>
>> Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
>>>> @@ -1560,6 +1562,12 @@ union bpf_attr {
>>>>                                  */
>>>>                                 __u64           cookie;
>>>>                         } tracing;
>>>> +                       struct {
>>>> +                               __u32           pf;
>>>> +                               __u32           hooknum;
>>>
>>> catching up on stuff a bit...
>>>
>>> enum nf_inet_hooks {
>>>         NF_INET_PRE_ROUTING,
>>>         NF_INET_LOCAL_IN,
>>>         NF_INET_FORWARD,
>>>         NF_INET_LOCAL_OUT,
>>>         NF_INET_POST_ROUTING,
>>>         NF_INET_NUMHOOKS,
>>>         NF_INET_INGRESS = NF_INET_NUMHOOKS,
>>> };
>>>
>>> So it seems like this "hook number" is more like "hook type", is my
>>> understanding correct?
>>
>> What is 'hook type'?
> 
> I meant that it's not some dynamically generated number that could
> change from the system to system, it's a fixed set of point in which
> this BPF program can be triggered. The distinction I was trying to
> make that it's actually different in nature compared to, say, ifindex,
> as it is fixed by the kernel.

Doesn't this ties the program to a specific hook then? Let's say you
have a program counting the number of packets from a specific IP, and
would you be able to attach it to both LOCAL_IN and FORWARD without
modifying it?

>>> If so, wouldn't it be cleaner and more uniform
>>> with, say, cgroup network hooks to provide hook type as
>>> expected_attach_type? It would also allow to have a nicer interface in
>>> libbpf, by specifying that as part of SEC():
>>>
>>> SEC("netfilter/pre_routing"), SEC("netfilter/local_in"), etc...
>>
>> I don't understand how that would help.
>> Attachment needs a priority and a family (ipv4, arp, etc.).
>>
>> If we allow netdev type we'll also need an ifindex.
>> Daniel Xu work will need to pass extra arguments ("please enable ip
>> defrag").
> 
> Ok, that's fine, if you think it doesn't make sense to pre-declare
> that a given BPF program is supposed to be run only in, say,
> PRE_ROUTING, then it's fine. We do declare this for other programs
> (e.g., cgroup_skb/egress vs cgroup_skb/ingress), so it felt like this
> might be a similar case.
> 
>>
>>> Also, it seems like you actually didn't wire NETFILTER link support in
>>> libbpf completely. See bpf_link_create under tools/lib/bpf/bpf.c, it
>>> has to handle this new type of link as well. Existing tests seem a bit
>>> bare-bones for SEC("netfilter"), would it be possible to add something
>>> that will demonstrate it a bit better and will be actually executed at
>>> runtime and validated?
>>
>> I can have a look.
> 
> It probably makes sense to add bpf_program__attach_netfilter() API as
> well which will return `struct bpf_link *`. Right now libbpf support
> for NETFILTER is very incomplete.
Andrii Nakryiko April 28, 2023, 9:18 p.m. UTC | #5
On Fri, Apr 28, 2023 at 9:54 AM Quentin Deslandes <qde@naccy.de> wrote:
>
> On 28/04/2023 00:21, Andrii Nakryiko wrote:
> > On Thu, Apr 27, 2023 at 2:10 AM Florian Westphal <fw@strlen.de> wrote:
> >>
> >> Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> >>>> @@ -1560,6 +1562,12 @@ union bpf_attr {
> >>>>                                  */
> >>>>                                 __u64           cookie;
> >>>>                         } tracing;
> >>>> +                       struct {
> >>>> +                               __u32           pf;
> >>>> +                               __u32           hooknum;
> >>>
> >>> catching up on stuff a bit...
> >>>
> >>> enum nf_inet_hooks {
> >>>         NF_INET_PRE_ROUTING,
> >>>         NF_INET_LOCAL_IN,
> >>>         NF_INET_FORWARD,
> >>>         NF_INET_LOCAL_OUT,
> >>>         NF_INET_POST_ROUTING,
> >>>         NF_INET_NUMHOOKS,
> >>>         NF_INET_INGRESS = NF_INET_NUMHOOKS,
> >>> };
> >>>
> >>> So it seems like this "hook number" is more like "hook type", is my
> >>> understanding correct?
> >>
> >> What is 'hook type'?
> >
> > I meant that it's not some dynamically generated number that could
> > change from the system to system, it's a fixed set of point in which
> > this BPF program can be triggered. The distinction I was trying to
> > make that it's actually different in nature compared to, say, ifindex,
> > as it is fixed by the kernel.
>
> Doesn't this ties the program to a specific hook then? Let's say you
> have a program counting the number of packets from a specific IP, and
> would you be able to attach it to both LOCAL_IN and FORWARD without
> modifying it?

By default, yes (but you can work around that). From your and
Florian's replies it follows that these are not like
expected_attach_type, if I understand correctly. So I'm fine with
having them as attach argument, not part of program type and attach
type.

>
> >>> If so, wouldn't it be cleaner and more uniform
> >>> with, say, cgroup network hooks to provide hook type as
> >>> expected_attach_type? It would also allow to have a nicer interface in
> >>> libbpf, by specifying that as part of SEC():
> >>>
> >>> SEC("netfilter/pre_routing"), SEC("netfilter/local_in"), etc...
> >>
> >> I don't understand how that would help.
> >> Attachment needs a priority and a family (ipv4, arp, etc.).
> >>
> >> If we allow netdev type we'll also need an ifindex.
> >> Daniel Xu work will need to pass extra arguments ("please enable ip
> >> defrag").
> >
> > Ok, that's fine, if you think it doesn't make sense to pre-declare
> > that a given BPF program is supposed to be run only in, say,
> > PRE_ROUTING, then it's fine. We do declare this for other programs
> > (e.g., cgroup_skb/egress vs cgroup_skb/ingress), so it felt like this
> > might be a similar case.
> >
> >>
> >>> Also, it seems like you actually didn't wire NETFILTER link support in
> >>> libbpf completely. See bpf_link_create under tools/lib/bpf/bpf.c, it
> >>> has to handle this new type of link as well. Existing tests seem a bit
> >>> bare-bones for SEC("netfilter"), would it be possible to add something
> >>> that will demonstrate it a bit better and will be actually executed at
> >>> runtime and validated?
> >>
> >> I can have a look.
> >
> > It probably makes sense to add bpf_program__attach_netfilter() API as
> > well which will return `struct bpf_link *`. Right now libbpf support
> > for NETFILTER is very incomplete.
>
diff mbox series

Patch

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index c8e03bcaecaa..0762444e3767 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -80,6 +80,7 @@  typedef unsigned int nf_hookfn(void *priv,
 enum nf_hook_ops_type {
 	NF_HOOK_OP_UNDEFINED,
 	NF_HOOK_OP_NF_TABLES,
+	NF_HOOK_OP_BPF,
 };
 
 struct nf_hook_ops {
diff --git a/include/net/netfilter/nf_bpf_link.h b/include/net/netfilter/nf_bpf_link.h
new file mode 100644
index 000000000000..eeaeaf3d15de
--- /dev/null
+++ b/include/net/netfilter/nf_bpf_link.h
@@ -0,0 +1,10 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#if IS_ENABLED(CONFIG_NETFILTER_BPF_LINK)
+int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+#else
+static inline int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+	return -EOPNOTSUPP;
+}
+#endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4b20a7269bee..1bb11a6ee667 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -986,6 +986,7 @@  enum bpf_prog_type {
 	BPF_PROG_TYPE_LSM,
 	BPF_PROG_TYPE_SK_LOOKUP,
 	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
+	BPF_PROG_TYPE_NETFILTER,
 };
 
 enum bpf_attach_type {
@@ -1050,6 +1051,7 @@  enum bpf_link_type {
 	BPF_LINK_TYPE_PERF_EVENT = 7,
 	BPF_LINK_TYPE_KPROBE_MULTI = 8,
 	BPF_LINK_TYPE_STRUCT_OPS = 9,
+	BPF_LINK_TYPE_NETFILTER = 10,
 
 	MAX_BPF_LINK_TYPE,
 };
@@ -1560,6 +1562,12 @@  union bpf_attr {
 				 */
 				__u64		cookie;
 			} tracing;
+			struct {
+				__u32		pf;
+				__u32		hooknum;
+				__s32		priority;
+				__u32		flags;
+			} netfilter;
 		};
 	} link_create;
 
@@ -6410,6 +6418,12 @@  struct bpf_link_info {
 		struct {
 			__u32 map_id;
 		} struct_ops;
+		struct {
+			__u32 pf;
+			__u32 hooknum;
+			__s32 priority;
+			__u32 flags;
+		} netfilter;
 	};
 } __attribute__((aligned(8)));
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bcf1a1920ddd..14f39c1e573e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -35,6 +35,7 @@ 
 #include <linux/rcupdate_trace.h>
 #include <linux/memcontrol.h>
 #include <linux/trace_events.h>
+#include <net/netfilter/nf_bpf_link.h>
 
 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
 			  (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@@ -2462,6 +2463,7 @@  static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
 	case BPF_PROG_TYPE_SOCK_OPS:
 	case BPF_PROG_TYPE_EXT: /* extends any prog */
+	case BPF_PROG_TYPE_NETFILTER:
 		return true;
 	case BPF_PROG_TYPE_CGROUP_SKB:
 		/* always unpriv */
@@ -4588,6 +4590,7 @@  static int link_create(union bpf_attr *attr, bpfptr_t uattr)
 
 	switch (prog->type) {
 	case BPF_PROG_TYPE_EXT:
+	case BPF_PROG_TYPE_NETFILTER:
 		break;
 	case BPF_PROG_TYPE_PERF_EVENT:
 	case BPF_PROG_TYPE_TRACEPOINT:
@@ -4654,6 +4657,9 @@  static int link_create(union bpf_attr *attr, bpfptr_t uattr)
 	case BPF_PROG_TYPE_XDP:
 		ret = bpf_xdp_link_attach(attr, prog);
 		break;
+	case BPF_PROG_TYPE_NETFILTER:
+		ret = bpf_nf_link_attach(attr, prog);
+		break;
 #endif
 	case BPF_PROG_TYPE_PERF_EVENT:
 	case BPF_PROG_TYPE_TRACEPOINT:
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index d0bf630482c1..441d1f134110 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -30,6 +30,9 @@  config NETFILTER_FAMILY_BRIDGE
 config NETFILTER_FAMILY_ARP
 	bool
 
+config NETFILTER_BPF_LINK
+	def_bool BPF_SYSCALL
+
 config NETFILTER_NETLINK_HOOK
 	tristate "Netfilter base hook dump support"
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 5ffef1cd6143..d4958e7e7631 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -22,6 +22,7 @@  nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o
 endif
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
+obj-$(CONFIG_NETFILTER_BPF_LINK) += nf_bpf_link.o
 
 obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
 obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
new file mode 100644
index 000000000000..efa4f3390742
--- /dev/null
+++ b/net/netfilter/nf_bpf_link.c
@@ -0,0 +1,159 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/netfilter.h>
+
+#include <net/netfilter/nf_bpf_link.h>
+#include <uapi/linux/netfilter_ipv4.h>
+
+static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
+				    const struct nf_hook_state *s)
+{
+	return NF_ACCEPT;
+}
+
+struct bpf_nf_link {
+	struct bpf_link link;
+	struct nf_hook_ops hook_ops;
+	struct net *net;
+	u32 dead;
+};
+
+static void bpf_nf_link_release(struct bpf_link *link)
+{
+	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
+
+	if (nf_link->dead)
+		return;
+
+	/* prevent hook-not-found warning splat from netfilter core when
+	 * .detach was already called
+	 */
+	if (!cmpxchg(&nf_link->dead, 0, 1))
+		nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
+}
+
+static void bpf_nf_link_dealloc(struct bpf_link *link)
+{
+	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
+
+	kfree(nf_link);
+}
+
+static int bpf_nf_link_detach(struct bpf_link *link)
+{
+	bpf_nf_link_release(link);
+	return 0;
+}
+
+static void bpf_nf_link_show_info(const struct bpf_link *link,
+				  struct seq_file *seq)
+{
+	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
+
+	seq_printf(seq, "pf:\t%u\thooknum:\t%u\tprio:\t%d\n",
+		   nf_link->hook_ops.pf, nf_link->hook_ops.hooknum,
+		   nf_link->hook_ops.priority);
+}
+
+static int bpf_nf_link_fill_link_info(const struct bpf_link *link,
+				      struct bpf_link_info *info)
+{
+	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
+
+	info->netfilter.pf = nf_link->hook_ops.pf;
+	info->netfilter.hooknum = nf_link->hook_ops.hooknum;
+	info->netfilter.priority = nf_link->hook_ops.priority;
+	info->netfilter.flags = 0;
+
+	return 0;
+}
+
+static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
+			      struct bpf_prog *old_prog)
+{
+	return -EOPNOTSUPP;
+}
+
+static const struct bpf_link_ops bpf_nf_link_lops = {
+	.release = bpf_nf_link_release,
+	.dealloc = bpf_nf_link_dealloc,
+	.detach = bpf_nf_link_detach,
+	.show_fdinfo = bpf_nf_link_show_info,
+	.fill_link_info = bpf_nf_link_fill_link_info,
+	.update_prog = bpf_nf_link_update,
+};
+
+static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
+{
+	switch (attr->link_create.netfilter.pf) {
+	case NFPROTO_IPV4:
+	case NFPROTO_IPV6:
+		if (attr->link_create.netfilter.hooknum >= NF_INET_NUMHOOKS)
+			return -EPROTO;
+		break;
+	default:
+		return -EAFNOSUPPORT;
+	}
+
+	if (attr->link_create.netfilter.flags)
+		return -EOPNOTSUPP;
+
+	/* make sure conntrack confirm is always last.
+	 *
+	 * In the future, if userspace can e.g. request defrag, then
+	 * "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG"
+	 * should fail.
+	 */
+	switch (attr->link_create.netfilter.priority) {
+	case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */
+	case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */
+	}
+
+	return 0;
+}
+
+int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+	struct net *net = current->nsproxy->net_ns;
+	struct bpf_link_primer link_primer;
+	struct bpf_nf_link *link;
+	int err;
+
+	if (attr->link_create.flags)
+		return -EINVAL;
+
+	err = bpf_nf_check_pf_and_hooks(attr);
+	if (err)
+		return err;
+
+	link = kzalloc(sizeof(*link), GFP_USER);
+	if (!link)
+		return -ENOMEM;
+
+	bpf_link_init(&link->link, BPF_LINK_TYPE_NETFILTER, &bpf_nf_link_lops, prog);
+
+	link->hook_ops.hook = nf_hook_run_bpf;
+	link->hook_ops.hook_ops_type = NF_HOOK_OP_BPF;
+	link->hook_ops.priv = prog;
+
+	link->hook_ops.pf = attr->link_create.netfilter.pf;
+	link->hook_ops.priority = attr->link_create.netfilter.priority;
+	link->hook_ops.hooknum = attr->link_create.netfilter.hooknum;
+
+	link->net = net;
+	link->dead = false;
+
+	err = bpf_link_prime(&link->link, &link_primer);
+	if (err) {
+		kfree(link);
+		return err;
+	}
+
+	err = nf_register_net_hook(net, &link->hook_ops);
+	if (err) {
+		bpf_link_cleanup(&link_primer);
+		return err;
+	}
+
+	return bpf_link_settle(&link_primer);
+}