diff mbox series

[RFC,nf-next,1/2] netfilter: bpf: support prog update

Message ID 1702467945-38866-2-git-send-email-alibuda@linux.alibaba.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series netfilter: bpf: support prog update | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-13 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-28 fail Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / test
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 fail Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 fail Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-29 fail Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 1117 this patch: 1119
netdev/cc_maintainers success CCed 10 of 10 maintainers
netdev/build_clang success Errors and warnings before: 1143 this patch: 1143
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 1144 this patch: 1146
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

D. Wythe Dec. 13, 2023, 11:45 a.m. UTC
From: "D. Wythe" <alibuda@linux.alibaba.com>

To support the prog update, we need to ensure that the prog seen
within the hook is always valid. Considering that hooks are always
protected by rcu_read_lock(), which provide us the ability to use a
new RCU-protected context to access the prog.

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
 net/netfilter/nf_bpf_link.c | 124 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 111 insertions(+), 13 deletions(-)

Comments

Florian Westphal Dec. 13, 2023, 10:24 p.m. UTC | #1
D. Wythe <alibuda@linux.alibaba.com> wrote:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
> 
> To support the prog update, we need to ensure that the prog seen
> within the hook is always valid. Considering that hooks are always
> protected by rcu_read_lock(), which provide us the ability to use a
> new RCU-protected context to access the prog.
> 
> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> ---
>  net/netfilter/nf_bpf_link.c | 124 +++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 111 insertions(+), 13 deletions(-)
> 
> diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
> index e502ec0..918c470 100644
> --- a/net/netfilter/nf_bpf_link.c
> +++ b/net/netfilter/nf_bpf_link.c
> @@ -8,17 +8,11 @@
>  #include <net/netfilter/nf_bpf_link.h>
>  #include <uapi/linux/netfilter_ipv4.h>
>  
> -static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
> -				    const struct nf_hook_state *s)
> +struct bpf_nf_hook_ctx
>  {
> -	const struct bpf_prog *prog = bpf_prog;
> -	struct bpf_nf_ctx ctx = {
> -		.state = s,
> -		.skb = skb,
> -	};
> -
> -	return bpf_prog_run(prog, &ctx);
> -}
> +	struct bpf_prog *prog;
> +	struct rcu_head rcu;
> +};

I don't understand the need for this structure.  AFAICS bpf_prog_put()
will always release the program via call_rcu()?

If it doesn't, we are probably already in trouble as-is without this
patch, I don't think anything that prevents us from ending up calling already
released bpf prog, or releasing it while another cpu is still running it
if bpf_prog_put releases the actual underlying prog instantly.

A BPF expert could confirm bpf-prog-put-is-call-rcu.

>  struct bpf_nf_link {
>  	struct bpf_link link;
> @@ -26,8 +20,59 @@ struct bpf_nf_link {
>  	struct net *net;
>  	u32 dead;
>  	const struct nf_defrag_hook *defrag_hook;
> +	/* protect link update in parallel */
> +	struct mutex update_lock;
> +	struct bpf_nf_hook_ctx __rcu *hook_ctx;

What kind of replacements-per-second rate are you aiming for?
I think

static DEFINE_MUTEX(bpf_nf_mutex);

is enough.

Then bpf_nf_link gains

	struct bpf_prog __rcu *prog

and possibly a trailing struct rcu_head, see below.

> +static void bpf_nf_hook_ctx_free_rcu(struct bpf_nf_hook_ctx *hook_ctx)
> +{
> +	call_rcu(&hook_ctx->rcu, __bpf_nf_hook_ctx_free_rcu);
> +}

Don't understand the need for call_rcu either, see below.

> +static unsigned int nf_hook_run_bpf(void *bpf_link, struct sk_buff *skb,
> +				    const struct nf_hook_state *s)
> +{
> +	const struct bpf_nf_link *link = bpf_link;
> +	struct bpf_nf_hook_ctx *hook_ctx;
> +	struct bpf_nf_ctx ctx = {
> +		.state = s,
> +		.skb = skb,
> +	};
> +
> +	hook_ctx = rcu_dereference(link->hook_ctx);

This could then just rcu_deref link->prog.

> +	return bpf_prog_run(hook_ctx->prog, &ctx);
> +}
> +
>  #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
>  static const struct nf_defrag_hook *
>  get_proto_defrag_hook(struct bpf_nf_link *link,
> @@ -120,6 +165,10 @@ static void bpf_nf_link_release(struct bpf_link *link)
>  	if (!cmpxchg(&nf_link->dead, 0, 1)) {
>  		nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
>  		bpf_nf_disable_defrag(nf_link);
> +		/* Wait for outstanding hook to complete before the
> +		 * link gets released.
> +		 */
> +		synchronize_rcu();
>  	}

Could you convert bpf_nf_link_dealloc to release via kfree_rcu instead?

> @@ -162,7 +212,42 @@ static int bpf_nf_link_fill_link_info(const struct bpf_link *link,
>  static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
>  			      struct bpf_prog *old_prog)
>  {
> -	return -EOPNOTSUPP;
> +	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
> +	struct bpf_nf_hook_ctx *hook_ctx;
> +	int err = 0;
> +
> +	mutex_lock(&nf_link->update_lock);
> +

I think you need to check link->dead here too.

> +	/* bpf_nf_link_release() ensures that after its execution, there will be
> +	 * no ongoing or upcoming execution of nf_hook_run_bpf() within any context.
> +	 * Therefore, within nf_hook_run_bpf(), the link remains valid at all times."
> +	 */
> +	link->hook_ops.priv = link;

ATM we only need to make sure the bpf prog itself stays alive until after
all concurrent rcu critical sections have completed.

After this change, struct bpf_link gets passed instead, so we need to
keep that alive too.

Which works with synchronize_rcu, sure, but that seems a bit overkill here.
Alexei Starovoitov Dec. 14, 2023, 3:25 a.m. UTC | #2
On Wed, Dec 13, 2023 at 2:24 PM Florian Westphal <fw@strlen.de> wrote:
>
> D. Wythe <alibuda@linux.alibaba.com> wrote:
> > From: "D. Wythe" <alibuda@linux.alibaba.com>
> >
> > To support the prog update, we need to ensure that the prog seen
> > within the hook is always valid. Considering that hooks are always
> > protected by rcu_read_lock(), which provide us the ability to use a
> > new RCU-protected context to access the prog.
> >
> > Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> > ---
> >  net/netfilter/nf_bpf_link.c | 124 +++++++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 111 insertions(+), 13 deletions(-)
> >
> > diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
> > index e502ec0..918c470 100644
> > --- a/net/netfilter/nf_bpf_link.c
> > +++ b/net/netfilter/nf_bpf_link.c
> > @@ -8,17 +8,11 @@
> >  #include <net/netfilter/nf_bpf_link.h>
> >  #include <uapi/linux/netfilter_ipv4.h>
> >
> > -static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
> > -                                 const struct nf_hook_state *s)
> > +struct bpf_nf_hook_ctx
> >  {
> > -     const struct bpf_prog *prog = bpf_prog;
> > -     struct bpf_nf_ctx ctx = {
> > -             .state = s,
> > -             .skb = skb,
> > -     };
> > -
> > -     return bpf_prog_run(prog, &ctx);
> > -}
> > +     struct bpf_prog *prog;
> > +     struct rcu_head rcu;
> > +};
>
> I don't understand the need for this structure.  AFAICS bpf_prog_put()
> will always release the program via call_rcu()?
>
> If it doesn't, we are probably already in trouble as-is without this
> patch, I don't think anything that prevents us from ending up calling already
> released bpf prog, or releasing it while another cpu is still running it
> if bpf_prog_put releases the actual underlying prog instantly.
>
> A BPF expert could confirm bpf-prog-put-is-call-rcu.

+1
These patches look unnecessary.
It seems that they accidently fix something else.
D. Wythe Dec. 14, 2023, 5:31 a.m. UTC | #3
On 12/14/23 6:24 AM, Florian Westphal wrote:
> D. Wythe <alibuda@linux.alibaba.com> wrote:
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> To support the prog update, we need to ensure that the prog seen
>> within the hook is always valid. Considering that hooks are always
>> protected by rcu_read_lock(), which provide us the ability to use a
>> new RCU-protected context to access the prog.
>>
>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> ---
>>   net/netfilter/nf_bpf_link.c | 124 +++++++++++++++++++++++++++++++++++++++-----
>>   1 file changed, 111 insertions(+), 13 deletions(-)
>>
>> diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
>> index e502ec0..918c470 100644
>> --- a/net/netfilter/nf_bpf_link.c
>> +++ b/net/netfilter/nf_bpf_link.c
>> @@ -8,17 +8,11 @@
>>   #include <net/netfilter/nf_bpf_link.h>
>>   #include <uapi/linux/netfilter_ipv4.h>
>>   
>> -static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
>> -				    const struct nf_hook_state *s)
>> +struct bpf_nf_hook_ctx
>>   {
>> -	const struct bpf_prog *prog = bpf_prog;
>> -	struct bpf_nf_ctx ctx = {
>> -		.state = s,
>> -		.skb = skb,
>> -	};
>> -
>> -	return bpf_prog_run(prog, &ctx);
>> -}
>> +	struct bpf_prog *prog;
>> +	struct rcu_head rcu;
>> +};
> I don't understand the need for this structure.  AFAICS bpf_prog_put()
> will always release the program via call_rcu()?
>
> If it doesn't, we are probably already in trouble as-is without this
> patch, I don't think anything that prevents us from ending up calling already
> released bpf prog, or releasing it while another cpu is still running it
> if bpf_prog_put releases the actual underlying prog instantly.
>
> A BPF expert could confirm bpf-prog-put-is-call-rcu.

Hi Florian,

I must admit that I did not realize that bpf_prog is released
under RCU ...

>>   struct bpf_nf_link {
>>   	struct bpf_link link;
>> @@ -26,8 +20,59 @@ struct bpf_nf_link {
>>   	struct net *net;
>>   	u32 dead;
>>   	const struct nf_defrag_hook *defrag_hook;
>> +	/* protect link update in parallel */
>> +	struct mutex update_lock;
>> +	struct bpf_nf_hook_ctx __rcu *hook_ctx;
> What kind of replacements-per-second rate are you aiming for?
> I think
>
> static DEFINE_MUTEX(bpf_nf_mutex);
>
> is enough.

I'm okay with that.

>
> Then bpf_nf_link gains
>
> 	struct bpf_prog __rcu *prog
>
> and possibly a trailing struct rcu_head, see below.

Yes, that's what we need.

>> +static void bpf_nf_hook_ctx_free_rcu(struct bpf_nf_hook_ctx *hook_ctx)
>> +{
>> +	call_rcu(&hook_ctx->rcu, __bpf_nf_hook_ctx_free_rcu);
>> +}
> Don't understand the need for call_rcu either, see below.
>
>> +static unsigned int nf_hook_run_bpf(void *bpf_link, struct sk_buff *skb,
>> +				    const struct nf_hook_state *s)
>> +{
>> +	const struct bpf_nf_link *link = bpf_link;
>> +	struct bpf_nf_hook_ctx *hook_ctx;
>> +	struct bpf_nf_ctx ctx = {
>> +		.state = s,
>> +		.skb = skb,
>> +	};
>> +
>> +	hook_ctx = rcu_dereference(link->hook_ctx);
> This could then just rcu_deref link->prog.
>
>> +	return bpf_prog_run(hook_ctx->prog, &ctx);
>> +}
>> +
>>   #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
>>   static const struct nf_defrag_hook *
>>   get_proto_defrag_hook(struct bpf_nf_link *link,
>> @@ -120,6 +165,10 @@ static void bpf_nf_link_release(struct bpf_link *link)
>>   	if (!cmpxchg(&nf_link->dead, 0, 1)) {
>>   		nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
>>   		bpf_nf_disable_defrag(nf_link);
>> +		/* Wait for outstanding hook to complete before the
>> +		 * link gets released.
>> +		 */
>> +		synchronize_rcu();
>>   	}
> Could you convert bpf_nf_link_dealloc to release via kfree_rcu instead?
>
Got it.
>> @@ -162,7 +212,42 @@ static int bpf_nf_link_fill_link_info(const struct bpf_link *link,
>>   static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
>>   			      struct bpf_prog *old_prog)
>>   {
>> -	return -EOPNOTSUPP;
>> +	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
>> +	struct bpf_nf_hook_ctx *hook_ctx;
>> +	int err = 0;
>> +
>> +	mutex_lock(&nf_link->update_lock);
>> +
> I think you need to check link->dead here too.

Got that.
>
>> +	/* bpf_nf_link_release() ensures that after its execution, there will be
>> +	 * no ongoing or upcoming execution of nf_hook_run_bpf() within any context.
>> +	 * Therefore, within nf_hook_run_bpf(), the link remains valid at all times."
>> +	 */
>> +	link->hook_ops.priv = link;
> ATM we only need to make sure the bpf prog itself stays alive until after
> all concurrent rcu critical sections have completed.
>
> After this change, struct bpf_link gets passed instead, so we need to
> keep that alive too.
>
> Which works with synchronize_rcu, sure, but that seems a bit overkill here.

Got it! Thank you very much for your suggestion.
I will address those issues you mentioned in the next version.


Best wishes,
D. Wythe
Alexei Starovoitov Dec. 14, 2023, 5:50 a.m. UTC | #4
On Wed, Dec 13, 2023 at 9:31 PM D. Wythe <alibuda@linux.alibaba.com> wrote:
>
> I will address those issues you mentioned in the next version.

Don't. There is no need for the next version.
None of these changes are necessary.
D. Wythe Dec. 14, 2023, 8:56 a.m. UTC | #5
On 12/14/23 1:50 PM, Alexei Starovoitov wrote:
> On Wed, Dec 13, 2023 at 9:31 PM D. Wythe <alibuda@linux.alibaba.com> wrote:
>> I will address those issues you mentioned in the next version.
> Don't. There is no need for the next version.
> None of these changes are necessary.

Can I know the reason ?  Updating prog for active link is kind of 
important feature
for real application..

Best wishes,
D. Wythe
Alexei Starovoitov Dec. 14, 2023, 1:37 p.m. UTC | #6
On Thu, Dec 14, 2023 at 12:57 AM D. Wythe <alibuda@linux.alibaba.com> wrote:
>
>
>
> On 12/14/23 1:50 PM, Alexei Starovoitov wrote:
> > On Wed, Dec 13, 2023 at 9:31 PM D. Wythe <alibuda@linux.alibaba.com> wrote:
> >> I will address those issues you mentioned in the next version.
> > Don't. There is no need for the next version.
> > None of these changes are necessary.
>
> Can I know the reason ?  Updating prog for active link is kind of
> important feature
> for real application..

yes. it's and it's working as expected. Do you see an issue?
D. Wythe Dec. 14, 2023, 3:56 p.m. UTC | #7
On 12/14/23 9:37 PM, Alexei Starovoitov wrote:
> yes. it's and it's working as expected. Do you see an issue?

Hi Alexei,

I see the issue here is that bpf_nf_link has not yet implemented 
prog_update,
which just simply returned -EOPNOTSUPP right now.

Do you mean that it is already implemented in the latest tree or
the not-supported was expected?

Thanks,
D. Wythe
Alexei Starovoitov Dec. 14, 2023, 4:02 p.m. UTC | #8
On Thu, Dec 14, 2023 at 7:56 AM D. Wythe <alibuda@linux.alibaba.com> wrote:
>
>
>
> On 12/14/23 9:37 PM, Alexei Starovoitov wrote:
> > yes. it's and it's working as expected. Do you see an issue?
>
> Hi Alexei,
>
> I see the issue here is that bpf_nf_link has not yet implemented
> prog_update,
> which just simply returned -EOPNOTSUPP right now.

I see. The commit log didn't make it clear.
Yes. That would be good to support.
D. Wythe Dec. 14, 2023, 4:10 p.m. UTC | #9
On 12/15/23 12:02 AM, Alexei Starovoitov wrote:
> I see. The commit log didn't make it clear.
> Yes. That would be good to support.

That's my bad. I will make the commit log more clear in the next version.
In any case, thanks very much for your feedback.

Besh wishes,
D. Wythe
diff mbox series

Patch

diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index e502ec0..918c470 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -8,17 +8,11 @@ 
 #include <net/netfilter/nf_bpf_link.h>
 #include <uapi/linux/netfilter_ipv4.h>
 
-static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
-				    const struct nf_hook_state *s)
+struct bpf_nf_hook_ctx
 {
-	const struct bpf_prog *prog = bpf_prog;
-	struct bpf_nf_ctx ctx = {
-		.state = s,
-		.skb = skb,
-	};
-
-	return bpf_prog_run(prog, &ctx);
-}
+	struct bpf_prog *prog;
+	struct rcu_head rcu;
+};
 
 struct bpf_nf_link {
 	struct bpf_link link;
@@ -26,8 +20,59 @@  struct bpf_nf_link {
 	struct net *net;
 	u32 dead;
 	const struct nf_defrag_hook *defrag_hook;
+	/* protect link update in parallel */
+	struct mutex update_lock;
+	struct bpf_nf_hook_ctx __rcu *hook_ctx;
 };
 
+static struct bpf_nf_hook_ctx *
+bpf_nf_hook_ctx_from_prog(struct bpf_prog *prog, gfp_t flags)
+{
+	struct bpf_nf_hook_ctx *hook_ctx;
+
+	hook_ctx = kmalloc(sizeof(*hook_ctx), flags);
+	if (hook_ctx) {
+		hook_ctx->prog = prog;
+		bpf_prog_inc(prog);
+	}
+	return hook_ctx;
+}
+
+static void bpf_nf_hook_ctx_free(struct bpf_nf_hook_ctx *hook_ctx)
+{
+	if (!hook_ctx)
+		return;
+	if (hook_ctx->prog)
+		bpf_prog_put(hook_ctx->prog);
+	kfree(hook_ctx);
+}
+
+static void __bpf_nf_hook_ctx_free_rcu(struct rcu_head *head)
+{
+	struct bpf_nf_hook_ctx *hook_ctx = container_of(head, struct bpf_nf_hook_ctx, rcu);
+
+	bpf_nf_hook_ctx_free(hook_ctx);
+}
+
+static void bpf_nf_hook_ctx_free_rcu(struct bpf_nf_hook_ctx *hook_ctx)
+{
+	call_rcu(&hook_ctx->rcu, __bpf_nf_hook_ctx_free_rcu);
+}
+
+static unsigned int nf_hook_run_bpf(void *bpf_link, struct sk_buff *skb,
+				    const struct nf_hook_state *s)
+{
+	const struct bpf_nf_link *link = bpf_link;
+	struct bpf_nf_hook_ctx *hook_ctx;
+	struct bpf_nf_ctx ctx = {
+		.state = s,
+		.skb = skb,
+	};
+
+	hook_ctx = rcu_dereference(link->hook_ctx);
+	return bpf_prog_run(hook_ctx->prog, &ctx);
+}
+
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 static const struct nf_defrag_hook *
 get_proto_defrag_hook(struct bpf_nf_link *link,
@@ -120,6 +165,10 @@  static void bpf_nf_link_release(struct bpf_link *link)
 	if (!cmpxchg(&nf_link->dead, 0, 1)) {
 		nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
 		bpf_nf_disable_defrag(nf_link);
+		/* Wait for outstanding hook to complete before the
+		 * link gets released.
+		 */
+		synchronize_rcu();
 	}
 }
 
@@ -127,6 +176,7 @@  static void bpf_nf_link_dealloc(struct bpf_link *link)
 {
 	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
 
+	bpf_nf_hook_ctx_free(nf_link->hook_ctx);
 	kfree(nf_link);
 }
 
@@ -162,7 +212,42 @@  static int bpf_nf_link_fill_link_info(const struct bpf_link *link,
 static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
 			      struct bpf_prog *old_prog)
 {
-	return -EOPNOTSUPP;
+	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
+	struct bpf_nf_hook_ctx *hook_ctx;
+	int err = 0;
+
+	mutex_lock(&nf_link->update_lock);
+
+	/* target old_prog mismatch */
+	if (old_prog && link->prog != old_prog) {
+		err = -EPERM;
+		goto out;
+	}
+
+	old_prog = link->prog;
+	if (old_prog == new_prog) {
+		/* don't need update */
+		bpf_prog_put(new_prog);
+		goto out;
+	}
+
+	hook_ctx = bpf_nf_hook_ctx_from_prog(new_prog, GFP_USER);
+	if (!hook_ctx) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* replace and get the old one */
+	hook_ctx = rcu_replace_pointer(nf_link->hook_ctx, hook_ctx,
+				       lockdep_is_held(&nf_link->update_lock));
+	/* free old hook_ctx */
+	bpf_nf_hook_ctx_free_rcu(hook_ctx);
+
+	old_prog = xchg(&link->prog, new_prog);
+	bpf_prog_put(old_prog);
+out:
+	mutex_unlock(&nf_link->update_lock);
+	return err;
 }
 
 static const struct bpf_link_ops bpf_nf_link_lops = {
@@ -222,11 +307,22 @@  int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 	if (!link)
 		return -ENOMEM;
 
+	link->hook_ctx = bpf_nf_hook_ctx_from_prog(prog, GFP_USER);
+	if (!link->hook_ctx) {
+		kfree(link);
+		return -ENOMEM;
+	}
+
 	bpf_link_init(&link->link, BPF_LINK_TYPE_NETFILTER, &bpf_nf_link_lops, prog);
 
 	link->hook_ops.hook = nf_hook_run_bpf;
 	link->hook_ops.hook_ops_type = NF_HOOK_OP_BPF;
-	link->hook_ops.priv = prog;
+
+	/* bpf_nf_link_release() ensures that after its execution, there will be
+	 * no ongoing or upcoming execution of nf_hook_run_bpf() within any context.
+	 * Therefore, within nf_hook_run_bpf(), the link remains valid at all times."
+	 */
+	link->hook_ops.priv = link;
 
 	link->hook_ops.pf = attr->link_create.netfilter.pf;
 	link->hook_ops.priority = attr->link_create.netfilter.priority;
@@ -236,9 +332,11 @@  int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 	link->dead = false;
 	link->defrag_hook = NULL;
 
+	mutex_init(&link->update_lock);
+
 	err = bpf_link_prime(&link->link, &link_primer);
 	if (err) {
-		kfree(link);
+		bpf_nf_link_dealloc(&link->link);
 		return err;
 	}