diff mbox series

[net-next,v2,4/7] netfilter: flowtable: allow updating offloaded rules asynchronously

Message ID 20230113165548.2692720-5-vladbu@nvidia.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Allow offloading of UDP NEW connections via act_ct | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 61 this patch: 61
netdev/cc_maintainers warning 4 maintainers not CCed: fw@strlen.de edumazet@google.com kadlec@netfilter.org coreteam@netfilter.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 61 this patch: 61
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 76 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Vlad Buslov Jan. 13, 2023, 4:55 p.m. UTC
Following patches in series need to update flowtable rule several times
during its lifetime in order to synchronize hardware offload with actual ct
status. However, reusing existing 'refresh' logic in act_ct would cause
data path to potentially schedule significant amount of spurious tasks in
'add' workqueue since it is executed per-packet. Instead, introduce a new
flow 'update' flag and use it to schedule async flow refresh in flowtable
gc which will only be executed once per gc iteration.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
---
 include/net/netfilter/nf_flow_table.h |  3 ++-
 net/netfilter/nf_flow_table_core.c    | 20 +++++++++++++++-----
 net/netfilter/nf_flow_table_offload.c |  5 +++--
 3 files changed, 20 insertions(+), 8 deletions(-)

Comments

Marcelo Ricardo Leitner Jan. 17, 2023, 3:28 p.m. UTC | #1
On Fri, Jan 13, 2023 at 05:55:45PM +0100, Vlad Buslov wrote:
> Following patches in series need to update flowtable rule several times
> during its lifetime in order to synchronize hardware offload with actual ct
> status. However, reusing existing 'refresh' logic in act_ct would cause
> data path to potentially schedule significant amount of spurious tasks in
> 'add' workqueue since it is executed per-packet. Instead, introduce a new
> flow 'update' flag and use it to schedule async flow refresh in flowtable
> gc which will only be executed once per gc iteration.
> 
> Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
> ---
>  include/net/netfilter/nf_flow_table.h |  3 ++-
>  net/netfilter/nf_flow_table_core.c    | 20 +++++++++++++++-----
>  net/netfilter/nf_flow_table_offload.c |  5 +++--
>  3 files changed, 20 insertions(+), 8 deletions(-)
> 
> diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
> index 88ab98ab41d9..e396424e2e68 100644
> --- a/include/net/netfilter/nf_flow_table.h
> +++ b/include/net/netfilter/nf_flow_table.h
> @@ -165,6 +165,7 @@ enum nf_flow_flags {
>  	NF_FLOW_HW_DEAD,
>  	NF_FLOW_HW_PENDING,
>  	NF_FLOW_HW_BIDIRECTIONAL,
> +	NF_FLOW_HW_UPDATE,
>  };
>  
>  enum flow_offload_type {
> @@ -300,7 +301,7 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
>  #define MODULE_ALIAS_NF_FLOWTABLE(family)	\
>  	MODULE_ALIAS("nf-flowtable-" __stringify(family))
>  
> -void nf_flow_offload_add(struct nf_flowtable *flowtable,
> +bool nf_flow_offload_add(struct nf_flowtable *flowtable,
>  			 struct flow_offload *flow);
>  void nf_flow_offload_del(struct nf_flowtable *flowtable,
>  			 struct flow_offload *flow);
> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
> index 04bd0ed4d2ae..5b495e768655 100644
> --- a/net/netfilter/nf_flow_table_core.c
> +++ b/net/netfilter/nf_flow_table_core.c
> @@ -316,21 +316,28 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
>  }
>  EXPORT_SYMBOL_GPL(flow_offload_add);
>  
> +static bool __flow_offload_refresh(struct nf_flowtable *flow_table,
> +				   struct flow_offload *flow)
> +{
> +	if (likely(!nf_flowtable_hw_offload(flow_table)))
> +		return true;
> +
> +	return nf_flow_offload_add(flow_table, flow);
> +}
> +
>  void flow_offload_refresh(struct nf_flowtable *flow_table,
>  			  struct flow_offload *flow)
>  {
>  	u32 timeout;
>  
>  	timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
> -	if (timeout - READ_ONCE(flow->timeout) > HZ)
> +	if (timeout - READ_ONCE(flow->timeout) > HZ &&
> +	    !test_bit(NF_FLOW_HW_UPDATE, &flow->flags))
>  		WRITE_ONCE(flow->timeout, timeout);
>  	else
>  		return;
>  
> -	if (likely(!nf_flowtable_hw_offload(flow_table)))
> -		return;
> -
> -	nf_flow_offload_add(flow_table, flow);
> +	__flow_offload_refresh(flow_table, flow);
>  }
>  EXPORT_SYMBOL_GPL(flow_offload_refresh);
>  
> @@ -435,6 +442,9 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
>  		} else {
>  			flow_offload_del(flow_table, flow);
>  		}
> +	} else if (test_and_clear_bit(NF_FLOW_HW_UPDATE, &flow->flags)) {
> +		if (!__flow_offload_refresh(flow_table, flow))
> +			set_bit(NF_FLOW_HW_UPDATE, &flow->flags);
>  	} else if (test_bit(NF_FLOW_HW, &flow->flags)) {
>  		nf_flow_offload_stats(flow_table, flow);

AFAICT even after this patchset it is possible to have both flags set
at the same time.
With that, this would cause the stats to skip a beat.
This would be better:

- 	} else if (test_bit(NF_FLOW_HW, &flow->flags)) {
- 		nf_flow_offload_stats(flow_table, flow);
+	} else {
+		if (test_and_clear_bit(NF_FLOW_HW_UPDATE, &flow->flags))
+			if (!__flow_offload_refresh(flow_table, flow))
+				set_bit(NF_FLOW_HW_UPDATE, &flow->flags);
+	 	if (test_bit(NF_FLOW_HW, &flow->flags))
+ 			nf_flow_offload_stats(flow_table, flow);
 	}

But a flow cannot have 2 pending actions at a time.
Then maybe an update to nf_flow_offload_tuple() to make it handle the
stats implicitly?

>  	}
> diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
> index 8b852f10fab4..103b2ca8d123 100644
> --- a/net/netfilter/nf_flow_table_offload.c
> +++ b/net/netfilter/nf_flow_table_offload.c
> @@ -1036,16 +1036,17 @@ nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
>  }
>  
>  
> -void nf_flow_offload_add(struct nf_flowtable *flowtable,
> +bool nf_flow_offload_add(struct nf_flowtable *flowtable,
>  			 struct flow_offload *flow)
>  {
>  	struct flow_offload_work *offload;
>  
>  	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
>  	if (!offload)
> -		return;
> +		return false;
>  
>  	flow_offload_queue_work(offload);
> +	return true;
>  }
>  
>  void nf_flow_offload_del(struct nf_flowtable *flowtable,
> -- 
> 2.38.1
>
Vlad Buslov Jan. 17, 2023, 5:33 p.m. UTC | #2
On Tue 17 Jan 2023 at 12:28, Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> wrote:
> On Fri, Jan 13, 2023 at 05:55:45PM +0100, Vlad Buslov wrote:
>> Following patches in series need to update flowtable rule several times
>> during its lifetime in order to synchronize hardware offload with actual ct
>> status. However, reusing existing 'refresh' logic in act_ct would cause
>> data path to potentially schedule significant amount of spurious tasks in
>> 'add' workqueue since it is executed per-packet. Instead, introduce a new
>> flow 'update' flag and use it to schedule async flow refresh in flowtable
>> gc which will only be executed once per gc iteration.
>> 
>> Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
>> ---
>>  include/net/netfilter/nf_flow_table.h |  3 ++-
>>  net/netfilter/nf_flow_table_core.c    | 20 +++++++++++++++-----
>>  net/netfilter/nf_flow_table_offload.c |  5 +++--
>>  3 files changed, 20 insertions(+), 8 deletions(-)
>> 
>> diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
>> index 88ab98ab41d9..e396424e2e68 100644
>> --- a/include/net/netfilter/nf_flow_table.h
>> +++ b/include/net/netfilter/nf_flow_table.h
>> @@ -165,6 +165,7 @@ enum nf_flow_flags {
>>  	NF_FLOW_HW_DEAD,
>>  	NF_FLOW_HW_PENDING,
>>  	NF_FLOW_HW_BIDIRECTIONAL,
>> +	NF_FLOW_HW_UPDATE,
>>  };
>>  
>>  enum flow_offload_type {
>> @@ -300,7 +301,7 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
>>  #define MODULE_ALIAS_NF_FLOWTABLE(family)	\
>>  	MODULE_ALIAS("nf-flowtable-" __stringify(family))
>>  
>> -void nf_flow_offload_add(struct nf_flowtable *flowtable,
>> +bool nf_flow_offload_add(struct nf_flowtable *flowtable,
>>  			 struct flow_offload *flow);
>>  void nf_flow_offload_del(struct nf_flowtable *flowtable,
>>  			 struct flow_offload *flow);
>> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
>> index 04bd0ed4d2ae..5b495e768655 100644
>> --- a/net/netfilter/nf_flow_table_core.c
>> +++ b/net/netfilter/nf_flow_table_core.c
>> @@ -316,21 +316,28 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
>>  }
>>  EXPORT_SYMBOL_GPL(flow_offload_add);
>>  
>> +static bool __flow_offload_refresh(struct nf_flowtable *flow_table,
>> +				   struct flow_offload *flow)
>> +{
>> +	if (likely(!nf_flowtable_hw_offload(flow_table)))
>> +		return true;
>> +
>> +	return nf_flow_offload_add(flow_table, flow);
>> +}
>> +
>>  void flow_offload_refresh(struct nf_flowtable *flow_table,
>>  			  struct flow_offload *flow)
>>  {
>>  	u32 timeout;
>>  
>>  	timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
>> -	if (timeout - READ_ONCE(flow->timeout) > HZ)
>> +	if (timeout - READ_ONCE(flow->timeout) > HZ &&
>> +	    !test_bit(NF_FLOW_HW_UPDATE, &flow->flags))
>>  		WRITE_ONCE(flow->timeout, timeout);
>>  	else
>>  		return;
>>  
>> -	if (likely(!nf_flowtable_hw_offload(flow_table)))
>> -		return;
>> -
>> -	nf_flow_offload_add(flow_table, flow);
>> +	__flow_offload_refresh(flow_table, flow);
>>  }
>>  EXPORT_SYMBOL_GPL(flow_offload_refresh);
>>  
>> @@ -435,6 +442,9 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
>>  		} else {
>>  			flow_offload_del(flow_table, flow);
>>  		}
>> +	} else if (test_and_clear_bit(NF_FLOW_HW_UPDATE, &flow->flags)) {
>> +		if (!__flow_offload_refresh(flow_table, flow))
>> +			set_bit(NF_FLOW_HW_UPDATE, &flow->flags);
>>  	} else if (test_bit(NF_FLOW_HW, &flow->flags)) {
>>  		nf_flow_offload_stats(flow_table, flow);
>
> AFAICT even after this patchset it is possible to have both flags set
> at the same time.
> With that, this would cause the stats to skip a beat.
> This would be better:
>
> - 	} else if (test_bit(NF_FLOW_HW, &flow->flags)) {
> - 		nf_flow_offload_stats(flow_table, flow);
> +	} else {
> +		if (test_and_clear_bit(NF_FLOW_HW_UPDATE, &flow->flags))
> +			if (!__flow_offload_refresh(flow_table, flow))
> +				set_bit(NF_FLOW_HW_UPDATE, &flow->flags);
> +	 	if (test_bit(NF_FLOW_HW, &flow->flags))
> + 			nf_flow_offload_stats(flow_table, flow);
>  	}
>
> But a flow cannot have 2 pending actions at a time.

Yes. And timeouts are quite generous so there is IMO no problem in
skipping one iteration. It is not like this wq is high priority and we
can guarantee any exact update interval here anyway.

> Then maybe an update to nf_flow_offload_tuple() to make it handle the
> stats implicitly?

I considered this, but didn't want to over-complicate this series which
is tricky enough as it is.

>
>>  	}
>> diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
>> index 8b852f10fab4..103b2ca8d123 100644
>> --- a/net/netfilter/nf_flow_table_offload.c
>> +++ b/net/netfilter/nf_flow_table_offload.c
>> @@ -1036,16 +1036,17 @@ nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
>>  }
>>  
>>  
>> -void nf_flow_offload_add(struct nf_flowtable *flowtable,
>> +bool nf_flow_offload_add(struct nf_flowtable *flowtable,
>>  			 struct flow_offload *flow)
>>  {
>>  	struct flow_offload_work *offload;
>>  
>>  	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
>>  	if (!offload)
>> -		return;
>> +		return false;
>>  
>>  	flow_offload_queue_work(offload);
>> +	return true;
>>  }
>>  
>>  void nf_flow_offload_del(struct nf_flowtable *flowtable,
>> -- 
>> 2.38.1
>>
Marcelo Ricardo Leitner Jan. 17, 2023, 5:47 p.m. UTC | #3
On Tue, Jan 17, 2023 at 07:33:31PM +0200, Vlad Buslov wrote:
> 
> On Tue 17 Jan 2023 at 12:28, Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> wrote:
> > On Fri, Jan 13, 2023 at 05:55:45PM +0100, Vlad Buslov wrote:
> >> Following patches in series need to update flowtable rule several times
> >> during its lifetime in order to synchronize hardware offload with actual ct
> >> status. However, reusing existing 'refresh' logic in act_ct would cause
> >> data path to potentially schedule significant amount of spurious tasks in
> >> 'add' workqueue since it is executed per-packet. Instead, introduce a new
> >> flow 'update' flag and use it to schedule async flow refresh in flowtable
> >> gc which will only be executed once per gc iteration.
> >> 
> >> Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
> >> ---
> >>  include/net/netfilter/nf_flow_table.h |  3 ++-
> >>  net/netfilter/nf_flow_table_core.c    | 20 +++++++++++++++-----
> >>  net/netfilter/nf_flow_table_offload.c |  5 +++--
> >>  3 files changed, 20 insertions(+), 8 deletions(-)
> >> 
> >> diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
> >> index 88ab98ab41d9..e396424e2e68 100644
> >> --- a/include/net/netfilter/nf_flow_table.h
> >> +++ b/include/net/netfilter/nf_flow_table.h
> >> @@ -165,6 +165,7 @@ enum nf_flow_flags {
> >>  	NF_FLOW_HW_DEAD,
> >>  	NF_FLOW_HW_PENDING,
> >>  	NF_FLOW_HW_BIDIRECTIONAL,
> >> +	NF_FLOW_HW_UPDATE,
> >>  };
> >>  
> >>  enum flow_offload_type {
> >> @@ -300,7 +301,7 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
> >>  #define MODULE_ALIAS_NF_FLOWTABLE(family)	\
> >>  	MODULE_ALIAS("nf-flowtable-" __stringify(family))
> >>  
> >> -void nf_flow_offload_add(struct nf_flowtable *flowtable,
> >> +bool nf_flow_offload_add(struct nf_flowtable *flowtable,
> >>  			 struct flow_offload *flow);
> >>  void nf_flow_offload_del(struct nf_flowtable *flowtable,
> >>  			 struct flow_offload *flow);
> >> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
> >> index 04bd0ed4d2ae..5b495e768655 100644
> >> --- a/net/netfilter/nf_flow_table_core.c
> >> +++ b/net/netfilter/nf_flow_table_core.c
> >> @@ -316,21 +316,28 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
> >>  }
> >>  EXPORT_SYMBOL_GPL(flow_offload_add);
> >>  
> >> +static bool __flow_offload_refresh(struct nf_flowtable *flow_table,
> >> +				   struct flow_offload *flow)
> >> +{
> >> +	if (likely(!nf_flowtable_hw_offload(flow_table)))
> >> +		return true;
> >> +
> >> +	return nf_flow_offload_add(flow_table, flow);
> >> +}
> >> +
> >>  void flow_offload_refresh(struct nf_flowtable *flow_table,
> >>  			  struct flow_offload *flow)
> >>  {
> >>  	u32 timeout;
> >>  
> >>  	timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
> >> -	if (timeout - READ_ONCE(flow->timeout) > HZ)
> >> +	if (timeout - READ_ONCE(flow->timeout) > HZ &&
> >> +	    !test_bit(NF_FLOW_HW_UPDATE, &flow->flags))
> >>  		WRITE_ONCE(flow->timeout, timeout);
> >>  	else
> >>  		return;
> >>  
> >> -	if (likely(!nf_flowtable_hw_offload(flow_table)))
> >> -		return;
> >> -
> >> -	nf_flow_offload_add(flow_table, flow);
> >> +	__flow_offload_refresh(flow_table, flow);
> >>  }
> >>  EXPORT_SYMBOL_GPL(flow_offload_refresh);
> >>  
> >> @@ -435,6 +442,9 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
> >>  		} else {
> >>  			flow_offload_del(flow_table, flow);
> >>  		}
> >> +	} else if (test_and_clear_bit(NF_FLOW_HW_UPDATE, &flow->flags)) {
> >> +		if (!__flow_offload_refresh(flow_table, flow))
> >> +			set_bit(NF_FLOW_HW_UPDATE, &flow->flags);
> >>  	} else if (test_bit(NF_FLOW_HW, &flow->flags)) {
> >>  		nf_flow_offload_stats(flow_table, flow);
> >
> > AFAICT even after this patchset it is possible to have both flags set
> > at the same time.
> > With that, this would cause the stats to skip a beat.
> > This would be better:
> >
> > - 	} else if (test_bit(NF_FLOW_HW, &flow->flags)) {
> > - 		nf_flow_offload_stats(flow_table, flow);
> > +	} else {
> > +		if (test_and_clear_bit(NF_FLOW_HW_UPDATE, &flow->flags))
> > +			if (!__flow_offload_refresh(flow_table, flow))
> > +				set_bit(NF_FLOW_HW_UPDATE, &flow->flags);
> > +	 	if (test_bit(NF_FLOW_HW, &flow->flags))
> > + 			nf_flow_offload_stats(flow_table, flow);
> >  	}
> >
> > But a flow cannot have 2 pending actions at a time.
> 
> Yes. And timeouts are quite generous so there is IMO no problem in
> skipping one iteration. It is not like this wq is high priority and we
> can guarantee any exact update interval here anyway.

I cannot disagree, lets say :-)

Perhaps I'm just over worried because of recent issues with ovs and
datapath flows, that it was evicting them because it saw no traffic in
5s and so.

For example,
Subject: [ovs-dev] [PATCH v3] ofproto-dpif-upcall: Wait for valid hw flow stats before applying min-revalidate-pps

And we're still chasing a stall in ovs revalidator that leads to
hicups in datapath stats periodicity.

Yet, I'm not aware of such checks on top of CT entries.

> 
> > Then maybe an update to nf_flow_offload_tuple() to make it handle the
> > stats implicitly?
> 
> I considered this, but didn't want to over-complicate this series which
> is tricky enough as it is.

Makes sense.

> 
> >
> >>  	}
> >> diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
> >> index 8b852f10fab4..103b2ca8d123 100644
> >> --- a/net/netfilter/nf_flow_table_offload.c
> >> +++ b/net/netfilter/nf_flow_table_offload.c
> >> @@ -1036,16 +1036,17 @@ nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
> >>  }
> >>  
> >>  
> >> -void nf_flow_offload_add(struct nf_flowtable *flowtable,
> >> +bool nf_flow_offload_add(struct nf_flowtable *flowtable,
> >>  			 struct flow_offload *flow)
> >>  {
> >>  	struct flow_offload_work *offload;
> >>  
> >>  	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
> >>  	if (!offload)
> >> -		return;
> >> +		return false;
> >>  
> >>  	flow_offload_queue_work(offload);
> >> +	return true;
> >>  }
> >>  
> >>  void nf_flow_offload_del(struct nf_flowtable *flowtable,
> >> -- 
> >> 2.38.1
> >> 
>
diff mbox series

Patch

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 88ab98ab41d9..e396424e2e68 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -165,6 +165,7 @@  enum nf_flow_flags {
 	NF_FLOW_HW_DEAD,
 	NF_FLOW_HW_PENDING,
 	NF_FLOW_HW_BIDIRECTIONAL,
+	NF_FLOW_HW_UPDATE,
 };
 
 enum flow_offload_type {
@@ -300,7 +301,7 @@  unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 #define MODULE_ALIAS_NF_FLOWTABLE(family)	\
 	MODULE_ALIAS("nf-flowtable-" __stringify(family))
 
-void nf_flow_offload_add(struct nf_flowtable *flowtable,
+bool nf_flow_offload_add(struct nf_flowtable *flowtable,
 			 struct flow_offload *flow);
 void nf_flow_offload_del(struct nf_flowtable *flowtable,
 			 struct flow_offload *flow);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 04bd0ed4d2ae..5b495e768655 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -316,21 +316,28 @@  int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
 }
 EXPORT_SYMBOL_GPL(flow_offload_add);
 
+static bool __flow_offload_refresh(struct nf_flowtable *flow_table,
+				   struct flow_offload *flow)
+{
+	if (likely(!nf_flowtable_hw_offload(flow_table)))
+		return true;
+
+	return nf_flow_offload_add(flow_table, flow);
+}
+
 void flow_offload_refresh(struct nf_flowtable *flow_table,
 			  struct flow_offload *flow)
 {
 	u32 timeout;
 
 	timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
-	if (timeout - READ_ONCE(flow->timeout) > HZ)
+	if (timeout - READ_ONCE(flow->timeout) > HZ &&
+	    !test_bit(NF_FLOW_HW_UPDATE, &flow->flags))
 		WRITE_ONCE(flow->timeout, timeout);
 	else
 		return;
 
-	if (likely(!nf_flowtable_hw_offload(flow_table)))
-		return;
-
-	nf_flow_offload_add(flow_table, flow);
+	__flow_offload_refresh(flow_table, flow);
 }
 EXPORT_SYMBOL_GPL(flow_offload_refresh);
 
@@ -435,6 +442,9 @@  static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
 		} else {
 			flow_offload_del(flow_table, flow);
 		}
+	} else if (test_and_clear_bit(NF_FLOW_HW_UPDATE, &flow->flags)) {
+		if (!__flow_offload_refresh(flow_table, flow))
+			set_bit(NF_FLOW_HW_UPDATE, &flow->flags);
 	} else if (test_bit(NF_FLOW_HW, &flow->flags)) {
 		nf_flow_offload_stats(flow_table, flow);
 	}
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 8b852f10fab4..103b2ca8d123 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -1036,16 +1036,17 @@  nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
 }
 
 
-void nf_flow_offload_add(struct nf_flowtable *flowtable,
+bool nf_flow_offload_add(struct nf_flowtable *flowtable,
 			 struct flow_offload *flow)
 {
 	struct flow_offload_work *offload;
 
 	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
 	if (!offload)
-		return;
+		return false;
 
 	flow_offload_queue_work(offload);
+	return true;
 }
 
 void nf_flow_offload_del(struct nf_flowtable *flowtable,