diff mbox series

[net-next,1/4] netfilter: conntrack: fix the gc rescheduling delay

Message ID 20220921095000.29569-2-fw@strlen.de (mailing list archive)
State Accepted
Commit 95eabdd207024312876d0ebed90b4c977e050e85
Delegated to: Netdev Maintainers
Headers show
Series [net-next,1/4] netfilter: conntrack: fix the gc rescheduling delay | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Pull request is its own cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 3 this patch: 3
netdev/cc_maintainers fail 1 blamed authors not CCed: pablo@netfilter.org; 3 maintainers not CCed: pablo@netfilter.org coreteam@netfilter.org kadlec@netfilter.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 3 this patch: 3
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 58 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Florian Westphal Sept. 21, 2022, 9:49 a.m. UTC
From: Antoine Tenart <atenart@kernel.org>

Commit 2cfadb761d3d ("netfilter: conntrack: revisit gc autotuning")
changed the eviction rescheduling to the use average expiry of scanned
entries (within 1-60s) by doing:

  for (...) {
      expires = clamp(nf_ct_expires(tmp), ...);
      next_run += expires;
      next_run /= 2;
  }

The issue is the above will make the average ('next_run' here) more
dependent on the last expiration values than the firsts (for sets > 2).
Depending on the expiration values used to compute the average, the
result can be quite different than what's expected. To fix this we can
do the following:

  for (...) {
      expires = clamp(nf_ct_expires(tmp), ...);
      next_run += (expires - next_run) / ++count;
  }

Fixes: 2cfadb761d3d ("netfilter: conntrack: revisit gc autotuning")
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nf_conntrack_core.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

Comments

patchwork-bot+netdevbpf@kernel.org Sept. 22, 2022, 2 a.m. UTC | #1
Hello:

This series was applied to netdev/net-next.git (master)
by Florian Westphal <fw@strlen.de>:

On Wed, 21 Sep 2022 11:49:57 +0200 you wrote:
> From: Antoine Tenart <atenart@kernel.org>
> 
> Commit 2cfadb761d3d ("netfilter: conntrack: revisit gc autotuning")
> changed the eviction rescheduling to the use average expiry of scanned
> entries (within 1-60s) by doing:
> 
>   for (...) {
>       expires = clamp(nf_ct_expires(tmp), ...);
>       next_run += expires;
>       next_run /= 2;
>   }
> 
> [...]

Here is the summary with links:
  - [net-next,1/4] netfilter: conntrack: fix the gc rescheduling delay
    https://git.kernel.org/netdev/net-next/c/95eabdd20702
  - [net-next,2/4] netfilter: conntrack: revisit the gc initial rescheduling bias
    https://git.kernel.org/netdev/net-next/c/2aa192757005
  - [net-next,3/4] headers: Remove some left-over license text in include/uapi/linux/netfilter/
    https://git.kernel.org/netdev/net-next/c/7b5541a932c2
  - [net-next,4/4] netfilter: rpfilter: Remove unused variable 'ret'.
    https://git.kernel.org/netdev/net-next/c/72f5c8980463

You are awesome, thank you!
diff mbox series

Patch

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c5851e1321e7..8efa6bd5703c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -67,6 +67,7 @@  struct conntrack_gc_work {
 	struct delayed_work	dwork;
 	u32			next_bucket;
 	u32			avg_timeout;
+	u32			count;
 	u32			start_time;
 	bool			exiting;
 	bool			early_drop;
@@ -1466,6 +1467,7 @@  static void gc_worker(struct work_struct *work)
 	unsigned int expired_count = 0;
 	unsigned long next_run;
 	s32 delta_time;
+	long count;
 
 	gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
 
@@ -1475,10 +1477,12 @@  static void gc_worker(struct work_struct *work)
 
 	if (i == 0) {
 		gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
+		gc_work->count = 1;
 		gc_work->start_time = start_time;
 	}
 
 	next_run = gc_work->avg_timeout;
+	count = gc_work->count;
 
 	end_time = start_time + GC_SCAN_MAX_DURATION;
 
@@ -1498,8 +1502,8 @@  static void gc_worker(struct work_struct *work)
 
 		hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
 			struct nf_conntrack_net *cnet;
-			unsigned long expires;
 			struct net *net;
+			long expires;
 
 			tmp = nf_ct_tuplehash_to_ctrack(h);
 
@@ -1513,6 +1517,7 @@  static void gc_worker(struct work_struct *work)
 
 				gc_work->next_bucket = i;
 				gc_work->avg_timeout = next_run;
+				gc_work->count = count;
 
 				delta_time = nfct_time_stamp - gc_work->start_time;
 
@@ -1528,8 +1533,8 @@  static void gc_worker(struct work_struct *work)
 			}
 
 			expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
+			expires = (expires - (long)next_run) / ++count;
 			next_run += expires;
-			next_run /= 2u;
 
 			if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
 				continue;
@@ -1570,6 +1575,7 @@  static void gc_worker(struct work_struct *work)
 		delta_time = nfct_time_stamp - end_time;
 		if (delta_time > 0 && i < hashsz) {
 			gc_work->avg_timeout = next_run;
+			gc_work->count = count;
 			gc_work->next_bucket = i;
 			next_run = 0;
 			goto early_exit;