From patchwork Fri Sep 16 09:29:40 2022
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Antoine Tenart <atenart@kernel.org>
X-Patchwork-Id: 12978335
X-Patchwork-Delegate: kuba@kernel.org
Return-Path: <netdev-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by smtp.lore.kernel.org (Postfix) with ESMTP id 868A5C54EE9
	for <netdev@archiver.kernel.org>; Fri, 16 Sep 2022 09:29:57 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S229503AbiIPJ3z (ORCPT <rfc822;netdev@archiver.kernel.org>);
        Fri, 16 Sep 2022 05:29:55 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:33390 "EHLO
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S229907AbiIPJ3w (ORCPT
        <rfc822;netdev@vger.kernel.org>); Fri, 16 Sep 2022 05:29:52 -0400
Received: from ams.source.kernel.org (ams.source.kernel.org
 [IPv6:2604:1380:4601:e00::1])
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5B64143E40;
        Fri, 16 Sep 2022 02:29:50 -0700 (PDT)
Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140])
        (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
        (No client certificate requested)
        by ams.source.kernel.org (Postfix) with ESMTPS id 20D13B81A4C;
        Fri, 16 Sep 2022 09:29:49 +0000 (UTC)
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 81A54C433C1;
        Fri, 16 Sep 2022 09:29:47 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
        s=k20201202; t=1663320587;
        bh=dfHufKqtlVJjPsBrkj4C+lW+AZOPU7tR+MI4RaBdHEI=;
        h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
        b=o7EphgOo39UDa7fWO6YVFcg+jTZewXHaq+YMQQI25cLIl97Jt7IVeJsUdiUZQnafD
         1nskgmIu1kOYVw6bp8+J3oxDRior0W5dC/XJt352K4Xw1qBt1WjPsAc9Bp1Y30bmgn
         UXvQSsnagxix75lju7clcc82cKhfTgEPSHowAvK+ag08ZJVl7/tJdYgDpkPmeW4wUY
         kNxR3+p7CqWCnBhG90L2typXbaSmdQUnuJ1vSeBESF1ep4gOGws37TRGuW3krh6pIm
         zZvsR8W3dAxmqatX+gdgQz5godyq09BzN1jzo/6n7cyU5we85/2wGGj/+YCkwHzOHz
         qDL7rCmCdP9fw==
From: Antoine Tenart <atenart@kernel.org>
To: pablo@netfilter.org, kadlec@netfilter.org, fw@strlen.de
Cc: Antoine Tenart <atenart@kernel.org>,
        netfilter-devel@vger.kernel.org, netdev@vger.kernel.org
Subject: [PATCH nf 1/2] netfilter: conntrack: fix the gc rescheduling delay
Date: Fri, 16 Sep 2022 11:29:40 +0200
Message-Id: <20220916092941.39121-2-atenart@kernel.org>
X-Mailer: git-send-email 2.37.3
In-Reply-To: <20220916092941.39121-1-atenart@kernel.org>
References: <20220916092941.39121-1-atenart@kernel.org>
MIME-Version: 1.0
Precedence: bulk
List-ID: <netdev.vger.kernel.org>
X-Mailing-List: netdev@vger.kernel.org
X-Patchwork-Delegate: kuba@kernel.org

Commit 2cfadb761d3d ("netfilter: conntrack: revisit gc autotuning")
changed the eviction rescheduling to the use average expiry of scanned
entries (within 1-60s) by doing:

  for (...) {
      expires = clamp(nf_ct_expires(tmp), ...);
      next_run += expires;
      next_run /= 2;
  }

The issue is the above will make the average ('next_run' here) more
dependent on the last expiration values than the firsts (for sets > 2).
Depending on the expiration values used to compute the average, the
result can be quite different than what's expected. To fix this we can
do the following:

  for (...) {
      expires = clamp(nf_ct_expires(tmp), ...);
      next_run += (expires - next_run) / ++count;
  }

Fixes: 2cfadb761d3d ("netfilter: conntrack: revisit gc autotuning")
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: Antoine Tenart <atenart@kernel.org>
---
 net/netfilter/nf_conntrack_core.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1357a2729a4b..2e6d5f1e6d63 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -67,6 +67,7 @@ struct conntrack_gc_work {
 	struct delayed_work	dwork;
 	u32			next_bucket;
 	u32			avg_timeout;
+	u32			count;
 	u32			start_time;
 	bool			exiting;
 	bool			early_drop;
@@ -1466,6 +1467,7 @@ static void gc_worker(struct work_struct *work)
 	unsigned int expired_count = 0;
 	unsigned long next_run;
 	s32 delta_time;
+	long count;
 
 	gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
 
@@ -1475,10 +1477,12 @@ static void gc_worker(struct work_struct *work)
 
 	if (i == 0) {
 		gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
+		gc_work->count = 1;
 		gc_work->start_time = start_time;
 	}
 
 	next_run = gc_work->avg_timeout;
+	count = gc_work->count;
 
 	end_time = start_time + GC_SCAN_MAX_DURATION;
 
@@ -1498,8 +1502,8 @@ static void gc_worker(struct work_struct *work)
 
 		hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
 			struct nf_conntrack_net *cnet;
-			unsigned long expires;
 			struct net *net;
+			long expires;
 
 			tmp = nf_ct_tuplehash_to_ctrack(h);
 
@@ -1513,6 +1517,7 @@ static void gc_worker(struct work_struct *work)
 
 				gc_work->next_bucket = i;
 				gc_work->avg_timeout = next_run;
+				gc_work->count = count;
 
 				delta_time = nfct_time_stamp - gc_work->start_time;
 
@@ -1528,8 +1533,8 @@ static void gc_worker(struct work_struct *work)
 			}
 
 			expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP);
+			expires = (expires - (long)next_run) / ++count;
 			next_run += expires;
-			next_run /= 2u;
 
 			if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
 				continue;
@@ -1570,6 +1575,7 @@ static void gc_worker(struct work_struct *work)
 		delta_time = nfct_time_stamp - end_time;
 		if (delta_time > 0 && i < hashsz) {
 			gc_work->avg_timeout = next_run;
+			gc_work->count = count;
 			gc_work->next_bucket = i;
 			next_run = 0;
 			goto early_exit;

From patchwork Fri Sep 16 09:29:41 2022
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Antoine Tenart <atenart@kernel.org>
X-Patchwork-Id: 12978336
X-Patchwork-Delegate: kuba@kernel.org
Return-Path: <netdev-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by smtp.lore.kernel.org (Postfix) with ESMTP id 147F4C6FA8B
	for <netdev@archiver.kernel.org>; Fri, 16 Sep 2022 09:29:59 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S229907AbiIPJ35 (ORCPT <rfc822;netdev@archiver.kernel.org>);
        Fri, 16 Sep 2022 05:29:57 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:33392 "EHLO
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S229895AbiIPJ3w (ORCPT
        <rfc822;netdev@vger.kernel.org>); Fri, 16 Sep 2022 05:29:52 -0400
Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217])
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 894D5193C0;
        Fri, 16 Sep 2022 02:29:51 -0700 (PDT)
Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140])
        (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
        (No client certificate requested)
        by dfw.source.kernel.org (Postfix) with ESMTPS id 263FC62990;
        Fri, 16 Sep 2022 09:29:51 +0000 (UTC)
Received: by smtp.kernel.org (Postfix) with ESMTPSA id 3ADFFC433D6;
        Fri, 16 Sep 2022 09:29:50 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
        s=k20201202; t=1663320590;
        bh=mH+7FsWWlU4xLltqGgEz1Qg6OJXIWH4c9PbXakdU4qk=;
        h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
        b=Flr+J/78gOr/+njPfQrcd6VKAJajooc9fwFIcMOYvIgHf/Bv6nlfgyhtxYhMrhchD
         0SLqSfJ3Da9yzjV8yBFwJtdQBd+rRC0YkEONGMh5THsbIIn/Kzo/L3urb6tFWuPvmi
         2yzIpzWJH8RomhlPLvUodjHxMvLpZ/w4WXia3rq3XEaoeQ+epSpLCYvlK786OlhEqQ
         LzxhIf/V1zecMFXbrpjKaJj8jnklJbzc4YXnn6S2wSooHJJKLfdVww5h4SGkmmRzCm
         bk5O7upAzlnxllPYKXp+9PKIcldAKZJWkWnpMmzlShiGyO0Lr/Mk7X/4JlYW0tZN5v
         Woe0vjnpIexxg==
From: Antoine Tenart <atenart@kernel.org>
To: pablo@netfilter.org, kadlec@netfilter.org, fw@strlen.de
Cc: Antoine Tenart <atenart@kernel.org>,
        netfilter-devel@vger.kernel.org, netdev@vger.kernel.org
Subject: [PATCH nf 2/2] netfilter: conntrack: revisit the gc initial
 rescheduling bias
Date: Fri, 16 Sep 2022 11:29:41 +0200
Message-Id: <20220916092941.39121-3-atenart@kernel.org>
X-Mailer: git-send-email 2.37.3
In-Reply-To: <20220916092941.39121-1-atenart@kernel.org>
References: <20220916092941.39121-1-atenart@kernel.org>
MIME-Version: 1.0
Precedence: bulk
List-ID: <netdev.vger.kernel.org>
X-Mailing-List: netdev@vger.kernel.org
X-Patchwork-Delegate: kuba@kernel.org

The previous commit changed the way the rescheduling delay is computed
which has a side effect: the bias is now represented as much as the
other entries in the rescheduling delay which makes the logic to kick in
only with very large sets, as the initial interval is very large
(INT_MAX).

Revisit the GC initial bias to allow more frequent GC for smaller sets
while still avoiding wakeups when a machine is mostly idle. We're moving
from a large initial value to pretending we have 100 entries expiring at
the upper bound. This way only a few entries having a small timeout
won't impact much the rescheduling delay and non-idle machines will have
enough entries to lower the delay when needed. This also improves
readability as the initial bias is now linked to what is computed
instead of being an arbitrary large value.

Fixes: 2cfadb761d3d ("netfilter: conntrack: revisit gc autotuning")
Suggested-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Antoine Tenart <atenart@kernel.org>
---
 net/netfilter/nf_conntrack_core.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2e6d5f1e6d63..8f261cd5b3a5 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -86,10 +86,12 @@ static DEFINE_MUTEX(nf_conntrack_mutex);
 /* clamp timeouts to this value (TCP unacked) */
 #define GC_SCAN_INTERVAL_CLAMP	(300ul * HZ)
 
-/* large initial bias so that we don't scan often just because we have
- * three entries with a 1s timeout.
+/* Initial bias pretending we have 100 entries at the upper bound so we don't
+ * wakeup often just because we have three entries with a 1s timeout while still
+ * allowing non-idle machines to wakeup more often when needed.
  */
-#define GC_SCAN_INTERVAL_INIT	INT_MAX
+#define GC_SCAN_INITIAL_COUNT	100
+#define GC_SCAN_INTERVAL_INIT	GC_SCAN_INTERVAL_MAX
 
 #define GC_SCAN_MAX_DURATION	msecs_to_jiffies(10)
 #define GC_SCAN_EXPIRED_MAX	(64000u / HZ)
@@ -1477,7 +1479,7 @@ static void gc_worker(struct work_struct *work)
 
 	if (i == 0) {
 		gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT;
-		gc_work->count = 1;
+		gc_work->count = GC_SCAN_INITIAL_COUNT;
 		gc_work->start_time = start_time;
 	}