diff mbox series

[RFC,net-next,v2,1/2] net: Remove expired routes with separated timers.

Message ID 20230517183337.190591-2-kuifeng@meta.com (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series Mitigate the Issue of Expired Routes in Linux IPv6 Routing Tables | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 512 this patch: 512
netdev/cc_maintainers success CCed 6 of 6 maintainers
netdev/build_clang success Errors and warnings before: 18 this patch: 18
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 515 this patch: 515
netdev/checkpatch fail ERROR: space required after that ',' (ctx:VxV) WARNING: From:/Signed-off-by: email address mismatch: 'From: Kui-Feng Lee <thinker.li@gmail.com>' != 'Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>' WARNING: Prefer using '"%s...", __func__' to using 'fib6_set_expires', this function's name, in a string
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Kui-Feng Lee May 17, 2023, 6:33 p.m. UTC
FIB6 GC walks tries of fib6_tables to remove expired routes.  Walking a
tree can be expensive if the number of routes in a table is big.
Creating a separated timer for each route that can expire will avoid
this potential issue.

Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
---
 include/net/ip6_fib.h | 19 ++++------
 net/ipv6/addrconf.c   |  8 ++--
 net/ipv6/ip6_fib.c    | 88 ++++++++++++++++++++++++++++++++++++-------
 net/ipv6/ndisc.c      |  2 +-
 net/ipv6/route.c      |  6 +--
 5 files changed, 91 insertions(+), 32 deletions(-)

Comments

David Ahern May 18, 2023, 3:36 a.m. UTC | #1
On 5/17/23 12:33 PM, Kui-Feng Lee wrote:
> @@ -179,6 +181,7 @@ struct fib6_info {
>  
>  	refcount_t			fib6_ref;
>  	unsigned long			expires;
> +	struct fib6_info_timer		*timer;

if this solution moves forward as a separate timer per route with an
expiration, the timer related info can be added inline. Current
fib6_info with a single nexthop is 264B so it is already rounded up to
512B on the allocation.
diff mbox series

Patch

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 05e6f756feaf..850995306718 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -161,6 +161,8 @@  struct fib6_nh {
 	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
 };
 
+struct fib6_info_timer;
+
 struct fib6_info {
 	struct fib6_table		*fib6_table;
 	struct fib6_info __rcu		*fib6_next;
@@ -179,6 +181,7 @@  struct fib6_info {
 
 	refcount_t			fib6_ref;
 	unsigned long			expires;
+	struct fib6_info_timer		*timer;
 	struct dst_metrics		*fib6_metrics;
 #define fib6_pmtu		fib6_metrics->metrics[RTAX_MTU-1]
 
@@ -247,18 +250,11 @@  static inline bool fib6_requires_src(const struct fib6_info *rt)
 	return rt->fib6_src.plen > 0;
 }
 
-static inline void fib6_clean_expires(struct fib6_info *f6i)
-{
-	f6i->fib6_flags &= ~RTF_EXPIRES;
-	f6i->expires = 0;
-}
+void fib6_clean_expires(struct fib6_info *f6i);
 
-static inline void fib6_set_expires(struct fib6_info *f6i,
-				    unsigned long expires)
-{
-	f6i->expires = expires;
-	f6i->fib6_flags |= RTF_EXPIRES;
-}
+void fib6_set_expires(struct net *net,
+		      struct fib6_info *f6i,
+		      unsigned long expires);
 
 static inline bool fib6_check_expired(const struct fib6_info *f6i)
 {
@@ -388,6 +384,7 @@  struct fib6_table {
 	struct inet_peer_base	tb6_peers;
 	unsigned int		flags;
 	unsigned int		fib_seq;
+	struct hlist_head	tb6_timer_hlist;
 #define RT6_TABLE_HAS_DFLT_ROUTER	BIT(0)
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3797917237d0..13e2366613c4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1254,7 +1254,8 @@  cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
 			ip6_del_rt(dev_net(ifp->idev->dev), f6i, false);
 		else {
 			if (!(f6i->fib6_flags & RTF_EXPIRES))
-				fib6_set_expires(f6i, expires);
+				fib6_set_expires(dev_net(ifp->idev->dev),
+						 f6i, expires);
 			fib6_info_release(f6i);
 		}
 	}
@@ -2762,7 +2763,8 @@  void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 				rt = NULL;
 			} else if (addrconf_finite_timeout(rt_expires)) {
 				/* not infinity */
-				fib6_set_expires(rt, jiffies + rt_expires);
+				fib6_set_expires(net, rt,
+						 jiffies + rt_expires);
 			} else {
 				fib6_clean_expires(rt);
 			}
@@ -4723,7 +4725,7 @@  static int modify_prefix_route(struct inet6_ifaddr *ifp,
 		if (!expires)
 			fib6_clean_expires(f6i);
 		else
-			fib6_set_expires(f6i, expires);
+			fib6_set_expires(dev_net(ifp->idev->dev), f6i, expires);
 
 		fib6_info_release(f6i);
 	}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 2438da5ff6da..8a10a0355816 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -25,6 +25,7 @@ 
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <linux/timer.h>
 
 #include <net/ip.h>
 #include <net/ipv6.h>
@@ -54,6 +55,12 @@  struct fib6_cleaner {
 #define FWS_INIT FWS_L
 #endif
 
+struct fib6_info_timer {
+	struct timer_list timer;
+	struct fib6_info *f6i;
+	struct net *net;
+};
+
 static struct fib6_info *fib6_find_prefix(struct net *net,
 					 struct fib6_table *table,
 					 struct fib6_node *fn);
@@ -144,6 +151,66 @@  static __be32 addr_bit_set(const void *token, int fn_bit)
 	       addr[fn_bit >> 5];
 }
 
+static void f6i_gc_timer_cb(struct timer_list *t)
+{
+	struct fib6_info_timer *timer;
+	struct nl_info info = {
+		.nlh = NULL,
+	};
+	struct fib6_info *f6i;
+	int res;
+
+	timer = from_timer(timer, t, timer);
+	info.nl_net = timer->net;
+	f6i = timer->f6i;
+	spin_lock(&f6i->fib6_table->tb6_lock);
+
+	res = fib6_del(f6i, &info);
+	if (res != 0) {
+#if RT6_DEBUG >= 2
+		pr_debug("%s: del failed: rt=%p@%p err=%d\n",
+			 __func__, f6i,
+			 rcu_access_pointer(f6i->fib6_node),
+			 res);
+#endif
+	}
+
+	spin_unlock(&f6i->fib6_table->tb6_lock);
+
+	fib6_info_release(f6i);
+}
+
+void fib6_clean_expires(struct fib6_info *f6i)
+{
+	f6i->fib6_flags &= ~RTF_EXPIRES;
+	f6i->expires = 0;
+	if (!f6i->timer)
+		return;
+	if (try_to_del_timer_sync(&f6i->timer->timer) == 1)
+		fib6_info_release(f6i);
+}
+
+void fib6_set_expires(struct net *net,struct fib6_info *f6i,
+		      unsigned long expires)
+{
+	f6i->expires = expires;
+	f6i->fib6_flags |= RTF_EXPIRES;
+	if (!f6i->timer) {
+		f6i->timer = kzalloc(sizeof(*f6i->timer), GFP_ATOMIC);
+		if (!f6i->timer) {
+			/* XXX: error handling */
+			panic("fib6_set_expires: kzalloc failed");
+			return;
+		}
+		f6i->timer->f6i = f6i;
+		f6i->timer->net = net;
+		timer_setup(&f6i->timer->timer, f6i_gc_timer_cb, 0);
+	}
+	fib6_info_hold(f6i);
+	if (mod_timer(&f6i->timer->timer, expires) == 1)
+		fib6_info_release(f6i);
+}
+
 struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
 {
 	struct fib6_info *f6i;
@@ -175,6 +242,7 @@  void fib6_info_destroy_rcu(struct rcu_head *head)
 		fib6_nh_release(f6i->fib6_nh);
 
 	ip_fib_metrics_put(f6i->fib6_metrics);
+	kfree(f6i->timer);
 	kfree(f6i);
 }
 EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu);
@@ -246,6 +314,7 @@  static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
 				   net->ipv6.fib6_null_entry);
 		table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
 		inet_peer_base_init(&table->tb6_peers);
+		INIT_HLIST_HEAD(&table->tb6_timer_hlist);
 	}
 
 	return table;
@@ -1120,7 +1189,8 @@  static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 				if (!(rt->fib6_flags & RTF_EXPIRES))
 					fib6_clean_expires(iter);
 				else
-					fib6_set_expires(iter, rt->expires);
+					fib6_set_expires(info->nl_net,
+							 iter, rt->expires);
 
 				if (rt->fib6_pmtu)
 					fib6_metric_set(iter, RTAX_MTU,
@@ -2025,6 +2095,9 @@  int fib6_del(struct fib6_info *rt, struct nl_info *info)
 		if (rt == cur) {
 			if (fib6_requires_src(cur))
 				fib6_routes_require_src_dec(info->nl_net);
+			if (cur->timer &&
+			    try_to_del_timer_sync(&cur->timer->timer) == 1)
+				fib6_info_release(cur);
 			fib6_del_route(table, fn, rtp, info);
 			return 0;
 		}
@@ -2290,19 +2363,6 @@  static int fib6_age(struct fib6_info *rt, void *arg)
 	struct fib6_gc_args *gc_args = arg;
 	unsigned long now = jiffies;
 
-	/*
-	 *	check addrconf expiration here.
-	 *	Routes are expired even if they are in use.
-	 */
-
-	if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
-		if (time_after(now, rt->expires)) {
-			RT6_TRACE("expiring %p\n", rt);
-			return -1;
-		}
-		gc_args->more++;
-	}
-
 	/*	Also age clones in the exception table.
 	 *	Note, that clones are aged out
 	 *	only if they are not in use now.
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 18634ebd20a4..1d4cf7f73097 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1407,7 +1407,7 @@  static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
 	}
 
 	if (rt)
-		fib6_set_expires(rt, jiffies + (HZ * lifetime));
+		fib6_set_expires(net, rt, jiffies + (HZ * lifetime));
 	if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
 	    ra_msg->icmph.icmp6_hop_limit) {
 		if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e3aec46bd466..87721a2a91b6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -990,7 +990,7 @@  int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 		if (!addrconf_finite_timeout(lifetime))
 			fib6_clean_expires(rt);
 		else
-			fib6_set_expires(rt, jiffies + HZ * lifetime);
+			fib6_set_expires(net, rt, jiffies + HZ * lifetime);
 
 		fib6_info_release(rt);
 	}
@@ -3755,8 +3755,8 @@  static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
 		rt->dst_nocount = true;
 
 	if (cfg->fc_flags & RTF_EXPIRES)
-		fib6_set_expires(rt, jiffies +
-				clock_t_to_jiffies(cfg->fc_expires));
+		fib6_set_expires(net, rt, jiffies +
+				 clock_t_to_jiffies(cfg->fc_expires));
 	else
 		fib6_clean_expires(rt);