diff mbox series

[net,V3] Exempt multicast addresses from five-second neighbor lifetime

Message ID 20201110172305.28056-1-jdike@akamai.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net,V3] Exempt multicast addresses from five-second neighbor lifetime | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present fail Series targets non-next tree, but doesn't contain any Fixes tags
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net
netdev/subject_prefix success Link
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 4960 this patch: 4960
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 64 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 5284 this patch: 5284
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Jeff Dike Nov. 10, 2020, 5:23 p.m. UTC
Commit 58956317c8de ("neighbor: Improve garbage collection")
guarantees neighbour table entries a five-second lifetime.  Processes
which make heavy use of multicast can fill the neighour table with
multicast addresses in five seconds.  At that point, neighbour entries
can't be GC-ed because they aren't five seconds old yet, the kernel
log starts to fill up with "neighbor table overflow!" messages, and
sends start to fail.

This patch allows multicast addresses to be thrown out before they've
lived out their five seconds.  This makes room for non-multicast
addresses and makes messages to all addresses more reliable in these
circumstances.

Signed-off-by: Jeff Dike <jdike@akamai.com>
---
 include/net/neighbour.h | 1 +
 net/core/neighbour.c    | 2 ++
 net/ipv4/arp.c          | 6 ++++++
 net/ipv6/ndisc.c        | 7 +++++++
 4 files changed, 16 insertions(+)

Comments

Jakub Kicinski Nov. 12, 2020, 5:32 p.m. UTC | #1
On Tue, 10 Nov 2020 12:23:05 -0500 Jeff Dike wrote:
> Commit 58956317c8de ("neighbor: Improve garbage collection")
> guarantees neighbour table entries a five-second lifetime.  Processes
> which make heavy use of multicast can fill the neighour table with
> multicast addresses in five seconds.  At that point, neighbour entries
> can't be GC-ed because they aren't five seconds old yet, the kernel
> log starts to fill up with "neighbor table overflow!" messages, and
> sends start to fail.
> 
> This patch allows multicast addresses to be thrown out before they've
> lived out their five seconds.  This makes room for non-multicast
> addresses and makes messages to all addresses more reliable in these
> circumstances.

We should add 

Fixes: 58956317c8de ("neighbor: Improve garbage collection")

right?

> Signed-off-by: Jeff Dike <jdike@akamai.com>

> diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
> index 687971d83b4e..097aa8bf07ee 100644
> --- a/net/ipv4/arp.c
> +++ b/net/ipv4/arp.c
> @@ -125,6 +125,7 @@ static int arp_constructor(struct neighbour *neigh);
>  static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
>  static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
>  static void parp_redo(struct sk_buff *skb);
> +static int arp_is_multicast(const void *pkey);
>  
>  static const struct neigh_ops arp_generic_ops = {
>  	.family =		AF_INET,
> @@ -156,6 +157,7 @@ struct neigh_table arp_tbl = {
>  	.key_eq		= arp_key_eq,
>  	.constructor	= arp_constructor,
>  	.proxy_redo	= parp_redo,
> +	.is_multicast   = arp_is_multicast,

extreme nit pick - please align the = sign using tabs like the
surrounding code does.

>  	.id		= "arp_cache",
>  	.parms		= {
>  		.tbl			= &arp_tbl,
> @@ -928,6 +930,10 @@ static void parp_redo(struct sk_buff *skb)
>  	arp_process(dev_net(skb->dev), NULL, skb);
>  }
>  
> +static int arp_is_multicast(const void *pkey)
> +{
> +	return ipv4_is_multicast(*((__be32 *)pkey));
> +}
>  
>  /*
>   *	Receive an arp request from the device layer.
> diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
> index 27f29b957ee7..67457cfadcd2 100644
> --- a/net/ipv6/ndisc.c
> +++ b/net/ipv6/ndisc.c
> @@ -81,6 +81,7 @@ static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
>  static int pndisc_constructor(struct pneigh_entry *n);
>  static void pndisc_destructor(struct pneigh_entry *n);
>  static void pndisc_redo(struct sk_buff *skb);
> +static int ndisc_is_multicast(const void *pkey);
>  
>  static const struct neigh_ops ndisc_generic_ops = {
>  	.family =		AF_INET6,
> @@ -115,6 +116,7 @@ struct neigh_table nd_tbl = {
>  	.pconstructor =	pndisc_constructor,
>  	.pdestructor =	pndisc_destructor,
>  	.proxy_redo =	pndisc_redo,
> +	.is_multicast = ndisc_is_multicast,

looks like the character after = is expected to be a tab, for better or
worse

>  	.allow_add  =   ndisc_allow_add,
>  	.id =		"ndisc_cache",
>  	.parms = {
> @@ -1706,6 +1708,11 @@ static void pndisc_redo(struct sk_buff *skb)
>  	kfree_skb(skb);
>  }
>  
> +static int ndisc_is_multicast(const void *pkey)
> +{
> +	return ipv6_addr_is_multicast((struct in6_addr *)pkey);
> +}
> +
>  static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
>  {
>  	struct inet6_dev *idev = __in6_dev_get(skb->dev);
Jeff Dike Nov. 13, 2020, 1:59 a.m. UTC | #2
Hi Jakub,

Yes to all your suggestions.

Thanks for the review.

Jeff

On 11/12/20 12:32 PM, Jakub Kicinski wrote:
> On Tue, 10 Nov 2020 12:23:05 -0500 Jeff Dike wrote:
>> Commit 58956317c8de ("neighbor: Improve garbage collection")
>> guarantees neighbour table entries a five-second lifetime.  Processes
>> which make heavy use of multicast can fill the neighour table with
>> multicast addresses in five seconds.  At that point, neighbour entries
>> can't be GC-ed because they aren't five seconds old yet, the kernel
>> log starts to fill up with "neighbor table overflow!" messages, and
>> sends start to fail.
>>
>> This patch allows multicast addresses to be thrown out before they've
>> lived out their five seconds.  This makes room for non-multicast
>> addresses and makes messages to all addresses more reliable in these
>> circumstances.
> 
> We should add 
> 
> Fixes: 58956317c8de ("neighbor: Improve garbage collection")
> 
> right?
> 
>> Signed-off-by: Jeff Dike <jdike@akamai.com>
> 
>> diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
>> index 687971d83b4e..097aa8bf07ee 100644
>> --- a/net/ipv4/arp.c
>> +++ b/net/ipv4/arp.c
>> @@ -125,6 +125,7 @@ static int arp_constructor(struct neighbour *neigh);
>>  static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
>>  static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
>>  static void parp_redo(struct sk_buff *skb);
>> +static int arp_is_multicast(const void *pkey);
>>  
>>  static const struct neigh_ops arp_generic_ops = {
>>  	.family =		AF_INET,
>> @@ -156,6 +157,7 @@ struct neigh_table arp_tbl = {
>>  	.key_eq		= arp_key_eq,
>>  	.constructor	= arp_constructor,
>>  	.proxy_redo	= parp_redo,
>> +	.is_multicast   = arp_is_multicast,
> 
> extreme nit pick - please align the = sign using tabs like the
> surrounding code does.
> 
>>  	.id		= "arp_cache",
>>  	.parms		= {
>>  		.tbl			= &arp_tbl,
>> @@ -928,6 +930,10 @@ static void parp_redo(struct sk_buff *skb)
>>  	arp_process(dev_net(skb->dev), NULL, skb);
>>  }
>>  
>> +static int arp_is_multicast(const void *pkey)
>> +{
>> +	return ipv4_is_multicast(*((__be32 *)pkey));
>> +}
>>  
>>  /*
>>   *	Receive an arp request from the device layer.
>> diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
>> index 27f29b957ee7..67457cfadcd2 100644
>> --- a/net/ipv6/ndisc.c
>> +++ b/net/ipv6/ndisc.c
>> @@ -81,6 +81,7 @@ static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
>>  static int pndisc_constructor(struct pneigh_entry *n);
>>  static void pndisc_destructor(struct pneigh_entry *n);
>>  static void pndisc_redo(struct sk_buff *skb);
>> +static int ndisc_is_multicast(const void *pkey);
>>  
>>  static const struct neigh_ops ndisc_generic_ops = {
>>  	.family =		AF_INET6,
>> @@ -115,6 +116,7 @@ struct neigh_table nd_tbl = {
>>  	.pconstructor =	pndisc_constructor,
>>  	.pdestructor =	pndisc_destructor,
>>  	.proxy_redo =	pndisc_redo,
>> +	.is_multicast = ndisc_is_multicast,
> 
> looks like the character after = is expected to be a tab, for better or
> worse
> 
>>  	.allow_add  =   ndisc_allow_add,
>>  	.id =		"ndisc_cache",
>>  	.parms = {
>> @@ -1706,6 +1708,11 @@ static void pndisc_redo(struct sk_buff *skb)
>>  	kfree_skb(skb);
>>  }
>>  
>> +static int ndisc_is_multicast(const void *pkey)
>> +{
>> +	return ipv6_addr_is_multicast((struct in6_addr *)pkey);
>> +}
>> +
>>  static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
>>  {
>>  	struct inet6_dev *idev = __in6_dev_get(skb->dev);
>
diff mbox series

Patch

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 81ee17594c32..22ced1381ede 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -204,6 +204,7 @@  struct neigh_table {
 	int			(*pconstructor)(struct pneigh_entry *);
 	void			(*pdestructor)(struct pneigh_entry *);
 	void			(*proxy_redo)(struct sk_buff *skb);
+	int			(*is_multicast)(const void *pkey);
 	bool			(*allow_add)(const struct net_device *dev,
 					     struct netlink_ext_ack *extack);
 	char			*id;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8e39e28b0a8d..9500d28a43b0 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -235,6 +235,8 @@  static int neigh_forced_gc(struct neigh_table *tbl)
 
 			write_lock(&n->lock);
 			if ((n->nud_state == NUD_FAILED) ||
+			    (tbl->is_multicast &&
+			     tbl->is_multicast(n->primary_key)) ||
 			    time_after(tref, n->updated))
 				remove = true;
 			write_unlock(&n->lock);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 687971d83b4e..097aa8bf07ee 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -125,6 +125,7 @@  static int arp_constructor(struct neighbour *neigh);
 static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
 static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
 static void parp_redo(struct sk_buff *skb);
+static int arp_is_multicast(const void *pkey);
 
 static const struct neigh_ops arp_generic_ops = {
 	.family =		AF_INET,
@@ -156,6 +157,7 @@  struct neigh_table arp_tbl = {
 	.key_eq		= arp_key_eq,
 	.constructor	= arp_constructor,
 	.proxy_redo	= parp_redo,
+	.is_multicast   = arp_is_multicast,
 	.id		= "arp_cache",
 	.parms		= {
 		.tbl			= &arp_tbl,
@@ -928,6 +930,10 @@  static void parp_redo(struct sk_buff *skb)
 	arp_process(dev_net(skb->dev), NULL, skb);
 }
 
+static int arp_is_multicast(const void *pkey)
+{
+	return ipv4_is_multicast(*((__be32 *)pkey));
+}
 
 /*
  *	Receive an arp request from the device layer.
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 27f29b957ee7..67457cfadcd2 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -81,6 +81,7 @@  static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
 static int pndisc_constructor(struct pneigh_entry *n);
 static void pndisc_destructor(struct pneigh_entry *n);
 static void pndisc_redo(struct sk_buff *skb);
+static int ndisc_is_multicast(const void *pkey);
 
 static const struct neigh_ops ndisc_generic_ops = {
 	.family =		AF_INET6,
@@ -115,6 +116,7 @@  struct neigh_table nd_tbl = {
 	.pconstructor =	pndisc_constructor,
 	.pdestructor =	pndisc_destructor,
 	.proxy_redo =	pndisc_redo,
+	.is_multicast = ndisc_is_multicast,
 	.allow_add  =   ndisc_allow_add,
 	.id =		"ndisc_cache",
 	.parms = {
@@ -1706,6 +1708,11 @@  static void pndisc_redo(struct sk_buff *skb)
 	kfree_skb(skb);
 }
 
+static int ndisc_is_multicast(const void *pkey)
+{
+	return ipv6_addr_is_multicast((struct in6_addr *)pkey);
+}
+
 static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
 {
 	struct inet6_dev *idev = __in6_dev_get(skb->dev);