diff mbox series

[RFC] Exempt multicast address from five-second neighbor lifetime

Message ID 0d7a29d2-499f-70ab-ee6f-ced4c9305181@akamai.com (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series [RFC] Exempt multicast address from five-second neighbor lifetime | expand

Commit Message

Jeff Dike Oct. 16, 2020, 2:54 p.m. UTC
Commit 58956317c8de guarantees arp table entries a five-second lifetime.  We have some apps which make heavy use of multicast, and these can cause the table to overflow by filling it with multicast addresses which can't be GC-ed until their five seconds are up.
This patch allows multicast addresses to be thrown out before they've lived out their five seconds.

Signed-off-by: Jeff Dike <jdike@akamai.com>

Comments

Jesse Brandeburg Oct. 16, 2020, 9:59 p.m. UTC | #1
Hi Jeff, 

Jeff Dike wrote:


Your subject should indicate net or net-next as the tree, please see:
https://www.kernel.org/doc/html/latest/networking/netdev-FAQ.html

> Commit 58956317c8de guarantees arp table entries a five-second lifetime.  We have some apps which make heavy use of multicast, and these can cause the table to overflow by filling it with multicast addresses which can't be GC-ed until their five seconds are up.
> This patch allows multicast addresses to be thrown out before they've lived out their five seconds.

Not sure how many patches you've submitted, but your commit message
should be wrapped at 68 or 72 characters or so.

> 
> Signed-off-by: Jeff Dike <jdike@akamai.com>

your triple-dash and a diffstat should be right here, did you hand edit
this mail instead of using git format-patch to generate it?

> 
> diff --git a/include/net/neighbour.h b/include/net/neighbour.h
> index 81ee17594c32..22ced1381ede 100644
> --- a/include/net/neighbour.h
> +++ b/include/net/neighbour.h
> @@ -204,6 +204,7 @@ struct neigh_table {
>  	int			(*pconstructor)(struct pneigh_entry *);
>  	void			(*pdestructor)(struct pneigh_entry *);
>  	void			(*proxy_redo)(struct sk_buff *skb);
> +	int			(*is_multicast)(const void *pkey);
>  	bool			(*allow_add)(const struct net_device *dev,
>  					     struct netlink_ext_ack *extack);
>  	char			*id;
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> index 8e39e28b0a8d..9500d28a43b0 100644
> --- a/net/core/neighbour.c
> +++ b/net/core/neighbour.c
> @@ -235,6 +235,8 @@ static int neigh_forced_gc(struct neigh_table *tbl)
>  
>  			write_lock(&n->lock);
>  			if ((n->nud_state == NUD_FAILED) ||
> +			    (tbl->is_multicast &&
> +			     tbl->is_multicast(n->primary_key)) ||
>  			    time_after(tref, n->updated))
>  				remove = true;
>  			write_unlock(&n->lock);
> diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
> index 687971d83b4e..110d6d408edc 100644
> --- a/net/ipv4/arp.c
> +++ b/net/ipv4/arp.c
> @@ -79,6 +79,7 @@
>  #include <linux/socket.h>
>  #include <linux/sockios.h>
>  #include <linux/errno.h>
> +#define __UAPI_DEF_IN_CLASS 1

Why is this added in the middle of the includes?

>  #include <linux/in.h>
>  #include <linux/mm.h>
>  #include <linux/inet.h>
> @@ -125,6 +126,7 @@ static int arp_constructor(struct neighbour *neigh);
>  static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
>  static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
>  static void parp_redo(struct sk_buff *skb);
> +static int arp_is_multicast(const void *pkey);
>  
>  static const struct neigh_ops arp_generic_ops = {
>  	.family =		AF_INET,
> @@ -156,6 +158,7 @@ struct neigh_table arp_tbl = {
>  	.key_eq		= arp_key_eq,
>  	.constructor	= arp_constructor,
>  	.proxy_redo	= parp_redo,
> +	.is_multicast   = arp_is_multicast,
>  	.id		= "arp_cache",
>  	.parms		= {
>  		.tbl			= &arp_tbl,
> @@ -928,6 +931,10 @@ static void parp_redo(struct sk_buff *skb)
>  	arp_process(dev_net(skb->dev), NULL, skb);
>  }
>  
> +static int arp_is_multicast(const void *pkey)
> +{
> +	return IN_MULTICAST(htonl(*((u32 *) pkey)));
> +}

Why not just move this function up and skip the declaration above?

>  
>  /*
>   *	Receive an arp request from the device layer.
> diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
> index 27f29b957ee7..b42c9314cc4e 100644
> --- a/net/ipv6/ndisc.c
> +++ b/net/ipv6/ndisc.c
> @@ -81,6 +81,7 @@ static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
>  static int pndisc_constructor(struct pneigh_entry *n);
>  static void pndisc_destructor(struct pneigh_entry *n);
>  static void pndisc_redo(struct sk_buff *skb);
> +static int ndisc_is_multicast(const void *pkey);
>  
>  static const struct neigh_ops ndisc_generic_ops = {
>  	.family =		AF_INET6,
> @@ -115,6 +116,7 @@ struct neigh_table nd_tbl = {
>  	.pconstructor =	pndisc_constructor,
>  	.pdestructor =	pndisc_destructor,
>  	.proxy_redo =	pndisc_redo,
> +	.is_multicast = ndisc_is_multicast,
>  	.allow_add  =   ndisc_allow_add,
>  	.id =		"ndisc_cache",
>  	.parms = {
> @@ -1706,6 +1708,11 @@ static void pndisc_redo(struct sk_buff *skb)
>  	kfree_skb(skb);
>  }
>  
> +static int ndisc_is_multicast(const void *pkey)
> +{
> +	return (((struct in6_addr *) pkey)->in6_u.u6_addr8[0] & 0xf0) == 0xf0;
> +}
> +

Again, just move this up above the first usage?

Does the above work on big and little endian, just seems suspicious
even though you're using a byte offset? Also I suspect this will
trigger a warning with sparse or with W=2 about pointer alignment.
Jeff Dike Oct. 19, 2020, 2:41 p.m. UTC | #2
Hi Jesse,

> Your subject should indicate net or net-next as the tree, please see:

I was intending more to see if this is an intrinsically bad idea than for it to go directly into a tree right now.

> Not sure how many patches you've submitted, but your commit message
> should be wrapped at 68 or 72 characters or so.

Not my first patch, but the first sent through Thunderbird and Outlook.

> your triple-dash and a diffstat should be right here, did you hand edit
> this mail instead of using git format-patch to generate it?

Yup, the log message on my internal commit wouldn't make too much sense outside of Akamai, so I wrote this changelog in my MUA.
 
> Why is this added in the middle of the includes?

I needed to get IN_MULTICAST defined - this is one reason I don't expect this patch as it stands to go anywhere.  IN_MULTICAST seems intended just for userspace use, but there isn't any way to ask the same question in the kernel.  The same seems to be true of IPv6 multicast addresses.

>> +static int arp_is_multicast(const void *pkey)
>> +{
>> +	return IN_MULTICAST(htonl(*((u32 *) pkey)));
>> +}
> 
> Why not just move this function up and skip the declaration above?

Following existing practice in this file.  Similar functions are declared above the structure and defined below it.

>>  
>> +static int ndisc_is_multicast(const void *pkey)
>> +{
>> +	return (((struct in6_addr *) pkey)->in6_u.u6_addr8[0] & 0xf0) == 0xf0;
>> +}
>> +
> 
> Again, just move this up above the first usage?

Following existing practice again.
> 
> Does the above work on big and little endian, just seems suspicious
> even though you're using a byte offset? Also I suspect this will
> trigger a warning with sparse or with W=2 about pointer alignment.

I used the byte offsets on purpose for this reason.  Didn't check if sparse had any problems with it.

Jeff
diff mbox series

Patch

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 81ee17594c32..22ced1381ede 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -204,6 +204,7 @@  struct neigh_table {
 	int			(*pconstructor)(struct pneigh_entry *);
 	void			(*pdestructor)(struct pneigh_entry *);
 	void			(*proxy_redo)(struct sk_buff *skb);
+	int			(*is_multicast)(const void *pkey);
 	bool			(*allow_add)(const struct net_device *dev,
 					     struct netlink_ext_ack *extack);
 	char			*id;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8e39e28b0a8d..9500d28a43b0 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -235,6 +235,8 @@  static int neigh_forced_gc(struct neigh_table *tbl)
 
 			write_lock(&n->lock);
 			if ((n->nud_state == NUD_FAILED) ||
+			    (tbl->is_multicast &&
+			     tbl->is_multicast(n->primary_key)) ||
 			    time_after(tref, n->updated))
 				remove = true;
 			write_unlock(&n->lock);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 687971d83b4e..110d6d408edc 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -79,6 +79,7 @@ 
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/errno.h>
+#define __UAPI_DEF_IN_CLASS 1
 #include <linux/in.h>
 #include <linux/mm.h>
 #include <linux/inet.h>
@@ -125,6 +126,7 @@  static int arp_constructor(struct neighbour *neigh);
 static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
 static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
 static void parp_redo(struct sk_buff *skb);
+static int arp_is_multicast(const void *pkey);
 
 static const struct neigh_ops arp_generic_ops = {
 	.family =		AF_INET,
@@ -156,6 +158,7 @@  struct neigh_table arp_tbl = {
 	.key_eq		= arp_key_eq,
 	.constructor	= arp_constructor,
 	.proxy_redo	= parp_redo,
+	.is_multicast   = arp_is_multicast,
 	.id		= "arp_cache",
 	.parms		= {
 		.tbl			= &arp_tbl,
@@ -928,6 +931,10 @@  static void parp_redo(struct sk_buff *skb)
 	arp_process(dev_net(skb->dev), NULL, skb);
 }
 
+static int arp_is_multicast(const void *pkey)
+{
+	return IN_MULTICAST(htonl(*((u32 *) pkey)));
+}
 
 /*
  *	Receive an arp request from the device layer.
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 27f29b957ee7..b42c9314cc4e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -81,6 +81,7 @@  static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
 static int pndisc_constructor(struct pneigh_entry *n);
 static void pndisc_destructor(struct pneigh_entry *n);
 static void pndisc_redo(struct sk_buff *skb);
+static int ndisc_is_multicast(const void *pkey);
 
 static const struct neigh_ops ndisc_generic_ops = {
 	.family =		AF_INET6,
@@ -115,6 +116,7 @@  struct neigh_table nd_tbl = {
 	.pconstructor =	pndisc_constructor,
 	.pdestructor =	pndisc_destructor,
 	.proxy_redo =	pndisc_redo,
+	.is_multicast = ndisc_is_multicast,
 	.allow_add  =   ndisc_allow_add,
 	.id =		"ndisc_cache",
 	.parms = {
@@ -1706,6 +1708,11 @@  static void pndisc_redo(struct sk_buff *skb)
 	kfree_skb(skb);
 }
 
+static int ndisc_is_multicast(const void *pkey)
+{
+	return (((struct in6_addr *) pkey)->in6_u.u6_addr8[0] & 0xf0) == 0xf0;
+}
+
 static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
 {
 	struct inet6_dev *idev = __in6_dev_get(skb->dev);