diff mbox

[PATCHv3,net-next,5/7] net: add confirm_neigh method to dst_ops

Message ID 1485899827-5212-6-git-send-email-ja@ssi.bg (mailing list archive)
State Not Applicable
Headers show

Commit Message

Julian Anastasov Jan. 31, 2017, 9:57 p.m. UTC
Add confirm_neigh method to dst_ops and use it from IPv4 and IPv6
to lookup and confirm the neighbour. Its usage via the new helper
dst_confirm_neigh() should be restricted to MSG_PROBE users for
performance reasons.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Eric Dumazet <edumazet@google.com>
---
 include/net/arp.h      | 16 ++++++++++++++++
 include/net/dst.h      |  7 +++++++
 include/net/dst_ops.h  |  2 ++
 include/net/ndisc.h    | 17 +++++++++++++++++
 net/ipv4/route.c       | 19 +++++++++++++++++++
 net/ipv6/route.c       | 16 ++++++++++++++++
 net/xfrm/xfrm_policy.c | 16 ++++++++++++++++
 7 files changed, 93 insertions(+)

Comments

Steffen Klassert Feb. 1, 2017, 7:44 a.m. UTC | #1
On Tue, Jan 31, 2017 at 11:57:05PM +0200, Julian Anastasov wrote:
>  
>  static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
> diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
> index 177e208..c010ee0 100644
> --- a/net/xfrm/xfrm_policy.c
> +++ b/net/xfrm/xfrm_policy.c
> @@ -2856,6 +2856,20 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
>  	return dst->path->ops->neigh_lookup(dst, skb, daddr);
>  }
>  
> +static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr)
> +{
> +	const struct dst_entry *path = dst->path;
> +
> +	if (path == dst) {

I think path can not be equal to dst here, otherwise we would
have an infinite recursion.

> +		dst->ops->confirm_neigh(dst, daddr);
> +	} else {
> +		/* daddr can be from different family and we need the
> +		 * tunnel address. How to get it?
> +		 */

This is only called on a xfrm_dst, so you should have dst->xfrm set.
You can get the daddr of this transform with:

xfrm_address_t *daddr = &xfrm->id.daddr;


> +		path->ops->confirm_neigh(path, NULL);

I think here it is better to go through the whole chain
of transformations with

child->ops->confirm_neigh(path, daddr);

> +	}
> +}
> +
>  int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
>  {
>  	int err = 0;
> @@ -2882,6 +2896,8 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
>  			dst_ops->link_failure = xfrm_link_failure;
>  		if (likely(dst_ops->neigh_lookup == NULL))
>  			dst_ops->neigh_lookup = xfrm_neigh_lookup;
> +		if (likely(!dst_ops->confirm_neigh))
> +			dst_ops->confirm_neigh = xfrm_confirm_neigh;

We also have address family depended dst_ops, look for
xfrm4_dst_ops_template/xfrm6_dst_ops_template.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Julian Anastasov Feb. 1, 2017, 11:04 p.m. UTC | #2
Hello,

On Wed, 1 Feb 2017, Steffen Klassert wrote:

> On Tue, Jan 31, 2017 at 11:57:05PM +0200, Julian Anastasov wrote:
> >  
> > +static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr)
> > +{
> > +	const struct dst_entry *path = dst->path;
> > +
> > +	if (path == dst) {
> 
> I think path can not be equal to dst here, otherwise we would
> have an infinite recursion.

	Yep, I know that, this place remained unfinished.

> > +		dst->ops->confirm_neigh(dst, daddr);
> > +	} else {
> > +		/* daddr can be from different family and we need the
> > +		 * tunnel address. How to get it?
> > +		 */
> 
> This is only called on a xfrm_dst, so you should have dst->xfrm set.
> You can get the daddr of this transform with:
> 
> xfrm_address_t *daddr = &xfrm->id.daddr;

	I hope so but see below...

> > +		path->ops->confirm_neigh(path, NULL);
> 
> I think here it is better to go through the whole chain
> of transformations with
> 
> child->ops->confirm_neigh(path, daddr);

	It may sounds good. But only dst->path->ops->confirm_neigh
points to real IPv4/IPv6 function. And also, I guess, the
family can change while walking the chain, so we should be
careful while providing the original daddr (which comes from
sendmsg). I had the idea to walk all xforms to get the latest
tunnel address but this can be slow. Something like this?:

static void xfrm_confirm_neigh(const struct dst_entry *dst, const void 
*daddr)
{
	const struct dst_entry *path = dst->path;

	/* By default, daddr is from sendmsg() if we have no tunnels */
	for (;dst != path; dst = dst->child) {
		const struct xfrm_state *xfrm = dst->xfrm;

		/* Use address from last tunnel	*/
		if (xfrm->props.mode != XFRM_MODE_TRANSPORT)
			daddr = &xfrm->id.daddr;
	}
	path->ops->confirm_neigh(path, daddr);
}

	This should work as long as path and last tunnel are
from same family. Also, after checking xfrm_dst_lookup() I'm not
sure using just &xfrm->id.daddr is enough. Should we consider
more places for daddr value?

> >  int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
> >  {
> >  	int err = 0;
> > @@ -2882,6 +2896,8 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
> >  			dst_ops->link_failure = xfrm_link_failure;
> >  		if (likely(dst_ops->neigh_lookup == NULL))
> >  			dst_ops->neigh_lookup = xfrm_neigh_lookup;
> > +		if (likely(!dst_ops->confirm_neigh))
> > +			dst_ops->confirm_neigh = xfrm_confirm_neigh;
> 
> We also have address family depended dst_ops, look for
> xfrm4_dst_ops_template/xfrm6_dst_ops_template.

	For now I installed common handler, just like
xfrm_neigh_lookup. BTW this function has problem from
commit f894cbf847c9, it looks like dst is wrongly provided,
first arg should be dst->path.

	But as dst_ops contains the family, I think, we can know
what kind of daddr is provided initially (dst->ops->family).
So far, the above logic does not need to compare the families.
But as I don't know the code well, I'm not sure, my assumptions are:

- transports do not change the family
- tunnels may change the family
- last tunnel gets dst0->path route from its family

Regards

--
Julian Anastasov <ja@ssi.bg>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steffen Klassert Feb. 3, 2017, 7:46 a.m. UTC | #3
On Thu, Feb 02, 2017 at 01:04:34AM +0200, Julian Anastasov wrote:
> 
> 	Hello,
> 
> On Wed, 1 Feb 2017, Steffen Klassert wrote:
> 
> > 
> > I think here it is better to go through the whole chain
> > of transformations with
> > 
> > child->ops->confirm_neigh(path, daddr);
> 
> 	It may sounds good. But only dst->path->ops->confirm_neigh
> points to real IPv4/IPv6 function. And also, I guess, the
> family can change while walking the chain, so we should be
> careful while providing the original daddr (which comes from
> sendmsg). I had the idea to walk all xforms to get the latest
> tunnel address but this can be slow. 

Is this a per packet call or is the information cached somewhere?

> Something like this?:
> 
> static void xfrm_confirm_neigh(const struct dst_entry *dst, const void 
> *daddr)
> {
> 	const struct dst_entry *path = dst->path;
> 
> 	/* By default, daddr is from sendmsg() if we have no tunnels */
> 	for (;dst != path; dst = dst->child) {
> 		const struct xfrm_state *xfrm = dst->xfrm;
> 
> 		/* Use address from last tunnel	*/
> 		if (xfrm->props.mode != XFRM_MODE_TRANSPORT)
> 			daddr = &xfrm->id.daddr;
> 	}
> 	path->ops->confirm_neigh(path, daddr);
> }

I thought about this (completely untested) one:

static void xfrm_confirm_neigh(const struct dst_entry *dst, const void
*daddr)

{
	const struct dst_entry *dst = dst->child;
	const struct xfrm_state *xfrm = dst->xfrm;

	if (xfrm)
		daddr = &xfrm->id.daddr;

	dst->ops->confirm_neigh(dst, daddr);
}

Only the last dst_entry in this call chain (path) sould
not have dst->xfrm set. So it finally calls path->ops->confirm_neigh
with the daddr of the last transformation. But your version
should do the same.

> 
> 	This should work as long as path and last tunnel are
> from same family.

Yes, the outer mode of the last transformation has the same
family as path.

> Also, after checking xfrm_dst_lookup() I'm not
> sure using just &xfrm->id.daddr is enough. Should we consider
> more places for daddr value?

Yes, indeed. We should do it like xfrm_dst_lookup() does it.

> 
> > >  int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
> > >  {
> > >  	int err = 0;
> > > @@ -2882,6 +2896,8 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
> > >  			dst_ops->link_failure = xfrm_link_failure;
> > >  		if (likely(dst_ops->neigh_lookup == NULL))
> > >  			dst_ops->neigh_lookup = xfrm_neigh_lookup;
> > > +		if (likely(!dst_ops->confirm_neigh))
> > > +			dst_ops->confirm_neigh = xfrm_confirm_neigh;
> > 
> > We also have address family depended dst_ops, look for
> > xfrm4_dst_ops_template/xfrm6_dst_ops_template.
> 
> 	For now I installed common handler, just like
> xfrm_neigh_lookup. BTW this function has problem from
> commit f894cbf847c9, it looks like dst is wrongly provided,
> first arg should be dst->path.

Yes, this should use dst->path of course. I really wonder why
nobody noticed this for the last five years.

> 
> 	But as dst_ops contains the family, I think, we can know
> what kind of daddr is provided initially (dst->ops->family).
> So far, the above logic does not need to compare the families.
> But as I don't know the code well, I'm not sure, my assumptions are:
> 
> - transports do not change the family
> - tunnels may change the family
> - last tunnel gets dst0->path route from its family

This is correct.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Julian Anastasov Feb. 3, 2017, 9:16 a.m. UTC | #4
Hello,

On Fri, 3 Feb 2017, Steffen Klassert wrote:

> On Thu, Feb 02, 2017 at 01:04:34AM +0200, Julian Anastasov wrote:
> > 
> > 	It may sounds good. But only dst->path->ops->confirm_neigh
> > points to real IPv4/IPv6 function. And also, I guess, the
> > family can change while walking the chain, so we should be
> > careful while providing the original daddr (which comes from
> > sendmsg). I had the idea to walk all xforms to get the latest
> > tunnel address but this can be slow. 
> 
> Is this a per packet call or is the information cached somewhere?

	It is for every packet that is sent with both
MSG_CONFIRM and MSG_PROBE, i.e. when nothing is sent
on the wire. It is used by patch 6 just for UDP, RAW, ICMP, L2TP.

> > Something like this?:
> > 
> > static void xfrm_confirm_neigh(const struct dst_entry *dst, const void 
> > *daddr)
> > {
> > 	const struct dst_entry *path = dst->path;
> > 
> > 	/* By default, daddr is from sendmsg() if we have no tunnels */
> > 	for (;dst != path; dst = dst->child) {
> > 		const struct xfrm_state *xfrm = dst->xfrm;
> > 
> > 		/* Use address from last tunnel	*/
> > 		if (xfrm->props.mode != XFRM_MODE_TRANSPORT)
> > 			daddr = &xfrm->id.daddr;
> > 	}
> > 	path->ops->confirm_neigh(path, daddr);
> > }
> 
> I thought about this (completely untested) one:
> 
> static void xfrm_confirm_neigh(const struct dst_entry *dst, const void
> *daddr)
> 
> {
> 	const struct dst_entry *dst = dst->child;

	When starting and dst arg is first xform, the above
assignment skips it. May be both lines should be swapped.

> 	const struct xfrm_state *xfrm = dst->xfrm;
> 
> 	if (xfrm)
> 		daddr = &xfrm->id.daddr;
> 
> 	dst->ops->confirm_neigh(dst, daddr);
> }
> 
> Only the last dst_entry in this call chain (path) sould
> not have dst->xfrm set. So it finally calls path->ops->confirm_neigh
> with the daddr of the last transformation. But your version
> should do the same.

	Above can be fixed but it is risky for the stack
usage when using recursion. In practice, there should not be
many xforms, though. Also, is id.daddr valid for transports?

> > 	This should work as long as path and last tunnel are
> > from same family.
> 
> Yes, the outer mode of the last transformation has the same
> family as path.
> 
> > Also, after checking xfrm_dst_lookup() I'm not
> > sure using just &xfrm->id.daddr is enough. Should we consider
> > more places for daddr value?
> 
> Yes, indeed. We should do it like xfrm_dst_lookup() does it.

	OK, I'll get logic from there. Should I use loop or
recursion?

Regards

--
Julian Anastasov <ja@ssi.bg>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steffen Klassert Feb. 3, 2017, 10:08 a.m. UTC | #5
On Fri, Feb 03, 2017 at 11:16:16AM +0200, Julian Anastasov wrote:
> 
> 	Hello,
> 
> On Fri, 3 Feb 2017, Steffen Klassert wrote:
> > 
> > I thought about this (completely untested) one:
> > 
> > static void xfrm_confirm_neigh(const struct dst_entry *dst, const void
> > *daddr)
> > 
> > {
> > 	const struct dst_entry *dst = dst->child;
> 
> 	When starting and dst arg is first xform, the above
> assignment skips it. May be both lines should be swapped.

Yes, that's better :)

> 
> > 	const struct xfrm_state *xfrm = dst->xfrm;
> > 
> > 	if (xfrm)
> > 		daddr = &xfrm->id.daddr;
> > 
> > 	dst->ops->confirm_neigh(dst, daddr);
> > }
> > 
> > Only the last dst_entry in this call chain (path) sould
> > not have dst->xfrm set. So it finally calls path->ops->confirm_neigh
> > with the daddr of the last transformation. But your version
> > should do the same.
> 
> 	Above can be fixed but it is risky for the stack
> usage when using recursion. In practice, there should not be
> many xforms, though. Also, is id.daddr valid for transports?

Yes, it is needed for the lookup. But id.daddr ist the same
as daddr of the packet on transport mode.

> 
> > > 	This should work as long as path and last tunnel are
> > > from same family.
> > 
> > Yes, the outer mode of the last transformation has the same
> > family as path.
> > 
> > > Also, after checking xfrm_dst_lookup() I'm not
> > > sure using just &xfrm->id.daddr is enough. Should we consider
> > > more places for daddr value?
> > 
> > Yes, indeed. We should do it like xfrm_dst_lookup() does it.
> 
> 	OK, I'll get logic from there. Should I use loop or
> recursion?

I don't have a strong opinion on that. Both should work,
choose whatever you prefer.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/arp.h b/include/net/arp.h
index 5e0f891..65619a2 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -35,6 +35,22 @@  static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32
 	return n;
 }
 
+static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key)
+{
+	struct neighbour *n;
+
+	rcu_read_lock_bh();
+	n = __ipv4_neigh_lookup_noref(dev, key);
+	if (n) {
+		unsigned long now = jiffies;
+
+		/* avoid dirtying neighbour */
+		if (n->confirmed != now)
+			n->confirmed = now;
+	}
+	rcu_read_unlock_bh();
+}
+
 void arp_init(void);
 int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg);
 void arp_send(int type, int ptype, __be32 dest_ip,
diff --git a/include/net/dst.h b/include/net/dst.h
index 6835d22..3a3b34b 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -477,6 +477,13 @@  static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst
 	return IS_ERR(n) ? NULL : n;
 }
 
+static inline void dst_confirm_neigh(const struct dst_entry *dst,
+				     const void *daddr)
+{
+	if (dst->ops->confirm_neigh)
+		dst->ops->confirm_neigh(dst, daddr);
+}
+
 static inline void dst_link_failure(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 8a2b66d..c84b328 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -33,6 +33,8 @@  struct dst_ops {
 	struct neighbour *	(*neigh_lookup)(const struct dst_entry *dst,
 						struct sk_buff *skb,
 						const void *daddr);
+	void			(*confirm_neigh)(const struct dst_entry *dst,
+						 const void *daddr);
 
 	struct kmem_cache	*kmem_cachep;
 
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index d562a2f..8a02146 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -391,6 +391,23 @@  static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, cons
 	return n;
 }
 
+static inline void __ipv6_confirm_neigh(struct net_device *dev,
+					const void *pkey)
+{
+	struct neighbour *n;
+
+	rcu_read_lock_bh();
+	n = __ipv6_neigh_lookup_noref(dev, pkey);
+	if (n) {
+		unsigned long now = jiffies;
+
+		/* avoid dirtying neighbour */
+		if (n->confirmed != now)
+			n->confirmed = now;
+	}
+	rcu_read_unlock_bh();
+}
+
 int ndisc_init(void);
 int ndisc_late_init(void);
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4b7c231..cb494a5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -154,6 +154,7 @@  static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
 static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
 					   struct sk_buff *skb,
 					   const void *daddr);
+static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr);
 
 static struct dst_ops ipv4_dst_ops = {
 	.family =		AF_INET,
@@ -168,6 +169,7 @@  static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
 	.redirect =		ip_do_redirect,
 	.local_out =		__ip_local_out,
 	.neigh_lookup =		ipv4_neigh_lookup,
+	.confirm_neigh =	ipv4_confirm_neigh,
 };
 
 #define ECN_OR_COST(class)	TC_PRIO_##class
@@ -461,6 +463,23 @@  static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
 	return neigh_create(&arp_tbl, pkey, dev);
 }
 
+static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
+{
+	struct net_device *dev = dst->dev;
+	const __be32 *pkey = daddr;
+	const struct rtable *rt;
+
+	rt = (const struct rtable *)dst;
+	if (rt->rt_gateway)
+		pkey = (const __be32 *)&rt->rt_gateway;
+	else if (!daddr ||
+		 (rt->rt_flags &
+		  (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL)))
+		return;
+
+	__ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
+}
+
 #define IP_IDENTS_SZ 2048u
 
 static atomic_t *ip_idents __read_mostly;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4b1f0f9..c876940 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -217,6 +217,21 @@  static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 	return neigh_create(&nd_tbl, daddr, dst->dev);
 }
 
+static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
+{
+	struct net_device *dev = dst->dev;
+	struct rt6_info *rt = (struct rt6_info *)dst;
+
+	daddr = choose_neigh_daddr(rt, NULL, daddr);
+	if (!daddr)
+		return;
+	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
+		return;
+	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
+		return;
+	__ipv6_confirm_neigh(dev, daddr);
+}
+
 static struct dst_ops ip6_dst_ops_template = {
 	.family			=	AF_INET6,
 	.gc			=	ip6_dst_gc,
@@ -233,6 +248,7 @@  static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
 	.redirect		=	rt6_do_redirect,
 	.local_out		=	__ip6_local_out,
 	.neigh_lookup		=	ip6_neigh_lookup,
+	.confirm_neigh		=	ip6_confirm_neigh,
 };
 
 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 177e208..c010ee0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2856,6 +2856,20 @@  static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
 	return dst->path->ops->neigh_lookup(dst, skb, daddr);
 }
 
+static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr)
+{
+	const struct dst_entry *path = dst->path;
+
+	if (path == dst) {
+		dst->ops->confirm_neigh(dst, daddr);
+	} else {
+		/* daddr can be from different family and we need the
+		 * tunnel address. How to get it?
+		 */
+		path->ops->confirm_neigh(path, NULL);
+	}
+}
+
 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 {
 	int err = 0;
@@ -2882,6 +2896,8 @@  int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 			dst_ops->link_failure = xfrm_link_failure;
 		if (likely(dst_ops->neigh_lookup == NULL))
 			dst_ops->neigh_lookup = xfrm_neigh_lookup;
+		if (likely(!dst_ops->confirm_neigh))
+			dst_ops->confirm_neigh = xfrm_confirm_neigh;
 		if (likely(afinfo->garbage_collect == NULL))
 			afinfo->garbage_collect = xfrm_garbage_collect_deferred;
 		rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo);