Message ID | 20190124144434.24910-1-kamalheib1@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Jason Gunthorpe |
Headers | show |
Series | [for-rc] IB/rxe: Improve loopback marking | expand |
On Thu, Jan 24, 2019 at 04:44:34PM +0200, Kamal Heib wrote: > Currently a packet is marked for loopback only if the source and > destination addresses match, This is not enough when multiple gids are s/match,/equals. > present in rxe device's gid table and the traffic is from one gid to > another, this patch fixes this by marking the packet for loopback if the > destination address found in the rxe's netdevice address list. s/another, this patch fixes this/ another. Fix it > > Fixes: 8700e3e7c485 ("Soft RoCE driver") > Signed-off-by: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> > Signed-off-by: Kamal Heib <kamalheib1@gmail.com> > --- > drivers/infiniband/sw/rxe/rxe_net.c | 45 +++++++++++++++++++++++++++-- > 1 file changed, 43 insertions(+), 2 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c > index 8fd03ae20efc..6ee984b7793b 100644 > --- a/drivers/infiniband/sw/rxe/rxe_net.c > +++ b/drivers/infiniband/sw/rxe/rxe_net.c > @@ -368,6 +368,25 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, > ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); > } > > +static inline bool same_rxe4(struct net_device *ndev, struct in_addr *daddr) > +{ > + struct in_device *in_dev; > + bool same_rxe = false; > + > + in_dev = in_dev_get(ndev); > + if (!in_dev) > + return false; > + > + for_ifa(in_dev) > + if (!memcmp(&ifa->ifa_address, daddr, sizeof(*daddr))) { > + same_rxe = true; > + break; > + } > + endfor_ifa(in_dev); > + in_dev_put(in_dev); > + return same_rxe; > +} > + > static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb, > struct rxe_av *av) > { > @@ -384,7 +403,7 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb, > return -EHOSTUNREACH; > } > > - if (!memcmp(saddr, daddr, sizeof(*daddr))) > + if (same_rxe4(skb->dev, daddr)) > pkt->mask |= RXE_LOOPBACK_MASK; > > prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), > @@ -397,6 +416,28 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb, > return 0; > } > > +static inline bool same_rxe6(struct net_device *ndev, struct in6_addr *daddr) > +{ > + struct inet6_dev *in6_dev; > + struct inet6_ifaddr *ifp; > + bool same_rxe = false; > + > + in6_dev = in6_dev_get(ndev); > + if (!in6_dev) > + return false; > + > + read_lock_bh(&in6_dev->lock); To me at first glance it is unclear why lock is not needed when traveling the v4 list. Suggesting to comment on that. > + list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { > + if (!memcmp(&ifp->addr, daddr, sizeof(*daddr))) { > + same_rxe = true; > + break; > + } > + } > + read_unlock_bh(&in6_dev->lock); > + in6_dev_put(in6_dev); > + return same_rxe; > +} > + > static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb, > struct rxe_av *av) > { > @@ -411,7 +452,7 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb, > return -EHOSTUNREACH; > } > > - if (!memcmp(saddr, daddr, sizeof(*daddr))) > + if (same_rxe6(skb->dev, daddr)) > pkt->mask |= RXE_LOOPBACK_MASK; Thanks Kamal! Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com> Will give it a short test today. > > prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), > -- > 2.20.1 >
On Thu, Jan 24, 2019 at 05:10:18PM +0200, Yuval Shaia wrote: > On Thu, Jan 24, 2019 at 04:44:34PM +0200, Kamal Heib wrote: > > Currently a packet is marked for loopback only if the source and > > destination addresses match, This is not enough when multiple gids are > > s/match,/equals. > > > present in rxe device's gid table and the traffic is from one gid to > > another, this patch fixes this by marking the packet for loopback if the > > destination address found in the rxe's netdevice address list. > > s/another, this patch fixes this/ another. Fix it > > > > > Fixes: 8700e3e7c485 ("Soft RoCE driver") > > Signed-off-by: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> > > Signed-off-by: Kamal Heib <kamalheib1@gmail.com> > > --- > > drivers/infiniband/sw/rxe/rxe_net.c | 45 +++++++++++++++++++++++++++-- > > 1 file changed, 43 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c > > index 8fd03ae20efc..6ee984b7793b 100644 > > --- a/drivers/infiniband/sw/rxe/rxe_net.c > > +++ b/drivers/infiniband/sw/rxe/rxe_net.c > > @@ -368,6 +368,25 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, > > ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); > > } > > > > +static inline bool same_rxe4(struct net_device *ndev, struct in_addr *daddr) > > +{ > > + struct in_device *in_dev; > > + bool same_rxe = false; > > + > > + in_dev = in_dev_get(ndev); > > + if (!in_dev) > > + return false; > > + > > + for_ifa(in_dev) > > + if (!memcmp(&ifa->ifa_address, daddr, sizeof(*daddr))) { > > + same_rxe = true; > > + break; > > + } > > + endfor_ifa(in_dev); > > + in_dev_put(in_dev); > > + return same_rxe; > > +} > > + > > static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb, > > struct rxe_av *av) > > { > > @@ -384,7 +403,7 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb, > > return -EHOSTUNREACH; > > } > > > > - if (!memcmp(saddr, daddr, sizeof(*daddr))) > > + if (same_rxe4(skb->dev, daddr)) > > pkt->mask |= RXE_LOOPBACK_MASK; > > > > prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), > > @@ -397,6 +416,28 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb, > > return 0; > > } > > > > +static inline bool same_rxe6(struct net_device *ndev, struct in6_addr *daddr) > > +{ > > + struct inet6_dev *in6_dev; > > + struct inet6_ifaddr *ifp; > > + bool same_rxe = false; > > + > > + in6_dev = in6_dev_get(ndev); > > + if (!in6_dev) > > + return false; > > + > > + read_lock_bh(&in6_dev->lock); > > To me at first glance it is unclear why lock is not needed when traveling > the v4 list. Suggesting to comment on that. > > > + list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { > > + if (!memcmp(&ifp->addr, daddr, sizeof(*daddr))) { > > + same_rxe = true; > > + break; > > + } > > + } > > + read_unlock_bh(&in6_dev->lock); > > + in6_dev_put(in6_dev); > > + return same_rxe; > > +} > > + > > static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb, > > struct rxe_av *av) > > { > > @@ -411,7 +452,7 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb, > > return -EHOSTUNREACH; > > } > > > > - if (!memcmp(saddr, daddr, sizeof(*daddr))) > > + if (same_rxe6(skb->dev, daddr)) > > pkt->mask |= RXE_LOOPBACK_MASK; > > Thanks Kamal! > > Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com> > > Will give it a short test today. Tested both RC and UD as well as with qemu guest with pvrdma device. Tested-by: Yuval Shaia <yuval.shaia@oracle.com> (btw, nice to finally move to 5.0 -:)) > > > > > prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), > > -- > > 2.20.1 > >
On Thu, Jan 24, 2019 at 04:44:34PM +0200, Kamal Heib wrote: > Currently a packet is marked for loopback only if the source and > destination addresses match, This is not enough when multiple gids are > present in rxe device's gid table and the traffic is from one gid to > another, this patch fixes this by marking the packet for loopback if the > destination address found in the rxe's netdevice address list. Shouldn't loop back detection be based on the DMAC/SMAC being equal and nothing else? Jason
On Thu, Jan 24, 2019 at 11:30:48AM -0700, Jason Gunthorpe wrote: > On Thu, Jan 24, 2019 at 04:44:34PM +0200, Kamal Heib wrote: > > Currently a packet is marked for loopback only if the source and > > destination addresses match, This is not enough when multiple gids are > > present in rxe device's gid table and the traffic is from one gid to > > another, this patch fixes this by marking the packet for loopback if the > > destination address found in the rxe's netdevice address list. > > Shouldn't loop back detection be based on the DMAC/SMAC being equal > and nothing else? > > Jason No because even if DMAC != SMAC (and in our case dgid != sgid) we can still can end up on the same rxe device. As the commit message trying to explain, take for example one rxe device with two gids one send to the second, this should be processed as loopback, right? Yuval
On Thu, Jan 24, 2019 at 08:50:47PM +0200, Yuval Shaia wrote: > On Thu, Jan 24, 2019 at 11:30:48AM -0700, Jason Gunthorpe wrote: > > On Thu, Jan 24, 2019 at 04:44:34PM +0200, Kamal Heib wrote: > > > Currently a packet is marked for loopback only if the source and > > > destination addresses match, This is not enough when multiple gids are > > > present in rxe device's gid table and the traffic is from one gid to > > > another, this patch fixes this by marking the packet for loopback if the > > > destination address found in the rxe's netdevice address list. > > > > Shouldn't loop back detection be based on the DMAC/SMAC being equal > > and nothing else? > > > > Jason > > No because even if DMAC != SMAC (and in our case dgid != sgid) we can still > can end up on the same rxe device. > > As the commit message trying to explain, take for example one rxe device > with two gids one send to the second, this should be processed as loopback, > right? rxe sits on top of a netdev - so it has *EXACTLY ONE* mac address. Anything else is a bug. Jason
On Thu, Jan 24, 2019 at 11:59:38AM -0700, Jason Gunthorpe wrote: > On Thu, Jan 24, 2019 at 08:50:47PM +0200, Yuval Shaia wrote: > > On Thu, Jan 24, 2019 at 11:30:48AM -0700, Jason Gunthorpe wrote: > > > On Thu, Jan 24, 2019 at 04:44:34PM +0200, Kamal Heib wrote: > > > > Currently a packet is marked for loopback only if the source and > > > > destination addresses match, This is not enough when multiple gids are > > > > present in rxe device's gid table and the traffic is from one gid to > > > > another, this patch fixes this by marking the packet for loopback if the > > > > destination address found in the rxe's netdevice address list. > > > > > > Shouldn't loop back detection be based on the DMAC/SMAC being equal > > > and nothing else? > > > > > > Jason > > > > No because even if DMAC != SMAC (and in our case dgid != sgid) we can still > > can end up on the same rxe device. > > > > As the commit message trying to explain, take for example one rxe device > > with two gids one send to the second, this should be processed as loopback, > > right? > > rxe sits on top of a netdev - so it has *EXACTLY ONE* mac > address. Anything else is a bug. > > Jason ok, i think see your point. So you are saying that instead of traveling the routing table we just need to check if dmac is the same as smac, correct? Yuval
On Thu, Jan 24, 2019 at 09:02:56PM +0200, Yuval Shaia wrote: > On Thu, Jan 24, 2019 at 11:59:38AM -0700, Jason Gunthorpe wrote: > > On Thu, Jan 24, 2019 at 08:50:47PM +0200, Yuval Shaia wrote: > > > On Thu, Jan 24, 2019 at 11:30:48AM -0700, Jason Gunthorpe wrote: > > > > On Thu, Jan 24, 2019 at 04:44:34PM +0200, Kamal Heib wrote: > > > > > Currently a packet is marked for loopback only if the source and > > > > > destination addresses match, This is not enough when multiple gids are > > > > > present in rxe device's gid table and the traffic is from one gid to > > > > > another, this patch fixes this by marking the packet for loopback if the > > > > > destination address found in the rxe's netdevice address list. > > > > > > > > Shouldn't loop back detection be based on the DMAC/SMAC being equal > > > > and nothing else? > > > > > > > > Jason > > > > > > No because even if DMAC != SMAC (and in our case dgid != sgid) we can still > > > can end up on the same rxe device. > > > > > > As the commit message trying to explain, take for example one rxe device > > > with two gids one send to the second, this should be processed as loopback, > > > right? > > > > rxe sits on top of a netdev - so it has *EXACTLY ONE* mac > > address. Anything else is a bug. > > > > Jason > > ok, i think see your point. > So you are saying that instead of traveling the routing table we just need > to check if dmac is the same as smac, correct? dmac should be set to the 'exactly one' mac, so yes. Jsaon
On 1/24/19 9:10 PM, Jason Gunthorpe wrote: > On Thu, Jan 24, 2019 at 09:02:56PM +0200, Yuval Shaia wrote: >> On Thu, Jan 24, 2019 at 11:59:38AM -0700, Jason Gunthorpe wrote: >>> On Thu, Jan 24, 2019 at 08:50:47PM +0200, Yuval Shaia wrote: >>>> On Thu, Jan 24, 2019 at 11:30:48AM -0700, Jason Gunthorpe wrote: >>>>> On Thu, Jan 24, 2019 at 04:44:34PM +0200, Kamal Heib wrote: >>>>>> Currently a packet is marked for loopback only if the source and >>>>>> destination addresses match, This is not enough when multiple gids are >>>>>> present in rxe device's gid table and the traffic is from one gid to >>>>>> another, this patch fixes this by marking the packet for loopback if the >>>>>> destination address found in the rxe's netdevice address list. >>>>> Shouldn't loop back detection be based on the DMAC/SMAC being equal >>>>> and nothing else? >>>>> >>>>> Jason >>>> No because even if DMAC != SMAC (and in our case dgid != sgid) we can still >>>> can end up on the same rxe device. >>>> >>>> As the commit message trying to explain, take for example one rxe device >>>> with two gids one send to the second, this should be processed as loopback, >>>> right? >>> rxe sits on top of a netdev - so it has *EXACTLY ONE* mac >>> address. Anything else is a bug. >>> >>> Jason >> ok, i think see your point. >> So you are saying that instead of traveling the routing table we just need >> to check if dmac is the same as smac, correct? > dmac should be set to the 'exactly one' mac, so yes. > > Jsaon Thanks Yuval and Jason. I'll prepare a new fix that will check if smac == dmac. Thanks, Kamal
On Sun, Jan 27, 2019 at 09:43:45AM +0200, Kamal Heib wrote: > > > On 1/24/19 9:10 PM, Jason Gunthorpe wrote: > > On Thu, Jan 24, 2019 at 09:02:56PM +0200, Yuval Shaia wrote: > >> On Thu, Jan 24, 2019 at 11:59:38AM -0700, Jason Gunthorpe wrote: > >>> On Thu, Jan 24, 2019 at 08:50:47PM +0200, Yuval Shaia wrote: > >>>> On Thu, Jan 24, 2019 at 11:30:48AM -0700, Jason Gunthorpe wrote: > >>>>> On Thu, Jan 24, 2019 at 04:44:34PM +0200, Kamal Heib wrote: > >>>>>> Currently a packet is marked for loopback only if the source and > >>>>>> destination addresses match, This is not enough when multiple gids are > >>>>>> present in rxe device's gid table and the traffic is from one gid to > >>>>>> another, this patch fixes this by marking the packet for loopback if the > >>>>>> destination address found in the rxe's netdevice address list. > >>>>> Shouldn't loop back detection be based on the DMAC/SMAC being equal > >>>>> and nothing else? > >>>>> > >>>>> Jason > >>>> No because even if DMAC != SMAC (and in our case dgid != sgid) we can still > >>>> can end up on the same rxe device. > >>>> > >>>> As the commit message trying to explain, take for example one rxe device > >>>> with two gids one send to the second, this should be processed as loopback, > >>>> right? > >>> rxe sits on top of a netdev - so it has *EXACTLY ONE* mac > >>> address. Anything else is a bug. > >>> > >>> Jason > >> ok, i think see your point. > >> So you are saying that instead of traveling the routing table we just need > >> to check if dmac is the same as smac, correct? > > dmac should be set to the 'exactly one' mac, so yes. > > > > Jsaon > > Thanks Yuval and Jason. > I'll prepare a new fix that will check if smac == dmac. > > Thanks, > Kamal Thanks, I was under the (probably wrong) assumption that dmac is yet unknown at this stage. Yuval
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 8fd03ae20efc..6ee984b7793b 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -368,6 +368,25 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); } +static inline bool same_rxe4(struct net_device *ndev, struct in_addr *daddr) +{ + struct in_device *in_dev; + bool same_rxe = false; + + in_dev = in_dev_get(ndev); + if (!in_dev) + return false; + + for_ifa(in_dev) + if (!memcmp(&ifa->ifa_address, daddr, sizeof(*daddr))) { + same_rxe = true; + break; + } + endfor_ifa(in_dev); + in_dev_put(in_dev); + return same_rxe; +} + static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb, struct rxe_av *av) { @@ -384,7 +403,7 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb, return -EHOSTUNREACH; } - if (!memcmp(saddr, daddr, sizeof(*daddr))) + if (same_rxe4(skb->dev, daddr)) pkt->mask |= RXE_LOOPBACK_MASK; prepare_udp_hdr(skb, cpu_to_be16(qp->src_port), @@ -397,6 +416,28 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb, return 0; } +static inline bool same_rxe6(struct net_device *ndev, struct in6_addr *daddr) +{ + struct inet6_dev *in6_dev; + struct inet6_ifaddr *ifp; + bool same_rxe = false; + + in6_dev = in6_dev_get(ndev); + if (!in6_dev) + return false; + + read_lock_bh(&in6_dev->lock); + list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { + if (!memcmp(&ifp->addr, daddr, sizeof(*daddr))) { + same_rxe = true; + break; + } + } + read_unlock_bh(&in6_dev->lock); + in6_dev_put(in6_dev); + return same_rxe; +} + static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb, struct rxe_av *av) { @@ -411,7 +452,7 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb, return -EHOSTUNREACH; } - if (!memcmp(saddr, daddr, sizeof(*daddr))) + if (same_rxe6(skb->dev, daddr)) pkt->mask |= RXE_LOOPBACK_MASK; prepare_udp_hdr(skb, cpu_to_be16(qp->src_port),