diff mbox

RDMA/ipoib: Update paths on CLIENT_REREG/SM_CHANGE events

Message ID 72ab5c33597f19082cd98674a204b34c59e2b772.1526658945.git.dledford@redhat.com (mailing list archive)
State Accepted
Headers show

Commit Message

Doug Ledford May 18, 2018, 4 p.m. UTC
We do a light flush on CLIENT_REREG and SM_CHANGE events.  This goes
through and marks paths invalid. But we weren't always checking for this
validity when we needed to, and so we could keep using a path marked
invalid.  What's more, once we establish a path with a valid ah, we put
a pointer to the ah in the neigh struct directly, so even if we mark the
path as invalid, as long as the neigh has a direct pointer to the ah, it
keeps using the old, outdated ah.

To fix this we do several things.

1) Put the valid flag in the ah instead of the path struct, so when we
put the ah pointer directly in the neigh struct, we can easily check the
validity of the ah on send events.
2) Check the neigh->ah and neigh->ah->valid elements in the needed
places, and if we have an ah, but it's invalid, then invoke a refresh of
the ah.
3) Fix the various places that check for path, but didn't check for
path->valid (now path->ah && path->ah->valid).

Reported-by: Evgenii Smirnov <evgenii.smirnov@profitbricks.com>
Fixes: ee1e2c82c245 ("IPoIB: Refresh paths instead of flushing them on
		      SM change events")
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/ulp/ipoib/ipoib.h      |  2 +-
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 33 ++++++++++++++++++-----
 2 files changed, 28 insertions(+), 7 deletions(-)

Comments

Doug Ledford May 21, 2018, 9:29 p.m. UTC | #1
On Fri, 2018-05-18 at 12:00 -0400, Doug Ledford wrote:
> We do a light flush on CLIENT_REREG and SM_CHANGE events.  This goes
> through and marks paths invalid. But we weren't always checking for this
> validity when we needed to, and so we could keep using a path marked
> invalid.  What's more, once we establish a path with a valid ah, we put
> a pointer to the ah in the neigh struct directly, so even if we mark the
> path as invalid, as long as the neigh has a direct pointer to the ah, it
> keeps using the old, outdated ah.
> 
> To fix this we do several things.
> 
> 1) Put the valid flag in the ah instead of the path struct, so when we
> put the ah pointer directly in the neigh struct, we can easily check the
> validity of the ah on send events.
> 2) Check the neigh->ah and neigh->ah->valid elements in the needed
> places, and if we have an ah, but it's invalid, then invoke a refresh of
> the ah.
> 3) Fix the various places that check for path, but didn't check for
> path->valid (now path->ah && path->ah->valid).
> 
> Reported-by: Evgenii Smirnov <evgenii.smirnov@profitbricks.com>
> Fixes: ee1e2c82c245 ("IPoIB: Refresh paths instead of flushing them on
> 		      SM change events")
> Signed-off-by: Doug Ledford <dledford@redhat.com>
> ---

Evgenii, have you had a chance to see if this patch resolves your issue
properly?

>  drivers/infiniband/ulp/ipoib/ipoib.h      |  2 +-
>  drivers/infiniband/ulp/ipoib/ipoib_main.c | 33 ++++++++++++++++++-----
>  2 files changed, 28 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
> index 308e0ce49289..a50b062ed13e 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h
> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h
> @@ -415,6 +415,7 @@ struct ipoib_ah {
>  	struct list_head   list;
>  	struct kref	   ref;
>  	unsigned	   last_send;
> +	int  		   valid;
>  };
>  
>  struct ipoib_path {
> @@ -431,7 +432,6 @@ struct ipoib_path {
>  
>  	struct rb_node	      rb_node;
>  	struct list_head      list;
> -	int  		      valid;
>  };
>  
>  struct ipoib_neigh {
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> index cf291f90b58f..788bb9573f1f 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> @@ -697,7 +697,8 @@ void ipoib_mark_paths_invalid(struct net_device *dev)
>  		ipoib_dbg(priv, "mark path LID 0x%08x GID %pI6 invalid\n",
>  			  be32_to_cpu(sa_path_get_dlid(&path->pathrec)),
>  			  path->pathrec.dgid.raw);
> -		path->valid =  0;
> +		if (path->ah)
> +			path->ah->valid = 0;
>  	}
>  
>  	spin_unlock_irq(&priv->lock);
> @@ -833,7 +834,7 @@ static void path_rec_completion(int status,
>  			while ((skb = __skb_dequeue(&neigh->queue)))
>  				__skb_queue_tail(&skqueue, skb);
>  		}
> -		path->valid = 1;
> +		path->ah->valid = 1;
>  	}
>  
>  	path->query = NULL;
> @@ -926,6 +927,24 @@ static int path_rec_start(struct net_device *dev,
>  	return 0;
>  }
>  
> +static void neigh_refresh_path(struct ipoib_neigh *neigh, u8 *daddr,
> +			       struct net_device *dev)
> +{
> +	struct ipoib_dev_priv *priv = ipoib_priv(dev);
> +	struct ipoib_path *path;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&priv->lock, flags);
> +
> +	path = __path_find(dev, daddr + 4);
> +	if (!path)
> +		goto out;
> +	if (!path->query)
> +		path_rec_start(dev, path);
> +out:
> +	spin_unlock_irqrestore(&priv->lock, flags);
> +}
> +
>  static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
>  					  struct net_device *dev)
>  {
> @@ -963,7 +982,7 @@ static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
>  
>  	list_add_tail(&neigh->list, &path->neigh_list);
>  
> -	if (path->ah) {
> +	if (path->ah && path->ah->valid) {
>  		kref_get(&path->ah->ref);
>  		neigh->ah = path->ah;
>  
> @@ -1034,7 +1053,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
>  		goto drop_and_unlock;
>  
>  	path = __path_find(dev, phdr->hwaddr + 4);
> -	if (!path || !path->valid) {
> +	if (!path || !path->ah || !path->ah->valid) {
>  		int new_path = 0;
>  
>  		if (!path) {
> @@ -1069,7 +1088,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
>  		return;
>  	}
>  
> -	if (path->ah) {
> +	if (path->ah && path->ah->valid) {
>  		ipoib_dbg(priv, "Send unicast ARP to %08x\n",
>  			  be32_to_cpu(sa_path_get_dlid(&path->pathrec)));
>  
> @@ -1161,10 +1180,12 @@ static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
>  			ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
>  			goto unref;
>  		}
> -	} else if (neigh->ah) {
> +	} else if (neigh->ah && neigh->ah->valid) {
>  		neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah,
>  						IPOIB_QPN(phdr->hwaddr));
>  		goto unref;
> +	} else if (neigh->ah) {
> +		neigh_refresh_path(neigh, phdr->hwaddr, dev);
>  	}
>  
>  	if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
Leon Romanovsky May 22, 2018, 6:03 a.m. UTC | #2
On Mon, May 21, 2018 at 05:29:23PM -0400, Doug Ledford wrote:
> On Fri, 2018-05-18 at 12:00 -0400, Doug Ledford wrote:
> > We do a light flush on CLIENT_REREG and SM_CHANGE events.  This goes
> > through and marks paths invalid. But we weren't always checking for this
> > validity when we needed to, and so we could keep using a path marked
> > invalid.  What's more, once we establish a path with a valid ah, we put
> > a pointer to the ah in the neigh struct directly, so even if we mark the
> > path as invalid, as long as the neigh has a direct pointer to the ah, it
> > keeps using the old, outdated ah.
> >
> > To fix this we do several things.
> >
> > 1) Put the valid flag in the ah instead of the path struct, so when we
> > put the ah pointer directly in the neigh struct, we can easily check the
> > validity of the ah on send events.
> > 2) Check the neigh->ah and neigh->ah->valid elements in the needed
> > places, and if we have an ah, but it's invalid, then invoke a refresh of
> > the ah.
> > 3) Fix the various places that check for path, but didn't check for
> > path->valid (now path->ah && path->ah->valid).
> >
> > Reported-by: Evgenii Smirnov <evgenii.smirnov@profitbricks.com>
> > Fixes: ee1e2c82c245 ("IPoIB: Refresh paths instead of flushing them on
> > 		      SM change events")
> > Signed-off-by: Doug Ledford <dledford@redhat.com>
> > ---
>
> Evgenii, have you had a chance to see if this patch resolves your issue
> properly?

Doug,

Can you please line-up the fixes line before you are merging it?
It breaks various grep scripts.

Thanks
Evgenii Smirnov May 22, 2018, 1:37 p.m. UTC | #3
I did the tests with changing LIDs and switching the SM. This patch 
solves the issue for both RC and UD modes.

I've also came to the same conclusion, that the valid flag should be 
moved to the ah struct, but was too slow to come up with a patch. Also, 
while looking at this issue, I've noticed some strange behavior in 
unicast_arp_send function. I'll send a patch regarding that in a 
separate email, could you also take a look on it?


On 18/05/18 18:00, Doug Ledford wrote:
> We do a light flush on CLIENT_REREG and SM_CHANGE events.  This goes
> through and marks paths invalid. But we weren't always checking for this
> validity when we needed to, and so we could keep using a path marked
> invalid.  What's more, once we establish a path with a valid ah, we put
> a pointer to the ah in the neigh struct directly, so even if we mark the
> path as invalid, as long as the neigh has a direct pointer to the ah, it
> keeps using the old, outdated ah.
>
> To fix this we do several things.
>
> 1) Put the valid flag in the ah instead of the path struct, so when we
> put the ah pointer directly in the neigh struct, we can easily check the
> validity of the ah on send events.
> 2) Check the neigh->ah and neigh->ah->valid elements in the needed
> places, and if we have an ah, but it's invalid, then invoke a refresh of
> the ah.
> 3) Fix the various places that check for path, but didn't check for
> path->valid (now path->ah && path->ah->valid).
>
> Reported-by: Evgenii Smirnov <evgenii.smirnov@profitbricks.com>
> Fixes: ee1e2c82c245 ("IPoIB: Refresh paths instead of flushing them on
> 		      SM change events")
> Signed-off-by: Doug Ledford <dledford@redhat.com>
> ---
>   drivers/infiniband/ulp/ipoib/ipoib.h      |  2 +-
>   drivers/infiniband/ulp/ipoib/ipoib_main.c | 33 ++++++++++++++++++-----
>   2 files changed, 28 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
> index 308e0ce49289..a50b062ed13e 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h
> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h
> @@ -415,6 +415,7 @@ struct ipoib_ah {
>   	struct list_head   list;
>   	struct kref	   ref;
>   	unsigned	   last_send;
> +	int  		   valid;
>   };
>   
>   struct ipoib_path {
> @@ -431,7 +432,6 @@ struct ipoib_path {
>   
>   	struct rb_node	      rb_node;
>   	struct list_head      list;
> -	int  		      valid;
>   };
>   
>   struct ipoib_neigh {
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> index cf291f90b58f..788bb9573f1f 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> @@ -697,7 +697,8 @@ void ipoib_mark_paths_invalid(struct net_device *dev)
>   		ipoib_dbg(priv, "mark path LID 0x%08x GID %pI6 invalid\n",
>   			  be32_to_cpu(sa_path_get_dlid(&path->pathrec)),
>   			  path->pathrec.dgid.raw);
> -		path->valid =  0;
> +		if (path->ah)
> +			path->ah->valid = 0;
>   	}
>   
>   	spin_unlock_irq(&priv->lock);
> @@ -833,7 +834,7 @@ static void path_rec_completion(int status,
>   			while ((skb = __skb_dequeue(&neigh->queue)))
>   				__skb_queue_tail(&skqueue, skb);
>   		}
> -		path->valid = 1;
> +		path->ah->valid = 1;
>   	}
>   
>   	path->query = NULL;
> @@ -926,6 +927,24 @@ static int path_rec_start(struct net_device *dev,
>   	return 0;
>   }
>   
> +static void neigh_refresh_path(struct ipoib_neigh *neigh, u8 *daddr,
> +			       struct net_device *dev)
> +{
> +	struct ipoib_dev_priv *priv = ipoib_priv(dev);
> +	struct ipoib_path *path;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&priv->lock, flags);
> +
> +	path = __path_find(dev, daddr + 4);
> +	if (!path)
> +		goto out;
> +	if (!path->query)
> +		path_rec_start(dev, path);
> +out:
> +	spin_unlock_irqrestore(&priv->lock, flags);
> +}
> +
>   static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
>   					  struct net_device *dev)
>   {
> @@ -963,7 +982,7 @@ static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
>   
>   	list_add_tail(&neigh->list, &path->neigh_list);
>   
> -	if (path->ah) {
> +	if (path->ah && path->ah->valid) {
>   		kref_get(&path->ah->ref);
>   		neigh->ah = path->ah;
>   
> @@ -1034,7 +1053,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
>   		goto drop_and_unlock;
>   
>   	path = __path_find(dev, phdr->hwaddr + 4);
> -	if (!path || !path->valid) {
> +	if (!path || !path->ah || !path->ah->valid) {
>   		int new_path = 0;
>   
>   		if (!path) {
> @@ -1069,7 +1088,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
>   		return;
>   	}
>   
> -	if (path->ah) {
> +	if (path->ah && path->ah->valid) {
>   		ipoib_dbg(priv, "Send unicast ARP to %08x\n",
>   			  be32_to_cpu(sa_path_get_dlid(&path->pathrec)));
>   
> @@ -1161,10 +1180,12 @@ static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
>   			ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
>   			goto unref;
>   		}
> -	} else if (neigh->ah) {
> +	} else if (neigh->ah && neigh->ah->valid) {
>   		neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah,
>   						IPOIB_QPN(phdr->hwaddr));
>   		goto unref;
> +	} else if (neigh->ah) {
> +		neigh_refresh_path(neigh, phdr->hwaddr, dev);
>   	}
>   
>   	if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Doug Ledford May 22, 2018, 5:18 p.m. UTC | #4
On Tue, 2018-05-22 at 09:03 +0300, Leon Romanovsky wrote:
> On Mon, May 21, 2018 at 05:29:23PM -0400, Doug Ledford wrote:
> > On Fri, 2018-05-18 at 12:00 -0400, Doug Ledford wrote:
> > > We do a light flush on CLIENT_REREG and SM_CHANGE events.  This goes
> > > through and marks paths invalid. But we weren't always checking for this
> > > validity when we needed to, and so we could keep using a path marked
> > > invalid.  What's more, once we establish a path with a valid ah, we put
> > > a pointer to the ah in the neigh struct directly, so even if we mark the
> > > path as invalid, as long as the neigh has a direct pointer to the ah, it
> > > keeps using the old, outdated ah.
> > > 
> > > To fix this we do several things.
> > > 
> > > 1) Put the valid flag in the ah instead of the path struct, so when we
> > > put the ah pointer directly in the neigh struct, we can easily check the
> > > validity of the ah on send events.
> > > 2) Check the neigh->ah and neigh->ah->valid elements in the needed
> > > places, and if we have an ah, but it's invalid, then invoke a refresh of
> > > the ah.
> > > 3) Fix the various places that check for path, but didn't check for
> > > path->valid (now path->ah && path->ah->valid).
> > > 
> > > Reported-by: Evgenii Smirnov <evgenii.smirnov@profitbricks.com>
> > > Fixes: ee1e2c82c245 ("IPoIB: Refresh paths instead of flushing them on
> > > 		      SM change events")
> > > Signed-off-by: Doug Ledford <dledford@redhat.com>
> > > ---
> > 
> > Evgenii, have you had a chance to see if this patch resolves your issue
> > properly?
> 
> Doug,
> 
> Can you please line-up the fixes line before you are merging it?
> It breaks various grep scripts.
> 
> Thanks

Fixed and pushed to for-next.  Thanks.
Doug Ledford May 22, 2018, 5:20 p.m. UTC | #5
On Tue, 2018-05-22 at 15:37 +0200, Evgenii Smirnov wrote:
> Also, 
> while looking at this issue, I've noticed some strange behavior in 
> unicast_arp_send function. I'll send a patch regarding that in a 
> separate email, could you also take a look on it?

Sure, I'll do that now while this stuff is still fresh in my head ;-)
Leon Romanovsky May 22, 2018, 5:29 p.m. UTC | #6
On Tue, May 22, 2018 at 01:18:52PM -0400, Doug Ledford wrote:
> On Tue, 2018-05-22 at 09:03 +0300, Leon Romanovsky wrote:
> > On Mon, May 21, 2018 at 05:29:23PM -0400, Doug Ledford wrote:
> > > On Fri, 2018-05-18 at 12:00 -0400, Doug Ledford wrote:
> > > > We do a light flush on CLIENT_REREG and SM_CHANGE events.  This goes
> > > > through and marks paths invalid. But we weren't always checking for this
> > > > validity when we needed to, and so we could keep using a path marked
> > > > invalid.  What's more, once we establish a path with a valid ah, we put
> > > > a pointer to the ah in the neigh struct directly, so even if we mark the
> > > > path as invalid, as long as the neigh has a direct pointer to the ah, it
> > > > keeps using the old, outdated ah.
> > > >
> > > > To fix this we do several things.
> > > >
> > > > 1) Put the valid flag in the ah instead of the path struct, so when we
> > > > put the ah pointer directly in the neigh struct, we can easily check the
> > > > validity of the ah on send events.
> > > > 2) Check the neigh->ah and neigh->ah->valid elements in the needed
> > > > places, and if we have an ah, but it's invalid, then invoke a refresh of
> > > > the ah.
> > > > 3) Fix the various places that check for path, but didn't check for
> > > > path->valid (now path->ah && path->ah->valid).
> > > >
> > > > Reported-by: Evgenii Smirnov <evgenii.smirnov@profitbricks.com>
> > > > Fixes: ee1e2c82c245 ("IPoIB: Refresh paths instead of flushing them on
> > > > 		      SM change events")
> > > > Signed-off-by: Doug Ledford <dledford@redhat.com>
> > > > ---
> > >
> > > Evgenii, have you had a chance to see if this patch resolves your issue
> > > properly?
> >
> > Doug,
> >
> > Can you please line-up the fixes line before you are merging it?
> > It breaks various grep scripts.
> >
> > Thanks
>
> Fixed and pushed to for-next.  Thanks.

Thanks

>
> --
> Doug Ledford <dledford@redhat.com>
>     GPG KeyID: B826A3330E572FDD
>     Key fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57 2FDD
diff mbox

Patch

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 308e0ce49289..a50b062ed13e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -415,6 +415,7 @@  struct ipoib_ah {
 	struct list_head   list;
 	struct kref	   ref;
 	unsigned	   last_send;
+	int  		   valid;
 };
 
 struct ipoib_path {
@@ -431,7 +432,6 @@  struct ipoib_path {
 
 	struct rb_node	      rb_node;
 	struct list_head      list;
-	int  		      valid;
 };
 
 struct ipoib_neigh {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index cf291f90b58f..788bb9573f1f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -697,7 +697,8 @@  void ipoib_mark_paths_invalid(struct net_device *dev)
 		ipoib_dbg(priv, "mark path LID 0x%08x GID %pI6 invalid\n",
 			  be32_to_cpu(sa_path_get_dlid(&path->pathrec)),
 			  path->pathrec.dgid.raw);
-		path->valid =  0;
+		if (path->ah)
+			path->ah->valid = 0;
 	}
 
 	spin_unlock_irq(&priv->lock);
@@ -833,7 +834,7 @@  static void path_rec_completion(int status,
 			while ((skb = __skb_dequeue(&neigh->queue)))
 				__skb_queue_tail(&skqueue, skb);
 		}
-		path->valid = 1;
+		path->ah->valid = 1;
 	}
 
 	path->query = NULL;
@@ -926,6 +927,24 @@  static int path_rec_start(struct net_device *dev,
 	return 0;
 }
 
+static void neigh_refresh_path(struct ipoib_neigh *neigh, u8 *daddr,
+			       struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+	struct ipoib_path *path;
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	path = __path_find(dev, daddr + 4);
+	if (!path)
+		goto out;
+	if (!path->query)
+		path_rec_start(dev, path);
+out:
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
 static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
 					  struct net_device *dev)
 {
@@ -963,7 +982,7 @@  static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
 
 	list_add_tail(&neigh->list, &path->neigh_list);
 
-	if (path->ah) {
+	if (path->ah && path->ah->valid) {
 		kref_get(&path->ah->ref);
 		neigh->ah = path->ah;
 
@@ -1034,7 +1053,7 @@  static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
 		goto drop_and_unlock;
 
 	path = __path_find(dev, phdr->hwaddr + 4);
-	if (!path || !path->valid) {
+	if (!path || !path->ah || !path->ah->valid) {
 		int new_path = 0;
 
 		if (!path) {
@@ -1069,7 +1088,7 @@  static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
 		return;
 	}
 
-	if (path->ah) {
+	if (path->ah && path->ah->valid) {
 		ipoib_dbg(priv, "Send unicast ARP to %08x\n",
 			  be32_to_cpu(sa_path_get_dlid(&path->pathrec)));
 
@@ -1161,10 +1180,12 @@  static netdev_tx_t ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
 			goto unref;
 		}
-	} else if (neigh->ah) {
+	} else if (neigh->ah && neigh->ah->valid) {
 		neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah,
 						IPOIB_QPN(phdr->hwaddr));
 		goto unref;
+	} else if (neigh->ah) {
+		neigh_refresh_path(neigh, phdr->hwaddr, dev);
 	}
 
 	if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {