@@ -530,6 +530,16 @@ static void lnet_peer_cancel_discovery(struct lnet_peer *lp)
goto out;
}
}
+
+ /* If we're asked to lock down the primary NID we shouldn't be
+ * deleting it
+ */
+ if (lp->lp_state & LNET_PEER_LOCK_PRIMARY &&
+ primary_nid == nid) {
+ rc = -EPERM;
+ goto out;
+ }
+
lpni = lnet_find_peer_ni_locked(nid);
if (!lpni) {
rc = -ENOENT;
@@ -1388,13 +1398,18 @@ struct lnet_peer_ni *
* down then this discovery can introduce long delays into the mount
* process, so skip it if it isn't necessary.
*/
- while (!lnet_peer_discovery_disabled && !lnet_peer_is_uptodate(lp)) {
+ if (!lnet_peer_discovery_disabled && !lnet_peer_is_uptodate(lp)) {
spin_lock(&lp->lp_lock);
/* force a full discovery cycle */
- lp->lp_state |= LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH;
+ lp->lp_state |= LNET_PEER_FORCE_PING | LNET_PEER_FORCE_PUSH |
+ LNET_PEER_LOCK_PRIMARY;
spin_unlock(&lp->lp_lock);
- rc = lnet_discover_peer_locked(lpni, cpt, true);
+ /* start discovery in the background. Messages to that
+ * peer will not go through until the discovery is
+ * complete
+ */
+ rc = lnet_discover_peer_locked(lpni, cpt, false);
if (rc)
goto out_decref;
/* The lpni (or lp) for this NID may have changed and our ref is
@@ -1408,14 +1423,6 @@ struct lnet_peer_ni *
goto out_unlock;
}
lp = lpni->lpni_peer_net->lpn_peer;
-
- /* If we find that the peer has discovery disabled then we will
- * not modify whatever primary NID is currently set for this
- * peer. Thus, we can break out of this loop even if the peer
- * is not fully up to date.
- */
- if (lnet_is_discovery_disabled(lp))
- break;
}
primary_nid = lp->lp_primary_nid;
out_decref:
@@ -1522,6 +1529,8 @@ struct lnet_peer_net *
lnet_peer_clr_non_mr_pref_nids(lp);
}
}
+ if (flags & LNET_PEER_LOCK_PRIMARY)
+ lp->lp_state |= LNET_PEER_LOCK_PRIMARY;
spin_unlock(&lp->lp_lock);
lp->lp_nnis++;
@@ -1676,9 +1685,27 @@ struct lnet_peer_net *
}
/* If this is the primary NID, destroy the peer. */
if (lnet_peer_ni_is_primary(lpni)) {
- struct lnet_peer *rtr_lp =
+ struct lnet_peer *lp2 =
lpni->lpni_peer_net->lpn_peer;
- int rtr_refcount = rtr_lp->lp_rtr_refcount;
+ int rtr_refcount = lp2->lp_rtr_refcount;
+
+ /* If the new peer that this NID belongs to is
+ * a primary NID for another peer which we're
+ * suppose to preserve the Primary for then we
+ * don't want to mess with it. But the
+ * configuration is wrong at this point, so we
+ * should flag both of these peers as in a bad
+ * state
+ */
+ if (lp2->lp_state & LNET_PEER_LOCK_PRIMARY) {
+ spin_lock(&lp->lp_lock);
+ lp->lp_state |= LNET_PEER_BAD_CONFIG;
+ spin_unlock(&lp->lp_lock);
+ spin_lock(&lp2->lp_lock);
+ lp2->lp_state |= LNET_PEER_BAD_CONFIG;
+ spin_unlock(&lp2->lp_lock);
+ goto out_free_lpni;
+ }
/* if we're trying to delete a router it means
* we're moving this peer NI to a new peer so must
@@ -1686,9 +1713,9 @@ struct lnet_peer_net *
*/
if (rtr_refcount > 0) {
flags |= LNET_PEER_RTR_NI_FORCE_DEL;
- lnet_rtr_transfer_to_peer(rtr_lp, lp);
+ lnet_rtr_transfer_to_peer(lp2, lp);
}
- lnet_peer_del(lpni->lpni_peer_net->lpn_peer);
+ lnet_peer_del(lp2);
lnet_peer_ni_decref_locked(lpni);
lpni = lnet_peer_ni_alloc(nid);
if (!lpni) {
@@ -1746,7 +1773,8 @@ struct lnet_peer_net *
if (lp->lp_primary_nid == nid)
goto out;
- lp->lp_primary_nid = nid;
+ if (!(lp->lp_state & LNET_PEER_LOCK_PRIMARY))
+ lp->lp_primary_nid = nid;
rc = lnet_peer_add_nid(lp, nid, flags);
if (rc) {
@@ -1754,8 +1782,17 @@ struct lnet_peer_net *
goto out;
}
out:
+ /* if this is a configured peer or the primary for that peer has
+ * been locked, then we don't want to flag this scenario as
+ * a failure
+ */
+ if (lp->lp_state & LNET_PEER_CONFIGURED ||
+ lp->lp_state & LNET_PEER_LOCK_PRIMARY)
+ return 0;
+
CDEBUG(D_NET, "peer %s NID %s: %d\n",
libcfs_nid2str(old), libcfs_nid2str(nid), rc);
+
return rc;
}