@@ -893,6 +893,8 @@ int lnet_get_peer_ni_info(u32 peer_index, u64 *nid,
{
if (!(lp->lp_state & LNET_PEER_MULTI_RAIL))
return false;
+ if (lp->lp_state & LNET_PEER_MARK_DELETED)
+ return false;
if (lp->lp_state & LNET_PEER_FORCE_PUSH)
return true;
if (lp->lp_state & LNET_PEER_NO_DISCOVERY)
@@ -750,7 +750,9 @@ struct lnet_peer {
#define LNET_PEER_RTR_DISCOVERED BIT(17)
/* peer is marked for deletion */
-#define LNET_PEER_MARK_DELETION BIT(18)
+#define LNET_PEER_MARK_DELETION BIT(18)
+/* lnet_peer_del()/lnet_peer_del_locked() has been called on the peer */
+#define LNET_PEER_MARK_DELETED BIT(19)
struct lnet_peer_net {
/* chain on lp_peer_nets */
@@ -450,6 +450,10 @@ void lnet_peer_uninit(void)
CDEBUG(D_NET, "peer %s\n", libcfs_nid2str(peer->lp_primary_nid));
+ spin_lock(&peer->lp_lock);
+ peer->lp_state |= LNET_PEER_MARK_DELETED;
+ spin_unlock(&peer->lp_lock);
+
lpni = lnet_get_next_peer_ni_locked(peer, NULL, lpni);
while (lpni) {
lpni2 = lnet_get_next_peer_ni_locked(peer, NULL, lpni);
@@ -462,9 +466,40 @@ void lnet_peer_uninit(void)
return rc2;
}
+/* Discovering this peer is taking too long. Cancel any Ping or Push
+ * that discovery is waiting on by unlinking the relevant MDs. The
+ * lnet_discovery_event_handler() will proceed from here and complete
+ * the cleanup.
+ */
+static void lnet_peer_cancel_discovery(struct lnet_peer *lp)
+{
+ struct lnet_handle_md ping_mdh;
+ struct lnet_handle_md push_mdh;
+
+ LNetInvalidateMDHandle(&ping_mdh);
+ LNetInvalidateMDHandle(&push_mdh);
+
+ spin_lock(&lp->lp_lock);
+ if (lp->lp_state & LNET_PEER_PING_SENT) {
+ ping_mdh = lp->lp_ping_mdh;
+ LNetInvalidateMDHandle(&lp->lp_ping_mdh);
+ }
+ if (lp->lp_state & LNET_PEER_PUSH_SENT) {
+ push_mdh = lp->lp_push_mdh;
+ LNetInvalidateMDHandle(&lp->lp_push_mdh);
+ }
+ spin_unlock(&lp->lp_lock);
+
+ if (!LNetMDHandleIsInvalid(ping_mdh))
+ LNetMDUnlink(ping_mdh);
+ if (!LNetMDHandleIsInvalid(push_mdh))
+ LNetMDUnlink(push_mdh);
+}
+
static int
lnet_peer_del(struct lnet_peer *peer)
{
+ lnet_peer_cancel_discovery(peer);
lnet_net_lock(LNET_LOCK_EX);
lnet_peer_del_locked(peer);
lnet_net_unlock(LNET_LOCK_EX);
@@ -2955,6 +2990,10 @@ static int lnet_peer_deletion(struct lnet_peer *lp)
CDEBUG(D_NET, "peer %s(%p) state %#x\n",
libcfs_nid2str(lp->lp_primary_nid), lp, lp->lp_state);
+ /* no-op if lnet_peer_del() has already been called on this peer */
+ if (lp->lp_state & LNET_PEER_MARK_DELETED)
+ return 0;
+
if (the_lnet.ln_dc_state != LNET_DC_STATE_RUNNING)
return -ESHUTDOWN;
@@ -3382,37 +3421,6 @@ static void lnet_peer_discovery_error(struct lnet_peer *lp, int error)
}
/*
- * Discovering this peer is taking too long. Cancel any Ping or Push
- * that discovery is waiting on by unlinking the relevant MDs. The
- * lnet_discovery_event_handler() will proceed from here and complete
- * the cleanup.
- */
-static void lnet_peer_cancel_discovery(struct lnet_peer *lp)
-{
- struct lnet_handle_md ping_mdh;
- struct lnet_handle_md push_mdh;
-
- LNetInvalidateMDHandle(&ping_mdh);
- LNetInvalidateMDHandle(&push_mdh);
-
- spin_lock(&lp->lp_lock);
- if (lp->lp_state & LNET_PEER_PING_SENT) {
- ping_mdh = lp->lp_ping_mdh;
- LNetInvalidateMDHandle(&lp->lp_ping_mdh);
- }
- if (lp->lp_state & LNET_PEER_PUSH_SENT) {
- push_mdh = lp->lp_push_mdh;
- LNetInvalidateMDHandle(&lp->lp_push_mdh);
- }
- spin_unlock(&lp->lp_lock);
-
- if (!LNetMDHandleIsInvalid(ping_mdh))
- LNetMDUnlink(ping_mdh);
- if (!LNetMDHandleIsInvalid(push_mdh))
- LNetMDUnlink(push_mdh);
-}
-
-/*
* Wait for work to be queued or some other change that must be
* attended to. Returns non-zero if the discovery thread should shut
* down.
@@ -3566,7 +3574,8 @@ static int lnet_peer_discovery(void *arg)
CDEBUG(D_NET, "peer %s(%p) state %#x\n",
libcfs_nid2str(lp->lp_primary_nid), lp,
lp->lp_state);
- if (lp->lp_state & LNET_PEER_MARK_DELETION)
+ if (lp->lp_state & (LNET_PEER_MARK_DELETION |
+ LNET_PEER_MARK_DELETED))
rc = lnet_peer_deletion(lp);
else if (lp->lp_state & LNET_PEER_DATA_PRESENT)
rc = lnet_peer_data_present(lp);