@@ -1161,6 +1161,11 @@ struct lnet {
/* recovery eq handler */
struct lnet_handle_eq ln_mt_eqh;
+ /*
+ * Completed when the discovery and monitor threads can enter their
+ * work loops
+ */
+ struct completion ln_started;
};
#endif
@@ -1062,6 +1062,7 @@ struct lnet_libhandle *
INIT_LIST_HEAD(&the_lnet.ln_mt_peerNIRecovq);
init_waitqueue_head(&the_lnet.ln_dc_waitq);
LNetInvalidateEQHandle(&the_lnet.ln_mt_eqh);
+ init_completion(&the_lnet.ln_started);
rc = lnet_descriptor_setup();
if (rc != 0)
@@ -2583,6 +2584,8 @@ void lnet_lib_exit(void)
mutex_unlock(&the_lnet.ln_api_mutex);
+ complete_all(&the_lnet.ln_started);
+
/* wait for all routers to start */
lnet_wait_router_start();
@@ -3529,6 +3529,7 @@ void lnet_monitor_thr_stop(void)
lnet_build_msg_event(msg, LNET_EVENT_PUT);
+ wait_for_completion(&the_lnet.ln_started);
/*
* Must I ACK? If so I'll grab the ack_wmd out of the header and put
* it back into the ACK during lnet_finalize()
@@ -3258,6 +3258,8 @@ static int lnet_peer_discovery(void *arg)
struct lnet_peer *lp;
int rc;
+ wait_for_completion(&the_lnet.ln_started);
+
CDEBUG(D_NET, "started\n");
for (;;) {
@@ -3429,7 +3431,14 @@ void lnet_peer_discovery_stop(void)
LASSERT(the_lnet.ln_dc_state == LNET_DC_STATE_RUNNING);
the_lnet.ln_dc_state = LNET_DC_STATE_STOPPING;
- wake_up(&the_lnet.ln_dc_waitq);
+
+ /* In the LNetNIInit() path we may be stopping discovery before it
+ * entered its work loop
+ */
+ if (!completion_done(&the_lnet.ln_started))
+ complete(&the_lnet.ln_started);
+ else
+ wake_up(&the_lnet.ln_dc_waitq);
wait_event(the_lnet.ln_dc_waitq,
the_lnet.ln_dc_state == LNET_DC_STATE_SHUTDOWN);