@@ -127,7 +127,7 @@
return LNET_NI_STATUS_UP;
else if (atomic_read(&ni->ni_fatal_error_on))
return LNET_NI_STATUS_DOWN;
- else if (ni->ni_status)
+ else if (the_lnet.ln_routing && ni->ni_status)
return *ni->ni_status;
else
return LNET_NI_STATUS_UP;
@@ -1216,4 +1216,5 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
old ? "up" : "down",
alive ? "up" : "down");
}
+void lnet_update_ping_buffer(void);
#endif
@@ -2382,15 +2382,23 @@ static int kiblnd_port_get_attr(struct kib_hca_dev *hdev)
static inline void
kiblnd_set_ni_fatal_on(struct kib_hca_dev *hdev, int val)
{
- struct kib_net *net;
+ struct kib_net *net;
+ u32 ni_state_before;
+ bool update_ping_buf = false;
/* for health check */
list_for_each_entry(net, &hdev->ibh_dev->ibd_nets, ibn_list) {
if (val)
CDEBUG(D_NETERROR, "Fatal device error for NI %s\n",
libcfs_nidstr(&net->ibn_ni->ni_nid));
- atomic_set(&net->ibn_ni->ni_fatal_error_on, val);
+ ni_state_before = atomic_xchg(&net->ibn_ni->ni_fatal_error_on,
+ val);
+ if (!update_ping_buf && val != ni_state_before)
+ update_ping_buf = true;
}
+
+ if (update_ping_buf)
+ lnet_update_ping_buffer();
}
void
@@ -2748,6 +2756,8 @@ void kiblnd_destroy_dev(struct kib_dev *dev)
bool link_down = !(operstate == IF_OPER_UP);
struct in_device *in_dev;
bool found_ip = false;
+ u32 ni_state_before;
+ bool update_ping_buf = false;
const struct in_ifaddr *ifa;
event_kibdev = kiblnd_dev_search(dev->name);
@@ -2757,7 +2767,6 @@ void kiblnd_destroy_dev(struct kib_dev *dev)
list_for_each_entry_safe(net, cnxt, &event_kibdev->ibd_nets, ibn_list) {
found_ip = false;
-
ni = net->ibn_ni;
in_dev = __in_dev_get_rtnl(dev);
@@ -2766,8 +2775,9 @@ void kiblnd_destroy_dev(struct kib_dev *dev)
dev->name);
CDEBUG(D_NET, "%s: set link fatal state to 1\n",
libcfs_nidstr(&net->ibn_ni->ni_nid));
- atomic_set(&ni->ni_fatal_error_on, 1);
- continue;
+ ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+ 1);
+ goto ni_done;
}
in_dev_for_each_ifa_rtnl(ifa, in_dev) {
if (htonl(event_kibdev->ibd_ifip) == ifa->ifa_local)
@@ -2779,22 +2789,31 @@ void kiblnd_destroy_dev(struct kib_dev *dev)
dev->name);
CDEBUG(D_NET, "%s: set link fatal state to 1\n",
libcfs_nidstr(&net->ibn_ni->ni_nid));
- atomic_set(&ni->ni_fatal_error_on, 1);
- continue;
+ ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+ 1);
+ goto ni_done;
}
if (link_down) {
CDEBUG(D_NET, "%s: set link fatal state to 1\n",
libcfs_nidstr(&net->ibn_ni->ni_nid));
- atomic_set(&ni->ni_fatal_error_on, link_down);
+ ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+ link_down);
} else {
CDEBUG(D_NET, "%s: set link fatal state to %u\n",
libcfs_nidstr(&net->ibn_ni->ni_nid),
(kiblnd_get_link_status(dev) == 0));
- atomic_set(&ni->ni_fatal_error_on,
- (kiblnd_get_link_status(dev) == 0));
+ ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+ (kiblnd_get_link_status(dev) == 0));
}
+ni_done:
+ if (!update_ping_buf &&
+ (atomic_read(&ni->ni_fatal_error_on) != ni_state_before))
+ update_ping_buf = true;
}
+
+ if (update_ping_buf)
+ lnet_update_ping_buffer();
out:
return 0;
}
@@ -2806,6 +2825,8 @@ void kiblnd_destroy_dev(struct kib_dev *dev)
struct kib_net *net;
struct kib_net *cnxt;
struct net_device *event_netdev = ifa->ifa_dev->dev;
+ u32 ni_state_before;
+ bool update_ping_buf = false;
event_kibdev = kiblnd_dev_search(event_netdev->name);
@@ -2820,9 +2841,15 @@ void kiblnd_destroy_dev(struct kib_dev *dev)
CDEBUG(D_NET, "%s: set link fatal state to %u\n",
libcfs_nidstr(&net->ibn_ni->ni_nid),
(event == NETDEV_DOWN));
- atomic_set(&net->ibn_ni->ni_fatal_error_on,
- (event == NETDEV_DOWN));
+ ni_state_before = atomic_xchg(&net->ibn_ni->ni_fatal_error_on,
+ (event == NETDEV_DOWN));
+ if (!update_ping_buf &&
+ ((event == NETDEV_DOWN) != ni_state_before))
+ update_ping_buf = true;
}
+
+ if (update_ping_buf)
+ lnet_update_ping_buffer();
out:
return 0;
}
@@ -2000,6 +2000,8 @@ static int ksocknal_get_link_status(struct net_device *dev)
bool found_ip = false;
struct ksock_interface *ksi = NULL;
struct sockaddr_in *sa;
+ u32 ni_state_before;
+ bool update_ping_buf = false;
const struct in_ifaddr *ifa;
ifindex = dev->ifindex;
@@ -2045,8 +2047,9 @@ static int ksocknal_get_link_status(struct net_device *dev)
CDEBUG(D_NET, "Interface %s has no IPv4 status.\n",
dev->name);
CDEBUG(D_NET, "set link fatal state to 1\n");
- atomic_set(&ni->ni_fatal_error_on, 1);
- continue;
+ ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+ 1);
+ goto ni_done;
}
in_dev_for_each_ifa_rtnl(ifa, in_dev) {
if (sa->sin_addr.s_addr == ifa->ifa_local)
@@ -2057,20 +2060,29 @@ static int ksocknal_get_link_status(struct net_device *dev)
CDEBUG(D_NET, "Interface %s has no matching ip\n",
dev->name);
CDEBUG(D_NET, "set link fatal state to 1\n");
- atomic_set(&ni->ni_fatal_error_on, 1);
- continue;
+ ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+ 1);
+ goto ni_done;
}
if (link_down) {
CDEBUG(D_NET, "set link fatal state to 1\n");
- atomic_set(&ni->ni_fatal_error_on, link_down);
+ ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+ 1);
} else {
CDEBUG(D_NET, "set link fatal state to %u\n",
(ksocknal_get_link_status(dev) == 0));
- atomic_set(&ni->ni_fatal_error_on,
- (ksocknal_get_link_status(dev) == 0));
+ ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+ (ksocknal_get_link_status(dev) == 0));
}
+ni_done:
+ if (!update_ping_buf &&
+ (atomic_read(&ni->ni_fatal_error_on) != ni_state_before))
+ update_ping_buf = true;
}
+
+ if (update_ping_buf)
+ lnet_update_ping_buffer();
out:
return 0;
}
@@ -2086,6 +2098,8 @@ static int ksocknal_get_link_status(struct net_device *dev)
int ifindex;
struct ksock_interface *ksi = NULL;
struct sockaddr_in *sa;
+ u32 ni_state_before;
+ bool update_ping_buf = false;
if (!ksocknal_data.ksnd_nnets)
goto out;
@@ -2106,10 +2120,16 @@ static int ksocknal_get_link_status(struct net_device *dev)
CDEBUG(D_NET, "set link fatal state to %u\n",
(event == NETDEV_DOWN));
ni = net->ksnn_ni;
- atomic_set(&ni->ni_fatal_error_on,
- (event == NETDEV_DOWN));
+ ni_state_before = atomic_xchg(&ni->ni_fatal_error_on,
+ (event == NETDEV_DOWN));
+ if (!update_ping_buf &&
+ ((event == NETDEV_DOWN) != ni_state_before))
+ update_ping_buf = true;
}
}
+
+ if (update_ping_buf)
+ lnet_update_ping_buffer();
out:
return 0;
}
@@ -3841,6 +3841,26 @@ int lnet_dyn_del_ni(struct lnet_nid *nid)
return rc;
}
+void lnet_update_ping_buffer(void)
+{
+ struct lnet_ping_buffer *pbuf;
+ struct lnet_handle_md ping_mdh;
+
+ if (the_lnet.ln_routing)
+ return;
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+
+ if (!lnet_ping_target_setup(&pbuf, &ping_mdh,
+ LNET_PING_INFO_HDR_SIZE +
+ lnet_get_ni_bytes(),
+ false))
+ lnet_ping_target_update(pbuf, ping_mdh);
+
+ mutex_unlock(&the_lnet.ln_api_mutex);
+}
+EXPORT_SYMBOL(lnet_update_ping_buffer);
+
void lnet_incr_dlc_seq(void)
{
atomic_inc(&lnet_dlc_seq_no);
@@ -3079,6 +3079,15 @@ int ping_info_count_entries(struct lnet_ping_buffer *pbuf)
return nnis;
}
+static inline void handle_disc_lpni_health(struct lnet_peer_ni *lpni)
+{
+ if (lpni->lpni_ns_status == LNET_NI_STATUS_DOWN)
+ lnet_handle_remote_failure_locked(lpni);
+ else if (lpni->lpni_ns_status == LNET_NI_STATUS_UP &&
+ !lpni->lpni_last_alive)
+ atomic_set(&lpni->lpni_healthv, LNET_MAX_HEALTH_VALUE);
+}
+
/*
* Build a peer from incoming data.
*
@@ -3118,6 +3127,7 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
int i;
int j;
int rc;
+ u32 old_st;
flags = LNET_PEER_DISCOVERED;
if (pbuf->pb_info.pi_features & LNET_PING_FEAT_MULTI_RAIL)
@@ -3194,7 +3204,10 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
*/
lpni = lnet_peer_ni_find_locked(&curnis[i]);
if (lpni) {
+ old_st = lpni->lpni_ns_status;
lpni->lpni_ns_status = *stp;
+ if (old_st != lpni->lpni_ns_status)
+ handle_disc_lpni_health(lpni);
lnet_peer_ni_decref_locked(lpni);
}
break;
@@ -3224,6 +3237,7 @@ static int lnet_peer_merge_data(struct lnet_peer *lp,
lpni = lnet_peer_ni_find_locked(&addnis[i].ns_nid);
if (lpni) {
lpni->lpni_ns_status = addnis[i].ns_status;
+ handle_disc_lpni_health(lpni);
lnet_peer_ni_decref_locked(lpni);
}
}