@@ -684,6 +684,45 @@ struct lnet_ping_buffer {
#define LNET_PING_INFO_TO_BUFFER(PINFO) \
container_of((PINFO), struct lnet_ping_buffer, pb_info)
+static inline int
+lnet_ping_sts_size(const struct lnet_nid *nid)
+{
+ int size;
+
+ if (nid_is_nid4(nid))
+ return sizeof(struct lnet_ni_status);
+
+ size = offsetof(struct lnet_ni_large_status, ns_nid) +
+ NID_BYTES(nid);
+
+ return round_up(size, 4);
+}
+
+static inline struct lnet_ni_large_status *
+lnet_ping_sts_next(const struct lnet_ni_large_status *nis)
+{
+ return (void *)nis + lnet_ping_sts_size(&nis->ns_nid);
+}
+
+static inline bool
+lnet_ping_at_least_two_entries(const struct lnet_ping_info *pi)
+{
+ /* Return true if we have at lease two entries. There is always a
+ * least one, a 4-byte lo0 interface.
+ */
+ struct lnet_ni_large_status *lns;
+
+ if ((pi->pi_features & LNET_PING_FEAT_LARGE_ADDR) == 0)
+ return pi->pi_nnis <= 2;
+ /* There is at least 1 large-address entry */
+ if (pi->pi_nnis != 1)
+ return false;
+ lns = (void *)&pi->pi_ni[1];
+ lns = lnet_ping_sts_next(lns);
+
+ return ((void *)pi + lnet_ping_info_size(pi) <= (void *)lns);
+}
+
struct lnet_nid_list {
struct list_head nl_list;
struct lnet_nid nl_nid;
@@ -247,7 +247,6 @@ struct lnet_counters_common {
__u64 lcc_drop_length;
} __attribute__((packed));
-
#define LNET_NI_STATUS_UP 0x15aac0de
#define LNET_NI_STATUS_DOWN 0xdeadface
#define LNET_NI_STATUS_INVALID 0x00000000
@@ -255,19 +254,32 @@ struct lnet_counters_common {
struct lnet_ni_status {
lnet_nid_t ns_nid;
__u32 ns_status;
- __u32 ns_unused;
+ __u32 ns_msg_size; /* represents ping buffer size if message
+ * contains large NID addresses.
+ */
} __attribute__((packed));
-/*
- * NB: value of these features equal to LNET_PROTO_PING_VERSION_x
+/* When this appears in lnet_ping_info, it will be large
+ * enough to hold whatever nid is present, rounded up
+ * to a multiple of 4 bytes.
+ * NOTE: all users MUST check ns_nid.nid_size is usable.
+ */
+struct lnet_ni_large_status {
+ __u32 ns_status;
+ struct lnet_nid ns_nid;
+} __attribute__((packed));
+
+/* NB: value of these features equal to LNET_PROTO_PING_VERSION_x
* of old LNet, so there shouldn't be any compatibility issue
*/
#define LNET_PING_FEAT_INVAL (0) /* no feature */
#define LNET_PING_FEAT_BASE (1 << 0) /* just a ping */
#define LNET_PING_FEAT_NI_STATUS (1 << 1) /* return NI status */
-#define LNET_PING_FEAT_RTE_DISABLED (1 << 2) /* Routing enabled */
-#define LNET_PING_FEAT_MULTI_RAIL (1 << 3) /* Multi-Rail aware */
+#define LNET_PING_FEAT_RTE_DISABLED (1 << 2) /* Routing enabled */
+#define LNET_PING_FEAT_MULTI_RAIL (1 << 3) /* Multi-Rail aware */
#define LNET_PING_FEAT_DISCOVERY (1 << 4) /* Supports Discovery */
+#define LNET_PING_FEAT_LARGE_ADDR (1 << 5) /* Large addr nids present */
+#define LNET_PING_FEAT_PRIMARY_LARGE (1 << 6) /* Primary is first Large addr */
/*
* All ping feature bits fit to hit the wire.
@@ -277,17 +289,26 @@ struct lnet_ni_status {
* New feature bits can be added, just be aware that this does change the
* over-the-wire protocol.
*/
-#define LNET_PING_FEAT_BITS (LNET_PING_FEAT_BASE | \
- LNET_PING_FEAT_NI_STATUS | \
- LNET_PING_FEAT_RTE_DISABLED | \
- LNET_PING_FEAT_MULTI_RAIL | \
- LNET_PING_FEAT_DISCOVERY)
-
+#define LNET_PING_FEAT_BITS (LNET_PING_FEAT_BASE | \
+ LNET_PING_FEAT_NI_STATUS | \
+ LNET_PING_FEAT_RTE_DISABLED | \
+ LNET_PING_FEAT_MULTI_RAIL | \
+ LNET_PING_FEAT_DISCOVERY | \
+ LNET_PING_FEAT_LARGE_ADDR | \
+ LNET_PING_FEAT_PRIMARY_LARGE)
+
+/* NOTE:
+ * The first address in pi_ni *must* be the loop-back nid: LNET_NID_LO_0
+ * The second address must be the primary nid for the host unless
+ * LNET_PING_FEAT_PRIMARY_LARGE is set, then the first large address
+ * is the preferred primary. However nodes that do not recognise that
+ * flag will quietly ignore it.
+ */
struct lnet_ping_info {
__u32 pi_magic;
__u32 pi_features;
lnet_pid_t pi_pid;
- __u32 pi_nnis;
+ __u32 pi_nnis; /* number of nid4 entries */
struct lnet_ni_status pi_ni[0];
} __attribute__((packed));
@@ -297,7 +318,14 @@ struct lnet_ping_info {
offsetof(struct lnet_ping_info, pi_ni[LNET_INTERFACES_MIN])
#define LNET_PING_INFO_LONI(PINFO) ((PINFO)->pi_ni[0].ns_nid)
#define LNET_PING_INFO_SEQNO(PINFO) ((PINFO)->pi_ni[0].ns_status)
-#define lnet_ping_info_size(pinfo) \
- offsetof(struct lnet_ping_info, pi_ni[(pinfo)->pi_nnis])
+/* If LNET_PING_FEAT_LARGE_ADDR set, pi_nnis is the number of nid4 entries
+ * and pi_ni[0].ns_msg_size is the total number of bytes, including header and
+ * lnet_ni_large_status entries which follow the lnet_ni_status entries.
+ * This must be a multiple of 4.
+ */
+#define lnet_ping_info_size(pinfo) \
+ (((pinfo)->pi_features & LNET_PING_FEAT_LARGE_ADDR) \
+ ? ((pinfo)->pi_ni[0].ns_msg_size & ~3) \
+ : offsetof(struct lnet_ping_info, pi_ni[(pinfo)->pi_nnis]))
#endif
@@ -823,8 +823,15 @@ static void lnet_assert_wire_constants(void)
BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_nid) != 8);
BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_status) != 8);
BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_status) != 4);
- BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_unused) != 12);
- BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_unused) != 4);
+ BUILD_BUG_ON((int)offsetof(struct lnet_ni_status, ns_msg_size) != 12);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_ni_status *)0)->ns_msg_size) != 4);
+
+ /* Checks for struct lnet_ni_large_status */
+ BUILD_BUG_ON((int)sizeof(struct lnet_ni_large_status) != 24);
+ BUILD_BUG_ON((int)offsetof(struct lnet_ni_large_status, ns_status) != 0);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_ni_large_status *)0)->ns_status) != 4);
+ BUILD_BUG_ON((int)offsetof(struct lnet_ni_large_status, ns_nid) != 4);
+ BUILD_BUG_ON((int)sizeof(((struct lnet_ni_large_status *)0)->ns_nid) != 20);
/* Checks for struct lnet_ping_info and related constants */
BUILD_BUG_ON(LNET_PROTO_PING_MAGIC != 0x70696E67);
@@ -834,7 +841,9 @@ static void lnet_assert_wire_constants(void)
BUILD_BUG_ON(LNET_PING_FEAT_RTE_DISABLED != 4);
BUILD_BUG_ON(LNET_PING_FEAT_MULTI_RAIL != 8);
BUILD_BUG_ON(LNET_PING_FEAT_DISCOVERY != 16);
- BUILD_BUG_ON(LNET_PING_FEAT_BITS != 31);
+ BUILD_BUG_ON(LNET_PING_FEAT_LARGE_ADDR != 32);
+ BUILD_BUG_ON(LNET_PING_FEAT_PRIMARY_LARGE != 64);
+ BUILD_BUG_ON(LNET_PING_FEAT_BITS != 127);
/* Checks for struct lnet_ping_info */
BUILD_BUG_ON((int)sizeof(struct lnet_ping_info) != 16);
@@ -1770,21 +1779,7 @@ struct lnet_ping_buffer *
int bytes = 0;
list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
- if (nid_is_nid4(&ni->ni_nid))
- bytes += sizeof(struct lnet_ni_status);
-
- return bytes;
-}
-
-static inline int
-lnet_get_net_ni_bytes_pre(struct lnet_net *net)
-{
- struct lnet_ni *ni;
- int bytes = 0;
-
- list_for_each_entry(ni, &net->net_ni_added, ni_netlist)
- if (nid_is_nid4(&ni->ni_nid))
- bytes += sizeof(struct lnet_ni_status);
+ bytes += lnet_ping_sts_size(&ni->ni_nid);
return bytes;
}
@@ -1800,9 +1795,7 @@ struct lnet_ping_buffer *
list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
list_for_each_entry(ni, &net->net_ni_list, ni_netlist)
- if (nid_is_nid4(&ni->ni_nid))
- bytes += sizeof(struct lnet_ni_status);
-
+ bytes += lnet_ping_sts_size(&ni->ni_nid);
}
lnet_net_unlock(0);
@@ -1813,6 +1806,7 @@ struct lnet_ping_buffer *
void
lnet_swap_pinginfo(struct lnet_ping_buffer *pbuf)
{
+ struct lnet_ni_large_status *lstat, *lend;
struct lnet_ni_status *stat, *end;
int nnis;
int i;
@@ -1827,6 +1821,19 @@ struct lnet_ping_buffer *
for (i = 0; i < nnis && stat + 1 <= end; i++, stat++) {
__swab64s(&stat->ns_nid);
__swab32s(&stat->ns_status);
+ if (i == 0)
+ /* Might be total size */
+ __swab32s(&stat->ns_msg_size);
+ }
+ if (!(pbuf->pb_info.pi_features & LNET_PING_FEAT_LARGE_ADDR))
+ return;
+
+ lstat = (struct lnet_ni_large_status *)stat;
+ lend = (void *)end;
+ while (lstat + 1 <= lend) {
+ __swab32s(&lstat->ns_status);
+ /* struct lnet_nid never needs to be swabed */
+ lstat = lnet_ping_sts_next(lstat);
}
}
@@ -1954,6 +1961,7 @@ struct lnet_ping_buffer *
static void
lnet_ping_target_install_locked(struct lnet_ping_buffer *pbuf)
{
+ struct lnet_ni_large_status *lns, *lend;
struct lnet_ni_status *ns, *end;
struct lnet_ni *ni;
struct lnet_net *net;
@@ -1964,8 +1972,14 @@ struct lnet_ping_buffer *
end = (void *)&pbuf->pb_info + pbuf->pb_nbytes;
list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
- if (!nid_is_nid4(&ni->ni_nid))
+ if (!nid_is_nid4(&ni->ni_nid)) {
+ if (ns == &pbuf->pb_info.pi_ni[1]) {
+ /* This is primary, and it is long */
+ pbuf->pb_info.pi_features |=
+ LNET_PING_FEAT_PRIMARY_LARGE;
+ }
continue;
+ }
LASSERT(ns + 1 <= end);
ns->ns_nid = lnet_nid_to_nid4(&ni->ni_nid);
@@ -1979,6 +1993,31 @@ struct lnet_ping_buffer *
}
}
+ lns = (void *)ns;
+ lend = (void *)end;
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ if (nid_is_nid4(&ni->ni_nid))
+ continue;
+ LASSERT(lns + 1 <= lend);
+
+ lns->ns_nid = ni->ni_nid;
+
+ lnet_ni_lock(ni);
+ ns->ns_status = lnet_ni_get_status_locked(ni);
+ ni->ni_status = &lns->ns_status;
+ lnet_ni_unlock(ni);
+
+ lns = lnet_ping_sts_next(lns);
+ }
+ }
+ if ((void *)lns > (void *)ns) {
+ /* Record total info size */
+ pbuf->pb_info.pi_ni[0].ns_msg_size =
+ (void *)lns - (void *)&pbuf->pb_info;
+ pbuf->pb_info.pi_features |= LNET_PING_FEAT_LARGE_ADDR;
+ }
+
/* We (ab)use the ns_status of the loopback interface to
* transmit the sequence number. The first interface listed
* must be the loopback interface.
@@ -3397,7 +3436,6 @@ static int lnet_add_net_common(struct lnet_net *net,
struct lnet_ping_buffer *pbuf;
struct lnet_remotenet *rnet;
struct lnet_ni *ni;
- int net_ni_bytes;
u32 net_id;
int rc;
@@ -3415,39 +3453,32 @@ static int lnet_add_net_common(struct lnet_net *net,
return -EUSERS;
}
- /*
- * make sure you calculate the correct number of slots in the ping
+ if (tun)
+ memcpy(&net->net_tunables,
+ &tun->lt_cmn, sizeof(net->net_tunables));
+ else
+ memset(&net->net_tunables, -1, sizeof(net->net_tunables));
+
+ net_id = net->net_id;
+
+ rc = lnet_startup_lndnet(net, (tun ? &tun->lt_tun : NULL));
+ if (rc < 0)
+ return rc;
+
+ /* make sure you calculate the correct number of slots in the ping
* buffer. Since the ping info is a flattened list of all the NIs,
* we should allocate enough slots to accomodate the number of NIs
* which will be added.
- *
- * since ni hasn't been configured yet, use
- * lnet_get_net_ni_bytes_pre() which checks the net_ni_added list
*/
- net_ni_bytes = lnet_get_net_ni_bytes_pre(net);
-
rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
LNET_PING_INFO_HDR_SIZE +
- net_ni_bytes + lnet_get_ni_bytes(),
+ lnet_get_ni_bytes(),
false);
if (rc < 0) {
- lnet_net_free(net);
+ lnet_shutdown_lndnet(net);
return rc;
}
- if (tun)
- memcpy(&net->net_tunables,
- &tun->lt_cmn, sizeof(net->net_tunables));
- else
- memset(&net->net_tunables, -1, sizeof(net->net_tunables));
-
- net_id = net->net_id;
-
- rc = lnet_startup_lndnet(net, (tun ?
- &tun->lt_tun : NULL));
- if (rc < 0)
- goto failed;
-
lnet_net_lock(LNET_LOCK_EX);
net = lnet_get_net_locked(net_id);
LASSERT(net);
@@ -3678,7 +3709,7 @@ int lnet_dyn_del_ni(struct lnet_nid *nid)
rc = lnet_ping_target_setup(&pbuf, &ping_mdh,
(LNET_PING_INFO_HDR_SIZE +
lnet_get_ni_bytes() -
- sizeof(pbuf->pb_info.pi_ni[0])),
+ lnet_ping_sts_size(&ni->ni_nid)),
false);
if (rc != 0)
goto unlock_api_mutex;
@@ -5428,10 +5459,12 @@ static int lnet_ping(struct lnet_process_id id4, struct lnet_nid *src_nid,
goto fail_ping_buffer_decref;
}
- /* Test if smaller than lnet_pinginfo with no pi_ni status info */
- if (nob < LNET_PING_INFO_HDR_SIZE) {
+ /* Test if smaller than lnet_pinginfo with just one pi_ni status info.
+ * That one might contain size when large nids are used.
+ */
+ if (nob < LNET_PING_INFO_SIZE(1)) {
CERROR("%s: Short reply %d(%lu min)\n",
- libcfs_idstr(&id), nob, LNET_PING_INFO_HDR_SIZE);
+ libcfs_idstr(&id), nob, LNET_PING_INFO_SIZE(1));
goto fail_ping_buffer_decref;
}
@@ -831,7 +831,7 @@
* I only have a single (non-lolnd) interface.
*/
pi = &the_lnet.ln_ping_target->pb_info;
- if (pi->pi_nnis <= 2) {
+ if (lnet_ping_at_least_two_entries(pi)) {
handle_local_health = false;
attempt_local_resend = false;
}