@@ -312,8 +312,33 @@ struct lnet_lnd {
/* accept a new connection */
int (*lnd_accept)(struct lnet_ni *ni, struct socket *sock);
+
+ /* get dma_dev priority */
+ unsigned int (*lnd_get_dev_prio)(struct lnet_ni *ni,
+ unsigned int dev_idx);
};
+/* FIXME !!!!! The abstract for GPU page support (PCI peer2peer)
+ * was done for only the external NVIDIA driver and done very
+ * poorly. Once DRI / TTM supports peer2peer we can redo this
+ * right.
+ */
+static inline unsigned int lnet_get_dev_prio(struct device *dev,
+ unsigned int dev_idx)
+{
+ return UINT_MAX;
+}
+
+static inline bool lnet_is_rdma_only_page(struct page *page)
+{
+ return false;
+}
+
+static inline unsigned int lnet_get_dev_idx(struct page *page)
+{
+ return false;
+}
+
struct lnet_tx_queue {
int tq_credits; /* # tx credits free */
int tq_credits_min; /* lowest it's been */
@@ -2953,6 +2953,7 @@ static int kiblnd_startup(struct lnet_ni *ni)
.lnd_ctl = kiblnd_ctl,
.lnd_send = kiblnd_send,
.lnd_recv = kiblnd_recv,
+ .lnd_get_dev_prio = kiblnd_get_dev_prio,
};
static void ko2inlnd_assert_wire_constants(void)
@@ -858,18 +858,18 @@ static inline void kiblnd_dma_unmap_single(struct ib_device *dev,
#define KIBLND_UNMAP_ADDR_SET(p, m, a) do {} while (0)
#define KIBLND_UNMAP_ADDR(p, m, a) (a)
-static inline int kiblnd_dma_map_sg(struct ib_device *dev,
+static inline int kiblnd_dma_map_sg(struct kib_hca_dev *hdev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
- return ib_dma_map_sg(dev, sg, nents, direction);
+ return ib_dma_map_sg(hdev->ibh_ibdev, sg, nents, direction);
}
-static inline void kiblnd_dma_unmap_sg(struct ib_device *dev,
+static inline void kiblnd_dma_unmap_sg(struct kib_hca_dev *hdev,
struct scatterlist *sg, int nents,
enum dma_data_direction direction)
{
- ib_dma_unmap_sg(dev, sg, nents, direction);
+ ib_dma_unmap_sg(hdev->ibh_ibdev, sg, nents, direction);
}
static inline u64 kiblnd_sg_dma_address(struct ib_device *dev,
@@ -959,3 +959,4 @@ void kiblnd_pack_msg(struct lnet_ni *ni, struct kib_msg *msg, int version,
int kiblnd_send(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg);
int kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
int delayed, struct iov_iter *to, unsigned int rlen);
+unsigned int kiblnd_get_dev_prio(struct lnet_ni *ni, unsigned int dev_idx);
@@ -615,7 +615,7 @@ static void kiblnd_unmap_tx(struct kib_tx *tx)
kiblnd_fmr_pool_unmap(&tx->tx_fmr, tx->tx_status);
if (tx->tx_nfrags) {
- kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev->ibh_ibdev,
+ kiblnd_dma_unmap_sg(tx->tx_pool->tpo_hdev,
tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir);
tx->tx_nfrags = 0;
}
@@ -636,7 +636,7 @@ static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
tx->tx_nfrags = nfrags;
- rd->rd_nfrags = kiblnd_dma_map_sg(hdev->ibh_ibdev, tx->tx_frags,
+ rd->rd_nfrags = kiblnd_dma_map_sg(hdev, tx->tx_frags,
tx->tx_nfrags, tx->tx_dmadir);
for (i = 0, nob = 0; i < rd->rd_nfrags; i++) {
@@ -1721,6 +1721,18 @@ static int kiblnd_map_tx(struct lnet_ni *ni, struct kib_tx *tx,
lnet_finalize(lntmsg, -EIO);
}
+unsigned int
+kiblnd_get_dev_prio(struct lnet_ni *ni, unsigned int dev_idx)
+{
+ struct kib_net *net = ni->ni_data;
+ struct device *dev = NULL;
+
+ if (net)
+ dev = net->ibn_dev->ibd_hdev->ibh_ibdev->dma_device;
+
+ return lnet_get_dev_prio(dev, dev_idx);
+}
+
int
kiblnd_recv(struct lnet_ni *ni, void *private, struct lnet_msg *lntmsg,
int delayed, struct iov_iter *to, unsigned int rlen)
@@ -1420,16 +1420,38 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
return best_route;
}
+static inline unsigned int
+lnet_dev_prio_of_md(struct lnet_ni *ni, unsigned int dev_idx)
+{
+ if (dev_idx == UINT_MAX)
+ return UINT_MAX;
+
+ if (!ni || !ni->ni_net || !ni->ni_net->net_lnd ||
+ !ni->ni_net->net_lnd->lnd_get_dev_prio)
+ return UINT_MAX;
+
+ return ni->ni_net->net_lnd->lnd_get_dev_prio(ni, dev_idx);
+}
+
static struct lnet_ni *
lnet_get_best_ni(struct lnet_net *local_net, struct lnet_ni *best_ni,
struct lnet_peer *peer, struct lnet_peer_net *peer_net,
- int md_cpt)
+ struct lnet_msg *msg, int md_cpt)
{
- struct lnet_ni *ni = NULL;
+ struct lnet_libmd *md = msg->msg_md;
+ unsigned int offset = msg->msg_offset;
unsigned int shortest_distance;
+ struct lnet_ni *ni = NULL;
int best_credits;
int best_healthv;
u32 best_sel_prio;
+ unsigned int best_dev_prio;
+ unsigned int dev_idx = UINT_MAX;
+ struct page *page = lnet_get_first_page(md, offset);
+
+ msg->msg_rdma_force = lnet_is_rdma_only_page(page);
+ if (msg->msg_rdma_force)
+ dev_idx = lnet_get_dev_idx(page);
/* If there is no peer_ni that we can send to on this network,
* then there is no point in looking for a new best_ni here.
@@ -1440,9 +1462,11 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
if (!best_ni) {
best_sel_prio = LNET_MAX_SELECTION_PRIORITY;
shortest_distance = UINT_MAX;
+ best_dev_prio = UINT_MAX;
best_credits = INT_MIN;
best_healthv = 0;
} else {
+ best_dev_prio = lnet_dev_prio_of_md(best_ni, dev_idx);
shortest_distance = cfs_cpt_distance(lnet_cpt_table(), md_cpt,
best_ni->ni_dev_cpt);
best_credits = atomic_read(&best_ni->ni_tx_credits);
@@ -1456,6 +1480,7 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
int ni_healthv;
int ni_fatal;
u32 ni_sel_prio;
+ unsigned int ni_dev_prio;
ni_credits = atomic_read(&ni->ni_tx_credits);
ni_healthv = atomic_read(&ni->ni_healthv);
@@ -1471,6 +1496,8 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
md_cpt,
ni->ni_dev_cpt);
+ ni_dev_prio = lnet_dev_prio_of_md(ni, dev_idx);
+
/*
* All distances smaller than the NUMA range
* are treated equally.
@@ -1478,22 +1505,21 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
if (distance < lnet_numa_range)
distance = lnet_numa_range;
- /*
- * Select on health, shorter distance, available
- * credits, then round-robin.
+ /* * Select on health, selection policy, direct dma prio,
+ * shorter distance, available credits, then round-robin.
*/
if (ni_fatal)
continue;
if (best_ni)
CDEBUG(D_NET,
- "compare ni %s [c:%d, d:%d, s:%d, p:%u] with best_ni %s [c:%d, d:%d, s:%d, p:%u]\n",
+ "compare ni %s [c:%d, d:%d, s:%d, p:%u, g:%u] with best_ni %s [c:%d, d:%d, s:%d, p:%u, g:%u]\n",
libcfs_nid2str(ni->ni_nid), ni_credits, distance,
- ni->ni_seq, ni_sel_prio,
+ ni->ni_seq, ni_sel_prio, ni_dev_prio,
(best_ni) ? libcfs_nid2str(best_ni->ni_nid)
: "not selected", best_credits, shortest_distance,
(best_ni) ? best_ni->ni_seq : 0,
- best_sel_prio);
+ best_sel_prio, best_dev_prio);
else
goto select_ni;
@@ -1507,6 +1533,11 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
else if (ni_sel_prio < best_sel_prio)
goto select_ni;
+ if (ni_dev_prio > best_dev_prio)
+ continue;
+ else if (ni_dev_prio < best_dev_prio)
+ goto select_ni;
+
if (distance > shortest_distance)
continue;
else if (distance < shortest_distance)
@@ -1522,6 +1553,7 @@ void lnet_usr_translate_stats(struct lnet_ioctl_element_msg_stats *msg_stats,
select_ni:
best_sel_prio = ni_sel_prio;
+ best_dev_prio = ni_dev_prio;
shortest_distance = distance;
best_healthv = ni_healthv;
best_ni = ni;
@@ -1812,6 +1844,7 @@ struct lnet_ni *
lnet_find_best_ni_on_spec_net(struct lnet_ni *cur_best_ni,
struct lnet_peer *peer,
struct lnet_peer_net *peer_net,
+ struct lnet_msg *msg,
int cpt)
{
struct lnet_net *local_net;
@@ -1829,7 +1862,7 @@ struct lnet_ni *
* 3. Round Robin
*/
best_ni = lnet_get_best_ni(local_net, cur_best_ni,
- peer, peer_net, cpt);
+ peer, peer_net, msg, cpt);
return best_ni;
}
@@ -2064,6 +2097,7 @@ struct lnet_ni *
if (!sd->sd_best_ni) {
lpn = gwni->lpni_peer_net;
sd->sd_best_ni = lnet_find_best_ni_on_spec_net(NULL, gw, lpn,
+ sd->sd_msg,
sd->sd_md_cpt);
if (!sd->sd_best_ni) {
CERROR("Internal Error. Expected local ni on %s but non found :%s\n",
@@ -2143,7 +2177,7 @@ struct lnet_ni *
struct lnet_ni *
lnet_find_best_ni_on_local_net(struct lnet_peer *peer, int md_cpt,
- bool discovery)
+ struct lnet_msg *msg, bool discovery)
{
struct lnet_peer_net *lpn = NULL;
struct lnet_peer_net *best_lpn = NULL;
@@ -2237,8 +2271,8 @@ struct lnet_ni *
/* Select the best NI on the same net as best_lpn chosen
* above
*/
- best_ni = lnet_find_best_ni_on_spec_net(NULL, peer,
- best_lpn, md_cpt);
+ best_ni = lnet_find_best_ni_on_spec_net(NULL, peer, best_lpn,
+ msg, md_cpt);
}
return best_ni;
@@ -2298,6 +2332,7 @@ struct lnet_ni *
best_ni =
lnet_find_best_ni_on_spec_net(NULL, sd->sd_peer,
sd->sd_best_lpni->lpni_peer_net,
+ sd->sd_msg,
sd->sd_md_cpt);
/* If there is no best_ni we don't have a route */
if (!best_ni) {
@@ -2350,6 +2385,7 @@ struct lnet_ni *
sd->sd_best_ni = lnet_find_best_ni_on_spec_net(NULL,
sd->sd_peer,
sd->sd_best_lpni->lpni_peer_net,
+ sd->sd_msg,
sd->sd_md_cpt);
if (!sd->sd_best_ni) {
CERROR("Unable to forward message to %s. No local NI available\n",
@@ -2382,6 +2418,7 @@ struct lnet_ni *
sd->sd_best_ni =
lnet_find_best_ni_on_spec_net(NULL, sd->sd_peer,
sd->sd_best_lpni->lpni_peer_net,
+ sd->sd_msg,
sd->sd_md_cpt);
if (!sd->sd_best_ni) {
@@ -2403,6 +2440,7 @@ struct lnet_ni *
*/
sd->sd_best_ni = lnet_find_best_ni_on_local_net(sd->sd_peer,
sd->sd_md_cpt,
+ sd->sd_msg,
lnet_msg_discovery(sd->sd_msg));
if (sd->sd_best_ni) {
sd->sd_best_lpni =