@@ -98,6 +98,8 @@ struct rxe_mmap_info *rxe_create_mmap_info(struct rxe_dev *dev, u32 size,
int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
/* rxe_mr.c */
+void rxe_set_mr_lkey(struct rxe_mr *mr);
+
enum copy_direction {
to_mr_obj,
from_mr_obj,
@@ -137,6 +139,7 @@ void rxe_mr_cleanup(struct rxe_pool_entry *arg);
int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
/* rxe_mw.c */
+void rxe_set_mw_rkey(struct rxe_mw *mw);
struct ib_mw *rxe_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
struct ib_udata *udata);
int rxe_dealloc_mw(struct ib_mw *ibmw);
@@ -34,6 +34,23 @@
#include "rxe.h"
#include "rxe_loc.h"
+/* choose a unique non zero random number for lkey */
+void rxe_set_mr_lkey(struct rxe_mr *mr)
+{
+ int ret;
+ u32 lkey;
+
+next_lkey:
+ get_random_bytes(&lkey, sizeof(lkey));
+ lkey &= 0x7fffffff;
+ if (unlikely(lkey == 0))
+ goto next_lkey;
+ ret = rxe_add_key(mr, &lkey);
+ if (unlikely(ret == -EAGAIN))
+ goto next_lkey;
+}
+
+#if 0
/*
* lfsr (linear feedback shift register) with period 255
*/
@@ -50,6 +67,7 @@ static u8 rxe_get_key(void)
return key;
}
+#endif
int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
{
@@ -76,16 +94,16 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
static void rxe_mr_init(int access, struct rxe_mr *mr)
{
- u32 lkey = mr->pelem.index << 8 | rxe_get_key();
- u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
+ rxe_set_mr_lkey(mr);
- if (mr->pelem.pool->type == RXE_TYPE_MR) {
- mr->ibmr.lkey = lkey;
- mr->ibmr.rkey = rkey;
- }
+ if (access & IB_ACCESS_REMOTE)
+ mr->ibmr.rkey = mr->ibmr.lkey;
+ else
+ mr->ibmr.rkey = 0;
- mr->lkey = lkey;
- mr->rkey = rkey;
+ // TODO we shouldn't carry two copies
+ mr->lkey = mr->ibmr.lkey;
+ mr->rkey = mr->ibmr.rkey;
mr->state = RXE_MEM_STATE_INVALID;
mr->type = RXE_MEM_TYPE_NONE;
mr->map_shift = ilog2(RXE_BUF_PER_MAP);
@@ -155,9 +173,9 @@ void rxe_mr_init_dma(struct rxe_pd *pd,
mr->type = RXE_MEM_TYPE_DMA;
}
-int rxe_mr_init_user(struct rxe_pd *pd, u64 start,
- u64 length, u64 iova, int access, struct ib_udata *udata,
- struct rxe_mr *mr)
+int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length,
+ u64 iova, int access, struct ib_udata *udata,
+ struct rxe_mr *mr)
{
struct rxe_map **map;
struct rxe_phys_buf *buf = NULL;
@@ -233,15 +251,15 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start,
return err;
}
-int rxe_mr_init_fast(struct rxe_pd *pd,
- int max_pages, struct rxe_mr *mr)
+int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages,
+ struct rxe_mr *mr)
{
int err;
rxe_mr_init(0, mr);
/* In fastreg, we also set the rkey */
- mr->ibmr.rkey = mr->ibmr.lkey;
+ mr->rkey = mr->ibmr.rkey = mr->ibmr.lkey;
err = rxe_mr_alloc(mr, max_pages);
if (err)
@@ -564,18 +582,17 @@ int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
* (4) verify that mr state is valid
*/
struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
- enum lookup_type type)
+ enum lookup_type type)
{
struct rxe_mr *mr;
struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
- int index = key >> 8;
- mr = rxe_pool_get_index(&rxe->mr_pool, index);
+ mr = rxe_pool_get_key(&rxe->mr_pool, &key);
if (!mr)
return NULL;
- if (unlikely((type == lookup_local && mr->lkey != key) ||
- (type == lookup_remote && mr->rkey != key) ||
+ if (unlikely((type == lookup_local && mr->ibmr.lkey != key) ||
+ (type == lookup_remote && mr->ibmr.rkey != key) ||
mr->pd != pd ||
(access && !(access & mr->access)) ||
mr->state != RXE_MEM_STATE_VALID)) {
@@ -35,49 +35,95 @@
#include "rxe.h"
#include "rxe_loc.h"
+/* choose a unique non zero random number for rkey */
+void rxe_set_mw_rkey(struct rxe_mw *mw)
+{
+ int ret;
+ u32 rkey;
+
+next_rkey:
+ get_random_bytes(&rkey, sizeof(rkey));
+ if (unlikely(rkey == 0))
+ goto next_rkey;
+ rkey |= 0x80000000;
+ ret = rxe_add_key(mw, &rkey);
+ if (unlikely(ret == -EAGAIN))
+ goto next_rkey;
+}
+
/* place holder alloc and dealloc routines
- * need to add cross references between qp and mr with mw
+ * TODO add cross references between qp and mr with mw
* and cleanup when one side is deleted. Enough to make
* verbs function correctly for now */
struct ib_mw *rxe_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
struct ib_udata *udata)
{
+ int ret;
+ struct rxe_mw *mw;
struct rxe_pd *pd = to_rpd(ibpd);
struct rxe_dev *rxe = to_rdev(ibpd->device);
- struct rxe_mw *mw;
- u32 rkey;
- u8 key;
+ struct rxe_alloc_mw_resp __user *uresp;
+
+ if (udata) {
+ if (udata->outlen < sizeof(*uresp)) {
+ ret = -EINVAL;
+ goto err1;
+ }
+ }
if (unlikely((type != IB_MW_TYPE_1) &&
- (type != IB_MW_TYPE_2)))
- return ERR_PTR(-EINVAL);
+ (type != IB_MW_TYPE_2))) {
+ ret = -EINVAL;
+ goto err1;
+ }
rxe_add_ref(pd);
mw = rxe_alloc(&rxe->mw_pool);
if (!mw) {
rxe_drop_ref(pd);
- return ERR_PTR(-ENOMEM);
+ ret = -ENOMEM;
+ goto err1;
}
- /* pick a random key part as a starting point */
rxe_add_index(mw);
- get_random_bytes(&key, sizeof(key));
- rkey = mw->pelem.index << 8 | key;
+ rxe_set_mw_rkey(mw);
+
+ pr_info("rxe_alloc_mw: index = 0x%08x, rkey = 0x%08x\n",
+ mw->pelem.index, mw->ibmw.rkey);
spin_lock_init(&mw->lock);
+
+ if (type == IB_MW_TYPE_2) {
+ mw->state = RXE_MW_STATE_FREE;
+ } else {
+ mw->state = RXE_MW_STATE_VALID;
+ }
+
mw->qp = NULL;
mw->mr = NULL;
mw->addr = 0;
mw->length = 0;
mw->ibmw.pd = ibpd;
mw->ibmw.type = type;
- mw->ibmw.rkey = rkey;
- mw->state = (type == IB_MW_TYPE_2) ?
- RXE_MW_STATE_FREE :
- RXE_MW_STATE_VALID;
+
+ if (udata) {
+ uresp = udata->outbuf;
+ if (copy_to_user(&uresp->index, &mw->pelem.index,
+ sizeof(u32))) {
+ ret = -EFAULT;
+ goto err2;
+ }
+ }
return &mw->ibmw;
+err2:
+ rxe_drop_key(mw);
+ rxe_drop_index(mw);
+ rxe_drop_ref(mw);
+ rxe_drop_ref(pd);
+err1:
+ return ERR_PTR(ret);
}
int rxe_dealloc_mw(struct ib_mw *ibmw)
@@ -90,8 +136,9 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
mw->state = RXE_MW_STATE_INVALID;
spin_unlock_irqrestore(&mw->lock, flags);
- rxe_drop_ref(pd);
+ rxe_drop_key(mw);
rxe_drop_index(mw);
+ rxe_drop_ref(pd);
rxe_drop_ref(mw);
return 0;
@@ -99,6 +146,41 @@ int rxe_dealloc_mw(struct ib_mw *ibmw)
int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
{
- pr_err("rxe_bind_mw: not implemented\n");
- return -ENOSYS;
+ struct rxe_mw *mw;
+ struct rxe_mr *mr;
+
+ pr_info("rxe_bind_mw: called\n");
+
+ if (qp->is_user) {
+ } else {
+ mw = to_rmw(wqe->wr.wr.kmw.ibmw);
+ mr = to_rmr(wqe->wr.wr.kmw.ibmr);
+ }
+
+#if 0
+ wqe->wr.wr.bind_mw
+ __aligned_u64 addr;
+ __aligned_u64 length;
+ __u32 mr_rkey;
+ __u32 mw_rkey;
+ __u32 rkey;
+ __u32 access;
+
+ mw
+ struct rxe_pool_entry pelem; // alloc
+ struct ib_mw ibmw; // alloc
+ struct ib_device *device; // alloc
+ struct ib_pd *pd; // alloc
+ struct ib_uobject *uobject; // alloc
+ u32 rkey;
+ enum ib_mw_type type; // alloc
+ struct rxe_qp *qp; // bind
+ struct rxe_mem *mr; // bind
+ spinlock_t lock; // alloc
+ enum rxe_mw_state state; // all
+ u32 access; // bind
+ u64 addr; // bind
+ u64 length; // bind
+#endif
+ return 0;
}
@@ -34,10 +34,6 @@
#include "rxe.h"
#include "rxe_loc.h"
-/* info about object pools
- * note that mr and mw share a single index space
- * so that one can map an lkey to the correct type of object
- */
struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
[RXE_TYPE_UC] = {
.name = "rxe-uc",
@@ -79,16 +75,22 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
.name = "rxe-mr",
.size = sizeof(struct rxe_mr),
.cleanup = rxe_mr_cleanup,
- .flags = RXE_POOL_INDEX,
+ .flags = RXE_POOL_INDEX
+ | RXE_POOL_KEY,
.max_index = RXE_MAX_MR_INDEX,
.min_index = RXE_MIN_MR_INDEX,
+ .key_offset = offsetof(struct rxe_mr, ibmr.lkey),
+ .key_size = sizeof(u32),
},
[RXE_TYPE_MW] = {
.name = "rxe-mw",
.size = sizeof(struct rxe_mw),
- .flags = RXE_POOL_INDEX,
+ .flags = RXE_POOL_INDEX
+ | RXE_POOL_KEY,
.max_index = RXE_MAX_MW_INDEX,
.min_index = RXE_MIN_MW_INDEX,
+ .key_offset = offsetof(struct rxe_mw, ibmw.rkey),
+ .key_size = sizeof(u32),
},
[RXE_TYPE_MC_GRP] = {
.name = "rxe-mc_grp",
@@ -308,8 +310,9 @@ static void insert_index(struct rxe_pool *pool, struct rxe_pool_entry *new)
return;
}
-static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
+static int insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
{
+ int ret;
struct rb_node **link = &pool->key.tree.rb_node;
struct rb_node *parent = NULL;
struct rxe_pool_entry *elem;
@@ -323,7 +326,7 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
(u8 *)new + pool->key.key_offset, pool->key.key_size);
if (cmp == 0) {
- pr_warn("key already exists!\n");
+ ret = -EAGAIN;
goto out;
}
@@ -335,20 +338,25 @@ static void insert_key(struct rxe_pool *pool, struct rxe_pool_entry *new)
rb_link_node(&new->key_node, parent, link);
rb_insert_color(&new->key_node, &pool->key.tree);
+
+ ret = 0;
out:
- return;
+ return ret;
}
-void rxe_add_key(void *arg, void *key)
+int rxe_add_key(void *arg, void *key)
{
+ int ret;
struct rxe_pool_entry *elem = arg;
struct rxe_pool *pool = elem->pool;
unsigned long flags;
write_lock_irqsave(&pool->pool_lock, flags);
memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size);
- insert_key(pool, elem);
+ ret = insert_key(pool, elem);
write_unlock_irqrestore(&pool->pool_lock, flags);
+
+ return ret;
}
void rxe_drop_key(void *arg)
@@ -156,7 +156,7 @@ void rxe_drop_index(void *elem);
/* assign a key to a keyed object and insert object into
* pool's rb tree
*/
-void rxe_add_key(void *elem, void *key);
+int rxe_add_key(void *elem, void *key);
/* remove elem from rb tree */
void rxe_drop_key(void *elem);
@@ -911,9 +911,20 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
int access, struct ib_udata *udata)
{
int err;
+ struct rxe_mr *mr;
struct rxe_dev *rxe = to_rdev(ibpd->device);
struct rxe_pd *pd = to_rpd(ibpd);
- struct rxe_mr *mr;
+ struct rxe_reg_mr_resp __user *uresp = NULL;
+
+ if (udata) {
+ if (udata->outlen < sizeof(*uresp)) {
+ err = -EINVAL;
+ goto err2;
+ }
+ uresp = udata->outbuf;
+ }
+
+ rxe_add_ref(pd);
mr = rxe_alloc(&rxe->mr_pool);
if (!mr) {
@@ -923,19 +934,28 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
rxe_add_index(mr);
- rxe_add_ref(pd);
-
err = rxe_mr_init_user(pd, start, length, iova,
access, udata, mr);
if (err)
goto err3;
+ pr_info("rxe_reg_user_mr: index = 0x%08x, rkey = 0x%08x\n",
+ mr->pelem.index, mr->ibmr.rkey);
+
+ if (uresp) {
+ if (copy_to_user(&uresp->index, &mr->pelem.index,
+ sizeof(uresp->index))) {
+ err = -EFAULT;
+ goto err3;
+ }
+ }
+
return &mr->ibmr;
err3:
- rxe_drop_ref(pd);
rxe_drop_index(mr);
rxe_drop_ref(mr);
+ rxe_drop_ref(pd);
err2:
return ERR_PTR(err);
}
@@ -96,12 +96,28 @@ struct rxe_send_wr {
struct {
__aligned_u64 addr;
__aligned_u64 length;
- __u32 mr_rkey;
- __u32 mw_rkey;
+ __u32 mr_index;
+ __u32 pad1;
+ __u32 mw_index;
+ __u32 pad2;
__u32 rkey;
__u32 access;
- } bind_mw;
- /* reg is only used by the kernel and is not part of the uapi */
+ } umw;
+ /* below are only used by the kernel */
+ struct {
+ __aligned_u64 addr;
+ __aligned_u64 length;
+ union {
+ struct ib_mr *ibmr;
+ __aligned_u64 reserved1;
+ };
+ union {
+ struct ib_mw *ibmw;
+ __aligned_u64 reserved2;
+ };
+ __u32 rkey;
+ __u32 access;
+ } kmw;
struct {
union {
struct ib_mr *mr;
@@ -183,4 +199,14 @@ struct rxe_modify_srq_cmd {
__aligned_u64 mmap_info_addr;
};
+struct rxe_reg_mr_resp {
+ __u32 index;
+ __u32 reserved;
+};
+
+struct rxe_alloc_mw_resp {
+ __u32 index;
+ __u32 reserved;
+};
+
#endif /* RDMA_USER_RXE_H */
Finished decoupling indices and keys for MW and MR objects. Now user space can refer to an object by index and kernel can lookup object with l/rkey. Tweaked the user/kernel ABI for rxe WQEs to use indices instead of rkeys to identify MWs and MRs. Type 1 MWs can now be bound with the ibv_bind_mw api. Signed-off-by: Bob Pearson <rpearson@hpe.com> --- drivers/infiniband/sw/rxe/rxe_loc.h | 3 + drivers/infiniband/sw/rxe/rxe_mr.c | 55 +++++++----- drivers/infiniband/sw/rxe/rxe_mw.c | 116 ++++++++++++++++++++++---- drivers/infiniband/sw/rxe/rxe_pool.c | 30 ++++--- drivers/infiniband/sw/rxe/rxe_pool.h | 2 +- drivers/infiniband/sw/rxe/rxe_verbs.c | 28 ++++++- include/uapi/rdma/rdma_user_rxe.h | 34 +++++++- 7 files changed, 212 insertions(+), 56 deletions(-)