Message ID | 20190326125433.475-5-kamalheib1@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | pvrdma: Add support for SRQ | expand |
On Tue, Mar 26, 2019 at 02:54:33PM +0200, Kamal Heib wrote: > Implement the pvrdma device commands for supporting SRQ > > Signed-off-by: Kamal Heib <kamalheib1@gmail.com> > --- > hw/rdma/vmw/pvrdma_cmd.c | 147 ++++++++++++++++++++++++++++++++++++ > hw/rdma/vmw/pvrdma_main.c | 16 ++++ > hw/rdma/vmw/pvrdma_qp_ops.c | 46 ++++++++++- > hw/rdma/vmw/pvrdma_qp_ops.h | 1 + > 4 files changed, 209 insertions(+), 1 deletion(-) > > diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c > index 510062f05476..fb075048c90c 100644 > --- a/hw/rdma/vmw/pvrdma_cmd.c > +++ b/hw/rdma/vmw/pvrdma_cmd.c > @@ -615,6 +615,149 @@ static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req, > return 0; > } > > +static int create_srq_ring(PCIDevice *pci_dev, PvrdmaRing **ring, > + uint64_t pdir_dma, uint32_t max_wr, > + uint32_t max_sge, uint32_t nchunks) > +{ > + uint64_t *dir = NULL, *tbl = NULL; > + PvrdmaRing *r; > + int rc = -EINVAL; > + char ring_name[MAX_RING_NAME_SZ]; > + uint32_t wqe_sz; > + > + if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) { > + rdma_error_report("Got invalid page count for SRQ ring: %d", > + nchunks); > + return rc; > + } > + > + dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE); > + if (!dir) { > + rdma_error_report("Failed to map to SRQ page directory"); > + goto out; > + } > + > + tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE); > + if (!tbl) { > + rdma_error_report("Failed to map to SRQ page table"); > + goto out; > + } > + > + r = g_malloc(sizeof(*r)); > + *ring = r; > + > + r->ring_state = (struct pvrdma_ring *) > + rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE); > + if (!r->ring_state) { > + rdma_error_report("Failed to map tp SRQ ring state"); > + goto out_free_ring_mem; > + } > + > + wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) + > + sizeof(struct pvrdma_sge) * max_sge - 1); > + sprintf(ring_name, "srq_ring_%" PRIx64, pdir_dma); > + rc = pvrdma_ring_init(r, ring_name, pci_dev, &r->ring_state[1], max_wr, > + wqe_sz, (dma_addr_t *)&tbl[1], nchunks - 1); > + if (rc) { > + goto out_unmap_ring_state; > + } > + > + goto out; > + > +out_unmap_ring_state: > + rdma_pci_dma_unmap(pci_dev, r->ring_state, TARGET_PAGE_SIZE); > + > +out_free_ring_mem: > + g_free(r); > + > +out: > + rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE); > + rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE); > + > + return rc; > +} > + > +static void destroy_srq_ring(PvrdmaRing *ring) > +{ > + pvrdma_ring_free(ring); > + rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE); > + g_free(ring); > +} > + > +static int create_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, > + union pvrdma_cmd_resp *rsp) > +{ > + struct pvrdma_cmd_create_srq *cmd = &req->create_srq; > + struct pvrdma_cmd_create_srq_resp *resp = &rsp->create_srq_resp; > + PvrdmaRing *ring = NULL; > + int rc; > + > + memset(resp, 0, sizeof(*resp)); > + > + rc = create_srq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma, > + cmd->attrs.max_wr, cmd->attrs.max_sge, > + cmd->nchunks); > + if (rc) { > + return rc; > + } > + > + rc = rdma_rm_alloc_srq(&dev->rdma_dev_res, cmd->pd_handle, > + cmd->attrs.max_wr, cmd->attrs.max_sge, > + cmd->attrs.srq_limit, &resp->srqn, ring); > + if (rc) { > + destroy_srq_ring(ring); > + return rc; > + } > + > + return 0; > +} > + > +static int query_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, > + union pvrdma_cmd_resp *rsp) > +{ > + struct pvrdma_cmd_query_srq *cmd = &req->query_srq; > + struct pvrdma_cmd_query_srq_resp *resp = &rsp->query_srq_resp; > + > + memset(resp, 0, sizeof(*resp)); > + > + return rdma_rm_query_srq(&dev->rdma_dev_res, cmd->srq_handle, > + (struct ibv_srq_attr *)&resp->attrs); > +} > + > +static int modify_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, > + union pvrdma_cmd_resp *rsp) > +{ > + struct pvrdma_cmd_modify_srq *cmd = &req->modify_srq; > + > + /* Only support SRQ limit */ > + if (!(cmd->attr_mask & IBV_SRQ_LIMIT) || > + (cmd->attr_mask & IBV_SRQ_MAX_WR)) > + return -EINVAL; > + > + return rdma_rm_modify_srq(&dev->rdma_dev_res, cmd->srq_handle, > + (struct ibv_srq_attr *)&cmd->attrs, > + cmd->attr_mask); > +} > + > +static int destroy_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, > + union pvrdma_cmd_resp *rsp) > +{ > + struct pvrdma_cmd_destroy_srq *cmd = &req->destroy_srq; > + RdmaRmSRQ *srq; > + PvrdmaRing *ring; > + > + srq = rdma_rm_get_srq(&dev->rdma_dev_res, cmd->srq_handle); > + if (!srq) { > + return -EINVAL; > + } > + > + rdma_rm_dealloc_srq(&dev->rdma_dev_res, cmd->srq_handle); > + ring = (PvrdmaRing *)srq->opaque; Use after free. (i know that destroy_qp suffers from the same problem, appreciate if you could take care of it too). > + destroy_srq_ring(ring); > + > + return 0; > +} > + > struct cmd_handler { > uint32_t cmd; > uint32_t ack; > @@ -640,6 +783,10 @@ static struct cmd_handler cmd_handlers[] = { > {PVRDMA_CMD_DESTROY_UC, PVRDMA_CMD_DESTROY_UC_RESP_NOOP, destroy_uc}, > {PVRDMA_CMD_CREATE_BIND, PVRDMA_CMD_CREATE_BIND_RESP_NOOP, create_bind}, > {PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind}, > + {PVRDMA_CMD_CREATE_SRQ, PVRDMA_CMD_CREATE_SRQ_RESP, create_srq}, > + {PVRDMA_CMD_QUERY_SRQ, PVRDMA_CMD_QUERY_SRQ_RESP, query_srq}, > + {PVRDMA_CMD_MODIFY_SRQ, PVRDMA_CMD_MODIFY_SRQ_RESP, modify_srq}, > + {PVRDMA_CMD_DESTROY_SRQ, PVRDMA_CMD_DESTROY_SRQ_RESP, destroy_srq}, Thanks for adding more power to this device!! > }; > > int pvrdma_exec_cmd(PVRDMADev *dev) > diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c > index 0b46561bad11..e0d01a3199c1 100644 > --- a/hw/rdma/vmw/pvrdma_main.c > +++ b/hw/rdma/vmw/pvrdma_main.c > @@ -53,6 +53,7 @@ static Property pvrdma_dev_properties[] = { > DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev, > dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM), > DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH), > + DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ), > DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr), > DEFINE_PROP_END_OF_LIST(), > }; > @@ -261,6 +262,9 @@ static void init_dsr_dev_caps(PVRDMADev *dev) > dsr->caps.max_mr = dev->dev_attr.max_mr; > dsr->caps.max_pd = dev->dev_attr.max_pd; > dsr->caps.max_ah = dev->dev_attr.max_ah; > + dsr->caps.max_srq = dev->dev_attr.max_srq; > + dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr; > + dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge; > dsr->caps.gid_tbl_len = MAX_GIDS; > dsr->caps.sys_image_guid = 0; > dsr->caps.node_guid = dev->node_guid; > @@ -485,6 +489,13 @@ static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val, > pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK); > } > break; > + case PVRDMA_UAR_SRQ_OFFSET: > + if (val & PVRDMA_UAR_SRQ_RECV) { > + trace_pvrdma_uar_write(addr, val, "QP", "SRQ", > + val & PVRDMA_UAR_HANDLE_MASK, 0); > + pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK); Indent is needed here. > + } > + break; > default: > rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64, > addr, val); > @@ -554,6 +565,11 @@ static void init_dev_caps(PVRDMADev *dev) > > dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) - > TARGET_PAGE_SIZE; /* First page is ring state */ > + > + dev->dev_attr.max_srq_wr = pg_tbl_bytes / > + ((sizeof(struct pvrdma_rq_wqe_hdr) + > + sizeof(struct pvrdma_sge)) * > + dev->dev_attr.max_sge) - TARGET_PAGE_SIZE; > } > > static int pvrdma_check_ram_shared(Object *obj, void *opaque) > diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c > index 5b9786efbe4b..bd6db858de32 100644 > --- a/hw/rdma/vmw/pvrdma_qp_ops.c > +++ b/hw/rdma/vmw/pvrdma_qp_ops.c > @@ -70,7 +70,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, > > memset(cqe1, 0, sizeof(*cqe1)); > cqe1->wr_id = cqe->wr_id; > - cqe1->qp = cqe->qp; > + cqe1->qp = cqe->qp ? cqe->qp : wc->qp_num; > cqe1->opcode = cqe->opcode; > cqe1->status = wc->status; > cqe1->byte_len = wc->byte_len; > @@ -241,6 +241,50 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) > } > } > > +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle) > +{ > + RdmaRmSRQ *srq; > + PvrdmaRqWqe *wqe; > + PvrdmaRing *ring; > + > + srq = rdma_rm_get_srq(&dev->rdma_dev_res, srq_handle); > + if (unlikely(!srq)) { > + return; > + } > + > + ring = (PvrdmaRing *)srq->opaque; > + > + wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring); > + while (wqe) { > + CompHandlerCtx *comp_ctx; > + > + /* Prepare CQE */ > + comp_ctx = g_malloc(sizeof(CompHandlerCtx)); > + comp_ctx->dev = dev; > + comp_ctx->cq_handle = srq->recv_cq_handle; > + comp_ctx->cqe.wr_id = wqe->hdr.wr_id; > + comp_ctx->cqe.qp = 0; > + comp_ctx->cqe.opcode = IBV_WC_RECV; > + > + if (wqe->hdr.num_sge > dev->dev_attr.max_sge) { > + rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge, > + dev->dev_attr.max_sge); > + complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx); > + continue; > + } > + > + rdma_backend_post_srq_recv(&dev->backend_dev, &srq->backend_srq, > + (struct ibv_sge *)&wqe->sge[0], > + wqe->hdr.num_sge, > + comp_ctx); > + > + pvrdma_ring_read_inc(ring); > + > + wqe = pvrdma_ring_next_elem_read(ring); > + } > + > +} > + > void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle) > { > RdmaRmCQ *cq; > diff --git a/hw/rdma/vmw/pvrdma_qp_ops.h b/hw/rdma/vmw/pvrdma_qp_ops.h > index 31cb48ba29f1..82e720a76fb4 100644 > --- a/hw/rdma/vmw/pvrdma_qp_ops.h > +++ b/hw/rdma/vmw/pvrdma_qp_ops.h > @@ -22,6 +22,7 @@ int pvrdma_qp_ops_init(void); > void pvrdma_qp_ops_fini(void); > void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle); > void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle); > +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle); > void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle); > > #endif > -- > 2.20.1 > >
On 3/27/19 6:16 PM, Yuval Shaia wrote: > On Tue, Mar 26, 2019 at 02:54:33PM +0200, Kamal Heib wrote: >> Implement the pvrdma device commands for supporting SRQ >> >> Signed-off-by: Kamal Heib <kamalheib1@gmail.com> >> --- >> hw/rdma/vmw/pvrdma_cmd.c | 147 ++++++++++++++++++++++++++++++++++++ >> hw/rdma/vmw/pvrdma_main.c | 16 ++++ >> hw/rdma/vmw/pvrdma_qp_ops.c | 46 ++++++++++- >> hw/rdma/vmw/pvrdma_qp_ops.h | 1 + >> 4 files changed, 209 insertions(+), 1 deletion(-) >> >> diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c >> index 510062f05476..fb075048c90c 100644 >> --- a/hw/rdma/vmw/pvrdma_cmd.c >> +++ b/hw/rdma/vmw/pvrdma_cmd.c >> @@ -615,6 +615,149 @@ static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req, >> return 0; >> } >> >> +static int create_srq_ring(PCIDevice *pci_dev, PvrdmaRing **ring, >> + uint64_t pdir_dma, uint32_t max_wr, >> + uint32_t max_sge, uint32_t nchunks) >> +{ >> + uint64_t *dir = NULL, *tbl = NULL; >> + PvrdmaRing *r; >> + int rc = -EINVAL; >> + char ring_name[MAX_RING_NAME_SZ]; >> + uint32_t wqe_sz; >> + >> + if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) { >> + rdma_error_report("Got invalid page count for SRQ ring: %d", >> + nchunks); >> + return rc; >> + } >> + >> + dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE); >> + if (!dir) { >> + rdma_error_report("Failed to map to SRQ page directory"); >> + goto out; >> + } >> + >> + tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE); >> + if (!tbl) { >> + rdma_error_report("Failed to map to SRQ page table"); >> + goto out; >> + } >> + >> + r = g_malloc(sizeof(*r)); >> + *ring = r; >> + >> + r->ring_state = (struct pvrdma_ring *) >> + rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE); >> + if (!r->ring_state) { >> + rdma_error_report("Failed to map tp SRQ ring state"); >> + goto out_free_ring_mem; >> + } >> + >> + wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) + >> + sizeof(struct pvrdma_sge) * max_sge - 1); >> + sprintf(ring_name, "srq_ring_%" PRIx64, pdir_dma); >> + rc = pvrdma_ring_init(r, ring_name, pci_dev, &r->ring_state[1], max_wr, >> + wqe_sz, (dma_addr_t *)&tbl[1], nchunks - 1); >> + if (rc) { >> + goto out_unmap_ring_state; >> + } >> + >> + goto out; >> + >> +out_unmap_ring_state: >> + rdma_pci_dma_unmap(pci_dev, r->ring_state, TARGET_PAGE_SIZE); >> + >> +out_free_ring_mem: >> + g_free(r); >> + >> +out: >> + rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE); >> + rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE); >> + >> + return rc; >> +} >> + >> +static void destroy_srq_ring(PvrdmaRing *ring) >> +{ >> + pvrdma_ring_free(ring); >> + rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE); >> + g_free(ring); >> +} >> + >> +static int create_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, >> + union pvrdma_cmd_resp *rsp) >> +{ >> + struct pvrdma_cmd_create_srq *cmd = &req->create_srq; >> + struct pvrdma_cmd_create_srq_resp *resp = &rsp->create_srq_resp; >> + PvrdmaRing *ring = NULL; >> + int rc; >> + >> + memset(resp, 0, sizeof(*resp)); >> + >> + rc = create_srq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma, >> + cmd->attrs.max_wr, cmd->attrs.max_sge, >> + cmd->nchunks); >> + if (rc) { >> + return rc; >> + } >> + >> + rc = rdma_rm_alloc_srq(&dev->rdma_dev_res, cmd->pd_handle, >> + cmd->attrs.max_wr, cmd->attrs.max_sge, >> + cmd->attrs.srq_limit, &resp->srqn, ring); >> + if (rc) { >> + destroy_srq_ring(ring); >> + return rc; >> + } >> + >> + return 0; >> +} >> + >> +static int query_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, >> + union pvrdma_cmd_resp *rsp) >> +{ >> + struct pvrdma_cmd_query_srq *cmd = &req->query_srq; >> + struct pvrdma_cmd_query_srq_resp *resp = &rsp->query_srq_resp; >> + >> + memset(resp, 0, sizeof(*resp)); >> + >> + return rdma_rm_query_srq(&dev->rdma_dev_res, cmd->srq_handle, >> + (struct ibv_srq_attr *)&resp->attrs); >> +} >> + >> +static int modify_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, >> + union pvrdma_cmd_resp *rsp) >> +{ >> + struct pvrdma_cmd_modify_srq *cmd = &req->modify_srq; >> + >> + /* Only support SRQ limit */ >> + if (!(cmd->attr_mask & IBV_SRQ_LIMIT) || >> + (cmd->attr_mask & IBV_SRQ_MAX_WR)) >> + return -EINVAL; >> + >> + return rdma_rm_modify_srq(&dev->rdma_dev_res, cmd->srq_handle, >> + (struct ibv_srq_attr *)&cmd->attrs, >> + cmd->attr_mask); >> +} >> + >> +static int destroy_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, >> + union pvrdma_cmd_resp *rsp) >> +{ >> + struct pvrdma_cmd_destroy_srq *cmd = &req->destroy_srq; >> + RdmaRmSRQ *srq; >> + PvrdmaRing *ring; >> + >> + srq = rdma_rm_get_srq(&dev->rdma_dev_res, cmd->srq_handle); >> + if (!srq) { >> + return -EINVAL; >> + } >> + >> + rdma_rm_dealloc_srq(&dev->rdma_dev_res, cmd->srq_handle); >> + ring = (PvrdmaRing *)srq->opaque; > > Use after free. > (i know that destroy_qp suffers from the same problem, appreciate if you > could take care of it too). > Correct, I'll fix in v3. >> + destroy_srq_ring(ring); >> + >> + return 0; >> +} >> + >> struct cmd_handler { >> uint32_t cmd; >> uint32_t ack; >> @@ -640,6 +783,10 @@ static struct cmd_handler cmd_handlers[] = { >> {PVRDMA_CMD_DESTROY_UC, PVRDMA_CMD_DESTROY_UC_RESP_NOOP, destroy_uc}, >> {PVRDMA_CMD_CREATE_BIND, PVRDMA_CMD_CREATE_BIND_RESP_NOOP, create_bind}, >> {PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind}, >> + {PVRDMA_CMD_CREATE_SRQ, PVRDMA_CMD_CREATE_SRQ_RESP, create_srq}, >> + {PVRDMA_CMD_QUERY_SRQ, PVRDMA_CMD_QUERY_SRQ_RESP, query_srq}, >> + {PVRDMA_CMD_MODIFY_SRQ, PVRDMA_CMD_MODIFY_SRQ_RESP, modify_srq}, >> + {PVRDMA_CMD_DESTROY_SRQ, PVRDMA_CMD_DESTROY_SRQ_RESP, destroy_srq}, > > Thanks for adding more power to this device!! Sure :-) > >> }; >> >> int pvrdma_exec_cmd(PVRDMADev *dev) >> diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c >> index 0b46561bad11..e0d01a3199c1 100644 >> --- a/hw/rdma/vmw/pvrdma_main.c >> +++ b/hw/rdma/vmw/pvrdma_main.c >> @@ -53,6 +53,7 @@ static Property pvrdma_dev_properties[] = { >> DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev, >> dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM), >> DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH), >> + DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ), >> DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr), >> DEFINE_PROP_END_OF_LIST(), >> }; >> @@ -261,6 +262,9 @@ static void init_dsr_dev_caps(PVRDMADev *dev) >> dsr->caps.max_mr = dev->dev_attr.max_mr; >> dsr->caps.max_pd = dev->dev_attr.max_pd; >> dsr->caps.max_ah = dev->dev_attr.max_ah; >> + dsr->caps.max_srq = dev->dev_attr.max_srq; >> + dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr; >> + dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge; >> dsr->caps.gid_tbl_len = MAX_GIDS; >> dsr->caps.sys_image_guid = 0; >> dsr->caps.node_guid = dev->node_guid; >> @@ -485,6 +489,13 @@ static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val, >> pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK); >> } >> break; >> + case PVRDMA_UAR_SRQ_OFFSET: >> + if (val & PVRDMA_UAR_SRQ_RECV) { >> + trace_pvrdma_uar_write(addr, val, "QP", "SRQ", >> + val & PVRDMA_UAR_HANDLE_MASK, 0); >> + pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK); > > Indent is needed here. I'll fix it in v3. > >> + } >> + break; >> default: >> rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64, >> addr, val); >> @@ -554,6 +565,11 @@ static void init_dev_caps(PVRDMADev *dev) >> >> dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) - >> TARGET_PAGE_SIZE; /* First page is ring state */ >> + >> + dev->dev_attr.max_srq_wr = pg_tbl_bytes / >> + ((sizeof(struct pvrdma_rq_wqe_hdr) + >> + sizeof(struct pvrdma_sge)) * >> + dev->dev_attr.max_sge) - TARGET_PAGE_SIZE; >> } >> >> static int pvrdma_check_ram_shared(Object *obj, void *opaque) >> diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c >> index 5b9786efbe4b..bd6db858de32 100644 >> --- a/hw/rdma/vmw/pvrdma_qp_ops.c >> +++ b/hw/rdma/vmw/pvrdma_qp_ops.c >> @@ -70,7 +70,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, >> >> memset(cqe1, 0, sizeof(*cqe1)); >> cqe1->wr_id = cqe->wr_id; >> - cqe1->qp = cqe->qp; >> + cqe1->qp = cqe->qp ? cqe->qp : wc->qp_num; >> cqe1->opcode = cqe->opcode; >> cqe1->status = wc->status; >> cqe1->byte_len = wc->byte_len; >> @@ -241,6 +241,50 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) >> } >> } >> >> +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle) >> +{ >> + RdmaRmSRQ *srq; >> + PvrdmaRqWqe *wqe; >> + PvrdmaRing *ring; >> + >> + srq = rdma_rm_get_srq(&dev->rdma_dev_res, srq_handle); >> + if (unlikely(!srq)) { >> + return; >> + } >> + >> + ring = (PvrdmaRing *)srq->opaque; >> + >> + wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring); >> + while (wqe) { >> + CompHandlerCtx *comp_ctx; >> + >> + /* Prepare CQE */ >> + comp_ctx = g_malloc(sizeof(CompHandlerCtx)); >> + comp_ctx->dev = dev; >> + comp_ctx->cq_handle = srq->recv_cq_handle; >> + comp_ctx->cqe.wr_id = wqe->hdr.wr_id; >> + comp_ctx->cqe.qp = 0; >> + comp_ctx->cqe.opcode = IBV_WC_RECV; >> + >> + if (wqe->hdr.num_sge > dev->dev_attr.max_sge) { >> + rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge, >> + dev->dev_attr.max_sge); >> + complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx); >> + continue; >> + } >> + >> + rdma_backend_post_srq_recv(&dev->backend_dev, &srq->backend_srq, >> + (struct ibv_sge *)&wqe->sge[0], >> + wqe->hdr.num_sge, >> + comp_ctx); >> + >> + pvrdma_ring_read_inc(ring); >> + >> + wqe = pvrdma_ring_next_elem_read(ring); >> + } >> + >> +} >> + >> void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle) >> { >> RdmaRmCQ *cq; >> diff --git a/hw/rdma/vmw/pvrdma_qp_ops.h b/hw/rdma/vmw/pvrdma_qp_ops.h >> index 31cb48ba29f1..82e720a76fb4 100644 >> --- a/hw/rdma/vmw/pvrdma_qp_ops.h >> +++ b/hw/rdma/vmw/pvrdma_qp_ops.h >> @@ -22,6 +22,7 @@ int pvrdma_qp_ops_init(void); >> void pvrdma_qp_ops_fini(void); >> void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle); >> void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle); >> +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle); >> void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle); >> >> #endif >> -- >> 2.20.1 >> >>
diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c index 510062f05476..fb075048c90c 100644 --- a/hw/rdma/vmw/pvrdma_cmd.c +++ b/hw/rdma/vmw/pvrdma_cmd.c @@ -615,6 +615,149 @@ static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req, return 0; } +static int create_srq_ring(PCIDevice *pci_dev, PvrdmaRing **ring, + uint64_t pdir_dma, uint32_t max_wr, + uint32_t max_sge, uint32_t nchunks) +{ + uint64_t *dir = NULL, *tbl = NULL; + PvrdmaRing *r; + int rc = -EINVAL; + char ring_name[MAX_RING_NAME_SZ]; + uint32_t wqe_sz; + + if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) { + rdma_error_report("Got invalid page count for SRQ ring: %d", + nchunks); + return rc; + } + + dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE); + if (!dir) { + rdma_error_report("Failed to map to SRQ page directory"); + goto out; + } + + tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE); + if (!tbl) { + rdma_error_report("Failed to map to SRQ page table"); + goto out; + } + + r = g_malloc(sizeof(*r)); + *ring = r; + + r->ring_state = (struct pvrdma_ring *) + rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE); + if (!r->ring_state) { + rdma_error_report("Failed to map tp SRQ ring state"); + goto out_free_ring_mem; + } + + wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) + + sizeof(struct pvrdma_sge) * max_sge - 1); + sprintf(ring_name, "srq_ring_%" PRIx64, pdir_dma); + rc = pvrdma_ring_init(r, ring_name, pci_dev, &r->ring_state[1], max_wr, + wqe_sz, (dma_addr_t *)&tbl[1], nchunks - 1); + if (rc) { + goto out_unmap_ring_state; + } + + goto out; + +out_unmap_ring_state: + rdma_pci_dma_unmap(pci_dev, r->ring_state, TARGET_PAGE_SIZE); + +out_free_ring_mem: + g_free(r); + +out: + rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE); + rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE); + + return rc; +} + +static void destroy_srq_ring(PvrdmaRing *ring) +{ + pvrdma_ring_free(ring); + rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE); + g_free(ring); +} + +static int create_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, + union pvrdma_cmd_resp *rsp) +{ + struct pvrdma_cmd_create_srq *cmd = &req->create_srq; + struct pvrdma_cmd_create_srq_resp *resp = &rsp->create_srq_resp; + PvrdmaRing *ring = NULL; + int rc; + + memset(resp, 0, sizeof(*resp)); + + rc = create_srq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma, + cmd->attrs.max_wr, cmd->attrs.max_sge, + cmd->nchunks); + if (rc) { + return rc; + } + + rc = rdma_rm_alloc_srq(&dev->rdma_dev_res, cmd->pd_handle, + cmd->attrs.max_wr, cmd->attrs.max_sge, + cmd->attrs.srq_limit, &resp->srqn, ring); + if (rc) { + destroy_srq_ring(ring); + return rc; + } + + return 0; +} + +static int query_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, + union pvrdma_cmd_resp *rsp) +{ + struct pvrdma_cmd_query_srq *cmd = &req->query_srq; + struct pvrdma_cmd_query_srq_resp *resp = &rsp->query_srq_resp; + + memset(resp, 0, sizeof(*resp)); + + return rdma_rm_query_srq(&dev->rdma_dev_res, cmd->srq_handle, + (struct ibv_srq_attr *)&resp->attrs); +} + +static int modify_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, + union pvrdma_cmd_resp *rsp) +{ + struct pvrdma_cmd_modify_srq *cmd = &req->modify_srq; + + /* Only support SRQ limit */ + if (!(cmd->attr_mask & IBV_SRQ_LIMIT) || + (cmd->attr_mask & IBV_SRQ_MAX_WR)) + return -EINVAL; + + return rdma_rm_modify_srq(&dev->rdma_dev_res, cmd->srq_handle, + (struct ibv_srq_attr *)&cmd->attrs, + cmd->attr_mask); +} + +static int destroy_srq(PVRDMADev *dev, union pvrdma_cmd_req *req, + union pvrdma_cmd_resp *rsp) +{ + struct pvrdma_cmd_destroy_srq *cmd = &req->destroy_srq; + RdmaRmSRQ *srq; + PvrdmaRing *ring; + + srq = rdma_rm_get_srq(&dev->rdma_dev_res, cmd->srq_handle); + if (!srq) { + return -EINVAL; + } + + rdma_rm_dealloc_srq(&dev->rdma_dev_res, cmd->srq_handle); + ring = (PvrdmaRing *)srq->opaque; + destroy_srq_ring(ring); + + return 0; +} + struct cmd_handler { uint32_t cmd; uint32_t ack; @@ -640,6 +783,10 @@ static struct cmd_handler cmd_handlers[] = { {PVRDMA_CMD_DESTROY_UC, PVRDMA_CMD_DESTROY_UC_RESP_NOOP, destroy_uc}, {PVRDMA_CMD_CREATE_BIND, PVRDMA_CMD_CREATE_BIND_RESP_NOOP, create_bind}, {PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind}, + {PVRDMA_CMD_CREATE_SRQ, PVRDMA_CMD_CREATE_SRQ_RESP, create_srq}, + {PVRDMA_CMD_QUERY_SRQ, PVRDMA_CMD_QUERY_SRQ_RESP, query_srq}, + {PVRDMA_CMD_MODIFY_SRQ, PVRDMA_CMD_MODIFY_SRQ_RESP, modify_srq}, + {PVRDMA_CMD_DESTROY_SRQ, PVRDMA_CMD_DESTROY_SRQ_RESP, destroy_srq}, }; int pvrdma_exec_cmd(PVRDMADev *dev) diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index 0b46561bad11..e0d01a3199c1 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -53,6 +53,7 @@ static Property pvrdma_dev_properties[] = { DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev, dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM), DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH), + DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ), DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr), DEFINE_PROP_END_OF_LIST(), }; @@ -261,6 +262,9 @@ static void init_dsr_dev_caps(PVRDMADev *dev) dsr->caps.max_mr = dev->dev_attr.max_mr; dsr->caps.max_pd = dev->dev_attr.max_pd; dsr->caps.max_ah = dev->dev_attr.max_ah; + dsr->caps.max_srq = dev->dev_attr.max_srq; + dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr; + dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge; dsr->caps.gid_tbl_len = MAX_GIDS; dsr->caps.sys_image_guid = 0; dsr->caps.node_guid = dev->node_guid; @@ -485,6 +489,13 @@ static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val, pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK); } break; + case PVRDMA_UAR_SRQ_OFFSET: + if (val & PVRDMA_UAR_SRQ_RECV) { + trace_pvrdma_uar_write(addr, val, "QP", "SRQ", + val & PVRDMA_UAR_HANDLE_MASK, 0); + pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK); + } + break; default: rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64, addr, val); @@ -554,6 +565,11 @@ static void init_dev_caps(PVRDMADev *dev) dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) - TARGET_PAGE_SIZE; /* First page is ring state */ + + dev->dev_attr.max_srq_wr = pg_tbl_bytes / + ((sizeof(struct pvrdma_rq_wqe_hdr) + + sizeof(struct pvrdma_sge)) * + dev->dev_attr.max_sge) - TARGET_PAGE_SIZE; } static int pvrdma_check_ram_shared(Object *obj, void *opaque) diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c index 5b9786efbe4b..bd6db858de32 100644 --- a/hw/rdma/vmw/pvrdma_qp_ops.c +++ b/hw/rdma/vmw/pvrdma_qp_ops.c @@ -70,7 +70,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, memset(cqe1, 0, sizeof(*cqe1)); cqe1->wr_id = cqe->wr_id; - cqe1->qp = cqe->qp; + cqe1->qp = cqe->qp ? cqe->qp : wc->qp_num; cqe1->opcode = cqe->opcode; cqe1->status = wc->status; cqe1->byte_len = wc->byte_len; @@ -241,6 +241,50 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) } } +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle) +{ + RdmaRmSRQ *srq; + PvrdmaRqWqe *wqe; + PvrdmaRing *ring; + + srq = rdma_rm_get_srq(&dev->rdma_dev_res, srq_handle); + if (unlikely(!srq)) { + return; + } + + ring = (PvrdmaRing *)srq->opaque; + + wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring); + while (wqe) { + CompHandlerCtx *comp_ctx; + + /* Prepare CQE */ + comp_ctx = g_malloc(sizeof(CompHandlerCtx)); + comp_ctx->dev = dev; + comp_ctx->cq_handle = srq->recv_cq_handle; + comp_ctx->cqe.wr_id = wqe->hdr.wr_id; + comp_ctx->cqe.qp = 0; + comp_ctx->cqe.opcode = IBV_WC_RECV; + + if (wqe->hdr.num_sge > dev->dev_attr.max_sge) { + rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge, + dev->dev_attr.max_sge); + complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx); + continue; + } + + rdma_backend_post_srq_recv(&dev->backend_dev, &srq->backend_srq, + (struct ibv_sge *)&wqe->sge[0], + wqe->hdr.num_sge, + comp_ctx); + + pvrdma_ring_read_inc(ring); + + wqe = pvrdma_ring_next_elem_read(ring); + } + +} + void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle) { RdmaRmCQ *cq; diff --git a/hw/rdma/vmw/pvrdma_qp_ops.h b/hw/rdma/vmw/pvrdma_qp_ops.h index 31cb48ba29f1..82e720a76fb4 100644 --- a/hw/rdma/vmw/pvrdma_qp_ops.h +++ b/hw/rdma/vmw/pvrdma_qp_ops.h @@ -22,6 +22,7 @@ int pvrdma_qp_ops_init(void); void pvrdma_qp_ops_fini(void); void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle); void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle); +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle); void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle); #endif
Implement the pvrdma device commands for supporting SRQ Signed-off-by: Kamal Heib <kamalheib1@gmail.com> --- hw/rdma/vmw/pvrdma_cmd.c | 147 ++++++++++++++++++++++++++++++++++++ hw/rdma/vmw/pvrdma_main.c | 16 ++++ hw/rdma/vmw/pvrdma_qp_ops.c | 46 ++++++++++- hw/rdma/vmw/pvrdma_qp_ops.h | 1 + 4 files changed, 209 insertions(+), 1 deletion(-)