Message ID | 20190818132107.18181-3-yuval.shaia@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | rdma: Utilize ibv_reg_mr_iova for memory registration | expand |
Hi Yuval, On 8/18/19 4:21 PM, Yuval Shaia wrote: > The virtual address that is provided by the guest in post_send and > post_recv operations is related to the guest address space. This address > space is unknown to the HCA resides on host so extra step in these > operations is needed to adjust the address to host virtual address. > > This step, which is done in data-path affects performances. > > An enhanced verion of MR registration introduced here > https://patchwork.kernel.org/patch/11044467/ can be used so that the > guest virtual address space for this MR is known to the HCA in host. > > This will save the data-path adjustment. > > Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com> > --- > hw/rdma/rdma_backend.c | 13 +++++++++++++ > hw/rdma/rdma_backend.h | 5 +++++ > hw/rdma/rdma_rm.c | 5 +++++ > hw/rdma/vmw/pvrdma_main.c | 6 ++++++ > 4 files changed, 29 insertions(+) > > diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c > index c39051068d..c346407cd3 100644 > --- a/hw/rdma/rdma_backend.c > +++ b/hw/rdma/rdma_backend.c > @@ -391,7 +391,11 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res, > return VENDOR_ERR_INVLKEY | ssge[ssge_idx].lkey; > } > > +#ifdef LEGACY_RDMA_REG_MR > dsge->addr = (uintptr_t)mr->virt + ssge[ssge_idx].addr - mr->start; This is the performance hit you are addressing? The address computation? Thanks, Marcel > +#else > + dsge->addr = ssge[ssge_idx].addr; > +#endif > dsge->length = ssge[ssge_idx].length; > dsge->lkey = rdma_backend_mr_lkey(&mr->backend_mr); > > @@ -735,10 +739,19 @@ void rdma_backend_destroy_pd(RdmaBackendPD *pd) > } > } > > +#ifdef LEGACY_RDMA_REG_MR > int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr, > size_t length, int access) > +#else > +int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr, > + size_t length, uint64_t guest_start, int access) > +#endif > { > +#ifdef LEGACY_RDMA_REG_MR > mr->ibmr = ibv_reg_mr(pd->ibpd, addr, length, access); > +#else > + mr->ibmr = ibv_reg_mr_iova(pd->ibpd, addr, length, guest_start, access); > +#endif > if (!mr->ibmr) { > rdma_error_report("ibv_reg_mr fail, errno=%d", errno); > return -EIO; > diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h > index 7c1a19a2b5..127f96e2d5 100644 > --- a/hw/rdma/rdma_backend.h > +++ b/hw/rdma/rdma_backend.h > @@ -78,8 +78,13 @@ int rdma_backend_query_port(RdmaBackendDev *backend_dev, > int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd); > void rdma_backend_destroy_pd(RdmaBackendPD *pd); > > +#ifdef LEGACY_RDMA_REG_MR > int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr, > size_t length, int access); > +#else > +int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr, > + size_t length, uint64_t guest_start, int access); > +#endif > void rdma_backend_destroy_mr(RdmaBackendMR *mr); > > int rdma_backend_create_cq(RdmaBackendDev *backend_dev, RdmaBackendCQ *cq, > diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c > index 1927f85472..1524dfaeaa 100644 > --- a/hw/rdma/rdma_rm.c > +++ b/hw/rdma/rdma_rm.c > @@ -227,8 +227,13 @@ int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle, > mr->length = guest_length; > mr->virt += (mr->start & (TARGET_PAGE_SIZE - 1)); > > +#ifdef LEGACY_RDMA_REG_MR > ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt, > mr->length, access_flags); > +#else > + ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt, > + mr->length, guest_start, access_flags); > +#endif > if (ret) { > ret = -EIO; > goto out_dealloc_mr; > diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c > index 3e36e13013..18075285f6 100644 > --- a/hw/rdma/vmw/pvrdma_main.c > +++ b/hw/rdma/vmw/pvrdma_main.c > @@ -664,6 +664,12 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) > dev->shutdown_notifier.notify = pvrdma_shutdown_notifier; > qemu_register_shutdown_notifier(&dev->shutdown_notifier); > > +#ifdef LEGACY_RDMA_REG_MR > + rdma_info_report("Using legacy reg_mr"); > +#else > + rdma_info_report("Using iova reg_mr"); > +#endif > + > out: > if (rc) { > pvrdma_fini(pdev);
On Sat, Aug 31, 2019 at 10:31:57PM +0300, Marcel Apfelbaum wrote: > Hi Yuval, > > On 8/18/19 4:21 PM, Yuval Shaia wrote: > > The virtual address that is provided by the guest in post_send and > > post_recv operations is related to the guest address space. This address > > space is unknown to the HCA resides on host so extra step in these > > operations is needed to adjust the address to host virtual address. > > > > This step, which is done in data-path affects performances. > > > > An enhanced verion of MR registration introduced here > > https://patchwork.kernel.org/patch/11044467/ can be used so that the > > guest virtual address space for this MR is known to the HCA in host. > > > > This will save the data-path adjustment. > > > > Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com> > > --- > > hw/rdma/rdma_backend.c | 13 +++++++++++++ > > hw/rdma/rdma_backend.h | 5 +++++ > > hw/rdma/rdma_rm.c | 5 +++++ > > hw/rdma/vmw/pvrdma_main.c | 6 ++++++ > > 4 files changed, 29 insertions(+) > > > > diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c > > index c39051068d..c346407cd3 100644 > > --- a/hw/rdma/rdma_backend.c > > +++ b/hw/rdma/rdma_backend.c > > @@ -391,7 +391,11 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res, > > return VENDOR_ERR_INVLKEY | ssge[ssge_idx].lkey; > > } > > +#ifdef LEGACY_RDMA_REG_MR > > dsge->addr = (uintptr_t)mr->virt + ssge[ssge_idx].addr - mr->start; > > This is the performance hit you are addressing? The address computation? This is the support for legacy library, see below the enhancement. > > Thanks, > Marcel > > > > +#else > > + dsge->addr = ssge[ssge_idx].addr; Here it is, no need to adjust to host virtual address. Please note also that this is a huge step toward virtio-rdma support where the emulation will be bypassed in data path so no chance for address adjustment. > > +#endif > > dsge->length = ssge[ssge_idx].length; > > dsge->lkey = rdma_backend_mr_lkey(&mr->backend_mr); > > @@ -735,10 +739,19 @@ void rdma_backend_destroy_pd(RdmaBackendPD *pd) > > } > > } > > +#ifdef LEGACY_RDMA_REG_MR > > int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr, > > size_t length, int access) > > +#else > > +int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr, > > + size_t length, uint64_t guest_start, int access) > > +#endif > > { > > +#ifdef LEGACY_RDMA_REG_MR > > mr->ibmr = ibv_reg_mr(pd->ibpd, addr, length, access); > > +#else > > + mr->ibmr = ibv_reg_mr_iova(pd->ibpd, addr, length, guest_start, access); > > +#endif > > if (!mr->ibmr) { > > rdma_error_report("ibv_reg_mr fail, errno=%d", errno); > > return -EIO; > > diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h > > index 7c1a19a2b5..127f96e2d5 100644 > > --- a/hw/rdma/rdma_backend.h > > +++ b/hw/rdma/rdma_backend.h > > @@ -78,8 +78,13 @@ int rdma_backend_query_port(RdmaBackendDev *backend_dev, > > int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd); > > void rdma_backend_destroy_pd(RdmaBackendPD *pd); > > +#ifdef LEGACY_RDMA_REG_MR > > int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr, > > size_t length, int access); > > +#else > > +int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr, > > + size_t length, uint64_t guest_start, int access); > > +#endif > > void rdma_backend_destroy_mr(RdmaBackendMR *mr); > > int rdma_backend_create_cq(RdmaBackendDev *backend_dev, RdmaBackendCQ *cq, > > diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c > > index 1927f85472..1524dfaeaa 100644 > > --- a/hw/rdma/rdma_rm.c > > +++ b/hw/rdma/rdma_rm.c > > @@ -227,8 +227,13 @@ int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle, > > mr->length = guest_length; > > mr->virt += (mr->start & (TARGET_PAGE_SIZE - 1)); > > +#ifdef LEGACY_RDMA_REG_MR > > ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt, > > mr->length, access_flags); > > +#else > > + ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt, > > + mr->length, guest_start, access_flags); > > +#endif > > if (ret) { > > ret = -EIO; > > goto out_dealloc_mr; > > diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c > > index 3e36e13013..18075285f6 100644 > > --- a/hw/rdma/vmw/pvrdma_main.c > > +++ b/hw/rdma/vmw/pvrdma_main.c > > @@ -664,6 +664,12 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) > > dev->shutdown_notifier.notify = pvrdma_shutdown_notifier; > > qemu_register_shutdown_notifier(&dev->shutdown_notifier); > > +#ifdef LEGACY_RDMA_REG_MR > > + rdma_info_report("Using legacy reg_mr"); > > +#else > > + rdma_info_report("Using iova reg_mr"); > > +#endif > > + > > out: > > if (rc) { > > pvrdma_fini(pdev); > >
diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c index c39051068d..c346407cd3 100644 --- a/hw/rdma/rdma_backend.c +++ b/hw/rdma/rdma_backend.c @@ -391,7 +391,11 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res, return VENDOR_ERR_INVLKEY | ssge[ssge_idx].lkey; } +#ifdef LEGACY_RDMA_REG_MR dsge->addr = (uintptr_t)mr->virt + ssge[ssge_idx].addr - mr->start; +#else + dsge->addr = ssge[ssge_idx].addr; +#endif dsge->length = ssge[ssge_idx].length; dsge->lkey = rdma_backend_mr_lkey(&mr->backend_mr); @@ -735,10 +739,19 @@ void rdma_backend_destroy_pd(RdmaBackendPD *pd) } } +#ifdef LEGACY_RDMA_REG_MR int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr, size_t length, int access) +#else +int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr, + size_t length, uint64_t guest_start, int access) +#endif { +#ifdef LEGACY_RDMA_REG_MR mr->ibmr = ibv_reg_mr(pd->ibpd, addr, length, access); +#else + mr->ibmr = ibv_reg_mr_iova(pd->ibpd, addr, length, guest_start, access); +#endif if (!mr->ibmr) { rdma_error_report("ibv_reg_mr fail, errno=%d", errno); return -EIO; diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h index 7c1a19a2b5..127f96e2d5 100644 --- a/hw/rdma/rdma_backend.h +++ b/hw/rdma/rdma_backend.h @@ -78,8 +78,13 @@ int rdma_backend_query_port(RdmaBackendDev *backend_dev, int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd); void rdma_backend_destroy_pd(RdmaBackendPD *pd); +#ifdef LEGACY_RDMA_REG_MR int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr, size_t length, int access); +#else +int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr, + size_t length, uint64_t guest_start, int access); +#endif void rdma_backend_destroy_mr(RdmaBackendMR *mr); int rdma_backend_create_cq(RdmaBackendDev *backend_dev, RdmaBackendCQ *cq, diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c index 1927f85472..1524dfaeaa 100644 --- a/hw/rdma/rdma_rm.c +++ b/hw/rdma/rdma_rm.c @@ -227,8 +227,13 @@ int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle, mr->length = guest_length; mr->virt += (mr->start & (TARGET_PAGE_SIZE - 1)); +#ifdef LEGACY_RDMA_REG_MR ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt, mr->length, access_flags); +#else + ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt, + mr->length, guest_start, access_flags); +#endif if (ret) { ret = -EIO; goto out_dealloc_mr; diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index 3e36e13013..18075285f6 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -664,6 +664,12 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp) dev->shutdown_notifier.notify = pvrdma_shutdown_notifier; qemu_register_shutdown_notifier(&dev->shutdown_notifier); +#ifdef LEGACY_RDMA_REG_MR + rdma_info_report("Using legacy reg_mr"); +#else + rdma_info_report("Using iova reg_mr"); +#endif + out: if (rc) { pvrdma_fini(pdev);
The virtual address that is provided by the guest in post_send and post_recv operations is related to the guest address space. This address space is unknown to the HCA resides on host so extra step in these operations is needed to adjust the address to host virtual address. This step, which is done in data-path affects performances. An enhanced verion of MR registration introduced here https://patchwork.kernel.org/patch/11044467/ can be used so that the guest virtual address space for this MR is known to the HCA in host. This will save the data-path adjustment. Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com> --- hw/rdma/rdma_backend.c | 13 +++++++++++++ hw/rdma/rdma_backend.h | 5 +++++ hw/rdma/rdma_rm.c | 5 +++++ hw/rdma/vmw/pvrdma_main.c | 6 ++++++ 4 files changed, 29 insertions(+)