diff mbox series

[RFC,2/3] RDMA/uverbs: Add uverbs commands for fd-based MR registration

Message ID 1587056973-101760-3-git-send-email-jianxin.xiong@intel.com (mailing list archive)
State RFC
Headers show
Series RDMA: Add dma-buf support | expand

Commit Message

Xiong, Jianxin April 16, 2020, 5:09 p.m. UTC
Add new uverbs commands for registering user memory regions associated
with a file descriptor, such as dma-buf.  Add new function pointers to
'struct ib_device_ops' to support the new uverbs commands.

Signed-off-by: Jianxin Xiong <jianxin.xiong@intel.com>
Reviewed-by: Sean Hefty <sean.hefty@intel.com>
Acked-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
---
 drivers/infiniband/core/device.c     |   2 +
 drivers/infiniband/core/uverbs_cmd.c | 179 ++++++++++++++++++++++++++++++++++-
 include/rdma/ib_umem.h               |   3 +
 include/rdma/ib_verbs.h              |   8 ++
 include/uapi/rdma/ib_user_verbs.h    |  28 ++++++
 5 files changed, 219 insertions(+), 1 deletion(-)

Comments

Jason Gunthorpe April 16, 2020, 5:47 p.m. UTC | #1
On Thu, Apr 16, 2020 at 10:09:32AM -0700, Jianxin Xiong wrote:

> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> index f6c2552..b3f7261 100644
> --- a/drivers/infiniband/core/device.c
> +++ b/drivers/infiniband/core/device.c
> @@ -2654,9 +2654,11 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
>  	SET_DEVICE_OP(dev_ops, read_counters);
>  	SET_DEVICE_OP(dev_ops, reg_dm_mr);
>  	SET_DEVICE_OP(dev_ops, reg_user_mr);
> +	SET_DEVICE_OP(dev_ops, reg_user_mr_fd);
>  	SET_DEVICE_OP(dev_ops, req_ncomp_notif);
>  	SET_DEVICE_OP(dev_ops, req_notify_cq);
>  	SET_DEVICE_OP(dev_ops, rereg_user_mr);
> +	SET_DEVICE_OP(dev_ops, rereg_user_mr_fd);

I'm not so found of adding such a specific callback.. It seems better
to have a generic reg_user_mr that accepts a ib_umem created by the
core code. Burying the umem_get in the drivers was probably a mistake.

>  static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs)
>  {
>  	struct ib_uverbs_dereg_mr cmd;
> @@ -3916,7 +4081,19 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
>  			ib_uverbs_rereg_mr,
>  			UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_rereg_mr,
>  						struct ib_uverbs_rereg_mr_resp),
> -			UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr))),
> +			UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr)),
> +		DECLARE_UVERBS_WRITE(
> +			IB_USER_VERBS_CMD_REG_MR_FD,
> +			ib_uverbs_reg_mr_fd,
> +			UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_reg_mr_fd,
> +						struct ib_uverbs_reg_mr_resp),
> +			UAPI_DEF_METHOD_NEEDS_FN(reg_user_mr_fd)),
> +		DECLARE_UVERBS_WRITE(
> +			IB_USER_VERBS_CMD_REREG_MR_FD,
> +			ib_uverbs_rereg_mr_fd,
> +			UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_rereg_mr_fd,
> +						struct ib_uverbs_rereg_mr_resp),
> +			UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr_fd))),

New write based methods are not allowed, they have to be done as ioctl
methods.

Jason
Xiong, Jianxin April 16, 2020, 6:32 p.m. UTC | #2
> >  	SET_DEVICE_OP(dev_ops, read_counters);
> >  	SET_DEVICE_OP(dev_ops, reg_dm_mr);
> >  	SET_DEVICE_OP(dev_ops, reg_user_mr);
> > +	SET_DEVICE_OP(dev_ops, reg_user_mr_fd);
> >  	SET_DEVICE_OP(dev_ops, req_ncomp_notif);
> >  	SET_DEVICE_OP(dev_ops, req_notify_cq);
> >  	SET_DEVICE_OP(dev_ops, rereg_user_mr);
> > +	SET_DEVICE_OP(dev_ops, rereg_user_mr_fd);
> 
> I'm not so found of adding such a specific callback.. It seems better to have a generic reg_user_mr that accepts a ib_umem created by the
> core code. Burying the umem_get in the drivers was probably a mistake.

I totally agree. But that would require major changes to the uverbs workflow.

> 
> >  static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs)  {
> >  	struct ib_uverbs_dereg_mr cmd;
> > @@ -3916,7 +4081,19 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
> >  			ib_uverbs_rereg_mr,
> >  			UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_rereg_mr,
> >  						struct ib_uverbs_rereg_mr_resp),
> > -			UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr))),
> > +			UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr)),
> > +		DECLARE_UVERBS_WRITE(
> > +			IB_USER_VERBS_CMD_REG_MR_FD,
> > +			ib_uverbs_reg_mr_fd,
> > +			UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_reg_mr_fd,
> > +						struct ib_uverbs_reg_mr_resp),
> > +			UAPI_DEF_METHOD_NEEDS_FN(reg_user_mr_fd)),
> > +		DECLARE_UVERBS_WRITE(
> > +			IB_USER_VERBS_CMD_REREG_MR_FD,
> > +			ib_uverbs_rereg_mr_fd,
> > +			UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_rereg_mr_fd,
> > +						struct ib_uverbs_rereg_mr_resp),
> > +			UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr_fd))),
> 
> New write based methods are not allowed, they have to be done as ioctl methods.
> 

Will work on that.

> Jason
Jason Gunthorpe April 17, 2020, 12:35 p.m. UTC | #3
On Thu, Apr 16, 2020 at 06:32:01PM +0000, Xiong, Jianxin wrote:
> > >  	SET_DEVICE_OP(dev_ops, read_counters);
> > >  	SET_DEVICE_OP(dev_ops, reg_dm_mr);
> > >  	SET_DEVICE_OP(dev_ops, reg_user_mr);
> > > +	SET_DEVICE_OP(dev_ops, reg_user_mr_fd);
> > >  	SET_DEVICE_OP(dev_ops, req_ncomp_notif);
> > >  	SET_DEVICE_OP(dev_ops, req_notify_cq);
> > >  	SET_DEVICE_OP(dev_ops, rereg_user_mr);
> > > +	SET_DEVICE_OP(dev_ops, rereg_user_mr_fd);
> > 
> > I'm not so found of adding such a specific callback.. It seems better to have a generic reg_user_mr that accepts a ib_umem created by the
> > core code. Burying the umem_get in the drivers was probably a mistake.
> 
> I totally agree. But that would require major changes to the uverbs workflow.

I don't think it is that bad and would prefer it

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index f6c2552..b3f7261 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2654,9 +2654,11 @@  void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, read_counters);
 	SET_DEVICE_OP(dev_ops, reg_dm_mr);
 	SET_DEVICE_OP(dev_ops, reg_user_mr);
+	SET_DEVICE_OP(dev_ops, reg_user_mr_fd);
 	SET_DEVICE_OP(dev_ops, req_ncomp_notif);
 	SET_DEVICE_OP(dev_ops, req_notify_cq);
 	SET_DEVICE_OP(dev_ops, rereg_user_mr);
+	SET_DEVICE_OP(dev_ops, rereg_user_mr_fd);
 	SET_DEVICE_OP(dev_ops, resize_cq);
 	SET_DEVICE_OP(dev_ops, set_vf_guid);
 	SET_DEVICE_OP(dev_ops, set_vf_link_state);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 060b4eb..b4df5f1 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -879,6 +879,171 @@  static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
 	return ret;
 }
 
+static int ib_uverbs_reg_mr_fd(struct uverbs_attr_bundle *attrs)
+{
+	struct ib_uverbs_reg_mr_fd   cmd;
+	struct ib_uverbs_reg_mr_resp resp;
+	struct ib_uobject           *uobj;
+	struct ib_pd                *pd;
+	struct ib_mr                *mr;
+	int                          ret;
+	struct ib_device            *ib_dev;
+
+	ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+	if (ret)
+		return ret;
+
+	if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
+		return -EINVAL;
+
+	ret = ib_check_mr_access(cmd.access_flags);
+	if (ret)
+		return ret;
+
+	uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev);
+	if (IS_ERR(uobj))
+		return PTR_ERR(uobj);
+
+	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
+	if (!pd) {
+		ret = -EINVAL;
+		goto err_free;
+	}
+
+	if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
+		if (!(pd->device->attrs.device_cap_flags &
+		      IB_DEVICE_ON_DEMAND_PAGING)) {
+			pr_debug("ODP support not available\n");
+			ret = -EINVAL;
+			goto err_put;
+		}
+	}
+
+	mr = pd->device->ops.reg_user_mr_fd(pd, cmd.start, cmd.length,
+					    cmd.hca_va, cmd.fd_type, cmd.fd,
+					    cmd.access_flags,
+					    &attrs->driver_udata);
+	if (IS_ERR(mr)) {
+		ret = PTR_ERR(mr);
+		goto err_put;
+	}
+
+	mr->device    = pd->device;
+	mr->pd        = pd;
+	mr->type      = IB_MR_TYPE_USER;
+	mr->dm	      = NULL;
+	mr->sig_attrs = NULL;
+	mr->uobject   = uobj;
+	atomic_inc(&pd->usecnt);
+	mr->res.type  = RDMA_RESTRACK_MR;
+	rdma_restrack_uadd(&mr->res);
+
+	uobj->object  = mr;
+
+	memset(&resp, 0, sizeof(resp));
+	resp.lkey      = mr->lkey;
+	resp.rkey      = mr->rkey;
+	resp.mr_handle = uobj->id;
+
+	ret = uverbs_response(attrs, &resp, sizeof(resp));
+	if (ret)
+		goto err_copy;
+
+	uobj_put_obj_read(pd);
+
+	rdma_alloc_commit_uobject(uobj, attrs);
+	return 0;
+
+err_copy:
+	ib_dereg_mr_user(mr, uverbs_get_cleared_udata(attrs));
+
+err_put:
+	uobj_put_obj_read(pd);
+
+err_free:
+	uobj_alloc_abort(uobj, attrs);
+	return ret;
+}
+
+static int ib_uverbs_rereg_mr_fd(struct uverbs_attr_bundle *attrs)
+{
+	struct ib_uverbs_rereg_mr_fd   cmd;
+	struct ib_uverbs_rereg_mr_resp resp;
+	struct ib_pd                  *pd = NULL;
+	struct ib_mr                  *mr;
+	struct ib_pd                  *old_pd;
+	int                            ret;
+	struct ib_uobject	      *uobj;
+
+	ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+	if (ret)
+		return ret;
+
+	if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
+		return -EINVAL;
+
+	if ((cmd.flags & IB_MR_REREG_TRANS) &&
+	    (!cmd.start || !cmd.hca_va || 0 >= cmd.length ||
+	     (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
+		return -EINVAL;
+
+	uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, attrs);
+	if (IS_ERR(uobj))
+		return PTR_ERR(uobj);
+
+	mr = uobj->object;
+
+	if (mr->dm) {
+		ret = -EINVAL;
+		goto put_uobjs;
+	}
+
+	if (cmd.flags & IB_MR_REREG_ACCESS) {
+		ret = ib_check_mr_access(cmd.access_flags);
+		if (ret)
+			goto put_uobjs;
+	}
+
+	if (cmd.flags & IB_MR_REREG_PD) {
+		pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
+				       attrs);
+		if (!pd) {
+			ret = -EINVAL;
+			goto put_uobjs;
+		}
+	}
+
+	old_pd = mr->pd;
+	ret = mr->device->ops.rereg_user_mr_fd(mr, cmd.flags, cmd.start,
+					       cmd.length, cmd.hca_va,
+					       cmd.fd_type, cmd.fd,
+					       cmd.access_flags, pd,
+					       &attrs->driver_udata);
+	if (ret)
+		goto put_uobj_pd;
+
+	if (cmd.flags & IB_MR_REREG_PD) {
+		atomic_inc(&pd->usecnt);
+		mr->pd = pd;
+		atomic_dec(&old_pd->usecnt);
+	}
+
+	memset(&resp, 0, sizeof(resp));
+	resp.lkey = mr->lkey;
+	resp.rkey = mr->rkey;
+
+	ret = uverbs_response(attrs, &resp, sizeof(resp));
+
+put_uobj_pd:
+	if (cmd.flags & IB_MR_REREG_PD)
+		uobj_put_obj_read(pd);
+
+put_uobjs:
+	uobj_put_write(uobj);
+
+	return ret;
+}
+
 static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs)
 {
 	struct ib_uverbs_dereg_mr cmd;
@@ -3916,7 +4081,19 @@  static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
 			ib_uverbs_rereg_mr,
 			UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_rereg_mr,
 						struct ib_uverbs_rereg_mr_resp),
-			UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr))),
+			UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr)),
+		DECLARE_UVERBS_WRITE(
+			IB_USER_VERBS_CMD_REG_MR_FD,
+			ib_uverbs_reg_mr_fd,
+			UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_reg_mr_fd,
+						struct ib_uverbs_reg_mr_resp),
+			UAPI_DEF_METHOD_NEEDS_FN(reg_user_mr_fd)),
+		DECLARE_UVERBS_WRITE(
+			IB_USER_VERBS_CMD_REREG_MR_FD,
+			ib_uverbs_rereg_mr_fd,
+			UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_rereg_mr_fd,
+						struct ib_uverbs_rereg_mr_resp),
+			UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr_fd))),
 
 	DECLARE_UVERBS_OBJECT(
 		UVERBS_OBJECT_MW,
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 026a3cf..2347497 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -38,6 +38,9 @@ 
 #include <linux/workqueue.h>
 #include <rdma/ib_verbs.h>
 
+#define IB_UMEM_FD_TYPE_NONE	0
+#define IB_UMEM_FD_TYPE_DMABUF	1
+
 struct ib_ucontext;
 struct ib_umem_odp;
 struct ib_umem_dmabuf;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index bbc5cfb..2905aa0 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2436,6 +2436,14 @@  struct ib_device_ops {
 	int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
 			     u64 virt_addr, int mr_access_flags,
 			     struct ib_pd *pd, struct ib_udata *udata);
+	struct ib_mr *(*reg_user_mr_fd)(struct ib_pd *pd, u64 start, u64 length,
+				     u64 virt_addr, int fd_type, int fd,
+				     int mr_access_flags,
+				     struct ib_udata *udata);
+	int (*rereg_user_mr_fd)(struct ib_mr *mr, int flags, u64 start,
+				u64 length, u64 virt_addr, int fd_type, int fd,
+				int mr_access_flags, struct ib_pd *pd,
+				struct ib_udata *udata);
 	int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata);
 	struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
 				  u32 max_num_sg, struct ib_udata *udata);
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 0474c74..999fa34 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -88,6 +88,8 @@  enum ib_uverbs_write_cmds {
 	IB_USER_VERBS_CMD_CLOSE_XRCD,
 	IB_USER_VERBS_CMD_CREATE_XSRQ,
 	IB_USER_VERBS_CMD_OPEN_QP,
+	IB_USER_VERBS_CMD_REG_MR_FD,
+	IB_USER_VERBS_CMD_REREG_MR_FD,
 };
 
 enum {
@@ -346,6 +348,18 @@  struct ib_uverbs_reg_mr {
 	__aligned_u64 driver_data[0];
 };
 
+struct ib_uverbs_reg_mr_fd {
+	__aligned_u64 response;
+	__aligned_u64 start;
+	__aligned_u64 length;
+	__aligned_u64 hca_va;
+	__u32 pd_handle;
+	__u32 access_flags;
+	__u32 fd_type;
+	__u32 fd;
+	__aligned_u64 driver_data[0];
+};
+
 struct ib_uverbs_reg_mr_resp {
 	__u32 mr_handle;
 	__u32 lkey;
@@ -365,6 +379,20 @@  struct ib_uverbs_rereg_mr {
 	__aligned_u64 driver_data[0];
 };
 
+struct ib_uverbs_rereg_mr_fd {
+	__aligned_u64 response;
+	__u32 mr_handle;
+	__u32 flags;
+	__aligned_u64 start;
+	__aligned_u64 length;
+	__aligned_u64 hca_va;
+	__u32 pd_handle;
+	__u32 access_flags;
+	__u32 fd_type;
+	__u32 fd;
+	__aligned_u64 driver_data[0];
+};
+
 struct ib_uverbs_rereg_mr_resp {
 	__u32 lkey;
 	__u32 rkey;