diff mbox series

[v2,1/1] RDMA/mana_ib: Add EQ interrupt support to mana ib driver.

Message ID 20230606151747.1649305-1-weh@microsoft.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [v2,1/1] RDMA/mana_ib: Add EQ interrupt support to mana ib driver. | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch, async

Commit Message

Wei Hu June 6, 2023, 3:17 p.m. UTC
Add EQ interrupt support for mana ib driver. Allocate EQs per ucontext
to receive interrupt. Attach EQ when CQ is created. Call CQ interrupt
handler when completion interrupt happens. EQs are destroyed when
ucontext is deallocated.

The change calls some public APIs in mana ethernet driver to
allocate EQs and other resources. Ehe EQ process routine is also shared
by mana ethernet and mana ib drivers.

Co-developed-by: Ajay Sharma <sharmaajay@microsoft.com>
Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
Signed-off-by: Wei Hu <weh@microsoft.com>
---

v2: Use ibdev_dbg to print error messages and return -ENOMEN
    when kzalloc fails.

 drivers/infiniband/hw/mana/cq.c               |  32 ++++-
 drivers/infiniband/hw/mana/main.c             |  87 ++++++++++++
 drivers/infiniband/hw/mana/mana_ib.h          |   4 +
 drivers/infiniband/hw/mana/qp.c               |  90 +++++++++++-
 .../net/ethernet/microsoft/mana/gdma_main.c   | 131 ++++++++++--------
 drivers/net/ethernet/microsoft/mana/mana_en.c |   1 +
 include/net/mana/gdma.h                       |   9 +-
 7 files changed, 290 insertions(+), 64 deletions(-)

Comments

Long Li June 7, 2023, 9:03 p.m. UTC | #1
> Subject: [PATCH v2 1/1] RDMA/mana_ib: Add EQ interrupt support to mana ib
> driver.
> 
> Add EQ interrupt support for mana ib driver. Allocate EQs per ucontext to receive
> interrupt. Attach EQ when CQ is created. Call CQ interrupt handler when
> completion interrupt happens. EQs are destroyed when ucontext is deallocated.
> 
> The change calls some public APIs in mana ethernet driver to allocate EQs and
> other resources. Ehe EQ process routine is also shared by mana ethernet and
> mana ib drivers.
> 
> Co-developed-by: Ajay Sharma <sharmaajay@microsoft.com>
> Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
> Signed-off-by: Wei Hu <weh@microsoft.com>
> ---
> 
> v2: Use ibdev_dbg to print error messages and return -ENOMEN
>     when kzalloc fails.
> 
>  drivers/infiniband/hw/mana/cq.c               |  32 ++++-
>  drivers/infiniband/hw/mana/main.c             |  87 ++++++++++++
>  drivers/infiniband/hw/mana/mana_ib.h          |   4 +
>  drivers/infiniband/hw/mana/qp.c               |  90 +++++++++++-
>  .../net/ethernet/microsoft/mana/gdma_main.c   | 131 ++++++++++--------
>  drivers/net/ethernet/microsoft/mana/mana_en.c |   1 +
>  include/net/mana/gdma.h                       |   9 +-
>  7 files changed, 290 insertions(+), 64 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
> index d141cab8a1e6..3cd680e0e753 100644
> --- a/drivers/infiniband/hw/mana/cq.c
> +++ b/drivers/infiniband/hw/mana/cq.c
> @@ -12,13 +12,20 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct
> ib_cq_init_attr *attr,
>  	struct ib_device *ibdev = ibcq->device;
>  	struct mana_ib_create_cq ucmd = {};
>  	struct mana_ib_dev *mdev;
> +	struct gdma_context *gc;
> +	struct gdma_dev *gd;
>  	int err;
> 
>  	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> +	gd = mdev->gdma_dev;
> +	gc = gd->gdma_context;
> 
>  	if (udata->inlen < sizeof(ucmd))
>  		return -EINVAL;
> 
> +	cq->comp_vector = attr->comp_vector > gc->max_num_queues ?
> +				0 : attr->comp_vector;
> +
>  	err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata-
> >inlen));
>  	if (err) {
>  		ibdev_dbg(ibdev,
> @@ -69,11 +76,32 @@ int mana_ib_destroy_cq(struct ib_cq *ibcq, struct
> ib_udata *udata)
>  	struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
>  	struct ib_device *ibdev = ibcq->device;
>  	struct mana_ib_dev *mdev;
> +	struct gdma_context *gc;
> +	struct gdma_dev *gd;
> +
> 
>  	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> +	gd = mdev->gdma_dev;
> +	gc = gd->gdma_context;
> 
> -	mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
> -	ib_umem_release(cq->umem);
> +
> +
> +	if (atomic_read(&ibcq->usecnt) == 0) {
> +		mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);

Need to check if this function fails. The following code will call kfree(gc->cq_table[cq->id]), it's possible that IRQ is happening at the same time if CQ is not destroyed.

> +		ibdev_dbg(ibdev, "freeing gdma cq %p\n", gc->cq_table[cq->id]);
> +		kfree(gc->cq_table[cq->id]);
> +		gc->cq_table[cq->id] = NULL;
> +		ib_umem_release(cq->umem);
> +	}
> 
>  	return 0;
>  }
> +
> +void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq) {
> +	struct mana_ib_cq *cq = ctx;
> +	struct ib_device *ibdev = cq->ibcq.device;
> +
> +	ibdev_dbg(ibdev, "Enter %s %d\n", __func__, __LINE__);

This debug message seems overkill?

> +	cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); }
> diff --git a/drivers/infiniband/hw/mana/main.c
> b/drivers/infiniband/hw/mana/main.c
> index 7be4c3adb4e2..e4efbcaed10e 100644
> --- a/drivers/infiniband/hw/mana/main.c
> +++ b/drivers/infiniband/hw/mana/main.c
> @@ -143,6 +143,81 @@ int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct
> ib_udata *udata)
>  	return err;
>  }
> 
> +static void mana_ib_destroy_eq(struct mana_ib_ucontext *ucontext,
> +			       struct mana_ib_dev *mdev)
> +{
> +	struct gdma_context *gc = mdev->gdma_dev->gdma_context;
> +	struct ib_device *ibdev = ucontext->ibucontext.device;
> +	struct gdma_queue *eq;
> +	int i;
> +
> +	if (!ucontext->eqs)
> +		return;
> +
> +	for (i = 0; i < gc->max_num_queues; i++) {
> +		eq = ucontext->eqs[i].eq;
> +		if (!eq)
> +			continue;
> +
> +		mana_gd_destroy_queue(gc, eq);
> +	}
> +
> +	kfree(ucontext->eqs);
> +	ucontext->eqs = NULL;
> +
> +	ibdev_dbg(ibdev, "destroyed eq's count %d\n", gc->max_num_queues); }

Will gc->max_num_queues change after destroying a EQ?

> +
> +static int mana_ib_create_eq(struct mana_ib_ucontext *ucontext,
> +			     struct mana_ib_dev *mdev)
> +{
> +	struct gdma_queue_spec spec = {};
> +	struct gdma_queue *queue;
> +	struct gdma_context *gc;
> +	struct ib_device *ibdev;
> +	struct gdma_dev *gd;
> +	int err;
> +	int i;
> +
> +	if (!ucontext || !mdev)
> +		return -EINVAL;
> +
> +	ibdev = ucontext->ibucontext.device;
> +	gd = mdev->gdma_dev;
> +
> +	gc = gd->gdma_context;
> +
> +	ucontext->eqs = kcalloc(gc->max_num_queues, sizeof(struct mana_eq),
> +				GFP_KERNEL);
> +	if (!ucontext->eqs)
> +		return -ENOMEM;
> +
> +	spec.type = GDMA_EQ;
> +	spec.monitor_avl_buf = false;
> +	spec.queue_size = EQ_SIZE;
> +	spec.eq.callback = NULL;
> +	spec.eq.context = ucontext->eqs;
> +	spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
> +	spec.eq.msix_allocated = true;
> +
> +	for (i = 0; i < gc->max_num_queues; i++) {
> +		spec.eq.msix_index = i;
> +		err = mana_gd_create_mana_eq(gd, &spec, &queue);
> +		if (err)
> +			goto out;
> +
> +		queue->eq.disable_needed = true;
> +		ucontext->eqs[i].eq = queue;
> +	}
> +
> +	return 0;
> +
> +out:
> +	ibdev_dbg(ibdev, "Failed to allocated eq err %d\n", err);
> +	mana_ib_destroy_eq(ucontext, mdev);
> +	return err;
> +}
> +
>  static int mana_gd_destroy_doorbell_page(struct gdma_context *gc,
>  					 int doorbell_page)
>  {
> @@ -225,7 +300,17 @@ int mana_ib_alloc_ucontext(struct ib_ucontext
> *ibcontext,
> 
>  	ucontext->doorbell = doorbell_page;
> 
> +	ret = mana_ib_create_eq(ucontext, mdev);
> +	if (ret) {
> +		ibdev_dbg(ibdev, "Failed to create eq's , ret %d\n", ret);
> +		goto err;
> +	}
> +
>  	return 0;
> +
> +err:
> +	mana_gd_destroy_doorbell_page(gc, doorbell_page);
> +	return ret;
>  }
> 
>  void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) @@ -240,6
> +325,8 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
>  	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
>  	gc = mdev->gdma_dev->gdma_context;
> 
> +	mana_ib_destroy_eq(mana_ucontext, mdev);
> +
>  	ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell);
>  	if (ret)
>  		ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
> diff --git a/drivers/infiniband/hw/mana/mana_ib.h
> b/drivers/infiniband/hw/mana/mana_ib.h
> index 502cc8672eef..9672fa1670a5 100644
> --- a/drivers/infiniband/hw/mana/mana_ib.h
> +++ b/drivers/infiniband/hw/mana/mana_ib.h
> @@ -67,6 +67,7 @@ struct mana_ib_cq {
>  	int cqe;
>  	u64 gdma_region;
>  	u64 id;
> +	u32 comp_vector;
>  };
> 
>  struct mana_ib_qp {
> @@ -86,6 +87,7 @@ struct mana_ib_qp {
>  struct mana_ib_ucontext {
>  	struct ib_ucontext ibucontext;
>  	u32 doorbell;
> +	struct mana_eq *eqs;
>  };
> 
>  struct mana_ib_rwq_ind_table {
> @@ -159,4 +161,6 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port,
> int index,
> 
>  void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
> 
> +void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq);
> +
>  #endif
> diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
> index 54b61930a7fd..e133d86c0875 100644
> --- a/drivers/infiniband/hw/mana/qp.c
> +++ b/drivers/infiniband/hw/mana/qp.c
> @@ -96,16 +96,20 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp,
> struct ib_pd *pd,
>  	struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
>  	struct mana_ib_dev *mdev =
>  		container_of(pd->device, struct mana_ib_dev, ib_dev);
> +	struct ib_ucontext *ib_ucontext = pd->uobject->context;
>  	struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl;
>  	struct mana_ib_create_qp_rss_resp resp = {};
>  	struct mana_ib_create_qp_rss ucmd = {};
> +	struct mana_ib_ucontext *mana_ucontext;
>  	struct gdma_dev *gd = mdev->gdma_dev;
>  	mana_handle_t *mana_ind_table;
>  	struct mana_port_context *mpc;
> +	struct gdma_queue *gdma_cq;
>  	struct mana_context *mc;
>  	struct net_device *ndev;
>  	struct mana_ib_cq *cq;
>  	struct mana_ib_wq *wq;
> +	struct mana_eq *eq;
>  	unsigned int ind_tbl_size;
>  	struct ib_cq *ibcq;
>  	struct ib_wq *ibwq;
> @@ -114,6 +118,8 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp,
> struct ib_pd *pd,
>  	int ret;
> 
>  	mc = gd->driver_data;
> +	mana_ucontext =
> +		container_of(ib_ucontext, struct mana_ib_ucontext, ibucontext);
> 
>  	if (!udata || udata->inlen < sizeof(ucmd))
>  		return -EINVAL;
> @@ -180,6 +186,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp,
> struct ib_pd *pd,
>  	for (i = 0; i < ind_tbl_size; i++) {
>  		struct mana_obj_spec wq_spec = {};
>  		struct mana_obj_spec cq_spec = {};
> +		unsigned int max_num_queues = gd->gdma_context-
> >max_num_queues;
> 
>  		ibwq = ind_tbl->ind_tbl[i];
>  		wq = container_of(ibwq, struct mana_ib_wq, ibwq); @@ -193,7
> +200,8 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd
> *pd,
>  		cq_spec.gdma_region = cq->gdma_region;
>  		cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE;
>  		cq_spec.modr_ctx_id = 0;
> -		cq_spec.attached_eq = GDMA_CQ_NO_EQ;
> +		eq = &mana_ucontext->eqs[cq->comp_vector %
> max_num_queues];
> +		cq_spec.attached_eq = eq->eq->id;
> 
>  		ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ,
>  					 &wq_spec, &cq_spec, &wq-
> >rx_object); @@ -207,6 +215,9 @@ static int mana_ib_create_qp_rss(struct
> ib_qp *ibqp, struct ib_pd *pd,
>  		wq->id = wq_spec.queue_index;
>  		cq->id = cq_spec.queue_index;
> 
> +		ibdev_dbg(&mdev->ib_dev, "attached eq id %u  cq with
> id %llu\n",
> +			eq->eq->id, cq->id);
> +
>  		ibdev_dbg(&mdev->ib_dev,
>  			  "ret %d rx_object 0x%llx wq id %llu cq id %llu\n",
>  			  ret, wq->rx_object, wq->id, cq->id); @@ -215,6
> +226,27 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd
> *pd,
>  		resp.entries[i].wqid = wq->id;
> 
>  		mana_ind_table[i] = wq->rx_object;
> +
> +		if (gd->gdma_context->cq_table[cq->id] == NULL) {
> +
> +			gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
> +			if (!gdma_cq) {
> +				ibdev_dbg(&mdev->ib_dev,
> +					 "failed to allocate gdma_cq\n");
> +				ret = -ENOMEM;
> +				goto free_cq;
> +			}
> +
> +			ibdev_dbg(&mdev->ib_dev, "gdma cq allocated %p\n",
> +				  gdma_cq);
> +
> +			gdma_cq->cq.context = cq;
> +			gdma_cq->type = GDMA_CQ;
> +			gdma_cq->cq.callback = mana_ib_cq_handler;
> +			gdma_cq->id = cq->id;
> +			gd->gdma_context->cq_table[cq->id] = gdma_cq;
> +		}
> +
>  	}
>  	resp.num_entries = i;
> 
> @@ -224,7 +256,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp,
> struct ib_pd *pd,
>  					 ucmd.rx_hash_key_len,
>  					 ucmd.rx_hash_key);
>  	if (ret)
> -		goto fail;
> +		goto free_cq;
> 
>  	ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
>  	if (ret) {
> @@ -238,6 +270,23 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp,
> struct ib_pd *pd,
> 
>  	return 0;
> 
> +free_cq:
> +	{
> +		int j = i;
> +		u64 cqid;
> +
> +		while (j-- > 0) {
> +			cqid = resp.entries[j].cqid;
> +			gdma_cq = gd->gdma_context->cq_table[cqid];
> +			cq = gdma_cq->cq.context;
> +			if (atomic_read(&cq->ibcq.usecnt) == 0) {
> +				kfree(gd->gdma_context->cq_table[cqid]);
> +				gd->gdma_context->cq_table[cqid] = NULL;
> +			}
> +		}
> +
> +	}
> +
>  fail:
>  	while (i-- > 0) {
>  		ibwq = ind_tbl->ind_tbl[i];
> @@ -269,10 +318,12 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd,
>  	struct mana_obj_spec wq_spec = {};
>  	struct mana_obj_spec cq_spec = {};
>  	struct mana_port_context *mpc;
> +	struct gdma_queue *gdma_cq;
>  	struct mana_context *mc;
>  	struct net_device *ndev;
>  	struct ib_umem *umem;
> -	int err;
> +	struct mana_eq *eq;
> +	int err, eq_vec;
>  	u32 port;
> 
>  	mc = gd->driver_data;
> @@ -350,7 +401,9 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd,
>  	cq_spec.gdma_region = send_cq->gdma_region;
>  	cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE;
>  	cq_spec.modr_ctx_id = 0;
> -	cq_spec.attached_eq = GDMA_CQ_NO_EQ;
> +	eq_vec = send_cq->comp_vector % gd->gdma_context-
> >max_num_queues;
> +	eq = &mana_ucontext->eqs[eq_vec];
> +	cq_spec.attached_eq = eq->eq->id;
> 
>  	err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ,
> &wq_spec,
>  				 &cq_spec, &qp->tx_object);
> @@ -368,6 +421,26 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp,
> struct ib_pd *ibpd,
>  	qp->sq_id = wq_spec.queue_index;
>  	send_cq->id = cq_spec.queue_index;
> 
> +	if (gd->gdma_context->cq_table[send_cq->id] == NULL) {
> +
> +		gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
> +		if (!gdma_cq) {
> +			ibdev_dbg(&mdev->ib_dev,
> +				  "failed to allocate gdma_cq\n");
> +			err = -ENOMEM;
> +			goto err_destroy_wqobj_and_cq;
> +		}
> +
> +		pr_debug("gdma cq allocated %p\n", gdma_cq);
Should use ibdev_dbg

Thanks,
Long
Jakub Kicinski June 8, 2023, 4:39 a.m. UTC | #2
On Tue,  6 Jun 2023 15:17:47 +0000 Wei Hu wrote:
>  drivers/infiniband/hw/mana/cq.c               |  32 ++++-
>  drivers/infiniband/hw/mana/main.c             |  87 ++++++++++++
>  drivers/infiniband/hw/mana/mana_ib.h          |   4 +
>  drivers/infiniband/hw/mana/qp.c               |  90 +++++++++++-
>  .../net/ethernet/microsoft/mana/gdma_main.c   | 131 ++++++++++--------
>  drivers/net/ethernet/microsoft/mana/mana_en.c |   1 +
>  include/net/mana/gdma.h                       |   9 +-

IB and netdev are different subsystem, can you put it on a branch 
and send a PR as the cover letter so that both subsystems can pull?

Examples:
https://lore.kernel.org/all/20230607210410.88209-1-saeed@kernel.org/
https://lore.kernel.org/all/20230602171302.745492-1-anthony.l.nguyen@intel.com/
Wei Hu June 8, 2023, 11:17 a.m. UTC | #3
> -----Original Message-----
> Subject: RE: [PATCH v2 1/1] RDMA/mana_ib: Add EQ interrupt support to
> mana ib driver.
> 
> > Subject: [PATCH v2 1/1] RDMA/mana_ib: Add EQ interrupt support to mana
> > ib driver.
> >
> > Add EQ interrupt support for mana ib driver. Allocate EQs per ucontext
> > to receive interrupt. Attach EQ when CQ is created. Call CQ interrupt
> > handler when completion interrupt happens. EQs are destroyed when
> ucontext is deallocated.
> >
> > The change calls some public APIs in mana ethernet driver to allocate
> > EQs and other resources. Ehe EQ process routine is also shared by mana
> > ethernet and mana ib drivers.
> >
> > Co-developed-by: Ajay Sharma <sharmaajay@microsoft.com>
> > Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
> > Signed-off-by: Wei Hu <weh@microsoft.com>
> > ---
> >
> > v2: Use ibdev_dbg to print error messages and return -ENOMEN
> >     when kzalloc fails.
> >
> >  drivers/infiniband/hw/mana/cq.c               |  32 ++++-
> >  drivers/infiniband/hw/mana/main.c             |  87 ++++++++++++
> >  drivers/infiniband/hw/mana/mana_ib.h          |   4 +
> >  drivers/infiniband/hw/mana/qp.c               |  90 +++++++++++-
> >  .../net/ethernet/microsoft/mana/gdma_main.c   | 131 ++++++++++--------
> >  drivers/net/ethernet/microsoft/mana/mana_en.c |   1 +
> >  include/net/mana/gdma.h                       |   9 +-
> >  7 files changed, 290 insertions(+), 64 deletions(-)
> >
> > diff --git a/drivers/infiniband/hw/mana/cq.c
> > b/drivers/infiniband/hw/mana/cq.c index d141cab8a1e6..3cd680e0e753
> > 100644
> > --- a/drivers/infiniband/hw/mana/cq.c
> > +++ b/drivers/infiniband/hw/mana/cq.c
> > @@ -12,13 +12,20 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const
> > struct ib_cq_init_attr *attr,
> >  	struct ib_device *ibdev = ibcq->device;
> >  	struct mana_ib_create_cq ucmd = {};
> >  	struct mana_ib_dev *mdev;
> > +	struct gdma_context *gc;
> > +	struct gdma_dev *gd;
> >  	int err;
> >
> >  	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> > +	gd = mdev->gdma_dev;
> > +	gc = gd->gdma_context;
> >
> >  	if (udata->inlen < sizeof(ucmd))
> >  		return -EINVAL;
> >
> > +	cq->comp_vector = attr->comp_vector > gc->max_num_queues ?
> > +				0 : attr->comp_vector;
> > +
> >  	err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata-
> > >inlen));
> >  	if (err) {
> >  		ibdev_dbg(ibdev,
> > @@ -69,11 +76,32 @@ int mana_ib_destroy_cq(struct ib_cq *ibcq, struct
> > ib_udata *udata)
> >  	struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> >  	struct ib_device *ibdev = ibcq->device;
> >  	struct mana_ib_dev *mdev;
> > +	struct gdma_context *gc;
> > +	struct gdma_dev *gd;
> > +
> >
> >  	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> > +	gd = mdev->gdma_dev;
> > +	gc = gd->gdma_context;
> >
> > -	mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
> > -	ib_umem_release(cq->umem);
> > +
> > +
> > +	if (atomic_read(&ibcq->usecnt) == 0) {
> > +		mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
> 
> Need to check if this function fails. The following code will call kfree(gc-
> >cq_table[cq->id]), it's possible that IRQ is happening at the same time if CQ
> is not destroyed.
> 

Sure. Will update.

> > +		ibdev_dbg(ibdev, "freeing gdma cq %p\n", gc->cq_table[cq-
> >id]);
> > +		kfree(gc->cq_table[cq->id]);
> > +		gc->cq_table[cq->id] = NULL;
> > +		ib_umem_release(cq->umem);
> > +	}
> >
> >  	return 0;
> >  }
> > +
> > +void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq) {
> > +	struct mana_ib_cq *cq = ctx;
> > +	struct ib_device *ibdev = cq->ibcq.device;
> > +
> > +	ibdev_dbg(ibdev, "Enter %s %d\n", __func__, __LINE__);
> 
> This debug message seems overkill?
> 
> > +	cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); }
> > diff --git a/drivers/infiniband/hw/mana/main.c
> > b/drivers/infiniband/hw/mana/main.c
> > index 7be4c3adb4e2..e4efbcaed10e 100644
> > --- a/drivers/infiniband/hw/mana/main.c
> > +++ b/drivers/infiniband/hw/mana/main.c
> > @@ -143,6 +143,81 @@ int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct
> > ib_udata *udata)
> >  	return err;
> >  }
> >
> > +static void mana_ib_destroy_eq(struct mana_ib_ucontext *ucontext,
> > +			       struct mana_ib_dev *mdev)
> > +{
> > +	struct gdma_context *gc = mdev->gdma_dev->gdma_context;
> > +	struct ib_device *ibdev = ucontext->ibucontext.device;
> > +	struct gdma_queue *eq;
> > +	int i;
> > +
> > +	if (!ucontext->eqs)
> > +		return;
> > +
> > +	for (i = 0; i < gc->max_num_queues; i++) {
> > +		eq = ucontext->eqs[i].eq;
> > +		if (!eq)
> > +			continue;
> > +
> > +		mana_gd_destroy_queue(gc, eq);
> > +	}
> > +
> > +	kfree(ucontext->eqs);
> > +	ucontext->eqs = NULL;
> > +
> > +	ibdev_dbg(ibdev, "destroyed eq's count %d\n", gc-
> >max_num_queues); }
> 
> Will gc->max_num_queues change after destroying a EQ?
> 

I think it will not change. Also the compiler might optimize
the code to just read the value once and store it in a register.

Thanks,
Wei
Long Li June 8, 2023, 5:47 p.m. UTC | #4
> Subject: RE: [PATCH v2 1/1] RDMA/mana_ib: Add EQ interrupt support to
> mana ib driver.
> 
> 
> 
> > -----Original Message-----
> > Subject: RE: [PATCH v2 1/1] RDMA/mana_ib: Add EQ interrupt support to
> > mana ib driver.
> >
> > > Subject: [PATCH v2 1/1] RDMA/mana_ib: Add EQ interrupt support to
> > > mana ib driver.
> > >
> > > Add EQ interrupt support for mana ib driver. Allocate EQs per
> > > ucontext to receive interrupt. Attach EQ when CQ is created. Call CQ
> > > interrupt handler when completion interrupt happens. EQs are
> > > destroyed when
> > ucontext is deallocated.
> > >
> > > The change calls some public APIs in mana ethernet driver to
> > > allocate EQs and other resources. Ehe EQ process routine is also
> > > shared by mana ethernet and mana ib drivers.
> > >
> > > Co-developed-by: Ajay Sharma <sharmaajay@microsoft.com>
> > > Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
> > > Signed-off-by: Wei Hu <weh@microsoft.com>
> > > ---
> > >
> > > v2: Use ibdev_dbg to print error messages and return -ENOMEN
> > >     when kzalloc fails.
> > >
> > >  drivers/infiniband/hw/mana/cq.c               |  32 ++++-
> > >  drivers/infiniband/hw/mana/main.c             |  87 ++++++++++++
> > >  drivers/infiniband/hw/mana/mana_ib.h          |   4 +
> > >  drivers/infiniband/hw/mana/qp.c               |  90 +++++++++++-
> > >  .../net/ethernet/microsoft/mana/gdma_main.c   | 131 ++++++++++-------
> -
> > >  drivers/net/ethernet/microsoft/mana/mana_en.c |   1 +
> > >  include/net/mana/gdma.h                       |   9 +-
> > >  7 files changed, 290 insertions(+), 64 deletions(-)
> > >
> > > diff --git a/drivers/infiniband/hw/mana/cq.c
> > > b/drivers/infiniband/hw/mana/cq.c index d141cab8a1e6..3cd680e0e753
> > > 100644
> > > --- a/drivers/infiniband/hw/mana/cq.c
> > > +++ b/drivers/infiniband/hw/mana/cq.c
> > > @@ -12,13 +12,20 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const
> > > struct ib_cq_init_attr *attr,
> > >  	struct ib_device *ibdev = ibcq->device;
> > >  	struct mana_ib_create_cq ucmd = {};
> > >  	struct mana_ib_dev *mdev;
> > > +	struct gdma_context *gc;
> > > +	struct gdma_dev *gd;
> > >  	int err;
> > >
> > >  	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> > > +	gd = mdev->gdma_dev;
> > > +	gc = gd->gdma_context;
> > >
> > >  	if (udata->inlen < sizeof(ucmd))
> > >  		return -EINVAL;
> > >
> > > +	cq->comp_vector = attr->comp_vector > gc->max_num_queues ?
> > > +				0 : attr->comp_vector;
> > > +
> > >  	err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata-
> > > >inlen));
> > >  	if (err) {
> > >  		ibdev_dbg(ibdev,
> > > @@ -69,11 +76,32 @@ int mana_ib_destroy_cq(struct ib_cq *ibcq,
> > > struct ib_udata *udata)
> > >  	struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> > >  	struct ib_device *ibdev = ibcq->device;
> > >  	struct mana_ib_dev *mdev;
> > > +	struct gdma_context *gc;
> > > +	struct gdma_dev *gd;
> > > +
> > >
> > >  	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> > > +	gd = mdev->gdma_dev;
> > > +	gc = gd->gdma_context;
> > >
> > > -	mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
> > > -	ib_umem_release(cq->umem);
> > > +
> > > +
> > > +	if (atomic_read(&ibcq->usecnt) == 0) {
> > > +		mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
> >
> > Need to check if this function fails. The following code will call
> > kfree(gc-
> > >cq_table[cq->id]), it's possible that IRQ is happening at the same
> > >time if CQ
> > is not destroyed.
> >
> 
> Sure. Will update.
> 
> > > +		ibdev_dbg(ibdev, "freeing gdma cq %p\n", gc->cq_table[cq-
> > >id]);
> > > +		kfree(gc->cq_table[cq->id]);
> > > +		gc->cq_table[cq->id] = NULL;
> > > +		ib_umem_release(cq->umem);
> > > +	}
> > >
> > >  	return 0;
> > >  }
> > > +
> > > +void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq) {
> > > +	struct mana_ib_cq *cq = ctx;
> > > +	struct ib_device *ibdev = cq->ibcq.device;
> > > +
> > > +	ibdev_dbg(ibdev, "Enter %s %d\n", __func__, __LINE__);
> >
> > This debug message seems overkill?
> >
> > > +	cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); }
> > > diff --git a/drivers/infiniband/hw/mana/main.c
> > > b/drivers/infiniband/hw/mana/main.c
> > > index 7be4c3adb4e2..e4efbcaed10e 100644
> > > --- a/drivers/infiniband/hw/mana/main.c
> > > +++ b/drivers/infiniband/hw/mana/main.c
> > > @@ -143,6 +143,81 @@ int mana_ib_dealloc_pd(struct ib_pd *ibpd,
> > > struct ib_udata *udata)
> > >  	return err;
> > >  }
> > >
> > > +static void mana_ib_destroy_eq(struct mana_ib_ucontext *ucontext,
> > > +			       struct mana_ib_dev *mdev) {
> > > +	struct gdma_context *gc = mdev->gdma_dev->gdma_context;
> > > +	struct ib_device *ibdev = ucontext->ibucontext.device;
> > > +	struct gdma_queue *eq;
> > > +	int i;
> > > +
> > > +	if (!ucontext->eqs)
> > > +		return;
> > > +
> > > +	for (i = 0; i < gc->max_num_queues; i++) {
> > > +		eq = ucontext->eqs[i].eq;
> > > +		if (!eq)
> > > +			continue;
> > > +
> > > +		mana_gd_destroy_queue(gc, eq);
> > > +	}
> > > +
> > > +	kfree(ucontext->eqs);
> > > +	ucontext->eqs = NULL;
> > > +
> > > +	ibdev_dbg(ibdev, "destroyed eq's count %d\n", gc-
> > >max_num_queues); }
> >
> > Will gc->max_num_queues change after destroying a EQ?
> >
> 
> I think it will not change. Also the compiler might optimize the code to just
> read the value once and store it in a register.
> 
> Thanks,
> Wei

This message is confusing. How about changing it to " destroyed eq. Maximum count %d", or just remove the count as it's not informational.

Long
Wei Hu June 9, 2023, 1:15 a.m. UTC | #5
> -----Original Message-----
> From: Long Li <longli@microsoft.com>
> Sent: Friday, June 9, 2023 1:48 AM
> To: Wei Hu <weh@microsoft.com>; netdev@vger.kernel.org; linux-
> hyperv@vger.kernel.org; linux-rdma@vger.kernel.org; Ajay Sharma
> <sharmaajay@microsoft.com>; jgg@ziepe.ca; leon@kernel.org; KY
> Srinivasan <kys@microsoft.com>; Haiyang Zhang <haiyangz@microsoft.com>;
> wei.liu@kernel.org; Dexuan Cui <decui@microsoft.com>;
> davem@davemloft.net; edumazet@google.com; kuba@kernel.org;
> pabeni@redhat.com; vkuznets@redhat.com; ssengar@linux.microsoft.com;
> shradhagupta@linux.microsoft.com
> Subject: RE: [PATCH v2 1/1] RDMA/mana_ib: Add EQ interrupt support to
> mana ib driver.
> 
> > Subject: RE: [PATCH v2 1/1] RDMA/mana_ib: Add EQ interrupt support to
> > mana ib driver.
> >
> >
> >
> > > -----Original Message-----
> > > Subject: RE: [PATCH v2 1/1] RDMA/mana_ib: Add EQ interrupt support
> > > to mana ib driver.
> > >
> > > > Subject: [PATCH v2 1/1] RDMA/mana_ib: Add EQ interrupt support to
> > > > mana ib driver.
> > > >
> > > > Add EQ interrupt support for mana ib driver. Allocate EQs per
> > > > ucontext to receive interrupt. Attach EQ when CQ is created. Call
> > > > CQ interrupt handler when completion interrupt happens. EQs are
> > > > destroyed when
> > > ucontext is deallocated.
> > > >
> > > > The change calls some public APIs in mana ethernet driver to
> > > > allocate EQs and other resources. Ehe EQ process routine is also
> > > > shared by mana ethernet and mana ib drivers.
> > > >
> > > > Co-developed-by: Ajay Sharma <sharmaajay@microsoft.com>
> > > > Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
> > > > Signed-off-by: Wei Hu <weh@microsoft.com>
> > > > ---
> > > >
> > > > v2: Use ibdev_dbg to print error messages and return -ENOMEN
> > > >     when kzalloc fails.
> > > >
> > > >  drivers/infiniband/hw/mana/cq.c               |  32 ++++-
> > > >  drivers/infiniband/hw/mana/main.c             |  87 ++++++++++++
> > > >  drivers/infiniband/hw/mana/mana_ib.h          |   4 +
> > > >  drivers/infiniband/hw/mana/qp.c               |  90 +++++++++++-
> > > >  .../net/ethernet/microsoft/mana/gdma_main.c   | 131 ++++++++++---
> ----
> > -
> > > >  drivers/net/ethernet/microsoft/mana/mana_en.c |   1 +
> > > >  include/net/mana/gdma.h                       |   9 +-
> > > >  7 files changed, 290 insertions(+), 64 deletions(-)
> > > >
> > > > diff --git a/drivers/infiniband/hw/mana/cq.c
> > > > b/drivers/infiniband/hw/mana/cq.c index
> d141cab8a1e6..3cd680e0e753
> > > > 100644
> > > > --- a/drivers/infiniband/hw/mana/cq.c
> > > > +++ b/drivers/infiniband/hw/mana/cq.c
> > > > @@ -12,13 +12,20 @@ int mana_ib_create_cq(struct ib_cq *ibcq,
> > > > const struct ib_cq_init_attr *attr,
> > > >  	struct ib_device *ibdev = ibcq->device;
> > > >  	struct mana_ib_create_cq ucmd = {};
> > > >  	struct mana_ib_dev *mdev;
> > > > +	struct gdma_context *gc;
> > > > +	struct gdma_dev *gd;
> > > >  	int err;
> > > >
> > > >  	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> > > > +	gd = mdev->gdma_dev;
> > > > +	gc = gd->gdma_context;
> > > >
> > > >  	if (udata->inlen < sizeof(ucmd))
> > > >  		return -EINVAL;
> > > >
> > > > +	cq->comp_vector = attr->comp_vector > gc->max_num_queues ?
> > > > +				0 : attr->comp_vector;
> > > > +
> > > >  	err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata-
> > > > >inlen));
> > > >  	if (err) {
> > > >  		ibdev_dbg(ibdev,
> > > > @@ -69,11 +76,32 @@ int mana_ib_destroy_cq(struct ib_cq *ibcq,
> > > > struct ib_udata *udata)
> > > >  	struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
> > > >  	struct ib_device *ibdev = ibcq->device;
> > > >  	struct mana_ib_dev *mdev;
> > > > +	struct gdma_context *gc;
> > > > +	struct gdma_dev *gd;
> > > > +
> > > >
> > > >  	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
> > > > +	gd = mdev->gdma_dev;
> > > > +	gc = gd->gdma_context;
> > > >
> > > > -	mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
> > > > -	ib_umem_release(cq->umem);
> > > > +
> > > > +
> > > > +	if (atomic_read(&ibcq->usecnt) == 0) {
> > > > +		mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
> > >
> > > Need to check if this function fails. The following code will call
> > > kfree(gc-
> > > >cq_table[cq->id]), it's possible that IRQ is happening at the same
> > > >time if CQ
> > > is not destroyed.
> > >
> >
> > Sure. Will update.
> >
> > > > +		ibdev_dbg(ibdev, "freeing gdma cq %p\n", gc->cq_table[cq-
> > > >id]);
> > > > +		kfree(gc->cq_table[cq->id]);
> > > > +		gc->cq_table[cq->id] = NULL;
> > > > +		ib_umem_release(cq->umem);
> > > > +	}
> > > >
> > > >  	return 0;
> > > >  }
> > > > +
> > > > +void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq) {
> > > > +	struct mana_ib_cq *cq = ctx;
> > > > +	struct ib_device *ibdev = cq->ibcq.device;
> > > > +
> > > > +	ibdev_dbg(ibdev, "Enter %s %d\n", __func__, __LINE__);
> > >
> > > This debug message seems overkill?
> > >
> > > > +	cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); }
> > > > diff --git a/drivers/infiniband/hw/mana/main.c
> > > > b/drivers/infiniband/hw/mana/main.c
> > > > index 7be4c3adb4e2..e4efbcaed10e 100644
> > > > --- a/drivers/infiniband/hw/mana/main.c
> > > > +++ b/drivers/infiniband/hw/mana/main.c
> > > > @@ -143,6 +143,81 @@ int mana_ib_dealloc_pd(struct ib_pd *ibpd,
> > > > struct ib_udata *udata)
> > > >  	return err;
> > > >  }
> > > >
> > > > +static void mana_ib_destroy_eq(struct mana_ib_ucontext *ucontext,
> > > > +			       struct mana_ib_dev *mdev) {
> > > > +	struct gdma_context *gc = mdev->gdma_dev->gdma_context;
> > > > +	struct ib_device *ibdev = ucontext->ibucontext.device;
> > > > +	struct gdma_queue *eq;
> > > > +	int i;
> > > > +
> > > > +	if (!ucontext->eqs)
> > > > +		return;
> > > > +
> > > > +	for (i = 0; i < gc->max_num_queues; i++) {
> > > > +		eq = ucontext->eqs[i].eq;
> > > > +		if (!eq)
> > > > +			continue;
> > > > +
> > > > +		mana_gd_destroy_queue(gc, eq);
> > > > +	}
> > > > +
> > > > +	kfree(ucontext->eqs);
> > > > +	ucontext->eqs = NULL;
> > > > +
> > > > +	ibdev_dbg(ibdev, "destroyed eq's count %d\n", gc-
> > > >max_num_queues); }
> > >
> > > Will gc->max_num_queues change after destroying a EQ?
> > >
> >
> > I think it will not change. Also the compiler might optimize the code
> > to just read the value once and store it in a register.
> >
> > Thanks,
> > Wei
> 
> This message is confusing. How about changing it to " destroyed eq.
> Maximum count %d", or just remove the count as it's not informational.
> 
> Long

Sure. I will remove the count from the message.

Thanks,
Wei
Leon Romanovsky June 11, 2023, 6:18 p.m. UTC | #6
On Tue, Jun 06, 2023 at 03:17:47PM +0000, Wei Hu wrote:
> Add EQ interrupt support for mana ib driver. Allocate EQs per ucontext
> to receive interrupt. Attach EQ when CQ is created. Call CQ interrupt
> handler when completion interrupt happens. EQs are destroyed when
> ucontext is deallocated.
> 
> The change calls some public APIs in mana ethernet driver to
> allocate EQs and other resources. Ehe EQ process routine is also shared
> by mana ethernet and mana ib drivers.
> 
> Co-developed-by: Ajay Sharma <sharmaajay@microsoft.com>
> Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
> Signed-off-by: Wei Hu <weh@microsoft.com>
> ---
> 
> v2: Use ibdev_dbg to print error messages and return -ENOMEN
>     when kzalloc fails.

<...>

> +	if (atomic_read(&ibcq->usecnt) == 0) {

What exactly are you checking here? And in all places where you access ibcq->usecnt?

> +		mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
> +		ibdev_dbg(ibdev, "freeing gdma cq %p\n", gc->cq_table[cq->id]);
> +		kfree(gc->cq_table[cq->id]);
> +		gc->cq_table[cq->id] = NULL;
> +		ib_umem_release(cq->umem);
> +	}
>  
>  	return 0;
>  }
> +
> +void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq)
> +{
> +	struct mana_ib_cq *cq = ctx;
> +	struct ib_device *ibdev = cq->ibcq.device;
> +
> +	ibdev_dbg(ibdev, "Enter %s %d\n", __func__, __LINE__);

This patch has two many debug prints, most if not all should go.

Thanks
Wei Hu June 12, 2023, 4:44 a.m. UTC | #7
Hi Jakub,

> -----Original Message-----
> From: Jakub Kicinski <kuba@kernel.org>
> Sent: Thursday, June 8, 2023 12:39 PM
> To: Wei Hu <weh@microsoft.com>
> Cc: netdev@vger.kernel.org; linux-hyperv@vger.kernel.org; linux-
> rdma@vger.kernel.org; Long Li <longli@microsoft.com>; Ajay Sharma
> <sharmaajay@microsoft.com>; jgg@ziepe.ca; leon@kernel.org; KY
> Srinivasan <kys@microsoft.com>; Haiyang Zhang <haiyangz@microsoft.com>;
> wei.liu@kernel.org; Dexuan Cui <decui@microsoft.com>;
> davem@davemloft.net; edumazet@google.com; pabeni@redhat.com;
> vkuznets@redhat.com; ssengar@linux.microsoft.com;
> shradhagupta@linux.microsoft.com
> Subject: Re: [PATCH v2 1/1] RDMA/mana_ib: Add EQ interrupt support to
> mana ib driver.
> 
> On Tue,  6 Jun 2023 15:17:47 +0000 Wei Hu wrote:
> >  drivers/infiniband/hw/mana/cq.c               |  32 ++++-
> >  drivers/infiniband/hw/mana/main.c             |  87 ++++++++++++
> >  drivers/infiniband/hw/mana/mana_ib.h          |   4 +
> >  drivers/infiniband/hw/mana/qp.c               |  90 +++++++++++-
> >  .../net/ethernet/microsoft/mana/gdma_main.c   | 131 ++++++++++--------
> >  drivers/net/ethernet/microsoft/mana/mana_en.c |   1 +
> >  include/net/mana/gdma.h                       |   9 +-
> 
> IB and netdev are different subsystem, can you put it on a branch and send a
> PR as the cover letter so that both subsystems can pull?
> 
> Examples:
> https://nam06.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.
> kernel.org%2Fall%2F20230607210410.88209-1-
> saeed%40kernel.org%2F&data=05%7C01%7Cweh%40microsoft.com%7Cb672
> 4a9f672f47d433ef08db67da4ada%7C72f988bf86f141af91ab2d7cd011db47%7C
> 1%7C0%7C638217959538674174%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiM
> C4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000
> %7C%7C%7C&sdata=amO0W8QsR2I5INNNzCNOKEjrsYbzuZ92KXhNdfwSCHA
> %3D&reserved=0
> https://nam06.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.
> kernel.org%2Fall%2F20230602171302.745492-1-
> anthony.l.nguyen%40intel.com%2F&data=05%7C01%7Cweh%40microsoft.co
> m%7Cb6724a9f672f47d433ef08db67da4ada%7C72f988bf86f141af91ab2d7cd0
> 11db47%7C1%7C0%7C638217959538674174%7CUnknown%7CTWFpbGZsb3d8
> eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3
> D%7C3000%7C%7C%7C&sdata=A%2BjjtSx%2FvY2T%2BNIEPGuftk%2BCr%2Fv
> Yt2Xc1q8B6h2tb6g%3D&reserved=0

Thanks for you comment. I am  new to the process. I have a few questions regarding to this and hope you can help. First of all, the patch is mostly for IB. Is it possible for the patch to just go through the RDMA branch, since most of the changes are in RDMA? 

If the patch also needs to go through the NETDEV branch, does it mean two subsystems will pull its own part? A few follow-up questions about generating a PR, since I have never done such before.

1. Which repo should I clone and create the branch from?

2. From the example you provided, I see these people has their own branches on kernel.org, for example something like:
git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git tags/mlx5-updates-2023-06-06. 
I am not Linux maintainer. I just have repo on Github. How do I create or fork on kernel.org? Do I need an account to do so? Or I can use my own repo on Github?

3.  How to create PR in this case? Should I follow this link: https://docs.kernel.org/maintainer/pull-requests.html?

Thanks,
Wei
Leon Romanovsky June 12, 2023, 6:13 a.m. UTC | #8
On Mon, Jun 12, 2023 at 04:44:44AM +0000, Wei Hu wrote:
> Hi Jakub,
> 
> > -----Original Message-----
> > From: Jakub Kicinski <kuba@kernel.org>
> > Sent: Thursday, June 8, 2023 12:39 PM
> > To: Wei Hu <weh@microsoft.com>
> > Cc: netdev@vger.kernel.org; linux-hyperv@vger.kernel.org; linux-
> > rdma@vger.kernel.org; Long Li <longli@microsoft.com>; Ajay Sharma
> > <sharmaajay@microsoft.com>; jgg@ziepe.ca; leon@kernel.org; KY
> > Srinivasan <kys@microsoft.com>; Haiyang Zhang <haiyangz@microsoft.com>;
> > wei.liu@kernel.org; Dexuan Cui <decui@microsoft.com>;
> > davem@davemloft.net; edumazet@google.com; pabeni@redhat.com;
> > vkuznets@redhat.com; ssengar@linux.microsoft.com;
> > shradhagupta@linux.microsoft.com
> > Subject: Re: [PATCH v2 1/1] RDMA/mana_ib: Add EQ interrupt support to
> > mana ib driver.
> > 
> > On Tue,  6 Jun 2023 15:17:47 +0000 Wei Hu wrote:
> > >  drivers/infiniband/hw/mana/cq.c               |  32 ++++-
> > >  drivers/infiniband/hw/mana/main.c             |  87 ++++++++++++
> > >  drivers/infiniband/hw/mana/mana_ib.h          |   4 +
> > >  drivers/infiniband/hw/mana/qp.c               |  90 +++++++++++-
> > >  .../net/ethernet/microsoft/mana/gdma_main.c   | 131 ++++++++++--------
> > >  drivers/net/ethernet/microsoft/mana/mana_en.c |   1 +
> > >  include/net/mana/gdma.h                       |   9 +-
> > 
> > IB and netdev are different subsystem, can you put it on a branch and send a
> > PR as the cover letter so that both subsystems can pull?
> > 
> > Examples:
> > https://nam06.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.
> > kernel.org%2Fall%2F20230607210410.88209-1-
> > saeed%40kernel.org%2F&data=05%7C01%7Cweh%40microsoft.com%7Cb672
> > 4a9f672f47d433ef08db67da4ada%7C72f988bf86f141af91ab2d7cd011db47%7C
> > 1%7C0%7C638217959538674174%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiM
> > C4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000
> > %7C%7C%7C&sdata=amO0W8QsR2I5INNNzCNOKEjrsYbzuZ92KXhNdfwSCHA
> > %3D&reserved=0
> > https://nam06.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.
> > kernel.org%2Fall%2F20230602171302.745492-1-
> > anthony.l.nguyen%40intel.com%2F&data=05%7C01%7Cweh%40microsoft.co
> > m%7Cb6724a9f672f47d433ef08db67da4ada%7C72f988bf86f141af91ab2d7cd0
> > 11db47%7C1%7C0%7C638217959538674174%7CUnknown%7CTWFpbGZsb3d8
> > eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3
> > D%7C3000%7C%7C%7C&sdata=A%2BjjtSx%2FvY2T%2BNIEPGuftk%2BCr%2Fv
> > Yt2Xc1q8B6h2tb6g%3D&reserved=0
> 
> Thanks for you comment. I am  new to the process. I have a few questions regarding to this and hope you can help. First of all, the patch is mostly for IB. Is it possible for the patch to just go through the RDMA branch, since most of the changes are in RDMA? 

Yes, it can, we (RDMA) will handle it.

Thanks
Wei Hu June 12, 2023, 2:44 p.m. UTC | #9
> -----Original Message-----
> From: Leon Romanovsky <leon@kernel.org>
> Sent: Monday, June 12, 2023 2:19 AM
> 
> > +
> > +void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq) {
> > +	struct mana_ib_cq *cq = ctx;
> > +	struct ib_device *ibdev = cq->ibcq.device;
> > +
> > +	ibdev_dbg(ibdev, "Enter %s %d\n", __func__, __LINE__);
> 
> This patch has two many debug prints, most if not all should go.
> 
Thanks. I will remove the debug prints in the normal path. 

Wei
Jakub Kicinski June 12, 2023, 5:21 p.m. UTC | #10
On Mon, 12 Jun 2023 04:44:44 +0000 Wei Hu wrote:
> If the patch also needs to go through the NETDEV branch, does it mean two subsystems will pull its own part? A few follow-up questions about generating a PR, since I have never done such before.
> 
> 1. Which repo should I clone and create the branch from?

The main tree of Linus Torvalds. Check which tags are present in both
netdev and rdma trees and use the newest common tag between the trees
as a base.

> 2. From the example you provided, I see these people has their own branches on kernel.org, for example something like:
> git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git tags/mlx5-updates-2023-06-06. 
> I am not Linux maintainer. I just have repo on Github. How do I
> create or fork on kernel.org? Do I need an account to do so? Or I can
> use my own repo on Github?

GitHub is fine.

> 3.  How to create PR in this case? Should I follow this link:
> https://docs.kernel.org/maintainer/pull-requests.html?

Sort of. But still post the patches, so you'd want to use these
commands somewhere along the way:

git format-patch [...] -o $path --cover-letter
git request-pull [...] >> $path/0000-cover-letter.patch
Jakub Kicinski June 12, 2023, 5:22 p.m. UTC | #11
On Mon, 12 Jun 2023 09:13:49 +0300 Leon Romanovsky wrote:
> > Thanks for you comment. I am  new to the process. I have a few
> > questions regarding to this and hope you can help. First of all,
> > the patch is mostly for IB. Is it possible for the patch to just go
> > through the RDMA branch, since most of the changes are in RDMA?   
> 
> Yes, it can, we (RDMA) will handle it.

Probably, although it's better to teach them some process sooner
rather than later?
Jason Gunthorpe June 12, 2023, 6:16 p.m. UTC | #12
On Mon, Jun 12, 2023 at 10:22:21AM -0700, Jakub Kicinski wrote:
> On Mon, 12 Jun 2023 09:13:49 +0300 Leon Romanovsky wrote:
> > > Thanks for you comment. I am  new to the process. I have a few
> > > questions regarding to this and hope you can help. First of all,
> > > the patch is mostly for IB. Is it possible for the patch to just go
> > > through the RDMA branch, since most of the changes are in RDMA?   
> > 
> > Yes, it can, we (RDMA) will handle it.
> 
> Probably, although it's better to teach them some process sooner
> rather than later?

I've been of the opinion the shared branch process is difficult - we
took a long time to fine tune the process. If you don't fully
understand how to do this with git you can make a real mess of it.

So I would say MS is welcome to use it if they can do it right, but I
wouldn't push them to do so or expect they must to be
successful. Really only Mellanox and Intel seem to have enough churn
to justify it right now.

If they don't use shared branches then they must be responsible to
avoid conflicts, even if that means they have to delay sending patches
for a cycle.

Jason
Leon Romanovsky June 13, 2023, 7:29 a.m. UTC | #13
On Mon, Jun 12, 2023 at 10:22:21AM -0700, Jakub Kicinski wrote:
> On Mon, 12 Jun 2023 09:13:49 +0300 Leon Romanovsky wrote:
> > > Thanks for you comment. I am  new to the process. I have a few
> > > questions regarding to this and hope you can help. First of all,
> > > the patch is mostly for IB. Is it possible for the patch to just go
> > > through the RDMA branch, since most of the changes are in RDMA?   
> > 
> > Yes, it can, we (RDMA) will handle it.
> 
> Probably, although it's better to teach them some process sooner
> rather than later?

Like Jason wrote above, shared branch is valuable once the submitter has
enough volume to justify. In previous cycles, when we needed it, I simply
created shared branch [1] for them.

Even we (Nvidia), who has enough patch volume, sometimes prefer to
delay submission to next cycle and don't bother ourselves with shared
branch.

[1] https://lore.kernel.org/all/Y2v2CGEWC70g+Ot+@unreal/

Thanks
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
index d141cab8a1e6..3cd680e0e753 100644
--- a/drivers/infiniband/hw/mana/cq.c
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -12,13 +12,20 @@  int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	struct ib_device *ibdev = ibcq->device;
 	struct mana_ib_create_cq ucmd = {};
 	struct mana_ib_dev *mdev;
+	struct gdma_context *gc;
+	struct gdma_dev *gd;
 	int err;
 
 	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+	gd = mdev->gdma_dev;
+	gc = gd->gdma_context;
 
 	if (udata->inlen < sizeof(ucmd))
 		return -EINVAL;
 
+	cq->comp_vector = attr->comp_vector > gc->max_num_queues ?
+				0 : attr->comp_vector;
+
 	err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
 	if (err) {
 		ibdev_dbg(ibdev,
@@ -69,11 +76,32 @@  int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
 	struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
 	struct ib_device *ibdev = ibcq->device;
 	struct mana_ib_dev *mdev;
+	struct gdma_context *gc;
+	struct gdma_dev *gd;
+
 
 	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+	gd = mdev->gdma_dev;
+	gc = gd->gdma_context;
 
-	mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
-	ib_umem_release(cq->umem);
+
+
+	if (atomic_read(&ibcq->usecnt) == 0) {
+		mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
+		ibdev_dbg(ibdev, "freeing gdma cq %p\n", gc->cq_table[cq->id]);
+		kfree(gc->cq_table[cq->id]);
+		gc->cq_table[cq->id] = NULL;
+		ib_umem_release(cq->umem);
+	}
 
 	return 0;
 }
+
+void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq)
+{
+	struct mana_ib_cq *cq = ctx;
+	struct ib_device *ibdev = cq->ibcq.device;
+
+	ibdev_dbg(ibdev, "Enter %s %d\n", __func__, __LINE__);
+	cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 7be4c3adb4e2..e4efbcaed10e 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -143,6 +143,81 @@  int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 	return err;
 }
 
+static void mana_ib_destroy_eq(struct mana_ib_ucontext *ucontext,
+			       struct mana_ib_dev *mdev)
+{
+	struct gdma_context *gc = mdev->gdma_dev->gdma_context;
+	struct ib_device *ibdev = ucontext->ibucontext.device;
+	struct gdma_queue *eq;
+	int i;
+
+	if (!ucontext->eqs)
+		return;
+
+	for (i = 0; i < gc->max_num_queues; i++) {
+		eq = ucontext->eqs[i].eq;
+		if (!eq)
+			continue;
+
+		mana_gd_destroy_queue(gc, eq);
+	}
+
+	kfree(ucontext->eqs);
+	ucontext->eqs = NULL;
+
+	ibdev_dbg(ibdev, "destroyed eq's count %d\n", gc->max_num_queues);
+}
+
+static int mana_ib_create_eq(struct mana_ib_ucontext *ucontext,
+			     struct mana_ib_dev *mdev)
+{
+	struct gdma_queue_spec spec = {};
+	struct gdma_queue *queue;
+	struct gdma_context *gc;
+	struct ib_device *ibdev;
+	struct gdma_dev *gd;
+	int err;
+	int i;
+
+	if (!ucontext || !mdev)
+		return -EINVAL;
+
+	ibdev = ucontext->ibucontext.device;
+	gd = mdev->gdma_dev;
+
+	gc = gd->gdma_context;
+
+	ucontext->eqs = kcalloc(gc->max_num_queues, sizeof(struct mana_eq),
+				GFP_KERNEL);
+	if (!ucontext->eqs)
+		return -ENOMEM;
+
+	spec.type = GDMA_EQ;
+	spec.monitor_avl_buf = false;
+	spec.queue_size = EQ_SIZE;
+	spec.eq.callback = NULL;
+	spec.eq.context = ucontext->eqs;
+	spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
+	spec.eq.msix_allocated = true;
+
+	for (i = 0; i < gc->max_num_queues; i++) {
+		spec.eq.msix_index = i;
+		err = mana_gd_create_mana_eq(gd, &spec, &queue);
+		if (err)
+			goto out;
+
+		queue->eq.disable_needed = true;
+		ucontext->eqs[i].eq = queue;
+	}
+
+	return 0;
+
+out:
+	ibdev_dbg(ibdev, "Failed to allocated eq err %d\n", err);
+	mana_ib_destroy_eq(ucontext, mdev);
+	return err;
+}
+
 static int mana_gd_destroy_doorbell_page(struct gdma_context *gc,
 					 int doorbell_page)
 {
@@ -225,7 +300,17 @@  int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
 
 	ucontext->doorbell = doorbell_page;
 
+	ret = mana_ib_create_eq(ucontext, mdev);
+	if (ret) {
+		ibdev_dbg(ibdev, "Failed to create eq's , ret %d\n", ret);
+		goto err;
+	}
+
 	return 0;
+
+err:
+	mana_gd_destroy_doorbell_page(gc, doorbell_page);
+	return ret;
 }
 
 void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
@@ -240,6 +325,8 @@  void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
 	gc = mdev->gdma_dev->gdma_context;
 
+	mana_ib_destroy_eq(mana_ucontext, mdev);
+
 	ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell);
 	if (ret)
 		ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index 502cc8672eef..9672fa1670a5 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -67,6 +67,7 @@  struct mana_ib_cq {
 	int cqe;
 	u64 gdma_region;
 	u64 id;
+	u32 comp_vector;
 };
 
 struct mana_ib_qp {
@@ -86,6 +87,7 @@  struct mana_ib_qp {
 struct mana_ib_ucontext {
 	struct ib_ucontext ibucontext;
 	u32 doorbell;
+	struct mana_eq *eqs;
 };
 
 struct mana_ib_rwq_ind_table {
@@ -159,4 +161,6 @@  int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
 
 void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
 
+void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq);
+
 #endif
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index 54b61930a7fd..e133d86c0875 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -96,16 +96,20 @@  static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
 	struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
 	struct mana_ib_dev *mdev =
 		container_of(pd->device, struct mana_ib_dev, ib_dev);
+	struct ib_ucontext *ib_ucontext = pd->uobject->context;
 	struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl;
 	struct mana_ib_create_qp_rss_resp resp = {};
 	struct mana_ib_create_qp_rss ucmd = {};
+	struct mana_ib_ucontext *mana_ucontext;
 	struct gdma_dev *gd = mdev->gdma_dev;
 	mana_handle_t *mana_ind_table;
 	struct mana_port_context *mpc;
+	struct gdma_queue *gdma_cq;
 	struct mana_context *mc;
 	struct net_device *ndev;
 	struct mana_ib_cq *cq;
 	struct mana_ib_wq *wq;
+	struct mana_eq *eq;
 	unsigned int ind_tbl_size;
 	struct ib_cq *ibcq;
 	struct ib_wq *ibwq;
@@ -114,6 +118,8 @@  static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
 	int ret;
 
 	mc = gd->driver_data;
+	mana_ucontext =
+		container_of(ib_ucontext, struct mana_ib_ucontext, ibucontext);
 
 	if (!udata || udata->inlen < sizeof(ucmd))
 		return -EINVAL;
@@ -180,6 +186,7 @@  static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
 	for (i = 0; i < ind_tbl_size; i++) {
 		struct mana_obj_spec wq_spec = {};
 		struct mana_obj_spec cq_spec = {};
+		unsigned int max_num_queues = gd->gdma_context->max_num_queues;
 
 		ibwq = ind_tbl->ind_tbl[i];
 		wq = container_of(ibwq, struct mana_ib_wq, ibwq);
@@ -193,7 +200,8 @@  static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
 		cq_spec.gdma_region = cq->gdma_region;
 		cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE;
 		cq_spec.modr_ctx_id = 0;
-		cq_spec.attached_eq = GDMA_CQ_NO_EQ;
+		eq = &mana_ucontext->eqs[cq->comp_vector % max_num_queues];
+		cq_spec.attached_eq = eq->eq->id;
 
 		ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ,
 					 &wq_spec, &cq_spec, &wq->rx_object);
@@ -207,6 +215,9 @@  static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
 		wq->id = wq_spec.queue_index;
 		cq->id = cq_spec.queue_index;
 
+		ibdev_dbg(&mdev->ib_dev, "attached eq id %u  cq with id %llu\n",
+			eq->eq->id, cq->id);
+
 		ibdev_dbg(&mdev->ib_dev,
 			  "ret %d rx_object 0x%llx wq id %llu cq id %llu\n",
 			  ret, wq->rx_object, wq->id, cq->id);
@@ -215,6 +226,27 @@  static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
 		resp.entries[i].wqid = wq->id;
 
 		mana_ind_table[i] = wq->rx_object;
+
+		if (gd->gdma_context->cq_table[cq->id] == NULL) {
+
+			gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
+			if (!gdma_cq) {
+				ibdev_dbg(&mdev->ib_dev,
+					 "failed to allocate gdma_cq\n");
+				ret = -ENOMEM;
+				goto free_cq;
+			}
+
+			ibdev_dbg(&mdev->ib_dev, "gdma cq allocated %p\n",
+				  gdma_cq);
+
+			gdma_cq->cq.context = cq;
+			gdma_cq->type = GDMA_CQ;
+			gdma_cq->cq.callback = mana_ib_cq_handler;
+			gdma_cq->id = cq->id;
+			gd->gdma_context->cq_table[cq->id] = gdma_cq;
+		}
+
 	}
 	resp.num_entries = i;
 
@@ -224,7 +256,7 @@  static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
 					 ucmd.rx_hash_key_len,
 					 ucmd.rx_hash_key);
 	if (ret)
-		goto fail;
+		goto free_cq;
 
 	ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
 	if (ret) {
@@ -238,6 +270,23 @@  static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
 
 	return 0;
 
+free_cq:
+	{
+		int j = i;
+		u64 cqid;
+
+		while (j-- > 0) {
+			cqid = resp.entries[j].cqid;
+			gdma_cq = gd->gdma_context->cq_table[cqid];
+			cq = gdma_cq->cq.context;
+			if (atomic_read(&cq->ibcq.usecnt) == 0) {
+				kfree(gd->gdma_context->cq_table[cqid]);
+				gd->gdma_context->cq_table[cqid] = NULL;
+			}
+		}
+
+	}
+
 fail:
 	while (i-- > 0) {
 		ibwq = ind_tbl->ind_tbl[i];
@@ -269,10 +318,12 @@  static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
 	struct mana_obj_spec wq_spec = {};
 	struct mana_obj_spec cq_spec = {};
 	struct mana_port_context *mpc;
+	struct gdma_queue *gdma_cq;
 	struct mana_context *mc;
 	struct net_device *ndev;
 	struct ib_umem *umem;
-	int err;
+	struct mana_eq *eq;
+	int err, eq_vec;
 	u32 port;
 
 	mc = gd->driver_data;
@@ -350,7 +401,9 @@  static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
 	cq_spec.gdma_region = send_cq->gdma_region;
 	cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE;
 	cq_spec.modr_ctx_id = 0;
-	cq_spec.attached_eq = GDMA_CQ_NO_EQ;
+	eq_vec = send_cq->comp_vector % gd->gdma_context->max_num_queues;
+	eq = &mana_ucontext->eqs[eq_vec];
+	cq_spec.attached_eq = eq->eq->id;
 
 	err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ, &wq_spec,
 				 &cq_spec, &qp->tx_object);
@@ -368,6 +421,26 @@  static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
 	qp->sq_id = wq_spec.queue_index;
 	send_cq->id = cq_spec.queue_index;
 
+	if (gd->gdma_context->cq_table[send_cq->id] == NULL) {
+
+		gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
+		if (!gdma_cq) {
+			ibdev_dbg(&mdev->ib_dev,
+				  "failed to allocate gdma_cq\n");
+			err = -ENOMEM;
+			goto err_destroy_wqobj_and_cq;
+		}
+
+		pr_debug("gdma cq allocated %p\n", gdma_cq);
+		gdma_cq->cq.context = send_cq;
+		gdma_cq->type = GDMA_CQ;
+		gdma_cq->cq.callback = mana_ib_cq_handler;
+		gdma_cq->id = send_cq->id;
+		gd->gdma_context->cq_table[send_cq->id] = gdma_cq;
+	} else {
+		gdma_cq = gd->gdma_context->cq_table[send_cq->id];
+	}
+
 	ibdev_dbg(&mdev->ib_dev,
 		  "ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err,
 		  qp->tx_object, qp->sq_id, send_cq->id);
@@ -381,12 +454,17 @@  static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
 		ibdev_dbg(&mdev->ib_dev,
 			  "Failed copy udata for create qp-raw, %d\n",
 			  err);
-		goto err_destroy_wq_obj;
+		goto err_destroy_wqobj_and_cq;
 	}
 
 	return 0;
 
-err_destroy_wq_obj:
+err_destroy_wqobj_and_cq:
+	if (atomic_read(&send_cq->ibcq.usecnt) == 0) {
+		kfree(gdma_cq);
+		gd->gdma_context->cq_table[send_cq->id] = NULL;
+	}
+
 	mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
 
 err_destroy_dma_region:
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 8f3f78b68592..8231d77628d9 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -368,53 +368,57 @@  static void mana_gd_process_eqe(struct gdma_queue *eq)
 	}
 }
 
-static void mana_gd_process_eq_events(void *arg)
+static void mana_gd_process_eq_events(struct list_head *eq_list)
 {
 	u32 owner_bits, new_bits, old_bits;
 	union gdma_eqe_info eqe_info;
 	struct gdma_eqe *eq_eqe_ptr;
-	struct gdma_queue *eq = arg;
 	struct gdma_context *gc;
+	struct gdma_queue *eq;
 	struct gdma_eqe *eqe;
 	u32 head, num_eqe;
 	int i;
 
-	gc = eq->gdma_dev->gdma_context;
+	list_for_each_entry_rcu(eq, eq_list, entry) {
+		gc = eq->gdma_dev->gdma_context;
 
-	num_eqe = eq->queue_size / GDMA_EQE_SIZE;
-	eq_eqe_ptr = eq->queue_mem_ptr;
+		num_eqe = eq->queue_size / GDMA_EQE_SIZE;
+		eq_eqe_ptr = eq->queue_mem_ptr;
 
-	/* Process up to 5 EQEs at a time, and update the HW head. */
-	for (i = 0; i < 5; i++) {
-		eqe = &eq_eqe_ptr[eq->head % num_eqe];
-		eqe_info.as_uint32 = eqe->eqe_info;
-		owner_bits = eqe_info.owner_bits;
+		/* Process up to 5 EQEs at a time, and update the HW head. */
+		for (i = 0; i < 5; i++) {
+			eqe = &eq_eqe_ptr[eq->head % num_eqe];
+			eqe_info.as_uint32 = eqe->eqe_info;
+			owner_bits = eqe_info.owner_bits;
 
-		old_bits = (eq->head / num_eqe - 1) & GDMA_EQE_OWNER_MASK;
-		/* No more entries */
-		if (owner_bits == old_bits)
-			break;
+			old_bits =
+				(eq->head / num_eqe - 1) & GDMA_EQE_OWNER_MASK;
+			/* No more entries */
+			if (owner_bits == old_bits)
+				break;
 
-		new_bits = (eq->head / num_eqe) & GDMA_EQE_OWNER_MASK;
-		if (owner_bits != new_bits) {
-			dev_err(gc->dev, "EQ %d: overflow detected\n", eq->id);
-			break;
-		}
+			new_bits = (eq->head / num_eqe) & GDMA_EQE_OWNER_MASK;
+			if (owner_bits != new_bits) {
+				dev_err(gc->dev, "EQ %d: overflow detected\n",
+					eq->id);
+				break;
+			}
 
-		/* Per GDMA spec, rmb is necessary after checking owner_bits, before
-		 * reading eqe.
-		 */
-		rmb();
+			/* Per GDMA spec, rmb is necessary after checking
+			 * owner_bits, before reading eqe.
+			 */
+			rmb();
 
-		mana_gd_process_eqe(eq);
+			mana_gd_process_eqe(eq);
 
-		eq->head++;
-	}
+			eq->head++;
+		}
 
-	head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS);
+		head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS);
 
-	mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq->id,
-			      head, SET_ARM_BIT);
+		mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type,
+				      eq->id, head, SET_ARM_BIT);
+	}
 }
 
 static int mana_gd_register_irq(struct gdma_queue *queue,
@@ -432,44 +436,49 @@  static int mana_gd_register_irq(struct gdma_queue *queue,
 	gc = gd->gdma_context;
 	r = &gc->msix_resource;
 	dev = gc->dev;
+	msi_index = spec->eq.msix_index;
 
 	spin_lock_irqsave(&r->lock, flags);
 
-	msi_index = find_first_zero_bit(r->map, r->size);
-	if (msi_index >= r->size || msi_index >= gc->num_msix_usable) {
-		err = -ENOSPC;
-	} else {
-		bitmap_set(r->map, msi_index, 1);
-		queue->eq.msix_index = msi_index;
-	}
-
-	spin_unlock_irqrestore(&r->lock, flags);
+	if (!spec->eq.msix_allocated) {
+		msi_index = find_first_zero_bit(r->map, r->size);
+		if (msi_index >= r->size || msi_index >= gc->num_msix_usable)
+			err = -ENOSPC;
+		else
+			bitmap_set(r->map, msi_index, 1);
 
-	if (err) {
-		dev_err(dev, "Register IRQ err:%d, msi:%u rsize:%u, nMSI:%u",
-			err, msi_index, r->size, gc->num_msix_usable);
+		if (err) {
+			dev_err(dev, "Register IRQ err:%d, msi:%u rsize:%u, nMSI:%u",
+				err, msi_index, r->size, gc->num_msix_usable);
 
-		return err;
+			goto out;
+		}
 	}
 
+	queue->eq.msix_index = msi_index;
 	gic = &gc->irq_contexts[msi_index];
 
-	WARN_ON(gic->handler || gic->arg);
+	list_add_rcu(&queue->entry, &gic->eq_list);
 
-	gic->arg = queue;
+	WARN_ON(gic->handler);
 
 	gic->handler = mana_gd_process_eq_events;
 
-	return 0;
+out:
+	spin_unlock_irqrestore(&r->lock, flags);
+
+	return err;
 }
 
-static void mana_gd_deregiser_irq(struct gdma_queue *queue)
+static void mana_gd_deregister_irq(struct gdma_queue *queue)
 {
 	struct gdma_dev *gd = queue->gdma_dev;
 	struct gdma_irq_context *gic;
 	struct gdma_context *gc;
 	struct gdma_resource *r;
 	unsigned int msix_index;
+	struct list_head *p, *n;
+	struct gdma_queue *eq;
 	unsigned long flags;
 
 	gc = gd->gdma_context;
@@ -480,13 +489,25 @@  static void mana_gd_deregiser_irq(struct gdma_queue *queue)
 	if (WARN_ON(msix_index >= gc->num_msix_usable))
 		return;
 
+	spin_lock_irqsave(&r->lock, flags);
+
 	gic = &gc->irq_contexts[msix_index];
-	gic->handler = NULL;
-	gic->arg = NULL;
 
-	spin_lock_irqsave(&r->lock, flags);
-	bitmap_clear(r->map, msix_index, 1);
+	list_for_each_safe(p, n, &gic->eq_list) {
+		eq = list_entry(p, struct gdma_queue, entry);
+		if (queue == eq) {
+			list_del_rcu(&eq->entry);
+			break;
+		}
+	}
+
+	if (list_empty(&gic->eq_list)) {
+		gic->handler = NULL;
+		bitmap_clear(r->map, msix_index, 1);
+	}
+
 	spin_unlock_irqrestore(&r->lock, flags);
+	synchronize_rcu();
 
 	queue->eq.msix_index = INVALID_PCI_MSIX_INDEX;
 }
@@ -550,7 +571,7 @@  static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets,
 			dev_warn(gc->dev, "Failed to flush EQ: %d\n", err);
 	}
 
-	mana_gd_deregiser_irq(queue);
+	mana_gd_deregister_irq(queue);
 
 	if (queue->eq.disable_needed)
 		mana_gd_disable_queue(queue);
@@ -565,7 +586,7 @@  static int mana_gd_create_eq(struct gdma_dev *gd,
 	u32 log2_num_entries;
 	int err;
 
-	queue->eq.msix_index = INVALID_PCI_MSIX_INDEX;
+	queue->eq.msix_index = spec->eq.msix_index;
 
 	log2_num_entries = ilog2(queue->queue_size / GDMA_EQE_SIZE);
 
@@ -602,6 +623,7 @@  static int mana_gd_create_eq(struct gdma_dev *gd,
 	mana_gd_destroy_eq(gc, false, queue);
 	return err;
 }
+EXPORT_SYMBOL(mana_gd_create_mana_eq);
 
 static void mana_gd_create_cq(const struct gdma_queue_spec *spec,
 			      struct gdma_queue *queue)
@@ -873,6 +895,7 @@  void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue)
 	mana_gd_free_memory(gmi);
 	kfree(queue);
 }
+EXPORT_SYMBOL(mana_gd_destroy_queue);
 
 int mana_gd_verify_vf_version(struct pci_dev *pdev)
 {
@@ -1188,7 +1211,7 @@  static irqreturn_t mana_gd_intr(int irq, void *arg)
 	struct gdma_irq_context *gic = arg;
 
 	if (gic->handler)
-		gic->handler(gic->arg);
+		gic->handler(&gic->eq_list);
 
 	return IRQ_HANDLED;
 }
@@ -1241,7 +1264,7 @@  static int mana_gd_setup_irqs(struct pci_dev *pdev)
 	for (i = 0; i < nvec; i++) {
 		gic = &gc->irq_contexts[i];
 		gic->handler = NULL;
-		gic->arg = NULL;
+		INIT_LIST_HEAD(&gic->eq_list);
 
 		if (!i)
 			snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_hwc@pci:%s",
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 06d6292e09b3..85345225813f 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1156,6 +1156,7 @@  static int mana_create_eq(struct mana_context *ac)
 	spec.eq.callback = NULL;
 	spec.eq.context = ac->eqs;
 	spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
+	spec.eq.msix_allocated = false;
 
 	for (i = 0; i < gc->max_num_queues; i++) {
 		err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq);
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 96c120160f15..cc728fc42043 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -6,6 +6,7 @@ 
 
 #include <linux/dma-mapping.h>
 #include <linux/netdevice.h>
+#include <linux/list.h>
 
 #include "shm_channel.h"
 
@@ -291,6 +292,8 @@  struct gdma_queue {
 	u32 head;
 	u32 tail;
 
+	struct list_head entry;
+
 	/* Extra fields specific to EQ/CQ. */
 	union {
 		struct {
@@ -325,6 +328,8 @@  struct gdma_queue_spec {
 			void *context;
 
 			unsigned long log2_throttle_limit;
+			bool msix_allocated;
+			unsigned int msix_index;
 		} eq;
 
 		struct {
@@ -340,8 +345,8 @@  struct gdma_queue_spec {
 #define MANA_IRQ_NAME_SZ 32
 
 struct gdma_irq_context {
-	void (*handler)(void *arg);
-	void *arg;
+	void (*handler)(struct list_head *arg);
+	struct list_head eq_list;
 	char name[MANA_IRQ_NAME_SZ];
 };