Message ID | 6cc24816cca049bd8541317f5e41d3ac659445d3.1652588303.git.leonro@nvidia.com (mailing list archive) |
---|---|
State | Accepted |
Headers | show |
Series | [rdma-next] RDMA/mlx5: Add a umr recovery flow | expand |
On Sun, May 15, 2022 at 07:19:53AM +0300, Leon Romanovsky wrote: > @@ -270,17 +296,49 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey, > mlx5r_umr_init_context(&umr_context); > > down(&umrc->sem); > + while (true) { > + mutex_lock(&umrc->lock); You need to test this with lockdep, nesing a mutex under a semaphor is not allowed, AFAIK. > + err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe, > + with_data); > + mutex_unlock(&umrc->lock); > + if (err) { > + mlx5_ib_warn(dev, "UMR post send failed, err %d\n", > + err); > + break; > } > + > + wait_for_completion(&umr_context.done); Nor is sleeping under a semaphore. And, I'm pretty sure, this entire function is called under a spinlock in some cases. Jason
On Thu, May 26, 2022 at 11:32:12AM -0300, Jason Gunthorpe wrote: > On Sun, May 15, 2022 at 07:19:53AM +0300, Leon Romanovsky wrote: > > @@ -270,17 +296,49 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey, > > mlx5r_umr_init_context(&umr_context); > > > > down(&umrc->sem); > > + while (true) { > > + mutex_lock(&umrc->lock); > > You need to test this with lockdep, nesing a mutex under a semaphor is > not allowed, AFAIK. We are running with lockdep all our tests. > > > + err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe, > > + with_data); > > + mutex_unlock(&umrc->lock); > > + if (err) { > > + mlx5_ib_warn(dev, "UMR post send failed, err %d\n", > > + err); > > + break; > > } > > + > > + wait_for_completion(&umr_context.done); > > Nor is sleeping under a semaphore. Not according to the kernel/locking/semaphore.c. Semaphores can sleep and the code protected by semaphores can sleep too. 53 void down(struct semaphore *sem) 54 { 55 unsigned long flags; 56 57 might_sleep(); .... 64 } 65 EXPORT_SYMBOL(down); > > And, I'm pretty sure, this entire function is called under a spinlock > in some cases. Can you point to such flow? Thanks > > Jason
On Thu, May 26, 2022 at 07:29:20PM +0300, Leon Romanovsky wrote: > > > + err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe, > > > + with_data); > > > + mutex_unlock(&umrc->lock); > > > + if (err) { > > > + mlx5_ib_warn(dev, "UMR post send failed, err %d\n", > > > + err); > > > + break; > > > } > > > + > > > + wait_for_completion(&umr_context.done); > > > > Nor is sleeping under a semaphore. > > Not according to the kernel/locking/semaphore.c. Semaphores can sleep > and the code protected by semaphores can sleep too. > > 53 void down(struct semaphore *sem) > 54 { > 55 unsigned long flags; > 56 > 57 might_sleep(); > .... > 64 } > 65 EXPORT_SYMBOL(down); Hum, OK, I am confused > > And, I'm pretty sure, this entire function is called under a spinlock > > in some cases. > > Can you point to such flow? It seems like not anymore, or at least I couldn't find a case. Jason
On Thu, May 26, 2022 at 02:21:32PM -0300, Jason Gunthorpe wrote: > On Thu, May 26, 2022 at 07:29:20PM +0300, Leon Romanovsky wrote: > > > > > + err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe, > > > > + with_data); > > > > + mutex_unlock(&umrc->lock); > > > > + if (err) { > > > > + mlx5_ib_warn(dev, "UMR post send failed, err %d\n", > > > > + err); > > > > + break; > > > > } > > > > + > > > > + wait_for_completion(&umr_context.done); > > > > > > Nor is sleeping under a semaphore. > > > > Not according to the kernel/locking/semaphore.c. Semaphores can sleep > > and the code protected by semaphores can sleep too. > > > > 53 void down(struct semaphore *sem) > > 54 { > > 55 unsigned long flags; > > 56 > > 57 might_sleep(); > > .... > > 64 } > > 65 EXPORT_SYMBOL(down); > > Hum, OK, I am confused > > > > And, I'm pretty sure, this entire function is called under a spinlock > > > in some cases. > > > > Can you point to such flow? > > It seems like not anymore, or at least I couldn't find a case. So are we fine with this patch and it can go as is after merge window? Thanks > > Jason
On Sun, May 15, 2022 at 07:19:53AM +0300, Leon Romanovsky wrote: > From: Aharon Landau <aharonl@nvidia.com> > > When a UMR fails, the UMR QP state changes to an error state. Therefore, > all the further UMR operations will fail too. > > Add a recovery flow to the UMR QP, and repost the flushed WQEs. > > Signed-off-by: Aharon Landau <aharonl@nvidia.com> > Reviewed-by: Michael Guralnik <michaelgur@nvidia.com> > Signed-off-by: Leon Romanovsky <leonro@nvidia.com> > --- > drivers/infiniband/hw/mlx5/cq.c | 4 ++ > drivers/infiniband/hw/mlx5/mlx5_ib.h | 12 ++++- > drivers/infiniband/hw/mlx5/umr.c | 78 ++++++++++++++++++++++++---- > 3 files changed, 83 insertions(+), 11 deletions(-) > Thanks, applied to rdma-next.
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 716ab467ac9f..457f57b088c6 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -523,6 +523,10 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, "Requestor" : "Responder", cq->mcq.cqn); mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n", err_cqe->syndrome, err_cqe->vendor_err_synd); + if (wc->status != IB_WC_WR_FLUSH_ERR && + (*cur_qp)->type == MLX5_IB_QPT_REG_UMR) + dev->umrc.state = MLX5_UMR_STATE_RECOVER; + if (opcode == MLX5_CQE_REQ_ERR) { wq = &(*cur_qp)->sq; wqe_ctr = be16_to_cpu(cqe64->wqe_counter); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index df2b566ad73d..80a1c12ca1c2 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -717,13 +717,23 @@ struct mlx5_ib_umr_context { struct completion done; }; +enum { + MLX5_UMR_STATE_ACTIVE, + MLX5_UMR_STATE_RECOVER, + MLX5_UMR_STATE_ERR, +}; + struct umr_common { struct ib_pd *pd; struct ib_cq *cq; struct ib_qp *qp; - /* control access to UMR QP + /* Protects from UMR QP overflow */ struct semaphore sem; + /* Protects from using UMR while the UMR is not active + */ + struct mutex lock; + unsigned int state; }; struct mlx5_cache_ent { diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c index 3a48364c0918..e00b94d1b1ea 100644 --- a/drivers/infiniband/hw/mlx5/umr.c +++ b/drivers/infiniband/hw/mlx5/umr.c @@ -176,6 +176,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev) dev->umrc.pd = pd; sema_init(&dev->umrc.sem, MAX_UMR_WR); + mutex_init(&dev->umrc.lock); return 0; @@ -195,6 +196,31 @@ void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev) ib_dealloc_pd(dev->umrc.pd); } +static int mlx5r_umr_recover(struct mlx5_ib_dev *dev) +{ + struct umr_common *umrc = &dev->umrc; + struct ib_qp_attr attr; + int err; + + attr.qp_state = IB_QPS_RESET; + err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); + if (err) { + mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); + goto err; + } + + err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); + if (err) + goto err; + + umrc->state = MLX5_UMR_STATE_ACTIVE; + return 0; + +err: + umrc->state = MLX5_UMR_STATE_ERR; + return err; +} + static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, struct mlx5r_umr_wqe *wqe, bool with_data) { @@ -231,7 +257,7 @@ static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, id.ib_cqe = cqe; mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0, - MLX5_FENCE_MODE_NONE, MLX5_OPCODE_UMR); + MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR); mlx5r_ring_db(qp, 1, ctrl); @@ -270,17 +296,49 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey, mlx5r_umr_init_context(&umr_context); down(&umrc->sem); - err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe, - with_data); - if (err) - mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err); - else { - wait_for_completion(&umr_context.done); - if (umr_context.status != IB_WC_SUCCESS) { - mlx5_ib_warn(dev, "reg umr failed (%u)\n", - umr_context.status); + while (true) { + mutex_lock(&umrc->lock); + if (umrc->state == MLX5_UMR_STATE_ERR) { + mutex_unlock(&umrc->lock); err = -EFAULT; + break; + } + + if (umrc->state == MLX5_UMR_STATE_RECOVER) { + mutex_unlock(&umrc->lock); + usleep_range(3000, 5000); + continue; + } + + err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe, + with_data); + mutex_unlock(&umrc->lock); + if (err) { + mlx5_ib_warn(dev, "UMR post send failed, err %d\n", + err); + break; } + + wait_for_completion(&umr_context.done); + + if (umr_context.status == IB_WC_SUCCESS) + break; + + if (umr_context.status == IB_WC_WR_FLUSH_ERR) + continue; + + WARN_ON_ONCE(1); + mlx5_ib_warn(dev, + "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs\n", + umr_context.status); + mutex_lock(&umrc->lock); + err = mlx5r_umr_recover(dev); + mutex_unlock(&umrc->lock); + if (err) + mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n", + err); + err = -EFAULT; + break; } up(&umrc->sem); return err;