Message ID | 032d54e1ed92d0f288b385d6343a5b6e109daabe.1649139915.git.leonro@nvidia.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | Handle FW failures to destroy QP/RQ objects | expand |
On 05 Apr 11:12, Leon Romanovsky wrote: >From: Patrisious Haddad <phaddad@nvidia.com> > >Prior to this patch in the case that destroy_unmap_eq() >failed and was called again, it triggered an additional call of Where is it being failed and called again ? this shouldn't even be an option, we try to keep mlx5 symmetrical, constructors and destructors are supposed to be called only once in their respective positions. the callers must be fixed to avoid re-entry, or change destructors to clear up all resources even on failures, no matter what do not invent a reentry protocols to mlx5 destructors. >mlx5_debug_eq_remove() which causes a kernel crash, since >eq->dbg was not nullified in previous call. > [...] > int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) >diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c >index 229728c80233..3c61f355cdac 100644 >--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c >+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c >@@ -386,16 +386,20 @@ void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, > } > EXPORT_SYMBOL(mlx5_eq_disable); > >-static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) >+static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, >+ bool reentry) > { > int err; > > mlx5_debug_eq_remove(dev, eq); > > err = mlx5_cmd_destroy_eq(dev, eq->eqn); >- if (err) >+ if (err) { > mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", > eq->eqn); >+ if (reentry) >+ return err; >+ } > > mlx5_frag_buf_free(dev, &eq->frag_buf); > return err; >@@ -481,7 +485,7 @@ static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) > int err; > > mutex_lock(&eq_table->lock); >- err = destroy_unmap_eq(dev, eq); >+ err = destroy_unmap_eq(dev, eq, false); > mutex_unlock(&eq_table->lock); > return err; > } >@@ -748,12 +752,15 @@ EXPORT_SYMBOL(mlx5_eq_create_generic); > > int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq) > { >+ struct mlx5_eq_table *eq_table = dev->priv.eq_table; > int err; > > if (IS_ERR(eq)) > return -EINVAL; > >- err = destroy_async_eq(dev, eq); >+ mutex_lock(&eq_table->lock); Here you are inventing the re-entry. Please drop this and fix properly. And avoid boolean parameters to mlx5 core functions as much as possible, let's keep mlx5_core simple. >+ err = destroy_unmap_eq(dev, eq, true); >+ mutex_unlock(&eq_table->lock); > if (err) > goto out; > >@@ -851,7 +858,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) > list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { > list_del(&eq->list); > mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); >- if (destroy_unmap_eq(dev, &eq->core)) >+ if (destroy_unmap_eq(dev, &eq->core, false)) > mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", > eq->core.eqn); > tasklet_disable(&eq->tasklet_ctx.task); >@@ -915,7 +922,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) > goto clean_eq; > err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); > if (err) { >- destroy_unmap_eq(dev, &eq->core); >+ destroy_unmap_eq(dev, &eq->core, false); > goto clean_eq; > } > >-- >2.35.1 >
On Tue, Apr 05, 2022 at 12:48:45PM -0700, Saeed Mahameed wrote: > On 05 Apr 11:12, Leon Romanovsky wrote: > > From: Patrisious Haddad <phaddad@nvidia.com> > > > > Prior to this patch in the case that destroy_unmap_eq() > > failed and was called again, it triggered an additional call of > > Where is it being failed and called again ? this shouldn't even be an > option, we try to keep mlx5 symmetrical, constructors and destructors are > supposed to be called only once in their respective positions. > the callers must be fixed to avoid re-entry, or change destructors to clear > up all resources even on failures, no matter what do not invent a reentry > protocols to mlx5 destructors. It can happen when QP is exposed through DEVX interface. In that flow, only FW knows about it and reference count all users. This means that attempt to destroy such QP will fail, but mlx5_core is structured in such way that all cleanup was done before calling to FW to get success/fail response. For more detailed information, see this cover letter: https://lore.kernel.org/all/20200907120921.476363-1-leon@kernel.org/ <...> > > int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq) > > { > > + struct mlx5_eq_table *eq_table = dev->priv.eq_table; > > int err; > > > > if (IS_ERR(eq)) > > return -EINVAL; > > > > - err = destroy_async_eq(dev, eq); > > + mutex_lock(&eq_table->lock); > > Here you are inventing the re-entry. Please drop this and fix properly. And > avoid boolean parameters to mlx5 core > functions as much as possible, let's keep mlx5_core simple. If after reading the link above, you were not convinced, let's take it offline. Thanks
On 06 Apr 10:55, Leon Romanovsky wrote: >On Tue, Apr 05, 2022 at 12:48:45PM -0700, Saeed Mahameed wrote: >> On 05 Apr 11:12, Leon Romanovsky wrote: >> > From: Patrisious Haddad <phaddad@nvidia.com> >> > >> > Prior to this patch in the case that destroy_unmap_eq() >> > failed and was called again, it triggered an additional call of >> >> Where is it being failed and called again ? this shouldn't even be an >> option, we try to keep mlx5 symmetrical, constructors and destructors are >> supposed to be called only once in their respective positions. >> the callers must be fixed to avoid re-entry, or change destructors to clear >> up all resources even on failures, no matter what do not invent a reentry >> protocols to mlx5 destructors. > >It can happen when QP is exposed through DEVX interface. In that flow, >only FW knows about it and reference count all users. This means that >attempt to destroy such QP will fail, but mlx5_core is structured in >such way that all cleanup was done before calling to FW to get >success/fail response. I wasn't talking about destroy_qp, actually destroy_qp is implemented the way i am asking you to implement destroy_eq(); remove debugfs on first call to destroy EQ, and drop the reentry logic from from mlx5_eq_destroy_generic and destroy_async_eq. EQ is a core/mlx5_ib resources, it's not exposed to user nor DEVX, it shouldn't be subject to DEVX limitations. Also looking at the destroy_qp implementation, it removes the debugfs unconditionally even if the QP has ref count and removal will fail in FW. just FYI. For EQ I don't even understand why devx can cause ODP EQ removal to fail.. you must fix this at mlx5_ib layer, but for this patch, please drop the re-entry and remove debugfs in destroy_eq, unconditionally. > >For more detailed information, see this cover letter: >https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Fall%2F20200907120921.476363-1-leon%40kernel.org%2F&data=04%7C01%7Csaeedm%40nvidia.com%7Cee8a0add0a154e055f8508da17a2d6fd%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C637848285407413801%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=xD54MMVFSeONDeQyPHOinh93CPWjp2rUEL7F3izc210%3D&reserved=0 > ><...> > >> > int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq) >> > { >> > + struct mlx5_eq_table *eq_table = dev->priv.eq_table; >> > int err; >> > >> > if (IS_ERR(eq)) >> > return -EINVAL; >> > >> > - err = destroy_async_eq(dev, eq); >> > + mutex_lock(&eq_table->lock); >> >> Here you are inventing the re-entry. Please drop this and fix properly. And >> avoid boolean parameters to mlx5 core >> functions as much as possible, let's keep mlx5_core simple. > >If after reading the link above, you were not convinced, let's take it offline. > I am not convinced, see above. >Thanks
On Fri, Apr 08, 2022 at 12:30:35PM -0700, Saeed Mahameed wrote: > On 06 Apr 10:55, Leon Romanovsky wrote: > > On Tue, Apr 05, 2022 at 12:48:45PM -0700, Saeed Mahameed wrote: > > > On 05 Apr 11:12, Leon Romanovsky wrote: > > > > From: Patrisious Haddad <phaddad@nvidia.com> > > > > > > > > Prior to this patch in the case that destroy_unmap_eq() > > > > failed and was called again, it triggered an additional call of > > > > > > Where is it being failed and called again ? this shouldn't even be an > > > option, we try to keep mlx5 symmetrical, constructors and destructors are > > > supposed to be called only once in their respective positions. > > > the callers must be fixed to avoid re-entry, or change destructors to clear > > > up all resources even on failures, no matter what do not invent a reentry > > > protocols to mlx5 destructors. > > > > It can happen when QP is exposed through DEVX interface. In that flow, > > only FW knows about it and reference count all users. This means that > > attempt to destroy such QP will fail, but mlx5_core is structured in > > such way that all cleanup was done before calling to FW to get > > success/fail response. > > I wasn't talking about destroy_qp, actually destroy_qp is implemented the > way i am asking you to implement destroy_eq(); remove debugfs on first call > to destroy EQ, and drop the reentry logic from from mlx5_eq_destroy_generic > and destroy_async_eq. > > EQ is a core/mlx5_ib resources, it's not exposed to user nor DEVX, it > shouldn't be subject to DEVX limitations. I tend to agree with you. I'll take another look on it and resubmit. > > Also looking at the destroy_qp implementation, it removes the debugfs > unconditionally even if the QP has ref count and removal will fail in FW. > just FYI. Right, we don't care about debugfs. > > For EQ I don't even understand why devx can cause ODP EQ removal to fail.. > you must fix this at mlx5_ib layer, but for this patch, please drop the > re-entry and remove debugfs in destroy_eq, unconditionally. The reason to complexity is not debugfs, but an existence of "mlx5_frag_buf_free(dev, &eq->frag_buf);" line, after FW command is executed. We need to separate to two flows: the one that can tolerate FW cmd failures and the one that can't. If you don't add "reentry" flag, you can (theoretically) find yourself leaking ->frag_buf in the flows that don't know how to reentry. I'll resubmit. Thanks
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 3d3e55a5cb11..9b96a1ca0779 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -486,11 +486,11 @@ EXPORT_SYMBOL(mlx5_debug_qp_add); void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) { - if (!mlx5_debugfs_root) + if (!mlx5_debugfs_root || !qp->dbg) return; - if (qp->dbg) - rem_res_tree(qp->dbg); + rem_res_tree(qp->dbg); + qp->dbg = NULL; } EXPORT_SYMBOL(mlx5_debug_qp_remove); @@ -512,11 +512,11 @@ int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq) void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { - if (!mlx5_debugfs_root) + if (!mlx5_debugfs_root || !eq->dbg) return; - if (eq->dbg) - rem_res_tree(eq->dbg); + rem_res_tree(eq->dbg); + eq->dbg = NULL; } int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 229728c80233..3c61f355cdac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -386,16 +386,20 @@ void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, } EXPORT_SYMBOL(mlx5_eq_disable); -static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + bool reentry) { int err; mlx5_debug_eq_remove(dev, eq); err = mlx5_cmd_destroy_eq(dev, eq->eqn); - if (err) + if (err) { mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); + if (reentry) + return err; + } mlx5_frag_buf_free(dev, &eq->frag_buf); return err; @@ -481,7 +485,7 @@ static int destroy_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) int err; mutex_lock(&eq_table->lock); - err = destroy_unmap_eq(dev, eq); + err = destroy_unmap_eq(dev, eq, false); mutex_unlock(&eq_table->lock); return err; } @@ -748,12 +752,15 @@ EXPORT_SYMBOL(mlx5_eq_create_generic); int mlx5_eq_destroy_generic(struct mlx5_core_dev *dev, struct mlx5_eq *eq) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; if (IS_ERR(eq)) return -EINVAL; - err = destroy_async_eq(dev, eq); + mutex_lock(&eq_table->lock); + err = destroy_unmap_eq(dev, eq, true); + mutex_unlock(&eq_table->lock); if (err) goto out; @@ -851,7 +858,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); - if (destroy_unmap_eq(dev, &eq->core)) + if (destroy_unmap_eq(dev, &eq->core, false)) mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", eq->core.eqn); tasklet_disable(&eq->tasklet_ctx.task); @@ -915,7 +922,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) goto clean_eq; err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); if (err) { - destroy_unmap_eq(dev, &eq->core); + destroy_unmap_eq(dev, &eq->core, false); goto clean_eq; }