Message ID | 20230615115630.164098-2-christian.koenig@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/3] drm/scheduler: implement hw time accounting | expand |
On 2023-06-15 07:56, Christian König wrote: > This reverts commit 8ee3a52e3f35e064a3bf82f21dc74ddaf9843648. > > The new amdgpu_ctx_mgr_entity_fini() was never called, so it was pure > coincident that this patch didn't cause a crash. Since the workaround > shouldn't be needed any more just mostly revert the changes to amdgpu. > > Signed-off-by: Christian König <christian.koenig@amd.com> Add a fixes-tag, Fixes: 8ee3a52e3f35e0 ("drm/gpu-sched: fix force APP kill hang(v4)") Acked-by: Luben Tuikov <luben.tuikov@amd.com> Regards, Luben > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 59 ++----------------------- > drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 - > drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +- > 3 files changed, 5 insertions(+), 57 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > index d2139ac12159..1445e030d788 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > @@ -267,7 +267,7 @@ static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) > res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i])); > dma_fence_put(entity->fences[i]); > } > - > + drm_sched_entity_destroy(&entity->entity); > kfree(entity); > return res; > } > @@ -476,24 +476,6 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, > return r; > } > > -static void amdgpu_ctx_do_release(struct kref *ref) > -{ > - struct amdgpu_ctx *ctx; > - u32 i, j; > - > - ctx = container_of(ref, struct amdgpu_ctx, refcount); > - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { > - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { > - if (!ctx->entities[i][j]) > - continue; > - > - drm_sched_entity_destroy(&ctx->entities[i][j]->entity); > - } > - } > - > - amdgpu_ctx_fini(ref); > -} > - > static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) > { > struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; > @@ -502,7 +484,7 @@ static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) > mutex_lock(&mgr->lock); > ctx = idr_remove(&mgr->ctx_handles, id); > if (ctx) > - kref_put(&ctx->refcount, amdgpu_ctx_do_release); > + kref_put(&ctx->refcount, amdgpu_ctx_fini); > mutex_unlock(&mgr->lock); > return ctx ? 0 : -EINVAL; > } > @@ -712,7 +694,7 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) > if (ctx == NULL) > return -EINVAL; > > - kref_put(&ctx->refcount, amdgpu_ctx_do_release); > + kref_put(&ctx->refcount, amdgpu_ctx_fini); > return 0; > } > > @@ -881,45 +863,12 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) > return timeout; > } > > -void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) > -{ > - struct amdgpu_ctx *ctx; > - struct idr *idp; > - uint32_t id, i, j; > - > - idp = &mgr->ctx_handles; > - > - idr_for_each_entry(idp, ctx, id) { > - if (kref_read(&ctx->refcount) != 1) { > - DRM_ERROR("ctx %p is still alive\n", ctx); > - continue; > - } > - > - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { > - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { > - struct drm_sched_entity *entity; > - > - if (!ctx->entities[i][j]) > - continue; > - > - entity = &ctx->entities[i][j]->entity; > - drm_sched_entity_fini(entity); > - } > - } > - } > -} > - > void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) > { > struct amdgpu_ctx *ctx; > - struct idr *idp; > uint32_t id; > > - amdgpu_ctx_mgr_entity_fini(mgr); > - > - idp = &mgr->ctx_handles; > - > - idr_for_each_entry(idp, ctx, id) { > + idr_for_each_entry(&mgr->ctx_handles, ctx, id) { > if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1) > DRM_ERROR("ctx %p is still alive\n", ctx); > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h > index 0fa0e56daf67..729cf479d71d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h > @@ -91,7 +91,6 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, > > void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr, > struct amdgpu_device *adev); > -void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); > long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout); > void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); > void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > index 0efb38539d70..50c36c95556d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c > @@ -1278,6 +1278,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, > return; > > pm_runtime_get_sync(dev->dev); > + amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); > > if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD) != NULL) > amdgpu_uvd_free_handles(adev, file_priv); > @@ -1299,7 +1300,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, > amdgpu_bo_unreserve(pd); > } > > - amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); > amdgpu_vm_fini(adev, &fpriv->vm); > > if (pasid)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index d2139ac12159..1445e030d788 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -267,7 +267,7 @@ static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i])); dma_fence_put(entity->fences[i]); } - + drm_sched_entity_destroy(&entity->entity); kfree(entity); return res; } @@ -476,24 +476,6 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, return r; } -static void amdgpu_ctx_do_release(struct kref *ref) -{ - struct amdgpu_ctx *ctx; - u32 i, j; - - ctx = container_of(ref, struct amdgpu_ctx, refcount); - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { - if (!ctx->entities[i][j]) - continue; - - drm_sched_entity_destroy(&ctx->entities[i][j]->entity); - } - } - - amdgpu_ctx_fini(ref); -} - static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) { struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; @@ -502,7 +484,7 @@ static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id) mutex_lock(&mgr->lock); ctx = idr_remove(&mgr->ctx_handles, id); if (ctx) - kref_put(&ctx->refcount, amdgpu_ctx_do_release); + kref_put(&ctx->refcount, amdgpu_ctx_fini); mutex_unlock(&mgr->lock); return ctx ? 0 : -EINVAL; } @@ -712,7 +694,7 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) if (ctx == NULL) return -EINVAL; - kref_put(&ctx->refcount, amdgpu_ctx_do_release); + kref_put(&ctx->refcount, amdgpu_ctx_fini); return 0; } @@ -881,45 +863,12 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) return timeout; } -void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) -{ - struct amdgpu_ctx *ctx; - struct idr *idp; - uint32_t id, i, j; - - idp = &mgr->ctx_handles; - - idr_for_each_entry(idp, ctx, id) { - if (kref_read(&ctx->refcount) != 1) { - DRM_ERROR("ctx %p is still alive\n", ctx); - continue; - } - - for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { - for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) { - struct drm_sched_entity *entity; - - if (!ctx->entities[i][j]) - continue; - - entity = &ctx->entities[i][j]->entity; - drm_sched_entity_fini(entity); - } - } - } -} - void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr) { struct amdgpu_ctx *ctx; - struct idr *idp; uint32_t id; - amdgpu_ctx_mgr_entity_fini(mgr); - - idp = &mgr->ctx_handles; - - idr_for_each_entry(idp, ctx, id) { + idr_for_each_entry(&mgr->ctx_handles, ctx, id) { if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1) DRM_ERROR("ctx %p is still alive\n", ctx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index 0fa0e56daf67..729cf479d71d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -91,7 +91,6 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr, struct amdgpu_device *adev); -void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 0efb38539d70..50c36c95556d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1278,6 +1278,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, return; pm_runtime_get_sync(dev->dev); + amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD) != NULL) amdgpu_uvd_free_handles(adev, file_priv); @@ -1299,7 +1300,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_bo_unreserve(pd); } - amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); amdgpu_vm_fini(adev, &fpriv->vm); if (pasid)
This reverts commit 8ee3a52e3f35e064a3bf82f21dc74ddaf9843648. The new amdgpu_ctx_mgr_entity_fini() was never called, so it was pure coincident that this patch didn't cause a crash. Since the workaround shouldn't be needed any more just mostly revert the changes to amdgpu. Signed-off-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 59 ++----------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +- 3 files changed, 5 insertions(+), 57 deletions(-)