Message ID | 1344029208-10069-1-git-send-email-j.glisse@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 03.08.2012 23:26, j.glisse@gmail.com wrote: > From: Jerome Glisse <jglisse@redhat.com> > > Virtual address need to be fenced to know when we can safely remove it. > This patch also properly clear the pagetable. Previously it was > serouisly broken. > > Kernel 3.5/3.4 need a similar patch but adapted for difference in mutex locking. > > v2: For to update pagetable when unbinding bo (don't bailout if > bo_va->valid is true). > v3: Add kernel 3.5/3.4 comment. > > Signed-off-by: Jerome Glisse <jglisse@redhat.com> It should fix the problem, going to test it as soon as I find some 5min spare in my vacation. Till then it is: Reviewed-by: Christian König <christian.koenig@amd.com> In the future I would really like to make the updating of the PTE also async, that should save us allot of problems here. Christian. > --- > drivers/gpu/drm/radeon/radeon.h | 1 + > drivers/gpu/drm/radeon/radeon_cs.c | 32 +++++++++++++++++++++++++++++--- > drivers/gpu/drm/radeon/radeon_gart.c | 24 ++++++++++++++++++++++-- > drivers/gpu/drm/radeon/radeon_gem.c | 13 ++----------- > drivers/gpu/drm/radeon/radeon_object.c | 6 +----- > 5 files changed, 55 insertions(+), 21 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 5431af2..8d75c65 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -300,6 +300,7 @@ struct radeon_bo_va { > uint64_t soffset; > uint64_t eoffset; > uint32_t flags; > + struct radeon_fence *fence; > bool valid; > }; > > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c > index 8a4c49e..995f3ab 100644 > --- a/drivers/gpu/drm/radeon/radeon_cs.c > +++ b/drivers/gpu/drm/radeon/radeon_cs.c > @@ -278,6 +278,30 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) > return 0; > } > > +static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser, > + struct radeon_fence *fence) > +{ > + struct radeon_fpriv *fpriv = parser->filp->driver_priv; > + struct radeon_vm *vm = &fpriv->vm; > + struct radeon_bo_list *lobj; > + int r; > + > + if (parser->chunk_ib_idx == -1) > + return; > + if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) > + return; > + > + list_for_each_entry(lobj, &parser->validated, tv.head) { > + struct radeon_bo_va *bo_va; > + struct radeon_bo *rbo = lobj->bo; > + > + bo_va = radeon_bo_va(rbo, vm); > + radeon_fence_unref(&bo_va->fence); > + bo_va->fence = radeon_fence_ref(fence); > + } > + return 0; > +} > + > /** > * cs_parser_fini() - clean parser states > * @parser: parser structure holding parsing context. > @@ -290,11 +314,14 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) > { > unsigned i; > > - if (!error) > + if (!error) { > + /* fence all bo va before ttm_eu_fence_buffer_objects so bo are still reserved */ > + radeon_bo_vm_fence_va(parser, parser->ib.fence); > ttm_eu_fence_buffer_objects(&parser->validated, > parser->ib.fence); > - else > + } else { > ttm_eu_backoff_reservation(&parser->validated); > + } > > if (parser->relocs != NULL) { > for (i = 0; i < parser->nrelocs; i++) { > @@ -388,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, > > if (parser->chunk_ib_idx == -1) > return 0; > - > if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) > return 0; > > diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c > index b372005..9912182 100644 > --- a/drivers/gpu/drm/radeon/radeon_gart.c > +++ b/drivers/gpu/drm/radeon/radeon_gart.c > @@ -814,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, > return -EINVAL; > } > > - if (bo_va->valid) > + if (bo_va->valid && mem) > return 0; > > ngpu_pages = radeon_bo_ngpu_pages(bo); > @@ -859,11 +859,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, > struct radeon_bo *bo) > { > struct radeon_bo_va *bo_va; > + int r; > > bo_va = radeon_bo_va(bo, vm); > if (bo_va == NULL) > return 0; > > + /* wait for va use to end */ > + while (bo_va->fence) { > + r = radeon_fence_wait(bo_va->fence, false); > + if (r) { > + DRM_ERROR("error while waiting for fence: %d\n", r); > + } > + if (r == -EDEADLK) { > + r = radeon_gpu_reset(rdev); > + if (!r) > + continue; > + } > + break; > + } > + radeon_fence_unref(&bo_va->fence); > + > mutex_lock(&rdev->vm_manager.lock); > mutex_lock(&vm->mutex); > radeon_vm_bo_update_pte(rdev, vm, bo, NULL); > @@ -952,12 +968,15 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) > radeon_vm_unbind_locked(rdev, vm); > mutex_unlock(&rdev->vm_manager.lock); > > - /* remove all bo */ > + /* remove all bo at this point non are busy any more because unbind > + * waited for the last vm fence to signal > + */ > r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); > if (!r) { > bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm); > list_del_init(&bo_va->bo_list); > list_del_init(&bo_va->vm_list); > + radeon_fence_unref(&bo_va->fence); > radeon_bo_unreserve(rdev->ring_tmp_bo.bo); > kfree(bo_va); > } > @@ -969,6 +988,7 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) > r = radeon_bo_reserve(bo_va->bo, false); > if (!r) { > list_del_init(&bo_va->bo_list); > + radeon_fence_unref(&bo_va->fence); > radeon_bo_unreserve(bo_va->bo); > kfree(bo_va); > } > diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c > index 84d0452..1b57b00 100644 > --- a/drivers/gpu/drm/radeon/radeon_gem.c > +++ b/drivers/gpu/drm/radeon/radeon_gem.c > @@ -134,25 +134,16 @@ void radeon_gem_object_close(struct drm_gem_object *obj, > struct radeon_device *rdev = rbo->rdev; > struct radeon_fpriv *fpriv = file_priv->driver_priv; > struct radeon_vm *vm = &fpriv->vm; > - struct radeon_bo_va *bo_va, *tmp; > > if (rdev->family < CHIP_CAYMAN) { > return; > } > > if (radeon_bo_reserve(rbo, false)) { > + dev_err(rdev->dev, "leaking bo va because we fail to reserve bo\n"); > return; > } > - list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) { > - if (bo_va->vm == vm) { > - /* remove from this vm address space */ > - mutex_lock(&vm->mutex); > - list_del(&bo_va->vm_list); > - mutex_unlock(&vm->mutex); > - list_del(&bo_va->bo_list); > - kfree(bo_va); > - } > - } > + radeon_vm_bo_rmv(rdev, vm, rbo); > radeon_bo_unreserve(rbo); > } > > diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c > index 1f1a4c8..1cb014b 100644 > --- a/drivers/gpu/drm/radeon/radeon_object.c > +++ b/drivers/gpu/drm/radeon/radeon_object.c > @@ -52,11 +52,7 @@ void radeon_bo_clear_va(struct radeon_bo *bo) > > list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) { > /* remove from all vm address space */ > - mutex_lock(&bo_va->vm->mutex); > - list_del(&bo_va->vm_list); > - mutex_unlock(&bo_va->vm->mutex); > - list_del(&bo_va->bo_list); > - kfree(bo_va); > + radeon_vm_bo_rmv(bo->rdev, bo_va->vm, bo); > } > } >
On Fri, Aug 3, 2012 at 5:26 PM, <j.glisse@gmail.com> wrote: > From: Jerome Glisse <jglisse@redhat.com> > > Virtual address need to be fenced to know when we can safely remove it. > This patch also properly clear the pagetable. Previously it was > serouisly broken. > > Kernel 3.5/3.4 need a similar patch but adapted for difference in mutex locking. > > v2: For to update pagetable when unbinding bo (don't bailout if > bo_va->valid is true). > v3: Add kernel 3.5/3.4 comment. > > Signed-off-by: Jerome Glisse <jglisse@redhat.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> > --- > drivers/gpu/drm/radeon/radeon.h | 1 + > drivers/gpu/drm/radeon/radeon_cs.c | 32 +++++++++++++++++++++++++++++--- > drivers/gpu/drm/radeon/radeon_gart.c | 24 ++++++++++++++++++++++-- > drivers/gpu/drm/radeon/radeon_gem.c | 13 ++----------- > drivers/gpu/drm/radeon/radeon_object.c | 6 +----- > 5 files changed, 55 insertions(+), 21 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 5431af2..8d75c65 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -300,6 +300,7 @@ struct radeon_bo_va { > uint64_t soffset; > uint64_t eoffset; > uint32_t flags; > + struct radeon_fence *fence; > bool valid; > }; > > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c > index 8a4c49e..995f3ab 100644 > --- a/drivers/gpu/drm/radeon/radeon_cs.c > +++ b/drivers/gpu/drm/radeon/radeon_cs.c > @@ -278,6 +278,30 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) > return 0; > } > > +static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser, > + struct radeon_fence *fence) > +{ > + struct radeon_fpriv *fpriv = parser->filp->driver_priv; > + struct radeon_vm *vm = &fpriv->vm; > + struct radeon_bo_list *lobj; > + int r; > + > + if (parser->chunk_ib_idx == -1) > + return; > + if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) > + return; > + > + list_for_each_entry(lobj, &parser->validated, tv.head) { > + struct radeon_bo_va *bo_va; > + struct radeon_bo *rbo = lobj->bo; > + > + bo_va = radeon_bo_va(rbo, vm); > + radeon_fence_unref(&bo_va->fence); > + bo_va->fence = radeon_fence_ref(fence); > + } > + return 0; > +} > + > /** > * cs_parser_fini() - clean parser states > * @parser: parser structure holding parsing context. > @@ -290,11 +314,14 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) > { > unsigned i; > > - if (!error) > + if (!error) { > + /* fence all bo va before ttm_eu_fence_buffer_objects so bo are still reserved */ > + radeon_bo_vm_fence_va(parser, parser->ib.fence); > ttm_eu_fence_buffer_objects(&parser->validated, > parser->ib.fence); > - else > + } else { > ttm_eu_backoff_reservation(&parser->validated); > + } > > if (parser->relocs != NULL) { > for (i = 0; i < parser->nrelocs; i++) { > @@ -388,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, > > if (parser->chunk_ib_idx == -1) > return 0; > - > if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) > return 0; > > diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c > index b372005..9912182 100644 > --- a/drivers/gpu/drm/radeon/radeon_gart.c > +++ b/drivers/gpu/drm/radeon/radeon_gart.c > @@ -814,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, > return -EINVAL; > } > > - if (bo_va->valid) > + if (bo_va->valid && mem) > return 0; > > ngpu_pages = radeon_bo_ngpu_pages(bo); > @@ -859,11 +859,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, > struct radeon_bo *bo) > { > struct radeon_bo_va *bo_va; > + int r; > > bo_va = radeon_bo_va(bo, vm); > if (bo_va == NULL) > return 0; > > + /* wait for va use to end */ > + while (bo_va->fence) { > + r = radeon_fence_wait(bo_va->fence, false); > + if (r) { > + DRM_ERROR("error while waiting for fence: %d\n", r); > + } > + if (r == -EDEADLK) { > + r = radeon_gpu_reset(rdev); > + if (!r) > + continue; > + } > + break; > + } > + radeon_fence_unref(&bo_va->fence); > + > mutex_lock(&rdev->vm_manager.lock); > mutex_lock(&vm->mutex); > radeon_vm_bo_update_pte(rdev, vm, bo, NULL); > @@ -952,12 +968,15 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) > radeon_vm_unbind_locked(rdev, vm); > mutex_unlock(&rdev->vm_manager.lock); > > - /* remove all bo */ > + /* remove all bo at this point non are busy any more because unbind > + * waited for the last vm fence to signal > + */ > r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); > if (!r) { > bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm); > list_del_init(&bo_va->bo_list); > list_del_init(&bo_va->vm_list); > + radeon_fence_unref(&bo_va->fence); > radeon_bo_unreserve(rdev->ring_tmp_bo.bo); > kfree(bo_va); > } > @@ -969,6 +988,7 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) > r = radeon_bo_reserve(bo_va->bo, false); > if (!r) { > list_del_init(&bo_va->bo_list); > + radeon_fence_unref(&bo_va->fence); > radeon_bo_unreserve(bo_va->bo); > kfree(bo_va); > } > diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c > index 84d0452..1b57b00 100644 > --- a/drivers/gpu/drm/radeon/radeon_gem.c > +++ b/drivers/gpu/drm/radeon/radeon_gem.c > @@ -134,25 +134,16 @@ void radeon_gem_object_close(struct drm_gem_object *obj, > struct radeon_device *rdev = rbo->rdev; > struct radeon_fpriv *fpriv = file_priv->driver_priv; > struct radeon_vm *vm = &fpriv->vm; > - struct radeon_bo_va *bo_va, *tmp; > > if (rdev->family < CHIP_CAYMAN) { > return; > } > > if (radeon_bo_reserve(rbo, false)) { > + dev_err(rdev->dev, "leaking bo va because we fail to reserve bo\n"); > return; > } > - list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) { > - if (bo_va->vm == vm) { > - /* remove from this vm address space */ > - mutex_lock(&vm->mutex); > - list_del(&bo_va->vm_list); > - mutex_unlock(&vm->mutex); > - list_del(&bo_va->bo_list); > - kfree(bo_va); > - } > - } > + radeon_vm_bo_rmv(rdev, vm, rbo); > radeon_bo_unreserve(rbo); > } > > diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c > index 1f1a4c8..1cb014b 100644 > --- a/drivers/gpu/drm/radeon/radeon_object.c > +++ b/drivers/gpu/drm/radeon/radeon_object.c > @@ -52,11 +52,7 @@ void radeon_bo_clear_va(struct radeon_bo *bo) > > list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) { > /* remove from all vm address space */ > - mutex_lock(&bo_va->vm->mutex); > - list_del(&bo_va->vm_list); > - mutex_unlock(&bo_va->vm->mutex); > - list_del(&bo_va->bo_list); > - kfree(bo_va); > + radeon_vm_bo_rmv(bo->rdev, bo_va->vm, bo); > } > } > > -- > 1.7.10.4 > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/dri-devel
On 04.08.2012 11:07, Christian König wrote: > On 03.08.2012 23:26, j.glisse@gmail.com wrote: >> From: Jerome Glisse <jglisse@redhat.com> >> >> Virtual address need to be fenced to know when we can safely remove it. >> This patch also properly clear the pagetable. Previously it was >> serouisly broken. >> >> Kernel 3.5/3.4 need a similar patch but adapted for difference in >> mutex locking. >> >> v2: For to update pagetable when unbinding bo (don't bailout if >> bo_va->valid is true). >> v3: Add kernel 3.5/3.4 comment. >> >> Signed-off-by: Jerome Glisse <jglisse@redhat.com> > It should fix the problem, going to test it as soon as I find some > 5min spare in my vacation. Till then it is: > > Reviewed-by: Christian König <christian.koenig@amd.com> > > In the future I would really like to make the updating of the PTE also > async, that should save us allot of problems here. Had time today to test that a bit more. Found two minor notes in the patch, see below. > > >> --- >> drivers/gpu/drm/radeon/radeon.h | 1 + >> drivers/gpu/drm/radeon/radeon_cs.c | 32 >> +++++++++++++++++++++++++++++--- >> drivers/gpu/drm/radeon/radeon_gart.c | 24 ++++++++++++++++++++++-- >> drivers/gpu/drm/radeon/radeon_gem.c | 13 ++----------- >> drivers/gpu/drm/radeon/radeon_object.c | 6 +----- >> 5 files changed, 55 insertions(+), 21 deletions(-) >> >> diff --git a/drivers/gpu/drm/radeon/radeon.h >> b/drivers/gpu/drm/radeon/radeon.h >> index 5431af2..8d75c65 100644 >> --- a/drivers/gpu/drm/radeon/radeon.h >> +++ b/drivers/gpu/drm/radeon/radeon.h >> @@ -300,6 +300,7 @@ struct radeon_bo_va { >> uint64_t soffset; >> uint64_t eoffset; >> uint32_t flags; >> + struct radeon_fence *fence; >> bool valid; >> }; >> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c >> b/drivers/gpu/drm/radeon/radeon_cs.c >> index 8a4c49e..995f3ab 100644 >> --- a/drivers/gpu/drm/radeon/radeon_cs.c >> +++ b/drivers/gpu/drm/radeon/radeon_cs.c >> @@ -278,6 +278,30 @@ int radeon_cs_parser_init(struct >> radeon_cs_parser *p, void *data) >> return 0; >> } >> +static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser, >> + struct radeon_fence *fence) >> +{ >> + struct radeon_fpriv *fpriv = parser->filp->driver_priv; >> + struct radeon_vm *vm = &fpriv->vm; >> + struct radeon_bo_list *lobj; >> + int r; "r" is unused here, please remove. >> + >> + if (parser->chunk_ib_idx == -1) >> + return; >> + if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) >> + return; >> + >> + list_for_each_entry(lobj, &parser->validated, tv.head) { >> + struct radeon_bo_va *bo_va; >> + struct radeon_bo *rbo = lobj->bo; >> + >> + bo_va = radeon_bo_va(rbo, vm); >> + radeon_fence_unref(&bo_va->fence); >> + bo_va->fence = radeon_fence_ref(fence); >> + } >> + return 0; The function doesn't return an error value, so this should just be a "return" without value. >> +} >> + >> /** >> * cs_parser_fini() - clean parser states >> * @parser: parser structure holding parsing context. >> @@ -290,11 +314,14 @@ static void radeon_cs_parser_fini(struct >> radeon_cs_parser *parser, int error) >> { >> unsigned i; >> - if (!error) >> + if (!error) { >> + /* fence all bo va before ttm_eu_fence_buffer_objects so bo >> are still reserved */ >> + radeon_bo_vm_fence_va(parser, parser->ib.fence); >> ttm_eu_fence_buffer_objects(&parser->validated, >> parser->ib.fence); >> - else >> + } else { >> ttm_eu_backoff_reservation(&parser->validated); >> + } >> if (parser->relocs != NULL) { >> for (i = 0; i < parser->nrelocs; i++) { >> @@ -388,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct >> radeon_device *rdev, >> if (parser->chunk_ib_idx == -1) >> return 0; >> - >> if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) >> return 0; >> diff --git a/drivers/gpu/drm/radeon/radeon_gart.c >> b/drivers/gpu/drm/radeon/radeon_gart.c >> index b372005..9912182 100644 >> --- a/drivers/gpu/drm/radeon/radeon_gart.c >> +++ b/drivers/gpu/drm/radeon/radeon_gart.c >> @@ -814,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device >> *rdev, >> return -EINVAL; >> } >> - if (bo_va->valid) >> + if (bo_va->valid && mem) >> return 0; >> ngpu_pages = radeon_bo_ngpu_pages(bo); >> @@ -859,11 +859,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, >> struct radeon_bo *bo) >> { >> struct radeon_bo_va *bo_va; >> + int r; >> bo_va = radeon_bo_va(bo, vm); >> if (bo_va == NULL) >> return 0; >> + /* wait for va use to end */ >> + while (bo_va->fence) { >> + r = radeon_fence_wait(bo_va->fence, false); >> + if (r) { >> + DRM_ERROR("error while waiting for fence: %d\n", r); >> + } >> + if (r == -EDEADLK) { >> + r = radeon_gpu_reset(rdev); >> + if (!r) >> + continue; >> + } >> + break; >> + } >> + radeon_fence_unref(&bo_va->fence); >> + >> mutex_lock(&rdev->vm_manager.lock); >> mutex_lock(&vm->mutex); >> radeon_vm_bo_update_pte(rdev, vm, bo, NULL); >> @@ -952,12 +968,15 @@ void radeon_vm_fini(struct radeon_device *rdev, >> struct radeon_vm *vm) >> radeon_vm_unbind_locked(rdev, vm); >> mutex_unlock(&rdev->vm_manager.lock); >> - /* remove all bo */ >> + /* remove all bo at this point non are busy any more because unbind >> + * waited for the last vm fence to signal >> + */ >> r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); >> if (!r) { >> bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm); >> list_del_init(&bo_va->bo_list); >> list_del_init(&bo_va->vm_list); >> + radeon_fence_unref(&bo_va->fence); >> radeon_bo_unreserve(rdev->ring_tmp_bo.bo); >> kfree(bo_va); >> } >> @@ -969,6 +988,7 @@ void radeon_vm_fini(struct radeon_device *rdev, >> struct radeon_vm *vm) >> r = radeon_bo_reserve(bo_va->bo, false); >> if (!r) { >> list_del_init(&bo_va->bo_list); >> + radeon_fence_unref(&bo_va->fence); >> radeon_bo_unreserve(bo_va->bo); >> kfree(bo_va); >> } >> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c >> b/drivers/gpu/drm/radeon/radeon_gem.c >> index 84d0452..1b57b00 100644 >> --- a/drivers/gpu/drm/radeon/radeon_gem.c >> +++ b/drivers/gpu/drm/radeon/radeon_gem.c >> @@ -134,25 +134,16 @@ void radeon_gem_object_close(struct >> drm_gem_object *obj, >> struct radeon_device *rdev = rbo->rdev; >> struct radeon_fpriv *fpriv = file_priv->driver_priv; >> struct radeon_vm *vm = &fpriv->vm; >> - struct radeon_bo_va *bo_va, *tmp; >> if (rdev->family < CHIP_CAYMAN) { >> return; >> } >> if (radeon_bo_reserve(rbo, false)) { >> + dev_err(rdev->dev, "leaking bo va because we fail to reserve >> bo\n"); >> return; >> } >> - list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) { >> - if (bo_va->vm == vm) { >> - /* remove from this vm address space */ >> - mutex_lock(&vm->mutex); >> - list_del(&bo_va->vm_list); >> - mutex_unlock(&vm->mutex); >> - list_del(&bo_va->bo_list); >> - kfree(bo_va); >> - } >> - } >> + radeon_vm_bo_rmv(rdev, vm, rbo); >> radeon_bo_unreserve(rbo); >> } >> diff --git a/drivers/gpu/drm/radeon/radeon_object.c >> b/drivers/gpu/drm/radeon/radeon_object.c >> index 1f1a4c8..1cb014b 100644 >> --- a/drivers/gpu/drm/radeon/radeon_object.c >> +++ b/drivers/gpu/drm/radeon/radeon_object.c >> @@ -52,11 +52,7 @@ void radeon_bo_clear_va(struct radeon_bo *bo) >> list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) { >> /* remove from all vm address space */ >> - mutex_lock(&bo_va->vm->mutex); >> - list_del(&bo_va->vm_list); >> - mutex_unlock(&bo_va->vm->mutex); >> - list_del(&bo_va->bo_list); >> - kfree(bo_va); >> + radeon_vm_bo_rmv(bo->rdev, bo_va->vm, bo); >> } >> } > Additional to that patch we still need a minor fix to mesa (just move freeing the VM range after closing the handle). Going to send that in the next minute to the mesa-dev list. Otherwise the patch indeed fixes the problem, but also isn't the best idea for performance. Cheers, Christian.
On Mon, Aug 6, 2012 at 12:06 PM, Christian König <deathsimple@vodafone.de> wrote: > On 04.08.2012 11:07, Christian König wrote: >> >> On 03.08.2012 23:26, j.glisse@gmail.com wrote: >>> >>> From: Jerome Glisse <jglisse@redhat.com> >>> >>> Virtual address need to be fenced to know when we can safely remove it. >>> This patch also properly clear the pagetable. Previously it was >>> serouisly broken. >>> >>> Kernel 3.5/3.4 need a similar patch but adapted for difference in mutex >>> locking. >>> >>> v2: For to update pagetable when unbinding bo (don't bailout if >>> bo_va->valid is true). >>> v3: Add kernel 3.5/3.4 comment. >>> >>> Signed-off-by: Jerome Glisse <jglisse@redhat.com> >> >> It should fix the problem, going to test it as soon as I find some 5min >> spare in my vacation. Till then it is: >> >> Reviewed-by: Christian König <christian.koenig@amd.com> >> >> In the future I would really like to make the updating of the PTE also >> async, that should save us allot of problems here. > > > Had time today to test that a bit more. Found two minor notes in the patch, > see below. > > >> >> >>> --- >>> drivers/gpu/drm/radeon/radeon.h | 1 + >>> drivers/gpu/drm/radeon/radeon_cs.c | 32 >>> +++++++++++++++++++++++++++++--- >>> drivers/gpu/drm/radeon/radeon_gart.c | 24 ++++++++++++++++++++++-- >>> drivers/gpu/drm/radeon/radeon_gem.c | 13 ++----------- >>> drivers/gpu/drm/radeon/radeon_object.c | 6 +----- >>> 5 files changed, 55 insertions(+), 21 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/radeon/radeon.h >>> b/drivers/gpu/drm/radeon/radeon.h >>> index 5431af2..8d75c65 100644 >>> --- a/drivers/gpu/drm/radeon/radeon.h >>> +++ b/drivers/gpu/drm/radeon/radeon.h >>> @@ -300,6 +300,7 @@ struct radeon_bo_va { >>> uint64_t soffset; >>> uint64_t eoffset; >>> uint32_t flags; >>> + struct radeon_fence *fence; >>> bool valid; >>> }; >>> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c >>> b/drivers/gpu/drm/radeon/radeon_cs.c >>> index 8a4c49e..995f3ab 100644 >>> --- a/drivers/gpu/drm/radeon/radeon_cs.c >>> +++ b/drivers/gpu/drm/radeon/radeon_cs.c >>> @@ -278,6 +278,30 @@ int radeon_cs_parser_init(struct radeon_cs_parser >>> *p, void *data) >>> return 0; >>> } >>> +static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser, >>> + struct radeon_fence *fence) >>> +{ >>> + struct radeon_fpriv *fpriv = parser->filp->driver_priv; >>> + struct radeon_vm *vm = &fpriv->vm; >>> + struct radeon_bo_list *lobj; > > >>> + int r; > > "r" is unused here, please remove. > > >>> + >>> + if (parser->chunk_ib_idx == -1) >>> + return; >>> + if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) >>> + return; >>> + >>> + list_for_each_entry(lobj, &parser->validated, tv.head) { >>> + struct radeon_bo_va *bo_va; >>> + struct radeon_bo *rbo = lobj->bo; >>> + >>> + bo_va = radeon_bo_va(rbo, vm); >>> + radeon_fence_unref(&bo_va->fence); >>> + bo_va->fence = radeon_fence_ref(fence); >>> + } > > >>> + return 0; > > The function doesn't return an error value, so this should just be a > "return" without value. > > >>> +} >>> + >>> /** >>> * cs_parser_fini() - clean parser states >>> * @parser: parser structure holding parsing context. >>> @@ -290,11 +314,14 @@ static void radeon_cs_parser_fini(struct >>> radeon_cs_parser *parser, int error) >>> { >>> unsigned i; >>> - if (!error) >>> + if (!error) { >>> + /* fence all bo va before ttm_eu_fence_buffer_objects so bo are >>> still reserved */ >>> + radeon_bo_vm_fence_va(parser, parser->ib.fence); >>> ttm_eu_fence_buffer_objects(&parser->validated, >>> parser->ib.fence); >>> - else >>> + } else { >>> ttm_eu_backoff_reservation(&parser->validated); >>> + } >>> if (parser->relocs != NULL) { >>> for (i = 0; i < parser->nrelocs; i++) { >>> @@ -388,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device >>> *rdev, >>> if (parser->chunk_ib_idx == -1) >>> return 0; >>> - >>> if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) >>> return 0; >>> diff --git a/drivers/gpu/drm/radeon/radeon_gart.c >>> b/drivers/gpu/drm/radeon/radeon_gart.c >>> index b372005..9912182 100644 >>> --- a/drivers/gpu/drm/radeon/radeon_gart.c >>> +++ b/drivers/gpu/drm/radeon/radeon_gart.c >>> @@ -814,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device >>> *rdev, >>> return -EINVAL; >>> } >>> - if (bo_va->valid) >>> + if (bo_va->valid && mem) >>> return 0; >>> ngpu_pages = radeon_bo_ngpu_pages(bo); >>> @@ -859,11 +859,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, >>> struct radeon_bo *bo) >>> { >>> struct radeon_bo_va *bo_va; >>> + int r; >>> bo_va = radeon_bo_va(bo, vm); >>> if (bo_va == NULL) >>> return 0; >>> + /* wait for va use to end */ >>> + while (bo_va->fence) { >>> + r = radeon_fence_wait(bo_va->fence, false); >>> + if (r) { >>> + DRM_ERROR("error while waiting for fence: %d\n", r); >>> + } >>> + if (r == -EDEADLK) { >>> + r = radeon_gpu_reset(rdev); >>> + if (!r) >>> + continue; >>> + } >>> + break; >>> + } >>> + radeon_fence_unref(&bo_va->fence); >>> + >>> mutex_lock(&rdev->vm_manager.lock); >>> mutex_lock(&vm->mutex); >>> radeon_vm_bo_update_pte(rdev, vm, bo, NULL); >>> @@ -952,12 +968,15 @@ void radeon_vm_fini(struct radeon_device *rdev, >>> struct radeon_vm *vm) >>> radeon_vm_unbind_locked(rdev, vm); >>> mutex_unlock(&rdev->vm_manager.lock); >>> - /* remove all bo */ >>> + /* remove all bo at this point non are busy any more because unbind >>> + * waited for the last vm fence to signal >>> + */ >>> r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); >>> if (!r) { >>> bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm); >>> list_del_init(&bo_va->bo_list); >>> list_del_init(&bo_va->vm_list); >>> + radeon_fence_unref(&bo_va->fence); >>> radeon_bo_unreserve(rdev->ring_tmp_bo.bo); >>> kfree(bo_va); >>> } >>> @@ -969,6 +988,7 @@ void radeon_vm_fini(struct radeon_device *rdev, >>> struct radeon_vm *vm) >>> r = radeon_bo_reserve(bo_va->bo, false); >>> if (!r) { >>> list_del_init(&bo_va->bo_list); >>> + radeon_fence_unref(&bo_va->fence); >>> radeon_bo_unreserve(bo_va->bo); >>> kfree(bo_va); >>> } >>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c >>> b/drivers/gpu/drm/radeon/radeon_gem.c >>> index 84d0452..1b57b00 100644 >>> --- a/drivers/gpu/drm/radeon/radeon_gem.c >>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c >>> @@ -134,25 +134,16 @@ void radeon_gem_object_close(struct drm_gem_object >>> *obj, >>> struct radeon_device *rdev = rbo->rdev; >>> struct radeon_fpriv *fpriv = file_priv->driver_priv; >>> struct radeon_vm *vm = &fpriv->vm; >>> - struct radeon_bo_va *bo_va, *tmp; >>> if (rdev->family < CHIP_CAYMAN) { >>> return; >>> } >>> if (radeon_bo_reserve(rbo, false)) { >>> + dev_err(rdev->dev, "leaking bo va because we fail to reserve >>> bo\n"); >>> return; >>> } >>> - list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) { >>> - if (bo_va->vm == vm) { >>> - /* remove from this vm address space */ >>> - mutex_lock(&vm->mutex); >>> - list_del(&bo_va->vm_list); >>> - mutex_unlock(&vm->mutex); >>> - list_del(&bo_va->bo_list); >>> - kfree(bo_va); >>> - } >>> - } >>> + radeon_vm_bo_rmv(rdev, vm, rbo); >>> radeon_bo_unreserve(rbo); >>> } >>> diff --git a/drivers/gpu/drm/radeon/radeon_object.c >>> b/drivers/gpu/drm/radeon/radeon_object.c >>> index 1f1a4c8..1cb014b 100644 >>> --- a/drivers/gpu/drm/radeon/radeon_object.c >>> +++ b/drivers/gpu/drm/radeon/radeon_object.c >>> @@ -52,11 +52,7 @@ void radeon_bo_clear_va(struct radeon_bo *bo) >>> list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) { >>> /* remove from all vm address space */ >>> - mutex_lock(&bo_va->vm->mutex); >>> - list_del(&bo_va->vm_list); >>> - mutex_unlock(&bo_va->vm->mutex); >>> - list_del(&bo_va->bo_list); >>> - kfree(bo_va); >>> + radeon_vm_bo_rmv(bo->rdev, bo_va->vm, bo); >>> } >>> } >> >> > > Additional to that patch we still need a minor fix to mesa (just move > freeing the VM range after closing the handle). Going to send that in the > next minute to the mesa-dev list. > > Otherwise the patch indeed fixes the problem, but also isn't the best idea > for performance. > > Cheers, > Christian. > This does not impact performance, it's all about destroying bo/va so it's not a performance path. Or am i missing something here ? Cheers, Jerome
On 06.08.2012 18:30, Jerome Glisse wrote: > On Mon, Aug 6, 2012 at 12:06 PM, Christian König > <deathsimple@vodafone.de> wrote: >> [SNIP] >> Additional to that patch we still need a minor fix to mesa (just move >> freeing the VM range after closing the handle). Going to send that in the >> next minute to the mesa-dev list. >> >> Otherwise the patch indeed fixes the problem, but also isn't the best idea >> for performance. >> >> Cheers, >> Christian. >> > This does not impact performance, it's all about destroying bo/va so > it's not a performance path. Or am i missing something here ? radeonsi currently allocates very small BOs (8-32 bytes) for T# descriptors, and throws them away immediately after drawing. That alone of course is insane under a performance aspect, but waiting for the last job to finish makes things completely worse. It just moves my priorities of hacking on radeonsi a bit around. Christian.
On Mon, 2012-08-06 at 18:55 +0200, Christian König wrote: > On 06.08.2012 18:30, Jerome Glisse wrote: > > On Mon, Aug 6, 2012 at 12:06 PM, Christian König > > <deathsimple@vodafone.de> wrote: > >> [SNIP] > >> Additional to that patch we still need a minor fix to mesa (just move > >> freeing the VM range after closing the handle). Going to send that in the > >> next minute to the mesa-dev list. > >> > >> Otherwise the patch indeed fixes the problem, but also isn't the best idea > >> for performance. > >> > > This does not impact performance, it's all about destroying bo/va so > > it's not a performance path. Or am i missing something here ? > > radeonsi currently allocates very small BOs (8-32 bytes) for T# > descriptors, and throws them away immediately after drawing. > > That alone of course is insane under a performance aspect, but waiting > for the last job to finish makes things completely worse. Absolutely, but I think blocking on BO destruction really is the fundamental problem. To fix this, an alternative to your patch might be the fenced buffer manager in src/gallium/auxiliary/pipebuffer/.
On Mon, Aug 6, 2012 at 12:55 PM, Christian König <deathsimple@vodafone.de> wrote: > On 06.08.2012 18:30, Jerome Glisse wrote: >> >> On Mon, Aug 6, 2012 at 12:06 PM, Christian König >> <deathsimple@vodafone.de> wrote: >>> >>> [SNIP] >>> >>> Additional to that patch we still need a minor fix to mesa (just move >>> freeing the VM range after closing the handle). Going to send that in the >>> next minute to the mesa-dev list. >>> >>> Otherwise the patch indeed fixes the problem, but also isn't the best >>> idea >>> for performance. >>> >>> Cheers, >>> Christian. >>> >> This does not impact performance, it's all about destroying bo/va so >> it's not a performance path. Or am i missing something here ? > > > radeonsi currently allocates very small BOs (8-32 bytes) for T# descriptors, > and throws them away immediately after drawing. > > That alone of course is insane under a performance aspect, but waiting for > the last job to finish makes things completely worse. > > It just moves my priorities of hacking on radeonsi a bit around. > > Christian. Well we could simply just not register any close callback for gem and let the ttm bo delayed queue handle thing, that way userspace won't stall. Cheers, Jerome
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5431af2..8d75c65 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -300,6 +300,7 @@ struct radeon_bo_va { uint64_t soffset; uint64_t eoffset; uint32_t flags; + struct radeon_fence *fence; bool valid; }; diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 8a4c49e..995f3ab 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -278,6 +278,30 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) return 0; } +static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser, + struct radeon_fence *fence) +{ + struct radeon_fpriv *fpriv = parser->filp->driver_priv; + struct radeon_vm *vm = &fpriv->vm; + struct radeon_bo_list *lobj; + int r; + + if (parser->chunk_ib_idx == -1) + return; + if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) + return; + + list_for_each_entry(lobj, &parser->validated, tv.head) { + struct radeon_bo_va *bo_va; + struct radeon_bo *rbo = lobj->bo; + + bo_va = radeon_bo_va(rbo, vm); + radeon_fence_unref(&bo_va->fence); + bo_va->fence = radeon_fence_ref(fence); + } + return 0; +} + /** * cs_parser_fini() - clean parser states * @parser: parser structure holding parsing context. @@ -290,11 +314,14 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) { unsigned i; - if (!error) + if (!error) { + /* fence all bo va before ttm_eu_fence_buffer_objects so bo are still reserved */ + radeon_bo_vm_fence_va(parser, parser->ib.fence); ttm_eu_fence_buffer_objects(&parser->validated, parser->ib.fence); - else + } else { ttm_eu_backoff_reservation(&parser->validated); + } if (parser->relocs != NULL) { for (i = 0; i < parser->nrelocs; i++) { @@ -388,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, if (parser->chunk_ib_idx == -1) return 0; - if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) return 0; diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index b372005..9912182 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -814,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, return -EINVAL; } - if (bo_va->valid) + if (bo_va->valid && mem) return 0; ngpu_pages = radeon_bo_ngpu_pages(bo); @@ -859,11 +859,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, struct radeon_bo *bo) { struct radeon_bo_va *bo_va; + int r; bo_va = radeon_bo_va(bo, vm); if (bo_va == NULL) return 0; + /* wait for va use to end */ + while (bo_va->fence) { + r = radeon_fence_wait(bo_va->fence, false); + if (r) { + DRM_ERROR("error while waiting for fence: %d\n", r); + } + if (r == -EDEADLK) { + r = radeon_gpu_reset(rdev); + if (!r) + continue; + } + break; + } + radeon_fence_unref(&bo_va->fence); + mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); radeon_vm_bo_update_pte(rdev, vm, bo, NULL); @@ -952,12 +968,15 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) radeon_vm_unbind_locked(rdev, vm); mutex_unlock(&rdev->vm_manager.lock); - /* remove all bo */ + /* remove all bo at this point non are busy any more because unbind + * waited for the last vm fence to signal + */ r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); if (!r) { bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm); list_del_init(&bo_va->bo_list); list_del_init(&bo_va->vm_list); + radeon_fence_unref(&bo_va->fence); radeon_bo_unreserve(rdev->ring_tmp_bo.bo); kfree(bo_va); } @@ -969,6 +988,7 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) r = radeon_bo_reserve(bo_va->bo, false); if (!r) { list_del_init(&bo_va->bo_list); + radeon_fence_unref(&bo_va->fence); radeon_bo_unreserve(bo_va->bo); kfree(bo_va); } diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 84d0452..1b57b00 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -134,25 +134,16 @@ void radeon_gem_object_close(struct drm_gem_object *obj, struct radeon_device *rdev = rbo->rdev; struct radeon_fpriv *fpriv = file_priv->driver_priv; struct radeon_vm *vm = &fpriv->vm; - struct radeon_bo_va *bo_va, *tmp; if (rdev->family < CHIP_CAYMAN) { return; } if (radeon_bo_reserve(rbo, false)) { + dev_err(rdev->dev, "leaking bo va because we fail to reserve bo\n"); return; } - list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) { - if (bo_va->vm == vm) { - /* remove from this vm address space */ - mutex_lock(&vm->mutex); - list_del(&bo_va->vm_list); - mutex_unlock(&vm->mutex); - list_del(&bo_va->bo_list); - kfree(bo_va); - } - } + radeon_vm_bo_rmv(rdev, vm, rbo); radeon_bo_unreserve(rbo); } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 1f1a4c8..1cb014b 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -52,11 +52,7 @@ void radeon_bo_clear_va(struct radeon_bo *bo) list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) { /* remove from all vm address space */ - mutex_lock(&bo_va->vm->mutex); - list_del(&bo_va->vm_list); - mutex_unlock(&bo_va->vm->mutex); - list_del(&bo_va->bo_list); - kfree(bo_va); + radeon_vm_bo_rmv(bo->rdev, bo_va->vm, bo); } }