Message ID | 20231017211337.1593869-4-Felix.Kuehling@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Enable integration of KFD with DRM GEM_VA ioctl | expand |
On 2023-10-17 17:13, Felix Kuehling wrote: > Let amdgpu_vm_handle_moved update all BO VA mappings of BOs reserved by > the caller. This will be useful for handling extra BO VA mappings in > KFD VMs that are managed through the render node API. > > Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> > Reviewed-by: Christian König <christian.koenig@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 22 +-------------------- > drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 19 +++++++++++++----- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ++- > 4 files changed, 18 insertions(+), 28 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index 74769afaa33d..c8f2907ebd6f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -1113,7 +1113,6 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) > struct amdgpu_vm *vm = &fpriv->vm; > struct amdgpu_bo_list_entry *e; > struct amdgpu_bo_va *bo_va; > - struct amdgpu_bo *bo; > unsigned int i; > int r; > > @@ -1141,26 +1140,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) > return r; > } > > - amdgpu_bo_list_for_each_entry(e, p->bo_list) { > - /* ignore duplicates */ > - bo = ttm_to_amdgpu_bo(e->tv.bo); > - if (!bo) > - continue; > - > - bo_va = e->bo_va; > - if (bo_va == NULL) > - continue; > - > - r = amdgpu_vm_bo_update(adev, bo_va, false); > - if (r) > - return r; > - > - r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); > - if (r) > - return r; > - } FYI, removing this loop seemed to cause PSDB failures, mostly in RADV tests. It may have been a glitch in the infrastructure, but the failures were consistent on the three subsequent patches, too. Restoring this loop seems to make the tests pass, so I'll do that for now. I don't have time to debug CS with RADV, and this change is not needed for my ROCm work. Regards, Felix > - > - r = amdgpu_vm_handle_moved(adev, vm); > + r = amdgpu_vm_handle_moved(adev, vm, &p->ticket); > if (r) > return r; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c > index b5e28fa3f414..e7e87a3b2601 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c > @@ -409,7 +409,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) > if (!r) > r = amdgpu_vm_clear_freed(adev, vm, NULL); > if (!r) > - r = amdgpu_vm_handle_moved(adev, vm); > + r = amdgpu_vm_handle_moved(adev, vm, ticket); > > if (r && r != -EBUSY) > DRM_ERROR("Failed to invalidate VM page tables (%d))\n", > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index d72daf15662f..c586d0e93d75 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -1285,6 +1285,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, > * > * @adev: amdgpu_device pointer > * @vm: requested vm > + * @ticket: optional reservation ticket used to reserve the VM > * > * Make sure all BOs which are moved are updated in the PTs. > * > @@ -1294,11 +1295,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, > * PTs have to be reserved! > */ > int amdgpu_vm_handle_moved(struct amdgpu_device *adev, > - struct amdgpu_vm *vm) > + struct amdgpu_vm *vm, > + struct ww_acquire_ctx *ticket) > { > struct amdgpu_bo_va *bo_va; > struct dma_resv *resv; > - bool clear; > + bool clear, unlock; > int r; > > spin_lock(&vm->status_lock); > @@ -1321,17 +1323,24 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, > spin_unlock(&vm->status_lock); > > /* Try to reserve the BO to avoid clearing its ptes */ > - if (!adev->debug_vm && dma_resv_trylock(resv)) > + if (!adev->debug_vm && dma_resv_trylock(resv)) { > clear = false; > + unlock = true; > + /* The caller is already holding the reservation lock */ > + } else if (ticket && dma_resv_locking_ctx(resv) == ticket) { > + clear = false; > + unlock = false; > /* Somebody else is using the BO right now */ > - else > + } else { > clear = true; > + unlock = false; > + } > > r = amdgpu_vm_bo_update(adev, bo_va, clear); > if (r) > return r; > > - if (!clear) > + if (unlock) > dma_resv_unlock(resv); > spin_lock(&vm->status_lock); > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > index 6e71978db13f..ebcc75132b74 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h > @@ -432,7 +432,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, > struct amdgpu_vm *vm, > struct dma_fence **fence); > int amdgpu_vm_handle_moved(struct amdgpu_device *adev, > - struct amdgpu_vm *vm); > + struct amdgpu_vm *vm, > + struct ww_acquire_ctx *ticket); > void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, > struct amdgpu_vm *vm, struct amdgpu_bo *bo); > int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 74769afaa33d..c8f2907ebd6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1113,7 +1113,6 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_list_entry *e; struct amdgpu_bo_va *bo_va; - struct amdgpu_bo *bo; unsigned int i; int r; @@ -1141,26 +1140,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } - amdgpu_bo_list_for_each_entry(e, p->bo_list) { - /* ignore duplicates */ - bo = ttm_to_amdgpu_bo(e->tv.bo); - if (!bo) - continue; - - bo_va = e->bo_va; - if (bo_va == NULL) - continue; - - r = amdgpu_vm_bo_update(adev, bo_va, false); - if (r) - return r; - - r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); - if (r) - return r; - } - - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, &p->ticket); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index b5e28fa3f414..e7e87a3b2601 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -409,7 +409,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) if (!r) r = amdgpu_vm_clear_freed(adev, vm, NULL); if (!r) - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, ticket); if (r && r != -EBUSY) DRM_ERROR("Failed to invalidate VM page tables (%d))\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d72daf15662f..c586d0e93d75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1285,6 +1285,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requested vm + * @ticket: optional reservation ticket used to reserve the VM * * Make sure all BOs which are moved are updated in the PTs. * @@ -1294,11 +1295,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * PTs have to be reserved! */ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm) + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket) { struct amdgpu_bo_va *bo_va; struct dma_resv *resv; - bool clear; + bool clear, unlock; int r; spin_lock(&vm->status_lock); @@ -1321,17 +1323,24 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, spin_unlock(&vm->status_lock); /* Try to reserve the BO to avoid clearing its ptes */ - if (!adev->debug_vm && dma_resv_trylock(resv)) + if (!adev->debug_vm && dma_resv_trylock(resv)) { clear = false; + unlock = true; + /* The caller is already holding the reservation lock */ + } else if (ticket && dma_resv_locking_ctx(resv) == ticket) { + clear = false; + unlock = false; /* Somebody else is using the BO right now */ - else + } else { clear = true; + unlock = false; + } r = amdgpu_vm_bo_update(adev, bo_va, clear); if (r) return r; - if (!clear) + if (unlock) dma_resv_unlock(resv); spin_lock(&vm->status_lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 6e71978db13f..ebcc75132b74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -432,7 +432,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm); + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket); void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, struct amdgpu_vm *vm, struct amdgpu_bo *bo); int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,