Message ID | 1344853568-3870-11-git-send-email-deathsimple@vodafone.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Aug 13, 2012 at 6:26 AM, Christian König <deathsimple@vodafone.de> wrote: > Currently doing the update with the CP. > > Signed-off-by: Christian König <deathsimple@vodafone.de> NAK until point below are addressed > --- > drivers/gpu/drm/radeon/ni.c | 20 ++++++---- > drivers/gpu/drm/radeon/nid.h | 1 + > drivers/gpu/drm/radeon/radeon.h | 2 + > drivers/gpu/drm/radeon/radeon_asic.c | 3 ++ > drivers/gpu/drm/radeon/radeon_gart.c | 70 +++++++++++++++++++++++++--------- > 5 files changed, 71 insertions(+), 25 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c > index 1fd2e41..78d9cfb 100644 > --- a/drivers/gpu/drm/radeon/ni.c > +++ b/drivers/gpu/drm/radeon/ni.c > @@ -1513,20 +1513,24 @@ void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm, > unsigned pfn, struct ttm_mem_reg *mem, > unsigned npages, uint32_t flags) > { > - void __iomem *ptr = (void *)vm->pt; > - uint64_t addr; > + struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; > + uint64_t addr, pt = vm->pt_gpu_addr + pfn * 8; > int i; > > addr = flags = cayman_vm_page_flags(rdev, flags); > > - for (i = 0; i < npages; ++i, ++pfn) { > - if (mem) { > - addr = radeon_vm_get_addr(rdev, mem, i); > + radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, 1 + npages * 2)); > + radeon_ring_write(ring, pt & 0xffffffff); > + radeon_ring_write(ring, (pt >> 32) & 0xff); > + for (i = 0; i < npages; ++i) { > + if (mem) { > + addr = radeon_vm_get_addr(rdev, mem, i); > addr = addr & 0xFFFFFFFFFFFFF000ULL; > addr |= flags; > - } > - writeq(addr, ptr + (pfn * 8)); > - } > + } > + radeon_ring_write(ring, addr & 0xffffffff); > + radeon_ring_write(ring, (addr >> 32) & 0xffffffff); > + } > } > > void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib) > diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h > index 870db34..2423d1b 100644 > --- a/drivers/gpu/drm/radeon/nid.h > +++ b/drivers/gpu/drm/radeon/nid.h > @@ -585,6 +585,7 @@ > #define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73 > #define PACKET3_SET_RESOURCE_INDIRECT 0x74 > #define PACKET3_SET_APPEND_CNT 0x75 > +#define PACKET3_ME_WRITE 0x7A > > #endif > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 7d37cb2..3de0f08 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -1154,6 +1154,8 @@ struct radeon_asic { > struct { > int (*init)(struct radeon_device *rdev); > void (*fini)(struct radeon_device *rdev); > + > + u32 pt_ring_index; > void (*set_page)(struct radeon_device *rdev, struct radeon_vm *vm, > unsigned pfn, struct ttm_mem_reg *mem, > unsigned npages, uint32_t flags); > diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c > index 4b99a24..d0b4e50 100644 > --- a/drivers/gpu/drm/radeon/radeon_asic.c > +++ b/drivers/gpu/drm/radeon/radeon_asic.c > @@ -1359,6 +1359,7 @@ static struct radeon_asic cayman_asic = { > .vm = { > .init = &cayman_vm_init, > .fini = &cayman_vm_fini, > + .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, > .set_page = &cayman_vm_set_page, > }, > .ring = { > @@ -1461,6 +1462,7 @@ static struct radeon_asic trinity_asic = { > .vm = { > .init = &cayman_vm_init, > .fini = &cayman_vm_fini, > + .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, > .set_page = &cayman_vm_set_page, > }, > .ring = { > @@ -1563,6 +1565,7 @@ static struct radeon_asic si_asic = { > .vm = { > .init = &si_vm_init, > .fini = &si_vm_fini, > + .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, > .set_page = &cayman_vm_set_page, > }, > .ring = { > diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c > index bf5378b..9cfd213 100644 > --- a/drivers/gpu/drm/radeon/radeon_gart.c > +++ b/drivers/gpu/drm/radeon/radeon_gart.c > @@ -464,15 +464,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev) > continue; > > list_for_each_entry(bo_va, &vm->va, vm_list) { > - struct ttm_mem_reg *mem = NULL; > - if (bo_va->valid) > - mem = &bo_va->bo->tbo.mem; > - > bo_va->valid = false; > - r = radeon_vm_bo_update_pte(rdev, vm, bo_va->bo, mem); > - if (r) { > - DRM_ERROR("Failed to update pte for vm %d!\n", vm->id); > - } > } > } > return 0; For this to work properly you will need patch : http://people.freedesktop.org/~glisse/0001-drm-radeon-make-sure-ib-bo-is-properly-bound-and-up-.patch Otherwise there is a chance that ib bo pagetable is out of sync. > @@ -801,7 +793,6 @@ u64 radeon_vm_get_addr(struct radeon_device *rdev, > return addr; > } > > -/* object have to be reserved & global and local mutex must be locked */ > /** > * radeon_vm_bo_update_pte - map a bo into the vm page table > * > @@ -812,15 +803,21 @@ u64 radeon_vm_get_addr(struct radeon_device *rdev, > * > * Fill in the page table entries for @bo (cayman+). > * Returns 0 for success, -EINVAL for failure. > + * > + * Object have to be reserved & global and local mutex must be locked! > */ > int radeon_vm_bo_update_pte(struct radeon_device *rdev, > struct radeon_vm *vm, > struct radeon_bo *bo, > struct ttm_mem_reg *mem) > { > + unsigned ridx = rdev->asic->vm.pt_ring_index; > + struct radeon_ring *ring = &rdev->ring[ridx]; > + struct radeon_semaphore *sem = NULL; > struct radeon_bo_va *bo_va; > - unsigned ngpu_pages; > + unsigned ngpu_pages, ndw; > uint64_t pfn; > + int r; > > /* nothing to do if vm isn't bound */ > if (vm->sa_bo == NULL) > @@ -832,7 +829,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, > return -EINVAL; > } > > - if (bo_va->valid && mem) > + if (bo_va->valid == !!mem) > return 0; Logic is wrong, we want to return either if valid is true and mem not null, which means bo is already bound and its pagetable entry are up to date as it did not move since page table was last updated. Or return if valid is false and mem is null, meaning the pagetable already contain invalid page entry and we are unbinding the bo. So the proper test should be : if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL)) { return 0; } > > ngpu_pages = radeon_bo_ngpu_pages(bo); > @@ -846,12 +843,50 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, > if (mem->mem_type == TTM_PL_TT) { > bo_va->flags |= RADEON_VM_PAGE_SYSTEM; > } > - } > - if (!bo_va->valid) { > - mem = NULL; > + if (!bo_va->valid) { > + mem = NULL; > + } > + } else { > + bo_va->valid = false; > } > pfn = bo_va->soffset / RADEON_GPU_PAGE_SIZE; > - radeon_asic_vm_set_page(rdev, bo_va->vm, pfn, mem, ngpu_pages, bo_va->flags); > + > + if (vm->fence && radeon_fence_signaled(vm->fence)) { > + radeon_fence_unref(&vm->fence); > + } > + > + if (vm->fence && vm->fence->ring != ridx) { > + r = radeon_semaphore_create(rdev, &sem); > + if (r) { > + return r; > + } > + } > + > + /* estimate number of dw needed */ > + ndw = 32; > + ndw += (ngpu_pages >> 12) * 3; > + ndw += ngpu_pages * 2; > + > + r = radeon_ring_lock(rdev, ring, ndw); > + if (r) { > + return r; > + } > + > + if (sem && radeon_fence_need_sync(vm->fence, ridx)) { > + radeon_semaphore_sync_rings(rdev, sem, vm->fence->ring, ridx); > + radeon_fence_note_sync(vm->fence, ridx); > + } > + > + radeon_asic_vm_set_page(rdev, vm, pfn, mem, ngpu_pages, bo_va->flags); > + > + radeon_fence_unref(&vm->fence); > + r = radeon_fence_emit(rdev, &vm->fence, ridx); > + if (r) { > + radeon_ring_unlock_undo(rdev, ring); > + return r; > + } > + radeon_ring_unlock_commit(rdev, ring); > + radeon_semaphore_free(rdev, &sem, vm->fence); > radeon_fence_unref(&vm->last_flush); > return 0; > } > @@ -875,6 +910,7 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, > struct radeon_bo *bo) > { > struct radeon_bo_va *bo_va; > + int r; > > bo_va = radeon_bo_va(bo, vm); > if (bo_va == NULL) > @@ -882,14 +918,14 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, > > mutex_lock(&rdev->vm_manager.lock); > mutex_lock(&vm->mutex); > - radeon_vm_free_pt(rdev, vm); > + r = radeon_vm_bo_update_pte(rdev, vm, bo, NULL); > mutex_unlock(&rdev->vm_manager.lock); > list_del(&bo_va->vm_list); > mutex_unlock(&vm->mutex); > list_del(&bo_va->bo_list); > > kfree(bo_va); > - return 0; > + return r; > } > > /** > -- > 1.7.9.5 > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/dri-devel
On 13.08.2012 18:19, Jerome Glisse wrote: > On Mon, Aug 13, 2012 at 6:26 AM, Christian König > <deathsimple@vodafone.de> wrote: >> Currently doing the update with the CP. >> >> Signed-off-by: Christian König <deathsimple@vodafone.de> > NAK until point below are addressed [SNIP] > For this to work properly you will need patch : > http://people.freedesktop.org/~glisse/0001-drm-radeon-make-sure-ib-bo-is-properly-bound-and-up-.patch > > Otherwise there is a chance that ib bo pagetable is out of sync. Oh yes indeed. Going to sort in your patch directly before of this one. Also I haven't tested suspend/resume with this set yet, need to change that before sending it upstream. > [SNIP] >> @@ -832,7 +829,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, >> return -EINVAL; >> } >> >> - if (bo_va->valid && mem) >> + if (bo_va->valid == !!mem) >> return 0; > Logic is wrong, we want to return either if valid is true and mem not > null, which means bo is already bound and its pagetable entry are up > to date as it did not move since page table was last updated. Or > return if valid is false and mem is null, meaning the pagetable > already contain invalid page entry and we are unbinding the bo. So the > proper test should be : > > if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL)) { > return 0; > } Isn't that identical? Ok your version is definitely easier to read, so I'm going to use that one anyway. Thanks, Christian.
On Mon, Aug 13, 2012 at 12:53 PM, Christian König <deathsimple@vodafone.de> wrote: > On 13.08.2012 18:19, Jerome Glisse wrote: >> >> On Mon, Aug 13, 2012 at 6:26 AM, Christian König >> <deathsimple@vodafone.de> wrote: >>> >>> Currently doing the update with the CP. >>> >>> Signed-off-by: Christian König <deathsimple@vodafone.de> >> >> NAK until point below are addressed > > [SNIP] > >> For this to work properly you will need patch : >> >> http://people.freedesktop.org/~glisse/0001-drm-radeon-make-sure-ib-bo-is-properly-bound-and-up-.patch >> >> Otherwise there is a chance that ib bo pagetable is out of sync. > > Oh yes indeed. Going to sort in your patch directly before of this one. > > Also I haven't tested suspend/resume with this set yet, need to change that > before sending it upstream. I haven't tested my patch, but it should work. >> [SNIP] >> >>> @@ -832,7 +829,7 @@ int radeon_vm_bo_update_pte(struct radeon_device >>> *rdev, >>> return -EINVAL; >>> } >>> >>> - if (bo_va->valid && mem) >>> + if (bo_va->valid == !!mem) >>> return 0; >> >> Logic is wrong, we want to return either if valid is true and mem not >> null, which means bo is already bound and its pagetable entry are up >> to date as it did not move since page table was last updated. Or >> return if valid is false and mem is null, meaning the pagetable >> already contain invalid page entry and we are unbinding the bo. So the >> proper test should be : >> >> if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL)) { >> return 0; >> } > > Isn't that identical? Ok your version is definitely easier to read, so I'm > going to use that one anyway. Yes it's, i was bit confuse by your test, would rather make the logic easier to read for human. Cheers, Jerome
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 1fd2e41..78d9cfb 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1513,20 +1513,24 @@ void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm, unsigned pfn, struct ttm_mem_reg *mem, unsigned npages, uint32_t flags) { - void __iomem *ptr = (void *)vm->pt; - uint64_t addr; + struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; + uint64_t addr, pt = vm->pt_gpu_addr + pfn * 8; int i; addr = flags = cayman_vm_page_flags(rdev, flags); - for (i = 0; i < npages; ++i, ++pfn) { - if (mem) { - addr = radeon_vm_get_addr(rdev, mem, i); + radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, 1 + npages * 2)); + radeon_ring_write(ring, pt & 0xffffffff); + radeon_ring_write(ring, (pt >> 32) & 0xff); + for (i = 0; i < npages; ++i) { + if (mem) { + addr = radeon_vm_get_addr(rdev, mem, i); addr = addr & 0xFFFFFFFFFFFFF000ULL; addr |= flags; - } - writeq(addr, ptr + (pfn * 8)); - } + } + radeon_ring_write(ring, addr & 0xffffffff); + radeon_ring_write(ring, (addr >> 32) & 0xffffffff); + } } void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib) diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index 870db34..2423d1b 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -585,6 +585,7 @@ #define PACKET3_SET_CONTEXT_REG_INDIRECT 0x73 #define PACKET3_SET_RESOURCE_INDIRECT 0x74 #define PACKET3_SET_APPEND_CNT 0x75 +#define PACKET3_ME_WRITE 0x7A #endif diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 7d37cb2..3de0f08 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1154,6 +1154,8 @@ struct radeon_asic { struct { int (*init)(struct radeon_device *rdev); void (*fini)(struct radeon_device *rdev); + + u32 pt_ring_index; void (*set_page)(struct radeon_device *rdev, struct radeon_vm *vm, unsigned pfn, struct ttm_mem_reg *mem, unsigned npages, uint32_t flags); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 4b99a24..d0b4e50 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1359,6 +1359,7 @@ static struct radeon_asic cayman_asic = { .vm = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, + .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, .set_page = &cayman_vm_set_page, }, .ring = { @@ -1461,6 +1462,7 @@ static struct radeon_asic trinity_asic = { .vm = { .init = &cayman_vm_init, .fini = &cayman_vm_fini, + .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, .set_page = &cayman_vm_set_page, }, .ring = { @@ -1563,6 +1565,7 @@ static struct radeon_asic si_asic = { .vm = { .init = &si_vm_init, .fini = &si_vm_fini, + .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, .set_page = &cayman_vm_set_page, }, .ring = { diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index bf5378b..9cfd213 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -464,15 +464,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev) continue; list_for_each_entry(bo_va, &vm->va, vm_list) { - struct ttm_mem_reg *mem = NULL; - if (bo_va->valid) - mem = &bo_va->bo->tbo.mem; - bo_va->valid = false; - r = radeon_vm_bo_update_pte(rdev, vm, bo_va->bo, mem); - if (r) { - DRM_ERROR("Failed to update pte for vm %d!\n", vm->id); - } } } return 0; @@ -801,7 +793,6 @@ u64 radeon_vm_get_addr(struct radeon_device *rdev, return addr; } -/* object have to be reserved & global and local mutex must be locked */ /** * radeon_vm_bo_update_pte - map a bo into the vm page table * @@ -812,15 +803,21 @@ u64 radeon_vm_get_addr(struct radeon_device *rdev, * * Fill in the page table entries for @bo (cayman+). * Returns 0 for success, -EINVAL for failure. + * + * Object have to be reserved & global and local mutex must be locked! */ int radeon_vm_bo_update_pte(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_bo *bo, struct ttm_mem_reg *mem) { + unsigned ridx = rdev->asic->vm.pt_ring_index; + struct radeon_ring *ring = &rdev->ring[ridx]; + struct radeon_semaphore *sem = NULL; struct radeon_bo_va *bo_va; - unsigned ngpu_pages; + unsigned ngpu_pages, ndw; uint64_t pfn; + int r; /* nothing to do if vm isn't bound */ if (vm->sa_bo == NULL) @@ -832,7 +829,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, return -EINVAL; } - if (bo_va->valid && mem) + if (bo_va->valid == !!mem) return 0; ngpu_pages = radeon_bo_ngpu_pages(bo); @@ -846,12 +843,50 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, if (mem->mem_type == TTM_PL_TT) { bo_va->flags |= RADEON_VM_PAGE_SYSTEM; } - } - if (!bo_va->valid) { - mem = NULL; + if (!bo_va->valid) { + mem = NULL; + } + } else { + bo_va->valid = false; } pfn = bo_va->soffset / RADEON_GPU_PAGE_SIZE; - radeon_asic_vm_set_page(rdev, bo_va->vm, pfn, mem, ngpu_pages, bo_va->flags); + + if (vm->fence && radeon_fence_signaled(vm->fence)) { + radeon_fence_unref(&vm->fence); + } + + if (vm->fence && vm->fence->ring != ridx) { + r = radeon_semaphore_create(rdev, &sem); + if (r) { + return r; + } + } + + /* estimate number of dw needed */ + ndw = 32; + ndw += (ngpu_pages >> 12) * 3; + ndw += ngpu_pages * 2; + + r = radeon_ring_lock(rdev, ring, ndw); + if (r) { + return r; + } + + if (sem && radeon_fence_need_sync(vm->fence, ridx)) { + radeon_semaphore_sync_rings(rdev, sem, vm->fence->ring, ridx); + radeon_fence_note_sync(vm->fence, ridx); + } + + radeon_asic_vm_set_page(rdev, vm, pfn, mem, ngpu_pages, bo_va->flags); + + radeon_fence_unref(&vm->fence); + r = radeon_fence_emit(rdev, &vm->fence, ridx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, &sem, vm->fence); radeon_fence_unref(&vm->last_flush); return 0; } @@ -875,6 +910,7 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, struct radeon_bo *bo) { struct radeon_bo_va *bo_va; + int r; bo_va = radeon_bo_va(bo, vm); if (bo_va == NULL) @@ -882,14 +918,14 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); - radeon_vm_free_pt(rdev, vm); + r = radeon_vm_bo_update_pte(rdev, vm, bo, NULL); mutex_unlock(&rdev->vm_manager.lock); list_del(&bo_va->vm_list); mutex_unlock(&vm->mutex); list_del(&bo_va->bo_list); kfree(bo_va); - return 0; + return r; } /**
Currently doing the update with the CP. Signed-off-by: Christian König <deathsimple@vodafone.de> --- drivers/gpu/drm/radeon/ni.c | 20 ++++++---- drivers/gpu/drm/radeon/nid.h | 1 + drivers/gpu/drm/radeon/radeon.h | 2 + drivers/gpu/drm/radeon/radeon_asic.c | 3 ++ drivers/gpu/drm/radeon/radeon_gart.c | 70 +++++++++++++++++++++++++--------- 5 files changed, 71 insertions(+), 25 deletions(-)