Message ID | 20190412160338.64994-6-thellstrom@vmware.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Emulated coherent graphics memory | expand |
Am 12.04.19 um 18:04 schrieb Thomas Hellstrom: > With the vmwgfx dirty tracking, the default TTM fault handler is not > completely sufficient (vmwgfx need to modify the vma->vm_flags member, > and also needs to restrict the number of prefaults). > > We also want to replicate the new ttm_bo_vm_reserve() functionality > > So start turning the TTM vm code into helpers: ttm_bo_vm_fault_reserved() > and ttm_bo_vm_reserve(), and provide a default TTM fault handler for other > drivers to use. > > Cc: "Christian König" <christian.koenig@amd.com> > Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com> Two nit picks below, apart from that looks good to me as well. > --- > drivers/gpu/drm/ttm/ttm_bo_vm.c | 170 ++++++++++++++++++++------------ > include/drm/ttm/ttm_bo_api.h | 10 ++ > 2 files changed, 116 insertions(+), 64 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c > index bfb25b81fed7..3bd28fb97124 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c > @@ -42,8 +42,6 @@ > #include <linux/uaccess.h> > #include <linux/mem_encrypt.h> > > -#define TTM_BO_VM_NUM_PREFAULT 16 > - > static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, > struct vm_fault *vmf) > { > @@ -106,31 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo, > + page_offset; > } > > -static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > +/** > + * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback > + * @bo: The buffer object > + * @vmf: The fault structure handed to the callback > + * > + * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped > + * during long waits, and after the wait the callback will be restarted. This > + * is to allow other threads using the same virtual memory space concurrent > + * access to map(), unmap() completely unrelated buffer objects. TTM buffer > + * object reservations sometimes wait for GPU and should therefore be > + * considered long waits. This function reserves the buffer object interruptibly > + * taking this into account. Starvation is avoided by the vm system not > + * allowing too many repeated restarts. > + * This function is intended to be used in customized fault() and _mkwrite() > + * handlers. > + * > + * Return: > + * 0 on success and the bo was reserved. > + * VM_FAULT_RETRY if blocking wait. > + * VM_FAULT_NOPAGE if blocking wait and retrying was not allowed. > + */ > +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, > + struct vm_fault *vmf) > { > - struct vm_area_struct *vma = vmf->vma; > - struct ttm_buffer_object *bo = (struct ttm_buffer_object *) > - vma->vm_private_data; > - struct ttm_bo_device *bdev = bo->bdev; > - unsigned long page_offset; > - unsigned long page_last; > - unsigned long pfn; > - struct ttm_tt *ttm = NULL; > - struct page *page; > - int err; > - int i; > - vm_fault_t ret = VM_FAULT_NOPAGE; > - unsigned long address = vmf->address; > - struct ttm_mem_type_manager *man = > - &bdev->man[bo->mem.mem_type]; > - struct vm_area_struct cvma; > - > - /* > - * Work around locking order reversal in fault / nopfn > - * between mmap_sem and bo_reserve: Perform a trylock operation > - * for reserve, and if it fails, retry the fault after waiting > - * for the buffer to become unreserved. > - */ > if (unlikely(!reservation_object_trylock(bo->resv))) { > if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) { > if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { > @@ -151,14 +148,56 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > return VM_FAULT_NOPAGE; > } > > + return 0; > +} > +EXPORT_SYMBOL(ttm_bo_vm_reserve); > + > +/** > + * ttm_bo_vm_fault_reserved - TTM fault helper > + * @vmf: The struct vm_fault given as argument to the fault callback > + * @cvma: The struct vmw_area_struct affected. Note that this may be a > + * copy of the real vma object if the caller needs, for example, VM > + * flags to be temporarily altered while determining the page protection. > + * @num_prefault: Maximum number of prefault pages. The caller may want to > + * specify this based on madvice settings and the size of the GPU object > + * backed by the memory. > + * > + * This function inserts one or more page table entries pointing to the > + * memory backing the buffer object, and then returns a return code > + * instructing the caller to retry the page access. > + * > + * Return: > + * VM_FAULT_NOPAGE on success or pending signal > + * VM_FAULT_SIGBUS on unspecified error > + * VM_FAULT_OOM on out-of-memory > + * VM_FAULT_RETRY if retryable wait > + */ > +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, > + struct vm_area_struct *cvma, > + pgoff_t num_prefault) > +{ > + struct vm_area_struct *vma = vmf->vma; > + struct ttm_buffer_object *bo = (struct ttm_buffer_object *) > + vma->vm_private_data; > + struct ttm_bo_device *bdev = bo->bdev; > + unsigned long page_offset; > + unsigned long page_last; > + unsigned long pfn; > + struct ttm_tt *ttm = NULL; > + struct page *page; > + int err; > + pgoff_t i; > + vm_fault_t ret = VM_FAULT_NOPAGE; > + unsigned long address = vmf->address; > + struct ttm_mem_type_manager *man = > + &bdev->man[bo->mem.mem_type]; > + > /* > * Refuse to fault imported pages. This should be handled > * (if at all) by redirecting mmap to the exporter. > */ > - if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) { > - ret = VM_FAULT_SIGBUS; > - goto out_unlock; > - } > + if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) > + return VM_FAULT_SIGBUS; > > if (bdev->driver->fault_reserve_notify) { > struct dma_fence *moving = dma_fence_get(bo->moving); > @@ -169,11 +208,9 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > break; > case -EBUSY: > case -ERESTARTSYS: > - ret = VM_FAULT_NOPAGE; > - goto out_unlock; > + return VM_FAULT_NOPAGE; > default: > - ret = VM_FAULT_SIGBUS; > - goto out_unlock; > + return VM_FAULT_SIGBUS; > } > > if (bo->moving != moving) { > @@ -189,24 +226,15 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > * move. > */ > ret = ttm_bo_vm_fault_idle(bo, vmf); > - if (unlikely(ret != 0)) { > - if (ret == VM_FAULT_RETRY && > - !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { > - /* The BO has already been unreserved. */ > - return ret; > - } > - > - goto out_unlock; > - } > + if (unlikely(ret != 0)) > + return ret; > > err = ttm_mem_io_lock(man, true); > - if (unlikely(err != 0)) { > - ret = VM_FAULT_NOPAGE; > - goto out_unlock; > - } > + if (unlikely(err != 0)) > + return VM_FAULT_NOPAGE; > err = ttm_mem_io_reserve_vm(bo); > if (unlikely(err != 0)) { > - ret = VM_FAULT_SIGBUS; > + return VM_FAULT_SIGBUS; > goto out_io_unlock; This goto is now superfluous. > } > > @@ -220,17 +248,11 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > goto out_io_unlock; > } > > - /* > - * Make a local vma copy to modify the page_prot member > - * and vm_flags if necessary. The vma parameter is protected > - * by mmap_sem in write mode. > - */ > - cvma = *vma; > - cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags); > + cvma->vm_page_prot = vm_get_page_prot(cvma->vm_flags); > > if (bo->mem.bus.is_iomem) { > - cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, > - cvma.vm_page_prot); > + cvma->vm_page_prot = ttm_io_prot(bo->mem.placement, > + cvma->vm_page_prot); > } else { > struct ttm_operation_ctx ctx = { > .interruptible = false, > @@ -240,8 +262,8 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > }; > > ttm = bo->ttm; > - cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, > - cvma.vm_page_prot); > + cvma->vm_page_prot = ttm_io_prot(bo->mem.placement, > + cvma->vm_page_prot); > > /* Allocate all page at once, most common usage */ > if (ttm_tt_populate(ttm, &ctx)) { > @@ -254,10 +276,11 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > * Speculatively prefault a number of pages. Only error on > * first page. > */ > - for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) { > + for (i = 0; i < num_prefault; ++i) { > if (bo->mem.bus.is_iomem) { > /* Iomem should not be marked encrypted */ > - cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot); > + cvma->vm_page_prot = > + pgprot_decrypted(cvma->vm_page_prot); > pfn = ttm_bo_io_mem_pfn(bo, page_offset); > } else { > page = ttm->pages[page_offset]; > @@ -273,10 +296,10 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > } > > if (vma->vm_flags & VM_MIXEDMAP) > - ret = vmf_insert_mixed(&cvma, address, > + ret = vmf_insert_mixed(cvma, address, > __pfn_to_pfn_t(pfn, PFN_DEV)); > else > - ret = vmf_insert_pfn(&cvma, address, pfn); > + ret = vmf_insert_pfn(cvma, address, pfn); > > /* > * Somebody beat us to this PTE or prefaulting to > @@ -295,7 +318,26 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > ret = VM_FAULT_NOPAGE; > out_io_unlock: > ttm_mem_io_unlock(man); > -out_unlock: > + return ret; > +} > +EXPORT_SYMBOL(ttm_bo_vm_fault_reserved); > + > +static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > +{ > + struct vm_area_struct *vma = vmf->vma; > + struct vm_area_struct cvma = *vma; > + struct ttm_buffer_object *bo = (struct ttm_buffer_object *) > + vma->vm_private_data; That extra cast can be dropped, the vm_private_data member is a void* anyway. Regards, Christian. > + vm_fault_t ret; > + > + ret = ttm_bo_vm_reserve(bo, vmf); > + if (ret) > + return ret; > + > + ret = ttm_bo_vm_fault_reserved(vmf, &cvma, TTM_BO_VM_NUM_PREFAULT); > + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) > + return ret; > + > reservation_object_unlock(bo->resv); > return ret; > } > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h > index 49d9cdfc58f2..bebfa16426ca 100644 > --- a/include/drm/ttm/ttm_bo_api.h > +++ b/include/drm/ttm/ttm_bo_api.h > @@ -768,4 +768,14 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, > struct ttm_operation_ctx *ctx); > void ttm_bo_swapout_all(struct ttm_bo_device *bdev); > int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo); > + > +/* Default number of pre-faulted pages in the TTM fault handler */ > +#define TTM_BO_VM_NUM_PREFAULT 16 > + > +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, > + struct vm_fault *vmf); > + > +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, > + struct vm_area_struct *cvma, > + pgoff_t num_prefault); > #endif
On Mon, 2019-04-15 at 08:34 +0200, Christian König wrote: > Am 12.04.19 um 18:04 schrieb Thomas Hellstrom: > > With the vmwgfx dirty tracking, the default TTM fault handler is > > not > > completely sufficient (vmwgfx need to modify the vma->vm_flags > > member, > > and also needs to restrict the number of prefaults). > > > > We also want to replicate the new ttm_bo_vm_reserve() functionality > > > > So start turning the TTM vm code into helpers: > > ttm_bo_vm_fault_reserved() > > and ttm_bo_vm_reserve(), and provide a default TTM fault handler > > for other > > drivers to use. > > > > Cc: "Christian König" <christian.koenig@amd.com> > > Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com> > > Two nit picks below, apart from that looks good to me as well. Thanks Christian, I'll incoporate those. /Thomas > > > --- > > drivers/gpu/drm/ttm/ttm_bo_vm.c | 170 ++++++++++++++++++++------- > > ----- > > include/drm/ttm/ttm_bo_api.h | 10 ++ > > 2 files changed, 116 insertions(+), 64 deletions(-) > > > > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c > > b/drivers/gpu/drm/ttm/ttm_bo_vm.c > > index bfb25b81fed7..3bd28fb97124 100644 > > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c > > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c > > @@ -42,8 +42,6 @@ > > #include <linux/uaccess.h> > > #include <linux/mem_encrypt.h> > > > > -#define TTM_BO_VM_NUM_PREFAULT 16 > > - > > static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object > > *bo, > > struct vm_fault *vmf) > > { > > @@ -106,31 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct > > ttm_buffer_object *bo, > > + page_offset; > > } > > > > -static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > > +/** > > + * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm > > callback > > + * @bo: The buffer object > > + * @vmf: The fault structure handed to the callback > > + * > > + * vm callbacks like fault() and *_mkwrite() allow for the mm_sem > > to be dropped > > + * during long waits, and after the wait the callback will be > > restarted. This > > + * is to allow other threads using the same virtual memory space > > concurrent > > + * access to map(), unmap() completely unrelated buffer objects. > > TTM buffer > > + * object reservations sometimes wait for GPU and should therefore > > be > > + * considered long waits. This function reserves the buffer object > > interruptibly > > + * taking this into account. Starvation is avoided by the vm > > system not > > + * allowing too many repeated restarts. > > + * This function is intended to be used in customized fault() and > > _mkwrite() > > + * handlers. > > + * > > + * Return: > > + * 0 on success and the bo was reserved. > > + * VM_FAULT_RETRY if blocking wait. > > + * VM_FAULT_NOPAGE if blocking wait and retrying was not > > allowed. > > + */ > > +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, > > + struct vm_fault *vmf) > > { > > - struct vm_area_struct *vma = vmf->vma; > > - struct ttm_buffer_object *bo = (struct ttm_buffer_object *) > > - vma->vm_private_data; > > - struct ttm_bo_device *bdev = bo->bdev; > > - unsigned long page_offset; > > - unsigned long page_last; > > - unsigned long pfn; > > - struct ttm_tt *ttm = NULL; > > - struct page *page; > > - int err; > > - int i; > > - vm_fault_t ret = VM_FAULT_NOPAGE; > > - unsigned long address = vmf->address; > > - struct ttm_mem_type_manager *man = > > - &bdev->man[bo->mem.mem_type]; > > - struct vm_area_struct cvma; > > - > > - /* > > - * Work around locking order reversal in fault / nopfn > > - * between mmap_sem and bo_reserve: Perform a trylock operation > > - * for reserve, and if it fails, retry the fault after waiting > > - * for the buffer to become unreserved. > > - */ > > if (unlikely(!reservation_object_trylock(bo->resv))) { > > if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) { > > if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { > > @@ -151,14 +148,56 @@ static vm_fault_t ttm_bo_vm_fault(struct > > vm_fault *vmf) > > return VM_FAULT_NOPAGE; > > } > > > > + return 0; > > +} > > +EXPORT_SYMBOL(ttm_bo_vm_reserve); > > + > > +/** > > + * ttm_bo_vm_fault_reserved - TTM fault helper > > + * @vmf: The struct vm_fault given as argument to the fault > > callback > > + * @cvma: The struct vmw_area_struct affected. Note that this may > > be a > > + * copy of the real vma object if the caller needs, for example, > > VM > > + * flags to be temporarily altered while determining the page > > protection. > > + * @num_prefault: Maximum number of prefault pages. The caller may > > want to > > + * specify this based on madvice settings and the size of the GPU > > object > > + * backed by the memory. > > + * > > + * This function inserts one or more page table entries pointing > > to the > > + * memory backing the buffer object, and then returns a return > > code > > + * instructing the caller to retry the page access. > > + * > > + * Return: > > + * VM_FAULT_NOPAGE on success or pending signal > > + * VM_FAULT_SIGBUS on unspecified error > > + * VM_FAULT_OOM on out-of-memory > > + * VM_FAULT_RETRY if retryable wait > > + */ > > +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, > > + struct vm_area_struct *cvma, > > + pgoff_t num_prefault) > > +{ > > + struct vm_area_struct *vma = vmf->vma; > > + struct ttm_buffer_object *bo = (struct ttm_buffer_object *) > > + vma->vm_private_data; > > + struct ttm_bo_device *bdev = bo->bdev; > > + unsigned long page_offset; > > + unsigned long page_last; > > + unsigned long pfn; > > + struct ttm_tt *ttm = NULL; > > + struct page *page; > > + int err; > > + pgoff_t i; > > + vm_fault_t ret = VM_FAULT_NOPAGE; > > + unsigned long address = vmf->address; > > + struct ttm_mem_type_manager *man = > > + &bdev->man[bo->mem.mem_type]; > > + > > /* > > * Refuse to fault imported pages. This should be handled > > * (if at all) by redirecting mmap to the exporter. > > */ > > - if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) { > > - ret = VM_FAULT_SIGBUS; > > - goto out_unlock; > > - } > > + if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) > > + return VM_FAULT_SIGBUS; > > > > if (bdev->driver->fault_reserve_notify) { > > struct dma_fence *moving = dma_fence_get(bo->moving); > > @@ -169,11 +208,9 @@ static vm_fault_t ttm_bo_vm_fault(struct > > vm_fault *vmf) > > break; > > case -EBUSY: > > case -ERESTARTSYS: > > - ret = VM_FAULT_NOPAGE; > > - goto out_unlock; > > + return VM_FAULT_NOPAGE; > > default: > > - ret = VM_FAULT_SIGBUS; > > - goto out_unlock; > > + return VM_FAULT_SIGBUS; > > } > > > > if (bo->moving != moving) { > > @@ -189,24 +226,15 @@ static vm_fault_t ttm_bo_vm_fault(struct > > vm_fault *vmf) > > * move. > > */ > > ret = ttm_bo_vm_fault_idle(bo, vmf); > > - if (unlikely(ret != 0)) { > > - if (ret == VM_FAULT_RETRY && > > - !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { > > - /* The BO has already been unreserved. */ > > - return ret; > > - } > > - > > - goto out_unlock; > > - } > > + if (unlikely(ret != 0)) > > + return ret; > > > > err = ttm_mem_io_lock(man, true); > > - if (unlikely(err != 0)) { > > - ret = VM_FAULT_NOPAGE; > > - goto out_unlock; > > - } > > + if (unlikely(err != 0)) > > + return VM_FAULT_NOPAGE; > > err = ttm_mem_io_reserve_vm(bo); > > if (unlikely(err != 0)) { > > - ret = VM_FAULT_SIGBUS; > > + return VM_FAULT_SIGBUS; > > goto out_io_unlock; > > This goto is now superfluous. > > > } > > > > @@ -220,17 +248,11 @@ static vm_fault_t ttm_bo_vm_fault(struct > > vm_fault *vmf) > > goto out_io_unlock; > > } > > > > - /* > > - * Make a local vma copy to modify the page_prot member > > - * and vm_flags if necessary. The vma parameter is protected > > - * by mmap_sem in write mode. > > - */ > > - cvma = *vma; > > - cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags); > > + cvma->vm_page_prot = vm_get_page_prot(cvma->vm_flags); > > > > if (bo->mem.bus.is_iomem) { > > - cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, > > - cvma.vm_page_prot); > > + cvma->vm_page_prot = ttm_io_prot(bo->mem.placement, > > + cvma->vm_page_prot); > > } else { > > struct ttm_operation_ctx ctx = { > > .interruptible = false, > > @@ -240,8 +262,8 @@ static vm_fault_t ttm_bo_vm_fault(struct > > vm_fault *vmf) > > }; > > > > ttm = bo->ttm; > > - cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, > > - cvma.vm_page_prot); > > + cvma->vm_page_prot = ttm_io_prot(bo->mem.placement, > > + cvma->vm_page_prot); > > > > /* Allocate all page at once, most common usage */ > > if (ttm_tt_populate(ttm, &ctx)) { > > @@ -254,10 +276,11 @@ static vm_fault_t ttm_bo_vm_fault(struct > > vm_fault *vmf) > > * Speculatively prefault a number of pages. Only error on > > * first page. > > */ > > - for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) { > > + for (i = 0; i < num_prefault; ++i) { > > if (bo->mem.bus.is_iomem) { > > /* Iomem should not be marked encrypted */ > > - cvma.vm_page_prot = > > pgprot_decrypted(cvma.vm_page_prot); > > + cvma->vm_page_prot = > > + pgprot_decrypted(cvma->vm_page_prot); > > pfn = ttm_bo_io_mem_pfn(bo, page_offset); > > } else { > > page = ttm->pages[page_offset]; > > @@ -273,10 +296,10 @@ static vm_fault_t ttm_bo_vm_fault(struct > > vm_fault *vmf) > > } > > > > if (vma->vm_flags & VM_MIXEDMAP) > > - ret = vmf_insert_mixed(&cvma, address, > > + ret = vmf_insert_mixed(cvma, address, > > __pfn_to_pfn_t(pfn, PFN_DEV)); > > else > > - ret = vmf_insert_pfn(&cvma, address, pfn); > > + ret = vmf_insert_pfn(cvma, address, pfn); > > > > /* > > * Somebody beat us to this PTE or prefaulting to > > @@ -295,7 +318,26 @@ static vm_fault_t ttm_bo_vm_fault(struct > > vm_fault *vmf) > > ret = VM_FAULT_NOPAGE; > > out_io_unlock: > > ttm_mem_io_unlock(man); > > -out_unlock: > > + return ret; > > +} > > +EXPORT_SYMBOL(ttm_bo_vm_fault_reserved); > > + > > +static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > > +{ > > + struct vm_area_struct *vma = vmf->vma; > > + struct vm_area_struct cvma = *vma; > > + struct ttm_buffer_object *bo = (struct ttm_buffer_object *) > > + vma->vm_private_data; > > That extra cast can be dropped, the vm_private_data member is a > void* > anyway. > > Regards, > Christian. > > > + vm_fault_t ret; > > + > > + ret = ttm_bo_vm_reserve(bo, vmf); > > + if (ret) > > + return ret; > > + > > + ret = ttm_bo_vm_fault_reserved(vmf, &cvma, > > TTM_BO_VM_NUM_PREFAULT); > > + if (ret == VM_FAULT_RETRY && !(vmf->flags & > > FAULT_FLAG_RETRY_NOWAIT)) > > + return ret; > > + > > reservation_object_unlock(bo->resv); > > return ret; > > } > > diff --git a/include/drm/ttm/ttm_bo_api.h > > b/include/drm/ttm/ttm_bo_api.h > > index 49d9cdfc58f2..bebfa16426ca 100644 > > --- a/include/drm/ttm/ttm_bo_api.h > > +++ b/include/drm/ttm/ttm_bo_api.h > > @@ -768,4 +768,14 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, > > struct ttm_operation_ctx *ctx); > > void ttm_bo_swapout_all(struct ttm_bo_device *bdev); > > int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo); > > + > > +/* Default number of pre-faulted pages in the TTM fault handler */ > > +#define TTM_BO_VM_NUM_PREFAULT 16 > > + > > +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, > > + struct vm_fault *vmf); > > + > > +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, > > + struct vm_area_struct *cvma, > > + pgoff_t num_prefault); > > #endif
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index bfb25b81fed7..3bd28fb97124 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -42,8 +42,6 @@ #include <linux/uaccess.h> #include <linux/mem_encrypt.h> -#define TTM_BO_VM_NUM_PREFAULT 16 - static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, struct vm_fault *vmf) { @@ -106,31 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo, + page_offset; } -static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) +/** + * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback + * @bo: The buffer object + * @vmf: The fault structure handed to the callback + * + * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped + * during long waits, and after the wait the callback will be restarted. This + * is to allow other threads using the same virtual memory space concurrent + * access to map(), unmap() completely unrelated buffer objects. TTM buffer + * object reservations sometimes wait for GPU and should therefore be + * considered long waits. This function reserves the buffer object interruptibly + * taking this into account. Starvation is avoided by the vm system not + * allowing too many repeated restarts. + * This function is intended to be used in customized fault() and _mkwrite() + * handlers. + * + * Return: + * 0 on success and the bo was reserved. + * VM_FAULT_RETRY if blocking wait. + * VM_FAULT_NOPAGE if blocking wait and retrying was not allowed. + */ +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, + struct vm_fault *vmf) { - struct vm_area_struct *vma = vmf->vma; - struct ttm_buffer_object *bo = (struct ttm_buffer_object *) - vma->vm_private_data; - struct ttm_bo_device *bdev = bo->bdev; - unsigned long page_offset; - unsigned long page_last; - unsigned long pfn; - struct ttm_tt *ttm = NULL; - struct page *page; - int err; - int i; - vm_fault_t ret = VM_FAULT_NOPAGE; - unsigned long address = vmf->address; - struct ttm_mem_type_manager *man = - &bdev->man[bo->mem.mem_type]; - struct vm_area_struct cvma; - - /* - * Work around locking order reversal in fault / nopfn - * between mmap_sem and bo_reserve: Perform a trylock operation - * for reserve, and if it fails, retry the fault after waiting - * for the buffer to become unreserved. - */ if (unlikely(!reservation_object_trylock(bo->resv))) { if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) { if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { @@ -151,14 +148,56 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) return VM_FAULT_NOPAGE; } + return 0; +} +EXPORT_SYMBOL(ttm_bo_vm_reserve); + +/** + * ttm_bo_vm_fault_reserved - TTM fault helper + * @vmf: The struct vm_fault given as argument to the fault callback + * @cvma: The struct vmw_area_struct affected. Note that this may be a + * copy of the real vma object if the caller needs, for example, VM + * flags to be temporarily altered while determining the page protection. + * @num_prefault: Maximum number of prefault pages. The caller may want to + * specify this based on madvice settings and the size of the GPU object + * backed by the memory. + * + * This function inserts one or more page table entries pointing to the + * memory backing the buffer object, and then returns a return code + * instructing the caller to retry the page access. + * + * Return: + * VM_FAULT_NOPAGE on success or pending signal + * VM_FAULT_SIGBUS on unspecified error + * VM_FAULT_OOM on out-of-memory + * VM_FAULT_RETRY if retryable wait + */ +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, + struct vm_area_struct *cvma, + pgoff_t num_prefault) +{ + struct vm_area_struct *vma = vmf->vma; + struct ttm_buffer_object *bo = (struct ttm_buffer_object *) + vma->vm_private_data; + struct ttm_bo_device *bdev = bo->bdev; + unsigned long page_offset; + unsigned long page_last; + unsigned long pfn; + struct ttm_tt *ttm = NULL; + struct page *page; + int err; + pgoff_t i; + vm_fault_t ret = VM_FAULT_NOPAGE; + unsigned long address = vmf->address; + struct ttm_mem_type_manager *man = + &bdev->man[bo->mem.mem_type]; + /* * Refuse to fault imported pages. This should be handled * (if at all) by redirecting mmap to the exporter. */ - if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) { - ret = VM_FAULT_SIGBUS; - goto out_unlock; - } + if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) + return VM_FAULT_SIGBUS; if (bdev->driver->fault_reserve_notify) { struct dma_fence *moving = dma_fence_get(bo->moving); @@ -169,11 +208,9 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) break; case -EBUSY: case -ERESTARTSYS: - ret = VM_FAULT_NOPAGE; - goto out_unlock; + return VM_FAULT_NOPAGE; default: - ret = VM_FAULT_SIGBUS; - goto out_unlock; + return VM_FAULT_SIGBUS; } if (bo->moving != moving) { @@ -189,24 +226,15 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) * move. */ ret = ttm_bo_vm_fault_idle(bo, vmf); - if (unlikely(ret != 0)) { - if (ret == VM_FAULT_RETRY && - !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { - /* The BO has already been unreserved. */ - return ret; - } - - goto out_unlock; - } + if (unlikely(ret != 0)) + return ret; err = ttm_mem_io_lock(man, true); - if (unlikely(err != 0)) { - ret = VM_FAULT_NOPAGE; - goto out_unlock; - } + if (unlikely(err != 0)) + return VM_FAULT_NOPAGE; err = ttm_mem_io_reserve_vm(bo); if (unlikely(err != 0)) { - ret = VM_FAULT_SIGBUS; + return VM_FAULT_SIGBUS; goto out_io_unlock; } @@ -220,17 +248,11 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) goto out_io_unlock; } - /* - * Make a local vma copy to modify the page_prot member - * and vm_flags if necessary. The vma parameter is protected - * by mmap_sem in write mode. - */ - cvma = *vma; - cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags); + cvma->vm_page_prot = vm_get_page_prot(cvma->vm_flags); if (bo->mem.bus.is_iomem) { - cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, - cvma.vm_page_prot); + cvma->vm_page_prot = ttm_io_prot(bo->mem.placement, + cvma->vm_page_prot); } else { struct ttm_operation_ctx ctx = { .interruptible = false, @@ -240,8 +262,8 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) }; ttm = bo->ttm; - cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, - cvma.vm_page_prot); + cvma->vm_page_prot = ttm_io_prot(bo->mem.placement, + cvma->vm_page_prot); /* Allocate all page at once, most common usage */ if (ttm_tt_populate(ttm, &ctx)) { @@ -254,10 +276,11 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) * Speculatively prefault a number of pages. Only error on * first page. */ - for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) { + for (i = 0; i < num_prefault; ++i) { if (bo->mem.bus.is_iomem) { /* Iomem should not be marked encrypted */ - cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot); + cvma->vm_page_prot = + pgprot_decrypted(cvma->vm_page_prot); pfn = ttm_bo_io_mem_pfn(bo, page_offset); } else { page = ttm->pages[page_offset]; @@ -273,10 +296,10 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) } if (vma->vm_flags & VM_MIXEDMAP) - ret = vmf_insert_mixed(&cvma, address, + ret = vmf_insert_mixed(cvma, address, __pfn_to_pfn_t(pfn, PFN_DEV)); else - ret = vmf_insert_pfn(&cvma, address, pfn); + ret = vmf_insert_pfn(cvma, address, pfn); /* * Somebody beat us to this PTE or prefaulting to @@ -295,7 +318,26 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) ret = VM_FAULT_NOPAGE; out_io_unlock: ttm_mem_io_unlock(man); -out_unlock: + return ret; +} +EXPORT_SYMBOL(ttm_bo_vm_fault_reserved); + +static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct vm_area_struct cvma = *vma; + struct ttm_buffer_object *bo = (struct ttm_buffer_object *) + vma->vm_private_data; + vm_fault_t ret; + + ret = ttm_bo_vm_reserve(bo, vmf); + if (ret) + return ret; + + ret = ttm_bo_vm_fault_reserved(vmf, &cvma, TTM_BO_VM_NUM_PREFAULT); + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + return ret; + reservation_object_unlock(bo->resv); return ret; } diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 49d9cdfc58f2..bebfa16426ca 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -768,4 +768,14 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx); void ttm_bo_swapout_all(struct ttm_bo_device *bdev); int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo); + +/* Default number of pre-faulted pages in the TTM fault handler */ +#define TTM_BO_VM_NUM_PREFAULT 16 + +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, + struct vm_fault *vmf); + +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, + struct vm_area_struct *cvma, + pgoff_t num_prefault); #endif
With the vmwgfx dirty tracking, the default TTM fault handler is not completely sufficient (vmwgfx need to modify the vma->vm_flags member, and also needs to restrict the number of prefaults). We also want to replicate the new ttm_bo_vm_reserve() functionality So start turning the TTM vm code into helpers: ttm_bo_vm_fault_reserved() and ttm_bo_vm_reserve(), and provide a default TTM fault handler for other drivers to use. Cc: "Christian König" <christian.koenig@amd.com> Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com> --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 170 ++++++++++++++++++++------------ include/drm/ttm/ttm_bo_api.h | 10 ++ 2 files changed, 116 insertions(+), 64 deletions(-)