Message ID | 1611003683-3534-2-git-send-email-andrey.grodzovsky@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | RFC Support hot device unplug in amdgpu | expand |
On Mon, Jan 18, 2021 at 4:02 PM Andrey Grodzovsky <andrey.grodzovsky@amd.com> wrote: > > On device removal reroute all CPU mappings to dummy page. > > v3: > Remove loop to find DRM file and instead access it > by vma->vm_file->private_data. Move dummy page installation > into a separate function. > > v4: > Map the entire BOs VA space into on demand allocated dummy page > on the first fault for that BO. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> > --- > drivers/gpu/drm/ttm/ttm_bo_vm.c | 82 ++++++++++++++++++++++++++++++++++++++++- > include/drm/ttm/ttm_bo_api.h | 2 + > 2 files changed, 83 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c > index 6dc96cf..ed89da3 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c > @@ -34,6 +34,8 @@ > #include <drm/ttm/ttm_bo_driver.h> > #include <drm/ttm/ttm_placement.h> > #include <drm/drm_vma_manager.h> > +#include <drm/drm_drv.h> > +#include <drm/drm_managed.h> > #include <linux/mm.h> > #include <linux/pfn_t.h> > #include <linux/rbtree.h> > @@ -380,25 +382,103 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, > } > EXPORT_SYMBOL(ttm_bo_vm_fault_reserved); > > +static void ttm_bo_release_dummy_page(struct drm_device *dev, void *res) > +{ > + struct page *dummy_page = (struct page *)res; > + > + __free_page(dummy_page); > +} > + > +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot) > +{ > + struct vm_area_struct *vma = vmf->vma; > + struct ttm_buffer_object *bo = vma->vm_private_data; > + struct ttm_bo_device *bdev = bo->bdev; > + struct drm_device *ddev = bo->base.dev; > + vm_fault_t ret = VM_FAULT_NOPAGE; > + unsigned long address = vma->vm_start; > + unsigned long num_prefault = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; > + unsigned long pfn; > + struct page *page; > + int i; > + > + /* > + * Wait for buffer data in transit, due to a pipelined > + * move. > + */ > + ret = ttm_bo_vm_fault_idle(bo, vmf); > + if (unlikely(ret != 0)) > + return ret; > + > + /* Allocate new dummy page to map all the VA range in this VMA to it*/ > + page = alloc_page(GFP_KERNEL | __GFP_ZERO); > + if (!page) > + return VM_FAULT_OOM; > + > + pfn = page_to_pfn(page); > + > + /* > + * Prefault the entire VMA range right away to avoid further faults > + */ > + for (i = 0; i < num_prefault; ++i) { > + > + if (unlikely(address >= vma->vm_end)) > + break; > + > + if (vma->vm_flags & VM_MIXEDMAP) > + ret = vmf_insert_mixed_prot(vma, address, > + __pfn_to_pfn_t(pfn, PFN_DEV), > + prot); > + else > + ret = vmf_insert_pfn_prot(vma, address, pfn, prot); > + > + /* Never error on prefaulted PTEs */ > + if (unlikely((ret & VM_FAULT_ERROR))) { > + if (i == 0) > + return VM_FAULT_NOPAGE; > + else > + break; > + } > + > + address += PAGE_SIZE; > + } > + > + /* Set the page to be freed using drmm release action */ > + if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, page)) > + return VM_FAULT_OOM; > + > + return ret; > +} > +EXPORT_SYMBOL(ttm_bo_vm_dummy_page); > + > vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > { > struct vm_area_struct *vma = vmf->vma; > pgprot_t prot; > struct ttm_buffer_object *bo = vma->vm_private_data; > + struct drm_device *ddev = bo->base.dev; > vm_fault_t ret; > + int idx; > > ret = ttm_bo_vm_reserve(bo, vmf); > if (ret) > return ret; > > prot = vma->vm_page_prot; > - ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); > + if (drm_dev_enter(ddev, &idx)) { > + ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); > + drm_dev_exit(idx); > + } else { > + ret = ttm_bo_vm_dummy_page(vmf, prot); > + } > if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) > return ret; > > dma_resv_unlock(bo->base.resv); > > return ret; > + > + return ret; Duplicate return here. Alex > } > EXPORT_SYMBOL(ttm_bo_vm_fault); > > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h > index e17be32..12fb240 100644 > --- a/include/drm/ttm/ttm_bo_api.h > +++ b/include/drm/ttm/ttm_bo_api.h > @@ -643,4 +643,6 @@ void ttm_bo_vm_close(struct vm_area_struct *vma); > int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, > void *buf, int len, int write); > > +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot); > + > #endif > -- > 2.7.4 > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel
Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky: > On device removal reroute all CPU mappings to dummy page. > > v3: > Remove loop to find DRM file and instead access it > by vma->vm_file->private_data. Move dummy page installation > into a separate function. > > v4: > Map the entire BOs VA space into on demand allocated dummy page > on the first fault for that BO. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> > --- > drivers/gpu/drm/ttm/ttm_bo_vm.c | 82 ++++++++++++++++++++++++++++++++++++++++- > include/drm/ttm/ttm_bo_api.h | 2 + > 2 files changed, 83 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c > index 6dc96cf..ed89da3 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c > @@ -34,6 +34,8 @@ > #include <drm/ttm/ttm_bo_driver.h> > #include <drm/ttm/ttm_placement.h> > #include <drm/drm_vma_manager.h> > +#include <drm/drm_drv.h> > +#include <drm/drm_managed.h> > #include <linux/mm.h> > #include <linux/pfn_t.h> > #include <linux/rbtree.h> > @@ -380,25 +382,103 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, > } > EXPORT_SYMBOL(ttm_bo_vm_fault_reserved); > > +static void ttm_bo_release_dummy_page(struct drm_device *dev, void *res) > +{ > + struct page *dummy_page = (struct page *)res; > + > + __free_page(dummy_page); > +} > + > +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot) > +{ > + struct vm_area_struct *vma = vmf->vma; > + struct ttm_buffer_object *bo = vma->vm_private_data; > + struct ttm_bo_device *bdev = bo->bdev; > + struct drm_device *ddev = bo->base.dev; > + vm_fault_t ret = VM_FAULT_NOPAGE; > + unsigned long address = vma->vm_start; > + unsigned long num_prefault = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; > + unsigned long pfn; > + struct page *page; > + int i; > + > + /* > + * Wait for buffer data in transit, due to a pipelined > + * move. > + */ > + ret = ttm_bo_vm_fault_idle(bo, vmf); > + if (unlikely(ret != 0)) > + return ret; This is superfluous and probably quite harmful here because we wait for the hardware to do something. We map a dummy page instead of the real BO content to the whole range anyway, so no need to wait for the real BO content to show up. > + > + /* Allocate new dummy page to map all the VA range in this VMA to it*/ > + page = alloc_page(GFP_KERNEL | __GFP_ZERO); > + if (!page) > + return VM_FAULT_OOM; > + > + pfn = page_to_pfn(page); > + > + /* > + * Prefault the entire VMA range right away to avoid further faults > + */ > + for (i = 0; i < num_prefault; ++i) { Maybe rename the variable to num_pages. I was confused for a moment why we still prefault. Alternative you can just drop i and do "for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)". > + > + if (unlikely(address >= vma->vm_end)) > + break; > + > + if (vma->vm_flags & VM_MIXEDMAP) > + ret = vmf_insert_mixed_prot(vma, address, > + __pfn_to_pfn_t(pfn, PFN_DEV), > + prot); > + else > + ret = vmf_insert_pfn_prot(vma, address, pfn, prot); > + > + /* Never error on prefaulted PTEs */ > + if (unlikely((ret & VM_FAULT_ERROR))) { > + if (i == 0) > + return VM_FAULT_NOPAGE; > + else > + break; This should probably be modified to either always return the error or always ignore it. Apart from that looks good to me. Christian. > + } > + > + address += PAGE_SIZE; > + } > + > + /* Set the page to be freed using drmm release action */ > + if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, page)) > + return VM_FAULT_OOM; > + > + return ret; > +} > +EXPORT_SYMBOL(ttm_bo_vm_dummy_page); > + > vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > { > struct vm_area_struct *vma = vmf->vma; > pgprot_t prot; > struct ttm_buffer_object *bo = vma->vm_private_data; > + struct drm_device *ddev = bo->base.dev; > vm_fault_t ret; > + int idx; > > ret = ttm_bo_vm_reserve(bo, vmf); > if (ret) > return ret; > > prot = vma->vm_page_prot; > - ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); > + if (drm_dev_enter(ddev, &idx)) { > + ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); > + drm_dev_exit(idx); > + } else { > + ret = ttm_bo_vm_dummy_page(vmf, prot); > + } > if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) > return ret; > > dma_resv_unlock(bo->base.resv); > > return ret; > + > + return ret; > } > EXPORT_SYMBOL(ttm_bo_vm_fault); > > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h > index e17be32..12fb240 100644 > --- a/include/drm/ttm/ttm_bo_api.h > +++ b/include/drm/ttm/ttm_bo_api.h > @@ -643,4 +643,6 @@ void ttm_bo_vm_close(struct vm_area_struct *vma); > int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, > void *buf, int len, int write); > > +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot); > + > #endif
On Mon, Jan 18, 2021 at 04:01:10PM -0500, Andrey Grodzovsky wrote: > On device removal reroute all CPU mappings to dummy page. > > v3: > Remove loop to find DRM file and instead access it > by vma->vm_file->private_data. Move dummy page installation > into a separate function. > > v4: > Map the entire BOs VA space into on demand allocated dummy page > on the first fault for that BO. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> > --- > drivers/gpu/drm/ttm/ttm_bo_vm.c | 82 ++++++++++++++++++++++++++++++++++++++++- > include/drm/ttm/ttm_bo_api.h | 2 + > 2 files changed, 83 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c > index 6dc96cf..ed89da3 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c > @@ -34,6 +34,8 @@ > #include <drm/ttm/ttm_bo_driver.h> > #include <drm/ttm/ttm_placement.h> > #include <drm/drm_vma_manager.h> > +#include <drm/drm_drv.h> > +#include <drm/drm_managed.h> > #include <linux/mm.h> > #include <linux/pfn_t.h> > #include <linux/rbtree.h> > @@ -380,25 +382,103 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, > } > EXPORT_SYMBOL(ttm_bo_vm_fault_reserved); > > +static void ttm_bo_release_dummy_page(struct drm_device *dev, void *res) > +{ > + struct page *dummy_page = (struct page *)res; > + > + __free_page(dummy_page); > +} > + > +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot) > +{ > + struct vm_area_struct *vma = vmf->vma; > + struct ttm_buffer_object *bo = vma->vm_private_data; > + struct ttm_bo_device *bdev = bo->bdev; > + struct drm_device *ddev = bo->base.dev; > + vm_fault_t ret = VM_FAULT_NOPAGE; > + unsigned long address = vma->vm_start; > + unsigned long num_prefault = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; > + unsigned long pfn; > + struct page *page; > + int i; > + > + /* > + * Wait for buffer data in transit, due to a pipelined > + * move. > + */ > + ret = ttm_bo_vm_fault_idle(bo, vmf); > + if (unlikely(ret != 0)) > + return ret; > + > + /* Allocate new dummy page to map all the VA range in this VMA to it*/ > + page = alloc_page(GFP_KERNEL | __GFP_ZERO); > + if (!page) > + return VM_FAULT_OOM; > + > + pfn = page_to_pfn(page); > + > + /* > + * Prefault the entire VMA range right away to avoid further faults > + */ > + for (i = 0; i < num_prefault; ++i) { > + > + if (unlikely(address >= vma->vm_end)) > + break; > + > + if (vma->vm_flags & VM_MIXEDMAP) > + ret = vmf_insert_mixed_prot(vma, address, > + __pfn_to_pfn_t(pfn, PFN_DEV), > + prot); > + else > + ret = vmf_insert_pfn_prot(vma, address, pfn, prot); > + > + /* Never error on prefaulted PTEs */ > + if (unlikely((ret & VM_FAULT_ERROR))) { > + if (i == 0) > + return VM_FAULT_NOPAGE; > + else > + break; > + } > + > + address += PAGE_SIZE; > + } > + > + /* Set the page to be freed using drmm release action */ > + if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, page)) > + return VM_FAULT_OOM; > + > + return ret; > +} > +EXPORT_SYMBOL(ttm_bo_vm_dummy_page); I think we can lift this entire thing (once the ttm_bo_vm_fault_idle is gone) to the drm level, since nothing ttm specific in here. Probably stuff it into drm_gem.c (but really it's not even gem specific, it's fully generic "replace this vma with dummy pages pls" function. Aside from this nit I think the overall approach you have here is starting to look good. Lots of work&polish, but imo we're getting there and can start landing stuff soon. -Daniel > + > vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > { > struct vm_area_struct *vma = vmf->vma; > pgprot_t prot; > struct ttm_buffer_object *bo = vma->vm_private_data; > + struct drm_device *ddev = bo->base.dev; > vm_fault_t ret; > + int idx; > > ret = ttm_bo_vm_reserve(bo, vmf); > if (ret) > return ret; > > prot = vma->vm_page_prot; > - ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); > + if (drm_dev_enter(ddev, &idx)) { > + ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); > + drm_dev_exit(idx); > + } else { > + ret = ttm_bo_vm_dummy_page(vmf, prot); > + } > if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) > return ret; > > dma_resv_unlock(bo->base.resv); > > return ret; > + > + return ret; > } > EXPORT_SYMBOL(ttm_bo_vm_fault); > > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h > index e17be32..12fb240 100644 > --- a/include/drm/ttm/ttm_bo_api.h > +++ b/include/drm/ttm/ttm_bo_api.h > @@ -643,4 +643,6 @@ void ttm_bo_vm_close(struct vm_area_struct *vma); > int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, > void *buf, int len, int write); > > +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot); > + > #endif > -- > 2.7.4 >
On 1/19/21 8:56 AM, Daniel Vetter wrote: > On Mon, Jan 18, 2021 at 04:01:10PM -0500, Andrey Grodzovsky wrote: >> On device removal reroute all CPU mappings to dummy page. >> >> v3: >> Remove loop to find DRM file and instead access it >> by vma->vm_file->private_data. Move dummy page installation >> into a separate function. >> >> v4: >> Map the entire BOs VA space into on demand allocated dummy page >> on the first fault for that BO. >> >> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >> --- >> drivers/gpu/drm/ttm/ttm_bo_vm.c | 82 ++++++++++++++++++++++++++++++++++++++++- >> include/drm/ttm/ttm_bo_api.h | 2 + >> 2 files changed, 83 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c >> index 6dc96cf..ed89da3 100644 >> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c >> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c >> @@ -34,6 +34,8 @@ >> #include <drm/ttm/ttm_bo_driver.h> >> #include <drm/ttm/ttm_placement.h> >> #include <drm/drm_vma_manager.h> >> +#include <drm/drm_drv.h> >> +#include <drm/drm_managed.h> >> #include <linux/mm.h> >> #include <linux/pfn_t.h> >> #include <linux/rbtree.h> >> @@ -380,25 +382,103 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, >> } >> EXPORT_SYMBOL(ttm_bo_vm_fault_reserved); >> >> +static void ttm_bo_release_dummy_page(struct drm_device *dev, void *res) >> +{ >> + struct page *dummy_page = (struct page *)res; >> + >> + __free_page(dummy_page); >> +} >> + >> +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot) >> +{ >> + struct vm_area_struct *vma = vmf->vma; >> + struct ttm_buffer_object *bo = vma->vm_private_data; >> + struct ttm_bo_device *bdev = bo->bdev; >> + struct drm_device *ddev = bo->base.dev; >> + vm_fault_t ret = VM_FAULT_NOPAGE; >> + unsigned long address = vma->vm_start; >> + unsigned long num_prefault = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; >> + unsigned long pfn; >> + struct page *page; >> + int i; >> + >> + /* >> + * Wait for buffer data in transit, due to a pipelined >> + * move. >> + */ >> + ret = ttm_bo_vm_fault_idle(bo, vmf); >> + if (unlikely(ret != 0)) >> + return ret; >> + >> + /* Allocate new dummy page to map all the VA range in this VMA to it*/ >> + page = alloc_page(GFP_KERNEL | __GFP_ZERO); >> + if (!page) >> + return VM_FAULT_OOM; >> + >> + pfn = page_to_pfn(page); >> + >> + /* >> + * Prefault the entire VMA range right away to avoid further faults >> + */ >> + for (i = 0; i < num_prefault; ++i) { >> + >> + if (unlikely(address >= vma->vm_end)) >> + break; >> + >> + if (vma->vm_flags & VM_MIXEDMAP) >> + ret = vmf_insert_mixed_prot(vma, address, >> + __pfn_to_pfn_t(pfn, PFN_DEV), >> + prot); >> + else >> + ret = vmf_insert_pfn_prot(vma, address, pfn, prot); >> + >> + /* Never error on prefaulted PTEs */ >> + if (unlikely((ret & VM_FAULT_ERROR))) { >> + if (i == 0) >> + return VM_FAULT_NOPAGE; >> + else >> + break; >> + } >> + >> + address += PAGE_SIZE; >> + } >> + >> + /* Set the page to be freed using drmm release action */ >> + if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, page)) >> + return VM_FAULT_OOM; >> + >> + return ret; >> +} >> +EXPORT_SYMBOL(ttm_bo_vm_dummy_page); > I think we can lift this entire thing (once the ttm_bo_vm_fault_idle is > gone) to the drm level, since nothing ttm specific in here. Probably stuff > it into drm_gem.c (but really it's not even gem specific, it's fully > generic "replace this vma with dummy pages pls" function. Once I started with this I noticed that drmm_add_action_or_reset depends on struct drm_device *ddev = bo->base.dev and bo is the private data we embed at the TTM level when setting up the mapping and so this forces to move drmm_add_action_or_reset out of this function to every client who uses this function, and then you separate the logic of page allocation from it's release. So I suggest we keep it as is. Andrey > > Aside from this nit I think the overall approach you have here is starting > to look good. Lots of work&polish, but imo we're getting there and can > start landing stuff soon. > -Daniel > >> + >> vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) >> { >> struct vm_area_struct *vma = vmf->vma; >> pgprot_t prot; >> struct ttm_buffer_object *bo = vma->vm_private_data; >> + struct drm_device *ddev = bo->base.dev; >> vm_fault_t ret; >> + int idx; >> >> ret = ttm_bo_vm_reserve(bo, vmf); >> if (ret) >> return ret; >> >> prot = vma->vm_page_prot; >> - ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); >> + if (drm_dev_enter(ddev, &idx)) { >> + ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); >> + drm_dev_exit(idx); >> + } else { >> + ret = ttm_bo_vm_dummy_page(vmf, prot); >> + } >> if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) >> return ret; >> >> dma_resv_unlock(bo->base.resv); >> >> return ret; >> + >> + return ret; >> } >> EXPORT_SYMBOL(ttm_bo_vm_fault); >> >> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h >> index e17be32..12fb240 100644 >> --- a/include/drm/ttm/ttm_bo_api.h >> +++ b/include/drm/ttm/ttm_bo_api.h >> @@ -643,4 +643,6 @@ void ttm_bo_vm_close(struct vm_area_struct *vma); >> int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, >> void *buf, int len, int write); >> >> +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot); >> + >> #endif >> -- >> 2.7.4 >>
Hey Daniel, just a ping. Andrey On 1/25/21 10:28 AM, Andrey Grodzovsky wrote: > > On 1/19/21 8:56 AM, Daniel Vetter wrote: >> On Mon, Jan 18, 2021 at 04:01:10PM -0500, Andrey Grodzovsky wrote: >>> On device removal reroute all CPU mappings to dummy page. >>> >>> v3: >>> Remove loop to find DRM file and instead access it >>> by vma->vm_file->private_data. Move dummy page installation >>> into a separate function. >>> >>> v4: >>> Map the entire BOs VA space into on demand allocated dummy page >>> on the first fault for that BO. >>> >>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> >>> --- >>> drivers/gpu/drm/ttm/ttm_bo_vm.c | 82 >>> ++++++++++++++++++++++++++++++++++++++++- >>> include/drm/ttm/ttm_bo_api.h | 2 + >>> 2 files changed, 83 insertions(+), 1 deletion(-) >>> >>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c >>> index 6dc96cf..ed89da3 100644 >>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c >>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c >>> @@ -34,6 +34,8 @@ >>> #include <drm/ttm/ttm_bo_driver.h> >>> #include <drm/ttm/ttm_placement.h> >>> #include <drm/drm_vma_manager.h> >>> +#include <drm/drm_drv.h> >>> +#include <drm/drm_managed.h> >>> #include <linux/mm.h> >>> #include <linux/pfn_t.h> >>> #include <linux/rbtree.h> >>> @@ -380,25 +382,103 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault >>> *vmf, >>> } >>> EXPORT_SYMBOL(ttm_bo_vm_fault_reserved); >>> +static void ttm_bo_release_dummy_page(struct drm_device *dev, void *res) >>> +{ >>> + struct page *dummy_page = (struct page *)res; >>> + >>> + __free_page(dummy_page); >>> +} >>> + >>> +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot) >>> +{ >>> + struct vm_area_struct *vma = vmf->vma; >>> + struct ttm_buffer_object *bo = vma->vm_private_data; >>> + struct ttm_bo_device *bdev = bo->bdev; >>> + struct drm_device *ddev = bo->base.dev; >>> + vm_fault_t ret = VM_FAULT_NOPAGE; >>> + unsigned long address = vma->vm_start; >>> + unsigned long num_prefault = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; >>> + unsigned long pfn; >>> + struct page *page; >>> + int i; >>> + >>> + /* >>> + * Wait for buffer data in transit, due to a pipelined >>> + * move. >>> + */ >>> + ret = ttm_bo_vm_fault_idle(bo, vmf); >>> + if (unlikely(ret != 0)) >>> + return ret; >>> + >>> + /* Allocate new dummy page to map all the VA range in this VMA to it*/ >>> + page = alloc_page(GFP_KERNEL | __GFP_ZERO); >>> + if (!page) >>> + return VM_FAULT_OOM; >>> + >>> + pfn = page_to_pfn(page); >>> + >>> + /* >>> + * Prefault the entire VMA range right away to avoid further faults >>> + */ >>> + for (i = 0; i < num_prefault; ++i) { >>> + >>> + if (unlikely(address >= vma->vm_end)) >>> + break; >>> + >>> + if (vma->vm_flags & VM_MIXEDMAP) >>> + ret = vmf_insert_mixed_prot(vma, address, >>> + __pfn_to_pfn_t(pfn, PFN_DEV), >>> + prot); >>> + else >>> + ret = vmf_insert_pfn_prot(vma, address, pfn, prot); >>> + >>> + /* Never error on prefaulted PTEs */ >>> + if (unlikely((ret & VM_FAULT_ERROR))) { >>> + if (i == 0) >>> + return VM_FAULT_NOPAGE; >>> + else >>> + break; >>> + } >>> + >>> + address += PAGE_SIZE; >>> + } >>> + >>> + /* Set the page to be freed using drmm release action */ >>> + if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, page)) >>> + return VM_FAULT_OOM; >>> + >>> + return ret; >>> +} >>> +EXPORT_SYMBOL(ttm_bo_vm_dummy_page); >> I think we can lift this entire thing (once the ttm_bo_vm_fault_idle is >> gone) to the drm level, since nothing ttm specific in here. Probably stuff >> it into drm_gem.c (but really it's not even gem specific, it's fully >> generic "replace this vma with dummy pages pls" function. > > > Once I started with this I noticed that drmm_add_action_or_reset depends > on struct drm_device *ddev = bo->base.dev and bo is the private data > we embed at the TTM level when setting up the mapping and so this forces > to move drmm_add_action_or_reset out of this function to every client who uses > this function, and then you separate the logic of page allocation from it's > release. > So I suggest we keep it as is. > > Andrey > > >> >> Aside from this nit I think the overall approach you have here is starting >> to look good. Lots of work&polish, but imo we're getting there and can >> start landing stuff soon. >> -Daniel >> >>> + >>> vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) >>> { >>> struct vm_area_struct *vma = vmf->vma; >>> pgprot_t prot; >>> struct ttm_buffer_object *bo = vma->vm_private_data; >>> + struct drm_device *ddev = bo->base.dev; >>> vm_fault_t ret; >>> + int idx; >>> ret = ttm_bo_vm_reserve(bo, vmf); >>> if (ret) >>> return ret; >>> prot = vma->vm_page_prot; >>> - ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); >>> + if (drm_dev_enter(ddev, &idx)) { >>> + ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); >>> + drm_dev_exit(idx); >>> + } else { >>> + ret = ttm_bo_vm_dummy_page(vmf, prot); >>> + } >>> if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) >>> return ret; >>> dma_resv_unlock(bo->base.resv); >>> return ret; >>> + >>> + return ret; >>> } >>> EXPORT_SYMBOL(ttm_bo_vm_fault); >>> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h >>> index e17be32..12fb240 100644 >>> --- a/include/drm/ttm/ttm_bo_api.h >>> +++ b/include/drm/ttm/ttm_bo_api.h >>> @@ -643,4 +643,6 @@ void ttm_bo_vm_close(struct vm_area_struct *vma); >>> int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, >>> void *buf, int len, int write); >>> +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot); >>> + >>> #endif >>> -- >>> 2.7.4 >>>
On Wed, Jan 27, 2021 at 09:29:41AM -0500, Andrey Grodzovsky wrote: > Hey Daniel, just a ping. Was on vacations last week. > Andrey > > On 1/25/21 10:28 AM, Andrey Grodzovsky wrote: > > > > On 1/19/21 8:56 AM, Daniel Vetter wrote: > > > On Mon, Jan 18, 2021 at 04:01:10PM -0500, Andrey Grodzovsky wrote: > > > > On device removal reroute all CPU mappings to dummy page. > > > > > > > > v3: > > > > Remove loop to find DRM file and instead access it > > > > by vma->vm_file->private_data. Move dummy page installation > > > > into a separate function. > > > > > > > > v4: > > > > Map the entire BOs VA space into on demand allocated dummy page > > > > on the first fault for that BO. > > > > > > > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> > > > > --- > > > > drivers/gpu/drm/ttm/ttm_bo_vm.c | 82 > > > > ++++++++++++++++++++++++++++++++++++++++- > > > > include/drm/ttm/ttm_bo_api.h | 2 + > > > > 2 files changed, 83 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c > > > > index 6dc96cf..ed89da3 100644 > > > > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c > > > > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c > > > > @@ -34,6 +34,8 @@ > > > > #include <drm/ttm/ttm_bo_driver.h> > > > > #include <drm/ttm/ttm_placement.h> > > > > #include <drm/drm_vma_manager.h> > > > > +#include <drm/drm_drv.h> > > > > +#include <drm/drm_managed.h> > > > > #include <linux/mm.h> > > > > #include <linux/pfn_t.h> > > > > #include <linux/rbtree.h> > > > > @@ -380,25 +382,103 @@ vm_fault_t > > > > ttm_bo_vm_fault_reserved(struct vm_fault *vmf, > > > > } > > > > EXPORT_SYMBOL(ttm_bo_vm_fault_reserved); > > > > +static void ttm_bo_release_dummy_page(struct drm_device *dev, void *res) > > > > +{ > > > > + struct page *dummy_page = (struct page *)res; > > > > + > > > > + __free_page(dummy_page); > > > > +} > > > > + > > > > +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot) > > > > +{ > > > > + struct vm_area_struct *vma = vmf->vma; > > > > + struct ttm_buffer_object *bo = vma->vm_private_data; > > > > + struct ttm_bo_device *bdev = bo->bdev; > > > > + struct drm_device *ddev = bo->base.dev; > > > > + vm_fault_t ret = VM_FAULT_NOPAGE; > > > > + unsigned long address = vma->vm_start; > > > > + unsigned long num_prefault = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; > > > > + unsigned long pfn; > > > > + struct page *page; > > > > + int i; > > > > + > > > > + /* > > > > + * Wait for buffer data in transit, due to a pipelined > > > > + * move. > > > > + */ > > > > + ret = ttm_bo_vm_fault_idle(bo, vmf); > > > > + if (unlikely(ret != 0)) > > > > + return ret; > > > > + > > > > + /* Allocate new dummy page to map all the VA range in this VMA to it*/ > > > > + page = alloc_page(GFP_KERNEL | __GFP_ZERO); > > > > + if (!page) > > > > + return VM_FAULT_OOM; > > > > + > > > > + pfn = page_to_pfn(page); > > > > + > > > > + /* > > > > + * Prefault the entire VMA range right away to avoid further faults > > > > + */ > > > > + for (i = 0; i < num_prefault; ++i) { > > > > + > > > > + if (unlikely(address >= vma->vm_end)) > > > > + break; > > > > + > > > > + if (vma->vm_flags & VM_MIXEDMAP) > > > > + ret = vmf_insert_mixed_prot(vma, address, > > > > + __pfn_to_pfn_t(pfn, PFN_DEV), > > > > + prot); > > > > + else > > > > + ret = vmf_insert_pfn_prot(vma, address, pfn, prot); > > > > + > > > > + /* Never error on prefaulted PTEs */ > > > > + if (unlikely((ret & VM_FAULT_ERROR))) { > > > > + if (i == 0) > > > > + return VM_FAULT_NOPAGE; > > > > + else > > > > + break; > > > > + } > > > > + > > > > + address += PAGE_SIZE; > > > > + } > > > > + > > > > + /* Set the page to be freed using drmm release action */ > > > > + if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, page)) > > > > + return VM_FAULT_OOM; > > > > + > > > > + return ret; > > > > +} > > > > +EXPORT_SYMBOL(ttm_bo_vm_dummy_page); > > > I think we can lift this entire thing (once the ttm_bo_vm_fault_idle is > > > gone) to the drm level, since nothing ttm specific in here. Probably stuff > > > it into drm_gem.c (but really it's not even gem specific, it's fully > > > generic "replace this vma with dummy pages pls" function. > > > > > > Once I started with this I noticed that drmm_add_action_or_reset depends > > on struct drm_device *ddev = bo->base.dev and bo is the private data > > we embed at the TTM level when setting up the mapping and so this forces > > to move drmm_add_action_or_reset out of this function to every client who uses > > this function, and then you separate the logic of page allocation from > > it's release. > > So I suggest we keep it as is. Uh disappointing. Thing is, ttm essentially means drm devices with gem, except for vmwgfx, which is a drm_device without gem. And I think one of the remaining ttm refactors in this area is to move ttm_device over into drm_device someone, and then we'd have bo->base.dev always set to something that drmm_add_action_or_reset can use. I guess hand-rolling for now and jotting this down as a TODO item is fine too, but would be good to get this addressed since that's another reason here to do this. Maybe sync with Christian how to best do this. -Daniel > > > > Andrey > > > > > > > > > > Aside from this nit I think the overall approach you have here is starting > > > to look good. Lots of work&polish, but imo we're getting there and can > > > start landing stuff soon. > > > -Daniel > > > > > > > + > > > > vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > > > > { > > > > struct vm_area_struct *vma = vmf->vma; > > > > pgprot_t prot; > > > > struct ttm_buffer_object *bo = vma->vm_private_data; > > > > + struct drm_device *ddev = bo->base.dev; > > > > vm_fault_t ret; > > > > + int idx; > > > > ret = ttm_bo_vm_reserve(bo, vmf); > > > > if (ret) > > > > return ret; > > > > prot = vma->vm_page_prot; > > > > - ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); > > > > + if (drm_dev_enter(ddev, &idx)) { > > > > + ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); > > > > + drm_dev_exit(idx); > > > > + } else { > > > > + ret = ttm_bo_vm_dummy_page(vmf, prot); > > > > + } > > > > if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) > > > > return ret; > > > > dma_resv_unlock(bo->base.resv); > > > > return ret; > > > > + > > > > + return ret; > > > > } > > > > EXPORT_SYMBOL(ttm_bo_vm_fault); > > > > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h > > > > index e17be32..12fb240 100644 > > > > --- a/include/drm/ttm/ttm_bo_api.h > > > > +++ b/include/drm/ttm/ttm_bo_api.h > > > > @@ -643,4 +643,6 @@ void ttm_bo_vm_close(struct vm_area_struct *vma); > > > > int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, > > > > void *buf, int len, int write); > > > > +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot); > > > > + > > > > #endif > > > > -- > > > > 2.7.4 > > > >
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 6dc96cf..ed89da3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -34,6 +34,8 @@ #include <drm/ttm/ttm_bo_driver.h> #include <drm/ttm/ttm_placement.h> #include <drm/drm_vma_manager.h> +#include <drm/drm_drv.h> +#include <drm/drm_managed.h> #include <linux/mm.h> #include <linux/pfn_t.h> #include <linux/rbtree.h> @@ -380,25 +382,103 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, } EXPORT_SYMBOL(ttm_bo_vm_fault_reserved); +static void ttm_bo_release_dummy_page(struct drm_device *dev, void *res) +{ + struct page *dummy_page = (struct page *)res; + + __free_page(dummy_page); +} + +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot) +{ + struct vm_area_struct *vma = vmf->vma; + struct ttm_buffer_object *bo = vma->vm_private_data; + struct ttm_bo_device *bdev = bo->bdev; + struct drm_device *ddev = bo->base.dev; + vm_fault_t ret = VM_FAULT_NOPAGE; + unsigned long address = vma->vm_start; + unsigned long num_prefault = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + unsigned long pfn; + struct page *page; + int i; + + /* + * Wait for buffer data in transit, due to a pipelined + * move. + */ + ret = ttm_bo_vm_fault_idle(bo, vmf); + if (unlikely(ret != 0)) + return ret; + + /* Allocate new dummy page to map all the VA range in this VMA to it*/ + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + return VM_FAULT_OOM; + + pfn = page_to_pfn(page); + + /* + * Prefault the entire VMA range right away to avoid further faults + */ + for (i = 0; i < num_prefault; ++i) { + + if (unlikely(address >= vma->vm_end)) + break; + + if (vma->vm_flags & VM_MIXEDMAP) + ret = vmf_insert_mixed_prot(vma, address, + __pfn_to_pfn_t(pfn, PFN_DEV), + prot); + else + ret = vmf_insert_pfn_prot(vma, address, pfn, prot); + + /* Never error on prefaulted PTEs */ + if (unlikely((ret & VM_FAULT_ERROR))) { + if (i == 0) + return VM_FAULT_NOPAGE; + else + break; + } + + address += PAGE_SIZE; + } + + /* Set the page to be freed using drmm release action */ + if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, page)) + return VM_FAULT_OOM; + + return ret; +} +EXPORT_SYMBOL(ttm_bo_vm_dummy_page); + vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; pgprot_t prot; struct ttm_buffer_object *bo = vma->vm_private_data; + struct drm_device *ddev = bo->base.dev; vm_fault_t ret; + int idx; ret = ttm_bo_vm_reserve(bo, vmf); if (ret) return ret; prot = vma->vm_page_prot; - ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); + if (drm_dev_enter(ddev, &idx)) { + ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); + drm_dev_exit(idx); + } else { + ret = ttm_bo_vm_dummy_page(vmf, prot); + } if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) return ret; dma_resv_unlock(bo->base.resv); return ret; + + return ret; } EXPORT_SYMBOL(ttm_bo_vm_fault); diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index e17be32..12fb240 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -643,4 +643,6 @@ void ttm_bo_vm_close(struct vm_area_struct *vma); int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write); +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot); + #endif
On device removal reroute all CPU mappings to dummy page. v3: Remove loop to find DRM file and instead access it by vma->vm_file->private_data. Move dummy page installation into a separate function. v4: Map the entire BOs VA space into on demand allocated dummy page on the first fault for that BO. Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 82 ++++++++++++++++++++++++++++++++++++++++- include/drm/ttm/ttm_bo_api.h | 2 + 2 files changed, 83 insertions(+), 1 deletion(-)