Message ID | 20191127083120.34611-5-thomas_os@shipmail.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Huge page-table entries for TTM | expand |
Am 27.11.19 um 09:31 schrieb Thomas Hellström (VMware): > From: Thomas Hellstrom <thellstrom@vmware.com> > > Support huge (PMD-size and PUD-size) page-table entries by providing a > huge_fault() callback. > We still support private mappings and write-notify by splitting the huge > page-table entries on write-access. > > Note that for huge page-faults to occur, either the kernel needs to be > compiled with trans-huge-pages always enabled, or the kernel needs to be > compiled with trans-huge-pages enabled using madvise, and the user-space > app needs to call madvise() to enable trans-huge pages on a per-mapping > basis. > > Furthermore huge page-faults will not occur unless buffer objects and > user-space addresses are aligned on huge page size boundaries. > > Cc: Andrew Morton <akpm@linux-foundation.org> > Cc: Michal Hocko <mhocko@suse.com> > Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org> > Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> > Cc: Ralph Campbell <rcampbell@nvidia.com> > Cc: "Jérôme Glisse" <jglisse@redhat.com> > Cc: "Christian König" <christian.koenig@amd.com> > Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com> > --- > drivers/gpu/drm/ttm/ttm_bo_vm.c | 139 +++++++++++++++++++++++++++++++- > include/drm/ttm/ttm_bo_api.h | 3 +- > 2 files changed, 138 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c > index 2098f8d4dfc5..8d6089880e39 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c > @@ -150,6 +150,84 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, > } > EXPORT_SYMBOL(ttm_bo_vm_reserve); > > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > +/** > + * ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults > + * @vmf: Fault data > + * @bo: The buffer object > + * @page_offset: Page offset from bo start > + * @fault_page_size: The size of the fault in pages. > + * @pgprot: The page protections. > + * Does additional checking whether it's possible to insert a PUD or PMD > + * pfn and performs the insertion. > + * > + * Return: VM_FAULT_NOPAGE on successful insertion, VM_FAULT_FALLBACK if > + * a huge fault was not possible, and a VM_FAULT_ERROR code otherwise. > + */ > +static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf, > + struct ttm_buffer_object *bo, > + pgoff_t page_offset, > + pgoff_t fault_page_size, > + pgprot_t pgprot) > +{ > + pgoff_t i; > + vm_fault_t ret; > + unsigned long pfn; > + pfn_t pfnt; > + struct ttm_tt *ttm = bo->ttm; > + bool write = vmf->flags & FAULT_FLAG_WRITE; > + > + > + /* Fault should not cross bo boundary */ > + page_offset &= ~(fault_page_size - 1); > + if (page_offset + fault_page_size > bo->num_pages) > + goto out_fallback; > + > + if (bo->mem.bus.is_iomem) > + pfn = ttm_bo_io_mem_pfn(bo, page_offset); > + else > + pfn = page_to_pfn(ttm->pages[page_offset]); > + > + /* pfn must be fault_page_size aligned. */ > + if ((pfn & (fault_page_size - 1)) != 0) > + goto out_fallback; > + > + /* IO memory is OK now, TT memory must be contigous. */ That won't work correctly, IO mem might not be contiguous either. We either need to call ttm_bo_io_mem_pfn() multiple times and check that the addresses are linear or return the length additional to the pfn. Regards, Christian. > + if (!bo->mem.bus.is_iomem) > + for (i = 1; i < fault_page_size; ++i) { > + if (page_to_pfn(ttm->pages[page_offset + i]) != pfn + i) > + goto out_fallback; > + } > + > + pfnt = __pfn_to_pfn_t(pfn, PFN_DEV); > + if (fault_page_size == (HPAGE_PMD_SIZE >> PAGE_SHIFT)) > + ret = vmf_insert_pfn_pmd_prot(vmf, pfnt, pgprot, write); > +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD > + else if (fault_page_size == (HPAGE_PUD_SIZE >> PAGE_SHIFT)) > + ret = vmf_insert_pfn_pud_prot(vmf, pfnt, pgprot, write); > +#endif > + else > + WARN_ON_ONCE(ret = VM_FAULT_FALLBACK); > + > + if (ret != VM_FAULT_NOPAGE) > + goto out_fallback; > + > + return VM_FAULT_NOPAGE; > +out_fallback: > + count_vm_event(THP_FAULT_FALLBACK); > + return VM_FAULT_FALLBACK; > +} > +#else > +static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf, > + struct ttm_buffer_object *bo, > + pgoff_t page_offset, > + pgoff_t fault_page_size, > + pgprot_t pgprot) > +{ > + return VM_FAULT_NOPAGE; > +} > +#endif > + > /** > * ttm_bo_vm_fault_reserved - TTM fault helper > * @vmf: The struct vm_fault given as argument to the fault callback > @@ -170,7 +248,8 @@ EXPORT_SYMBOL(ttm_bo_vm_reserve); > */ > vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, > pgprot_t prot, > - pgoff_t num_prefault) > + pgoff_t num_prefault, > + pgoff_t fault_page_size) > { > struct vm_area_struct *vma = vmf->vma; > struct ttm_buffer_object *bo = vma->vm_private_data; > @@ -262,6 +341,13 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, > prot = pgprot_decrypted(prot); > } > > + /* We don't prefault on huge faults. Yet. */ > + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && fault_page_size != 1) { > + ret = ttm_bo_vm_insert_huge(vmf, bo, page_offset, > + fault_page_size, prot); > + goto out_io_unlock; > + } > + > /* > * Speculatively prefault a number of pages. Only error on > * first page. > @@ -320,7 +406,7 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > return ret; > > prot = vma->vm_page_prot; > - ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT); > + ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); > if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) > return ret; > > @@ -330,6 +416,50 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) > } > EXPORT_SYMBOL(ttm_bo_vm_fault); > > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > +static vm_fault_t ttm_bo_vm_huge_fault(struct vm_fault *vmf, > + enum page_entry_size pe_size) > +{ > + struct vm_area_struct *vma = vmf->vma; > + pgprot_t prot; > + struct ttm_buffer_object *bo = vma->vm_private_data; > + vm_fault_t ret; > + pgoff_t fault_page_size = 0; > + bool write = vmf->flags & FAULT_FLAG_WRITE; > + > + switch (pe_size) { > + case PE_SIZE_PMD: > + fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT; > + break; > +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD > + case PE_SIZE_PUD: > + fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT; > + break; > +#endif > + default: > + WARN_ON_ONCE(1); > + return VM_FAULT_FALLBACK; > + } > + > + /* Fallback on write dirty-tracking or COW */ > + if (write && !(pgprot_val(vmf->vma->vm_page_prot) & _PAGE_RW)) > + return VM_FAULT_FALLBACK; > + > + ret = ttm_bo_vm_reserve(bo, vmf); > + if (ret) > + return ret; > + > + prot = vm_get_page_prot(vma->vm_flags); > + ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size); > + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) > + return ret; > + > + dma_resv_unlock(bo->base.resv); > + > + return ret; > +} > +#endif > + > void ttm_bo_vm_open(struct vm_area_struct *vma) > { > struct ttm_buffer_object *bo = vma->vm_private_data; > @@ -431,7 +561,10 @@ static const struct vm_operations_struct ttm_bo_vm_ops = { > .fault = ttm_bo_vm_fault, > .open = ttm_bo_vm_open, > .close = ttm_bo_vm_close, > - .access = ttm_bo_vm_access > + .access = ttm_bo_vm_access, > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > + .huge_fault = ttm_bo_vm_huge_fault, > +#endif > }; > > static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev, > diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h > index 66ca49db9633..4fc90d53aa15 100644 > --- a/include/drm/ttm/ttm_bo_api.h > +++ b/include/drm/ttm/ttm_bo_api.h > @@ -732,7 +732,8 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, > > vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, > pgprot_t prot, > - pgoff_t num_prefault); > + pgoff_t num_prefault, > + pgoff_t fault_page_size); > > vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf); >
On 11/27/19 10:12 AM, Christian König wrote: > Am 27.11.19 um 09:31 schrieb Thomas Hellström (VMware): >> From: Thomas Hellstrom <thellstrom@vmware.com> >> >> Support huge (PMD-size and PUD-size) page-table entries by providing a >> huge_fault() callback. >> We still support private mappings and write-notify by splitting the huge >> page-table entries on write-access. >> >> Note that for huge page-faults to occur, either the kernel needs to be >> compiled with trans-huge-pages always enabled, or the kernel needs to be >> compiled with trans-huge-pages enabled using madvise, and the user-space >> app needs to call madvise() to enable trans-huge pages on a per-mapping >> basis. >> >> Furthermore huge page-faults will not occur unless buffer objects and >> user-space addresses are aligned on huge page size boundaries. >> >> Cc: Andrew Morton <akpm@linux-foundation.org> >> Cc: Michal Hocko <mhocko@suse.com> >> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org> >> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> >> Cc: Ralph Campbell <rcampbell@nvidia.com> >> Cc: "Jérôme Glisse" <jglisse@redhat.com> >> Cc: "Christian König" <christian.koenig@amd.com> >> Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com> >> --- >> drivers/gpu/drm/ttm/ttm_bo_vm.c | 139 +++++++++++++++++++++++++++++++- >> include/drm/ttm/ttm_bo_api.h | 3 +- >> 2 files changed, 138 insertions(+), 4 deletions(-) >> >> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c >> b/drivers/gpu/drm/ttm/ttm_bo_vm.c >> index 2098f8d4dfc5..8d6089880e39 100644 >> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c >> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c >> @@ -150,6 +150,84 @@ vm_fault_t ttm_bo_vm_reserve(struct >> ttm_buffer_object *bo, >> } >> EXPORT_SYMBOL(ttm_bo_vm_reserve); >> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE >> +/** >> + * ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults >> + * @vmf: Fault data >> + * @bo: The buffer object >> + * @page_offset: Page offset from bo start >> + * @fault_page_size: The size of the fault in pages. >> + * @pgprot: The page protections. >> + * Does additional checking whether it's possible to insert a PUD or >> PMD >> + * pfn and performs the insertion. >> + * >> + * Return: VM_FAULT_NOPAGE on successful insertion, >> VM_FAULT_FALLBACK if >> + * a huge fault was not possible, and a VM_FAULT_ERROR code otherwise. >> + */ >> +static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf, >> + struct ttm_buffer_object *bo, >> + pgoff_t page_offset, >> + pgoff_t fault_page_size, >> + pgprot_t pgprot) >> +{ >> + pgoff_t i; >> + vm_fault_t ret; >> + unsigned long pfn; >> + pfn_t pfnt; >> + struct ttm_tt *ttm = bo->ttm; >> + bool write = vmf->flags & FAULT_FLAG_WRITE; >> + >> + >> + /* Fault should not cross bo boundary */ >> + page_offset &= ~(fault_page_size - 1); >> + if (page_offset + fault_page_size > bo->num_pages) >> + goto out_fallback; >> + >> + if (bo->mem.bus.is_iomem) >> + pfn = ttm_bo_io_mem_pfn(bo, page_offset); >> + else >> + pfn = page_to_pfn(ttm->pages[page_offset]); >> + >> + /* pfn must be fault_page_size aligned. */ >> + if ((pfn & (fault_page_size - 1)) != 0) >> + goto out_fallback; >> + >> + /* IO memory is OK now, TT memory must be contigous. */ > > That won't work correctly, IO mem might not be contiguous either. > > We either need to call ttm_bo_io_mem_pfn() multiple times and check > that the addresses are linear or return the length additional to the pfn. Yes, you're right. Will fix that up. Thanks, Thomas > > Regards, > Christian.
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 2098f8d4dfc5..8d6089880e39 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -150,6 +150,84 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, } EXPORT_SYMBOL(ttm_bo_vm_reserve); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +/** + * ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults + * @vmf: Fault data + * @bo: The buffer object + * @page_offset: Page offset from bo start + * @fault_page_size: The size of the fault in pages. + * @pgprot: The page protections. + * Does additional checking whether it's possible to insert a PUD or PMD + * pfn and performs the insertion. + * + * Return: VM_FAULT_NOPAGE on successful insertion, VM_FAULT_FALLBACK if + * a huge fault was not possible, and a VM_FAULT_ERROR code otherwise. + */ +static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf, + struct ttm_buffer_object *bo, + pgoff_t page_offset, + pgoff_t fault_page_size, + pgprot_t pgprot) +{ + pgoff_t i; + vm_fault_t ret; + unsigned long pfn; + pfn_t pfnt; + struct ttm_tt *ttm = bo->ttm; + bool write = vmf->flags & FAULT_FLAG_WRITE; + + + /* Fault should not cross bo boundary */ + page_offset &= ~(fault_page_size - 1); + if (page_offset + fault_page_size > bo->num_pages) + goto out_fallback; + + if (bo->mem.bus.is_iomem) + pfn = ttm_bo_io_mem_pfn(bo, page_offset); + else + pfn = page_to_pfn(ttm->pages[page_offset]); + + /* pfn must be fault_page_size aligned. */ + if ((pfn & (fault_page_size - 1)) != 0) + goto out_fallback; + + /* IO memory is OK now, TT memory must be contigous. */ + if (!bo->mem.bus.is_iomem) + for (i = 1; i < fault_page_size; ++i) { + if (page_to_pfn(ttm->pages[page_offset + i]) != pfn + i) + goto out_fallback; + } + + pfnt = __pfn_to_pfn_t(pfn, PFN_DEV); + if (fault_page_size == (HPAGE_PMD_SIZE >> PAGE_SHIFT)) + ret = vmf_insert_pfn_pmd_prot(vmf, pfnt, pgprot, write); +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + else if (fault_page_size == (HPAGE_PUD_SIZE >> PAGE_SHIFT)) + ret = vmf_insert_pfn_pud_prot(vmf, pfnt, pgprot, write); +#endif + else + WARN_ON_ONCE(ret = VM_FAULT_FALLBACK); + + if (ret != VM_FAULT_NOPAGE) + goto out_fallback; + + return VM_FAULT_NOPAGE; +out_fallback: + count_vm_event(THP_FAULT_FALLBACK); + return VM_FAULT_FALLBACK; +} +#else +static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf, + struct ttm_buffer_object *bo, + pgoff_t page_offset, + pgoff_t fault_page_size, + pgprot_t pgprot) +{ + return VM_FAULT_NOPAGE; +} +#endif + /** * ttm_bo_vm_fault_reserved - TTM fault helper * @vmf: The struct vm_fault given as argument to the fault callback @@ -170,7 +248,8 @@ EXPORT_SYMBOL(ttm_bo_vm_reserve); */ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, pgprot_t prot, - pgoff_t num_prefault) + pgoff_t num_prefault, + pgoff_t fault_page_size) { struct vm_area_struct *vma = vmf->vma; struct ttm_buffer_object *bo = vma->vm_private_data; @@ -262,6 +341,13 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, prot = pgprot_decrypted(prot); } + /* We don't prefault on huge faults. Yet. */ + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && fault_page_size != 1) { + ret = ttm_bo_vm_insert_huge(vmf, bo, page_offset, + fault_page_size, prot); + goto out_io_unlock; + } + /* * Speculatively prefault a number of pages. Only error on * first page. @@ -320,7 +406,7 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) return ret; prot = vma->vm_page_prot; - ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT); + ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) return ret; @@ -330,6 +416,50 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) } EXPORT_SYMBOL(ttm_bo_vm_fault); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static vm_fault_t ttm_bo_vm_huge_fault(struct vm_fault *vmf, + enum page_entry_size pe_size) +{ + struct vm_area_struct *vma = vmf->vma; + pgprot_t prot; + struct ttm_buffer_object *bo = vma->vm_private_data; + vm_fault_t ret; + pgoff_t fault_page_size = 0; + bool write = vmf->flags & FAULT_FLAG_WRITE; + + switch (pe_size) { + case PE_SIZE_PMD: + fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT; + break; +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + case PE_SIZE_PUD: + fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT; + break; +#endif + default: + WARN_ON_ONCE(1); + return VM_FAULT_FALLBACK; + } + + /* Fallback on write dirty-tracking or COW */ + if (write && !(pgprot_val(vmf->vma->vm_page_prot) & _PAGE_RW)) + return VM_FAULT_FALLBACK; + + ret = ttm_bo_vm_reserve(bo, vmf); + if (ret) + return ret; + + prot = vm_get_page_prot(vma->vm_flags); + ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size); + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + return ret; + + dma_resv_unlock(bo->base.resv); + + return ret; +} +#endif + void ttm_bo_vm_open(struct vm_area_struct *vma) { struct ttm_buffer_object *bo = vma->vm_private_data; @@ -431,7 +561,10 @@ static const struct vm_operations_struct ttm_bo_vm_ops = { .fault = ttm_bo_vm_fault, .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, - .access = ttm_bo_vm_access + .access = ttm_bo_vm_access, +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + .huge_fault = ttm_bo_vm_huge_fault, +#endif }; static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev, diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 66ca49db9633..4fc90d53aa15 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -732,7 +732,8 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, pgprot_t prot, - pgoff_t num_prefault); + pgoff_t num_prefault, + pgoff_t fault_page_size); vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf);