Message ID | 152699998750.24093.5270058390086110946.stgit@dwillia2-desk3.amr.corp.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue 22-05-18 07:39:47, Dan Williams wrote: > In support of enabling memory_failure() handling for device-dax > mappings, set the ->mapping association of pages backing device-dax > mappings. The rmap implementation requires page_mapping() to return the > address_space hosting the vmas that map the page. > > The ->mapping pointer is never cleared. There is no possibility for the > page to become associated with another address_space while the device is > enabled. When the device is disabled the 'struct page' array for the > device is destroyed / later reinitialized to zero. > > Signed-off-by: Dan Williams <dan.j.williams@intel.com> ... > @@ -402,17 +401,33 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, > id = dax_read_lock(); > switch (pe_size) { > case PE_SIZE_PTE: > - rc = __dev_dax_pte_fault(dev_dax, vmf); > + fault_size = PAGE_SIZE; > + rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn); > break; > case PE_SIZE_PMD: > - rc = __dev_dax_pmd_fault(dev_dax, vmf); > + fault_size = PMD_SIZE; > + rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn); > break; > case PE_SIZE_PUD: > - rc = __dev_dax_pud_fault(dev_dax, vmf); > + fault_size = PUD_SIZE; > + rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn); > break; > default: > rc = VM_FAULT_SIGBUS; > } > + > + if (rc == VM_FAULT_NOPAGE) { > + unsigned long i; > + > + for (i = 0; i < fault_size / PAGE_SIZE; i++) { > + struct page *page; > + > + page = pfn_to_page(pfn_t_to_pfn(pfn) + i); > + if (page->mapping) > + continue; > + page->mapping = filp->f_mapping; > + } > + } > dax_read_unlock(id); Careful here. Page fault can return VM_FAULT_NOPAGE even if we raced with somebody modifying the PTE or if we installed a zero page. With shared DAX mappings (and device dax does not allow anything else if I'm right) this does not matter as given file offset is guaranteed to always map to the same page but I think it deserves a comment. Honza
Hi Dan, I love your patch! Yet something to improve: [auto build test ERROR on linus/master] [also build test ERROR on v4.17-rc7 next-20180530] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Dan-Williams/mm-Teach-memory_failure-about-ZONE_DEVICE-pages/20180525-035652 config: arm64-allmodconfig (attached as .config) compiler: aarch64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree make.cross ARCH=arm64 All errors (new ones prefixed by >>): drivers//dax/device.c: In function 'dev_dax_huge_fault': >> drivers//dax/device.c:413:8: error: too many arguments to function '__dev_dax_pud_fault' rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn); ^~~~~~~~~~~~~~~~~~~ drivers//dax/device.c:380:19: note: declared here static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, ^~~~~~~~~~~~~~~~~~~ vim +/__dev_dax_pud_fault +413 drivers//dax/device.c 386 387 static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, 388 enum page_entry_size pe_size) 389 { 390 struct vm_area_struct *vma = vmf->vma; 391 struct file *filp = vma->vm_file; 392 unsigned long fault_size; 393 int rc, id; 394 pfn_t pfn; 395 struct dev_dax *dev_dax = filp->private_data; 396 397 dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm, 398 (vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read", 399 vma->vm_start, vma->vm_end, pe_size); 400 401 id = dax_read_lock(); 402 switch (pe_size) { 403 case PE_SIZE_PTE: 404 fault_size = PAGE_SIZE; 405 rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn); 406 break; 407 case PE_SIZE_PMD: 408 fault_size = PMD_SIZE; 409 rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn); 410 break; 411 case PE_SIZE_PUD: 412 fault_size = PUD_SIZE; > 413 rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn); 414 break; 415 default: 416 rc = VM_FAULT_SIGBUS; 417 } 418 419 if (rc == VM_FAULT_NOPAGE) { 420 unsigned long i; 421 422 for (i = 0; i < fault_size / PAGE_SIZE; i++) { 423 struct page *page; 424 425 page = pfn_to_page(pfn_t_to_pfn(pfn) + i); 426 if (page->mapping) 427 continue; 428 page->mapping = filp->f_mapping; 429 } 430 } 431 dax_read_unlock(id); 432 433 return rc; 434 } 435 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 686de08e120b..8e986478d48d 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -245,13 +245,12 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, } static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf) + struct vm_fault *vmf, pfn_t *pfn) { struct vm_area_struct *vma = vmf->vma; struct device *dev = &dev_dax->dev; struct dax_region *dax_region; phys_addr_t phys; - pfn_t pfn; unsigned int fault_size = PAGE_SIZE; if (check_vma(dev_dax, vma, __func__)) @@ -273,13 +272,13 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); + *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); - return vmf_insert_mixed(vma, vmf->address, pfn); + return vmf_insert_mixed(vma, vmf->address, *pfn); } static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf) + struct vm_fault *vmf, pfn_t *pfn) { unsigned long pmd_addr = vmf->address & PMD_MASK; struct vm_area_struct *vma = vmf->vma; @@ -287,7 +286,6 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct dax_region *dax_region; phys_addr_t phys; pgoff_t pgoff; - pfn_t pfn; unsigned int fault_size = PMD_SIZE; if (check_vma(dev_dax, vma, __func__)) @@ -322,15 +320,15 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); + *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); - return vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn, + return vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, *pfn, vmf->flags & FAULT_FLAG_WRITE); } #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, - struct vm_fault *vmf) + struct vm_fault *vmf, pfn_t *pfn) { unsigned long pud_addr = vmf->address & PUD_MASK; struct vm_area_struct *vma = vmf->vma; @@ -338,7 +336,6 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, struct dax_region *dax_region; phys_addr_t phys; pgoff_t pgoff; - pfn_t pfn; unsigned int fault_size = PUD_SIZE; @@ -374,9 +371,9 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, return VM_FAULT_SIGBUS; } - pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); + *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); - return vmf_insert_pfn_pud(vma, vmf->address, vmf->pud, pfn, + return vmf_insert_pfn_pud(vma, vmf->address, vmf->pud, *pfn, vmf->flags & FAULT_FLAG_WRITE); } #else @@ -390,9 +387,11 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax, static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, enum page_entry_size pe_size) { - int rc, id; struct vm_area_struct *vma = vmf->vma; struct file *filp = vma->vm_file; + unsigned long fault_size; + int rc, id; + pfn_t pfn; struct dev_dax *dev_dax = filp->private_data; dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm, @@ -402,17 +401,33 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, id = dax_read_lock(); switch (pe_size) { case PE_SIZE_PTE: - rc = __dev_dax_pte_fault(dev_dax, vmf); + fault_size = PAGE_SIZE; + rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn); break; case PE_SIZE_PMD: - rc = __dev_dax_pmd_fault(dev_dax, vmf); + fault_size = PMD_SIZE; + rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn); break; case PE_SIZE_PUD: - rc = __dev_dax_pud_fault(dev_dax, vmf); + fault_size = PUD_SIZE; + rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn); break; default: rc = VM_FAULT_SIGBUS; } + + if (rc == VM_FAULT_NOPAGE) { + unsigned long i; + + for (i = 0; i < fault_size / PAGE_SIZE; i++) { + struct page *page; + + page = pfn_to_page(pfn_t_to_pfn(pfn) + i); + if (page->mapping) + continue; + page->mapping = filp->f_mapping; + } + } dax_read_unlock(id); return rc;
In support of enabling memory_failure() handling for device-dax mappings, set the ->mapping association of pages backing device-dax mappings. The rmap implementation requires page_mapping() to return the address_space hosting the vmas that map the page. The ->mapping pointer is never cleared. There is no possibility for the page to become associated with another address_space while the device is enabled. When the device is disabled the 'struct page' array for the device is destroyed / later reinitialized to zero. Signed-off-by: Dan Williams <dan.j.williams@intel.com> --- drivers/dax/device.c | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-)