Message ID | 0f3df6604059011bf78a286c2cf5da5c4b41ccb1.1660902741.git.baolin.wang@linux.alibaba.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Fix some issues when looking up hugetlb page | expand |
Hi Baolin, I love your patch! Yet something to improve: [auto build test ERROR on akpm-mm/mm-everything] [also build test ERROR on linus/master v6.0-rc1 next-20220819] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Baolin-Wang/Fix-some-issues-when-looking-up-hugetlb-page/20220819-182017 base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything config: microblaze-randconfig-r003-20220819 (https://download.01.org/0day-ci/archive/20220820/202208200030.9j5HjdtZ-lkp@intel.com/config) compiler: microblaze-linux-gcc (GCC) 12.1.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/c0add09e9de4b39c58633c89ea25ba10ed12d134 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Baolin-Wang/Fix-some-issues-when-looking-up-hugetlb-page/20220819-182017 git checkout c0add09e9de4b39c58633c89ea25ba10ed12d134 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=microblaze SHELL=/bin/bash If you fix the issue, kindly add following tag where applicable Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): mm/gup.c: In function 'follow_page_pte': >> mm/gup.c:551:23: error: implicit declaration of function 'huge_ptep_get' [-Werror=implicit-function-declaration] 551 | pte = huge_ptep_get(ptep); | ^~~~~~~~~~~~~ >> mm/gup.c:551:23: error: incompatible types when assigning to type 'pte_t' from type 'int' cc1: some warnings being treated as errors vim +/huge_ptep_get +551 mm/gup.c 518 519 static struct page *follow_page_pte(struct vm_area_struct *vma, 520 unsigned long address, pmd_t *pmd, unsigned int flags, 521 struct dev_pagemap **pgmap) 522 { 523 struct mm_struct *mm = vma->vm_mm; 524 struct page *page; 525 spinlock_t *ptl; 526 pte_t *ptep, pte; 527 int ret; 528 529 /* FOLL_GET and FOLL_PIN are mutually exclusive. */ 530 if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == 531 (FOLL_PIN | FOLL_GET))) 532 return ERR_PTR(-EINVAL); 533 retry: 534 if (unlikely(pmd_bad(*pmd))) 535 return no_page_table(vma, flags); 536 537 /* 538 * Considering PTE level hugetlb, like continuous-PTE hugetlb on 539 * ARM64 architecture. 540 */ 541 if (is_vm_hugetlb_page(vma)) { 542 struct hstate *hstate = hstate_vma(vma); 543 unsigned long size = huge_page_size(hstate); 544 545 ptep = huge_pte_offset(mm, address, size); 546 if (!ptep) 547 return no_page_table(vma, flags); 548 549 ptl = huge_pte_lockptr(hstate, mm, ptep); 550 spin_lock(ptl); > 551 pte = huge_ptep_get(ptep); 552 } else { 553 ptep = pte_offset_map_lock(mm, pmd, address, &ptl); 554 pte = *ptep; 555 } 556 557 if (!pte_present(pte)) { 558 swp_entry_t entry; 559 /* 560 * KSM's break_ksm() relies upon recognizing a ksm page 561 * even while it is being migrated, so for that case we 562 * need migration_entry_wait(). 563 */ 564 if (likely(!(flags & FOLL_MIGRATION))) 565 goto no_page; 566 if (pte_none(pte)) 567 goto no_page; 568 entry = pte_to_swp_entry(pte); 569 if (!is_migration_entry(entry)) 570 goto no_page; 571 pte_unmap_unlock(ptep, ptl); 572 migration_entry_wait(mm, pmd, address); 573 goto retry; 574 } 575 if ((flags & FOLL_NUMA) && pte_protnone(pte)) 576 goto no_page; 577 578 page = vm_normal_page(vma, address, pte); 579 580 /* 581 * We only care about anon pages in can_follow_write_pte() and don't 582 * have to worry about pte_devmap() because they are never anon. 583 */ 584 if ((flags & FOLL_WRITE) && 585 !can_follow_write_pte(pte, page, vma, flags)) { 586 page = NULL; 587 goto out; 588 } 589 590 if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) { 591 /* 592 * Only return device mapping pages in the FOLL_GET or FOLL_PIN 593 * case since they are only valid while holding the pgmap 594 * reference. 595 */ 596 *pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap); 597 if (*pgmap) 598 page = pte_page(pte); 599 else 600 goto no_page; 601 } else if (unlikely(!page)) { 602 if (flags & FOLL_DUMP) { 603 /* Avoid special (like zero) pages in core dumps */ 604 page = ERR_PTR(-EFAULT); 605 goto out; 606 } 607 608 if (is_zero_pfn(pte_pfn(pte))) { 609 page = pte_page(pte); 610 } else { 611 ret = follow_pfn_pte(vma, address, ptep, flags); 612 page = ERR_PTR(ret); 613 goto out; 614 } 615 } 616 617 if (!pte_write(pte) && gup_must_unshare(flags, page)) { 618 page = ERR_PTR(-EMLINK); 619 goto out; 620 } 621 622 VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) && 623 !PageAnonExclusive(page), page); 624 625 /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */ 626 if (unlikely(!try_grab_page(page, flags))) { 627 page = ERR_PTR(-ENOMEM); 628 goto out; 629 } 630 /* 631 * We need to make the page accessible if and only if we are going 632 * to access its content (the FOLL_PIN case). Please see 633 * Documentation/core-api/pin_user_pages.rst for details. 634 */ 635 if (flags & FOLL_PIN) { 636 ret = arch_make_page_accessible(page); 637 if (ret) { 638 unpin_user_page(page); 639 page = ERR_PTR(ret); 640 goto out; 641 } 642 } 643 if (flags & FOLL_TOUCH) { 644 if ((flags & FOLL_WRITE) && 645 !pte_dirty(pte) && !PageDirty(page)) 646 set_page_dirty(page); 647 /* 648 * pte_mkyoung() would be more correct here, but atomic care 649 * is needed to avoid losing the dirty bit: it is easier to use 650 * mark_page_accessed(). 651 */ 652 mark_page_accessed(page); 653 } 654 out: 655 pte_unmap_unlock(ptep, ptl); 656 return page; 657 no_page: 658 pte_unmap_unlock(ptep, ptl); 659 if (!pte_none(pte)) 660 return NULL; 661 return no_page_table(vma, flags); 662 } 663
Hi Baolin, I love your patch! Yet something to improve: [auto build test ERROR on akpm-mm/mm-everything] [also build test ERROR on linus/master v6.0-rc1 next-20220819] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Baolin-Wang/Fix-some-issues-when-looking-up-hugetlb-page/20220819-182017 base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything config: riscv-randconfig-r042-20220819 (https://download.01.org/0day-ci/archive/20220820/202208200109.XEXN0wPy-lkp@intel.com/config) compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 0ac597f3cacf60479ffd36b03766fa7462dabd78) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install riscv cross compiling tool for clang build # apt-get install binutils-riscv64-linux-gnu # https://github.com/intel-lab-lkp/linux/commit/c0add09e9de4b39c58633c89ea25ba10ed12d134 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Baolin-Wang/Fix-some-issues-when-looking-up-hugetlb-page/20220819-182017 git checkout c0add09e9de4b39c58633c89ea25ba10ed12d134 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash If you fix the issue, kindly add following tag where applicable Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): >> mm/gup.c:551:9: error: implicit declaration of function 'huge_ptep_get' is invalid in C99 [-Werror,-Wimplicit-function-declaration] pte = huge_ptep_get(ptep); ^ >> mm/gup.c:551:7: error: assigning to 'pte_t' from incompatible type 'int' pte = huge_ptep_get(ptep); ^ ~~~~~~~~~~~~~~~~~~~ 2 errors generated. vim +/huge_ptep_get +551 mm/gup.c 518 519 static struct page *follow_page_pte(struct vm_area_struct *vma, 520 unsigned long address, pmd_t *pmd, unsigned int flags, 521 struct dev_pagemap **pgmap) 522 { 523 struct mm_struct *mm = vma->vm_mm; 524 struct page *page; 525 spinlock_t *ptl; 526 pte_t *ptep, pte; 527 int ret; 528 529 /* FOLL_GET and FOLL_PIN are mutually exclusive. */ 530 if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == 531 (FOLL_PIN | FOLL_GET))) 532 return ERR_PTR(-EINVAL); 533 retry: 534 if (unlikely(pmd_bad(*pmd))) 535 return no_page_table(vma, flags); 536 537 /* 538 * Considering PTE level hugetlb, like continuous-PTE hugetlb on 539 * ARM64 architecture. 540 */ 541 if (is_vm_hugetlb_page(vma)) { 542 struct hstate *hstate = hstate_vma(vma); 543 unsigned long size = huge_page_size(hstate); 544 545 ptep = huge_pte_offset(mm, address, size); 546 if (!ptep) 547 return no_page_table(vma, flags); 548 549 ptl = huge_pte_lockptr(hstate, mm, ptep); 550 spin_lock(ptl); > 551 pte = huge_ptep_get(ptep); 552 } else { 553 ptep = pte_offset_map_lock(mm, pmd, address, &ptl); 554 pte = *ptep; 555 } 556 557 if (!pte_present(pte)) { 558 swp_entry_t entry; 559 /* 560 * KSM's break_ksm() relies upon recognizing a ksm page 561 * even while it is being migrated, so for that case we 562 * need migration_entry_wait(). 563 */ 564 if (likely(!(flags & FOLL_MIGRATION))) 565 goto no_page; 566 if (pte_none(pte)) 567 goto no_page; 568 entry = pte_to_swp_entry(pte); 569 if (!is_migration_entry(entry)) 570 goto no_page; 571 pte_unmap_unlock(ptep, ptl); 572 migration_entry_wait(mm, pmd, address); 573 goto retry; 574 } 575 if ((flags & FOLL_NUMA) && pte_protnone(pte)) 576 goto no_page; 577 578 page = vm_normal_page(vma, address, pte); 579 580 /* 581 * We only care about anon pages in can_follow_write_pte() and don't 582 * have to worry about pte_devmap() because they are never anon. 583 */ 584 if ((flags & FOLL_WRITE) && 585 !can_follow_write_pte(pte, page, vma, flags)) { 586 page = NULL; 587 goto out; 588 } 589 590 if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) { 591 /* 592 * Only return device mapping pages in the FOLL_GET or FOLL_PIN 593 * case since they are only valid while holding the pgmap 594 * reference. 595 */ 596 *pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap); 597 if (*pgmap) 598 page = pte_page(pte); 599 else 600 goto no_page; 601 } else if (unlikely(!page)) { 602 if (flags & FOLL_DUMP) { 603 /* Avoid special (like zero) pages in core dumps */ 604 page = ERR_PTR(-EFAULT); 605 goto out; 606 } 607 608 if (is_zero_pfn(pte_pfn(pte))) { 609 page = pte_page(pte); 610 } else { 611 ret = follow_pfn_pte(vma, address, ptep, flags); 612 page = ERR_PTR(ret); 613 goto out; 614 } 615 } 616 617 if (!pte_write(pte) && gup_must_unshare(flags, page)) { 618 page = ERR_PTR(-EMLINK); 619 goto out; 620 } 621 622 VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) && 623 !PageAnonExclusive(page), page); 624 625 /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */ 626 if (unlikely(!try_grab_page(page, flags))) { 627 page = ERR_PTR(-ENOMEM); 628 goto out; 629 } 630 /* 631 * We need to make the page accessible if and only if we are going 632 * to access its content (the FOLL_PIN case). Please see 633 * Documentation/core-api/pin_user_pages.rst for details. 634 */ 635 if (flags & FOLL_PIN) { 636 ret = arch_make_page_accessible(page); 637 if (ret) { 638 unpin_user_page(page); 639 page = ERR_PTR(ret); 640 goto out; 641 } 642 } 643 if (flags & FOLL_TOUCH) { 644 if ((flags & FOLL_WRITE) && 645 !pte_dirty(pte) && !PageDirty(page)) 646 set_page_dirty(page); 647 /* 648 * pte_mkyoung() would be more correct here, but atomic care 649 * is needed to avoid losing the dirty bit: it is easier to use 650 * mark_page_accessed(). 651 */ 652 mark_page_accessed(page); 653 } 654 out: 655 pte_unmap_unlock(ptep, ptl); 656 return page; 657 no_page: 658 pte_unmap_unlock(ptep, ptl); 659 if (!pte_none(pte)) 660 return NULL; 661 return no_page_table(vma, flags); 662 } 663
diff --git a/mm/gup.c b/mm/gup.c index 5aa7531..3b2fa86 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -534,8 +534,26 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, if (unlikely(pmd_bad(*pmd))) return no_page_table(vma, flags); - ptep = pte_offset_map_lock(mm, pmd, address, &ptl); - pte = *ptep; + /* + * Considering PTE level hugetlb, like continuous-PTE hugetlb on + * ARM64 architecture. + */ + if (is_vm_hugetlb_page(vma)) { + struct hstate *hstate = hstate_vma(vma); + unsigned long size = huge_page_size(hstate); + + ptep = huge_pte_offset(mm, address, size); + if (!ptep) + return no_page_table(vma, flags); + + ptl = huge_pte_lockptr(hstate, mm, ptep); + spin_lock(ptl); + pte = huge_ptep_get(ptep); + } else { + ptep = pte_offset_map_lock(mm, pmd, address, &ptl); + pte = *ptep; + } + if (!pte_present(pte)) { swp_entry_t entry; /*
On some architectures (like ARM64), it can support CONT-PTE/PMD size hugetlb, which means it can support not only PMD/PUD size hugetlb: 2M and 1G, but also CONT-PTE/PMD size: 64K and 32M if a 4K page size specified. When looking up a CONT-PTE size hugetlb page by follow_page(), it will use pte_offset_map_lock() to get the pte lock for the CONT-PTE size hugetlb in follow_page_pte(). However this pte lock is incorrect for the CONT-PTE size hugetlb, since we should use mm->page_table_lock by huge_pte_lockptr(). That means the pte entry of the CONT-PTE size hugetlb under current pte lock is unstable in follow_page_pte(), we can continue to migrate or poison the pte entry of the CONT-PTE size hugetlb, which can cause some potential race issues, since the pte entry is unstable, and following pte_xxx() validation is also incorrect in follow_page_pte(), even though they are under the 'pte lock'. To fix this issue, we should validate if it is a CONT-PTE size VMA at first, and use huge_pte_lockptr() to get the correct pte lock and get the pte value by huge_ptep_get() to make the pte entry stable under the correct pte lock. Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com> --- mm/gup.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-)