@@ -189,7 +189,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
return pte;
}
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, struct hstate *h)
{
pgd_t *pgd;
pud_t *pud;
@@ -44,7 +44,7 @@ huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
}
pte_t *
-huge_pte_offset (struct mm_struct *mm, unsigned long addr)
+huge_pte_offset (struct mm_struct *mm, unsigned long addr, struct hstate *h)
{
unsigned long taddr = htlbpage_to_page(addr);
pgd_t *pgd;
@@ -92,7 +92,7 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int writ
if (REGION_NUMBER(addr) != RGN_HPAGE)
return ERR_PTR(-EINVAL);
- ptep = huge_pte_offset(mm, addr);
+ ptep = huge_pte_offset(mm, addr, size_to_hstate(HPAGE_SIZE));
if (!ptep || pte_none(*ptep))
return NULL;
page = pte_page(*ptep);
@@ -74,7 +74,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
return pte;
}
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, struct hstate *h)
{
pgd_t *pgd;
pud_t *pud;
@@ -36,7 +36,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr,
return pte;
}
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, struct hstate *h)
{
pgd_t *pgd;
pud_t *pud;
@@ -69,7 +69,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
return pte;
}
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, struct hstate *h)
{
pgd_t *pgd;
pud_t *pud;
@@ -55,7 +55,7 @@ static unsigned nr_gpages;
#define hugepd_none(hpd) (hpd_val(hpd) == 0)
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, struct hstate *h)
{
/* Only called for hugetlbfs pages, hence can ignore THP */
return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL);
@@ -176,7 +176,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
return (pte_t *) pmdp;
}
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, struct hstate *h)
{
pgd_t *pgdp;
pud_t *pudp;
@@ -42,7 +42,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
return pte;
}
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, struct hstate *h)
{
pgd_t *pgd;
pud_t *pud;
@@ -270,7 +270,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
return pte;
}
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, struct hstate *h)
{
pgd_t *pgd;
pud_t *pud;
@@ -102,7 +102,7 @@ static pte_t *get_pte(pte_t *base, int index, int level)
return ptep;
}
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, struct hstate *h)
{
pgd_t *pgd;
pud_t *pud;
@@ -31,7 +31,7 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
if (!vma || !is_vm_hugetlb_page(vma))
return ERR_PTR(-EINVAL);
- pte = huge_pte_offset(mm, address);
+ pte = huge_pte_offset(mm, address, hstate_vma(vma));
/* hugetlb should be locked, and hence, prefaulted */
WARN_ON(!pte || pte_none(*pte));
@@ -214,6 +214,7 @@ static inline struct uffd_msg userfault_msg(unsigned long address,
* hugepmd ranges.
*/
static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
+ struct vm_area_struct *vma,
unsigned long address,
unsigned long flags,
unsigned long reason)
@@ -224,7 +225,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
- pte = huge_pte_offset(mm, address);
+ pte = huge_pte_offset(mm, address, hstate_vma(vma));
if (!pte)
goto out;
@@ -243,6 +244,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
}
#else
static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
+ struct vm_area_struct *vma,
unsigned long address,
unsigned long flags,
unsigned long reason)
@@ -435,7 +437,8 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason)
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
reason);
else
- must_wait = userfaultfd_huge_must_wait(ctx, vmf->address,
+ must_wait = userfaultfd_huge_must_wait(ctx, vmf->vma,
+ vmf->address,
vmf->flags, reason);
up_read(&mm->mmap_sem);
@@ -113,7 +113,7 @@ extern struct list_head huge_boot_pages;
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr);
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, struct hstate *h);
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
int write);
@@ -3233,7 +3233,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) {
spinlock_t *src_ptl, *dst_ptl;
- src_pte = huge_pte_offset(src, addr);
+ src_pte = huge_pte_offset(src, addr, h);
if (!src_pte)
continue;
dst_pte = huge_pte_alloc(dst, addr, sz);
@@ -3317,7 +3317,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
address = start;
for (; address < end; address += sz) {
- ptep = huge_pte_offset(mm, address);
+ ptep = huge_pte_offset(mm, address, h);
if (!ptep)
continue;
@@ -3535,7 +3535,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
unmap_ref_private(mm, vma, old_page, address);
BUG_ON(huge_pte_none(pte));
spin_lock(ptl);
- ptep = huge_pte_offset(mm, address & huge_page_mask(h));
+ ptep = huge_pte_offset(mm, address & huge_page_mask(h), h);
if (likely(ptep &&
pte_same(huge_ptep_get(ptep), pte)))
goto retry_avoidcopy;
@@ -3574,7 +3574,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
* before the page tables are altered
*/
spin_lock(ptl);
- ptep = huge_pte_offset(mm, address & huge_page_mask(h));
+ ptep = huge_pte_offset(mm, address & huge_page_mask(h), h);
if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
ClearPagePrivate(new_page);
@@ -3861,7 +3861,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
address &= huge_page_mask(h);
- ptep = huge_pte_offset(mm, address);
+ ptep = huge_pte_offset(mm, address, h);
if (ptep) {
entry = huge_ptep_get(ptep);
if (unlikely(is_hugetlb_entry_migration(entry))) {
@@ -4118,7 +4118,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
*
* Note that page table lock is not held when pte is null.
*/
- pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
+ pte = huge_pte_offset(mm, vaddr & huge_page_mask(h), h);
if (pte)
ptl = huge_pte_lock(h, mm, pte);
absent = !pte || huge_pte_none(huge_ptep_get(pte));
@@ -4252,7 +4252,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
i_mmap_lock_write(vma->vm_file->f_mapping);
for (; address < end; address += huge_page_size(h)) {
spinlock_t *ptl;
- ptep = huge_pte_offset(mm, address);
+ ptep = huge_pte_offset(mm, address, h);
if (!ptep)
continue;
ptl = huge_pte_lock(h, mm, ptep);
@@ -4514,7 +4514,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
saddr = page_table_shareable(svma, vma, addr, idx);
if (saddr) {
- spte = huge_pte_offset(svma->vm_mm, saddr);
+ spte = huge_pte_offset(svma->vm_mm, saddr, hstate_vma(svma));
if (spte) {
get_page(virt_to_page(spte));
break;
@@ -4610,7 +4610,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
return pte;
}
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, struct hstate *h)
{
pgd_t *pgd;
p4d_t *p4d;
@@ -120,7 +120,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
if (unlikely(PageHuge(pvmw->page))) {
/* when pud is not present, pte will be NULL */
- pvmw->pte = huge_pte_offset(mm, pvmw->address);
+ pvmw->pte = huge_pte_offset(mm, pvmw->address, page_hstate(page));
if (!pvmw->pte)
return false;
@@ -185,7 +185,7 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
do {
next = hugetlb_entry_end(h, addr, end);
- pte = huge_pte_offset(walk->mm, addr & hmask);
+ pte = huge_pte_offset(walk->mm, addr & hmask, h);
if (pte && walk->hugetlb_entry)
err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
if (err)
A poisoned or migrated hugepage is stored as a swap entry in the page tables. On architectures that support hugepages consisting of contiguous page table entries (such as on arm64) this leads to ambiguity in determining the right page table entry to return in huge_pte_offset() when a poisoned entry is encountered. Let's remove the ambiguity by adding a hstate parameter to convey additional information about the requested address. Also fixup the definition/usage of huge_pte_offset() throughout the tree. Signed-off-by: Punit Agrawal <punit.agrawal@arm.com> --- arch/arm64/mm/hugetlbpage.c | 2 +- arch/ia64/mm/hugetlbpage.c | 4 ++-- arch/metag/mm/hugetlbpage.c | 2 +- arch/mips/mm/hugetlbpage.c | 2 +- arch/parisc/mm/hugetlbpage.c | 2 +- arch/powerpc/mm/hugetlbpage.c | 2 +- arch/s390/mm/hugetlbpage.c | 2 +- arch/sh/mm/hugetlbpage.c | 2 +- arch/sparc/mm/hugetlbpage.c | 2 +- arch/tile/mm/hugetlbpage.c | 2 +- arch/x86/mm/hugetlbpage.c | 2 +- fs/userfaultfd.c | 7 +++++-- include/linux/hugetlb.h | 2 +- mm/hugetlb.c | 18 +++++++++--------- mm/page_vma_mapped.c | 2 +- mm/pagewalk.c | 2 +- 16 files changed, 29 insertions(+), 26 deletions(-)