@@ -595,6 +595,7 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
#define pud_write(pud) pte_write(pud_pte(pud))
#define pud_mkhuge(pud) (__pud(pud_val(pud) & ~PUD_TABLE_BIT))
+#define pud_mkinvalid(pud) pte_pud(pte_mkinvalid(pud_pte(pud)))
#define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud))
#define __phys_to_pud_val(phys) __phys_to_pte_val(phys)
@@ -1344,6 +1345,16 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
}
#endif
+#ifdef CONFIG_HUGETLB_PAGE
+#define pudp_establish pudp_establish
+static inline pud_t pudp_establish(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp, pud_t pud)
+{
+ page_table_check_pud_set(vma->vm_mm, pudp, pud);
+ return __pud(xchg_relaxed(&pud_val(*pudp), pud_val(pud)));
+}
+#endif
+
/*
* Encode and decode a swap entry:
* bits 0-1: present (must be zero)
@@ -581,6 +581,14 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd)
return pmd;
}
+static inline pud_t pud_mkinvalid(pud_t pud)
+{
+ pud_val(pud) |= _PAGE_PRESENT_INVALID;
+ pud_val(pud) &= ~(_PAGE_PRESENT | _PAGE_VALID | _PAGE_DIRTY | _PAGE_PROTNONE);
+
+ return pud;
+}
+
/*
* The generic version pmdp_huge_get_and_clear uses a version of pmd_clear() with a
* different prototype.
@@ -736,6 +736,13 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd)
return pmd;
}
+static inline pud_t pud_mkinvalid(pud_t pud)
+{
+ pud_val(pud) &= ~(_PAGE_PRESENT | _PAGE_VALID | _PAGE_DIRTY);
+
+ return pud;
+}
+
/*
* The generic version pmdp_huge_get_and_clear uses a version of pmd_clear() with a
* different prototype.
@@ -1085,7 +1085,8 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
#define pmd_mksoft_dirty(pmd) pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
#define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)))
-#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#if defined(CONFIG_ARCH_ENABLE_THP_MIGRATION) || defined(CONFIG_HUGETLB_PAGE)
+#define pud_swp_soft_dirty(pud) pte_swp_soft_dirty(pud_pte(pud))
#define pmd_swp_mksoft_dirty(pmd) pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd)))
#define pmd_swp_soft_dirty(pmd) pte_swp_soft_dirty(pmd_pte(pmd))
#define pmd_swp_clear_soft_dirty(pmd) pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd)))
@@ -1386,6 +1387,10 @@ static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm,
extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp);
+#define __HAVE_ARCH_PUDP_INVALIDATE
+extern pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp);
+
#define pmd_move_must_withdraw pmd_move_must_withdraw
struct spinlock;
extern int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
@@ -37,7 +37,7 @@ EXPORT_SYMBOL(__pmd_frag_nr);
unsigned long __pmd_frag_size_shift;
EXPORT_SYMBOL(__pmd_frag_size_shift);
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES
/*
* This is called when relaxing access to a hugepage. It's also called in the page
* fault path when we don't hit any of the major fault cases, ie, a minor
@@ -259,7 +259,18 @@ pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
pmdv &= _HPAGE_CHG_MASK;
return pmd_set_protbits(__pmd(pmdv), newprot);
}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp)
+{
+ unsigned long old_pud;
+
+ VM_WARN_ON_ONCE(!pud_present(*pudp));
+ old_pud = pud_hugepage_update(vma->vm_mm, address, pudp, _PAGE_PRESENT, _PAGE_INVALID);
+ flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE);
+ return __pud(old_pud);
+}
+#endif /* CONFIG_PGTABLE_HAS_HUGE_LEAVES */
/* For use by kexec, called with MMU off */
notrace void mmu_cleanup_all(void)
@@ -657,6 +657,11 @@ static inline unsigned long pud_pfn(pud_t pud)
return ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT);
}
+static inline pud_t pud_mkinvalid(pud_t pud)
+{
+ return __pud(pud_val(pud) & ~(_PAGE_PRESENT|_PAGE_PROT_NONE));
+}
+
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
@@ -804,6 +809,16 @@ extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#ifdef CONFIG_HUGETLB_PAGE
+#define pudp_establish pudp_establish
+static inline pud_t pudp_establish(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp, pud_t pud)
+{
+ page_table_check_pud_set(vma->vm_mm, pudp, pud);
+ return __pud(atomic_long_xchg((atomic_long_t *)pudp, pud_val(pud)));
+}
+#endif
+
/*
* Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
* are !pte_none() && !pte_present().
@@ -783,6 +783,12 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd)
__pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
}
+static inline pud_t pud_mkinvalid(pud_t pud)
+{
+ return pfn_pud(pud_pfn(pud),
+ __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
+}
+
static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -1353,6 +1359,23 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
return pud;
}
+#ifndef pudp_establish
+#define pudp_establish pudp_establish
+static inline pud_t pudp_establish(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp, pud_t pud)
+{
+ page_table_check_pud_set(vma->vm_mm, pudp, pud);
+ if (IS_ENABLED(CONFIG_SMP)) {
+ return xchg(pudp, pud);
+ } else {
+ pud_t old = *pudp;
+
+ WRITE_ONCE(*pudp, pud);
+ return old;
+ }
+}
+#endif
+
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
@@ -1389,7 +1412,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
#define __HAVE_ARCH_PMDP_INVALIDATE_AD
extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
-
/*
* Page table pages are page-aligned. The lower half of the top
* level is used for userspace and the top half for the kernel.
@@ -1541,7 +1563,12 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
}
-#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#if defined(CONFIG_ARCH_ENABLE_THP_MIGRATION) || defined(CONFIG_HUGETLB_PAGE)
+static inline int pud_swp_soft_dirty(pud_t pud)
+{
+ return pud_flags(pud) & _PAGE_SWP_SOFT_DIRTY;
+}
+
static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
{
return pmd_set_flags(pmd, _PAGE_SWP_SOFT_DIRTY);
@@ -956,6 +956,11 @@ extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp);
#endif
+#ifndef __HAVE_ARCH_PUDP_INVALIDATE
+extern pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp);
+#endif
+
#ifndef __HAVE_ARCH_PMDP_INVALIDATE_AD
/*
@@ -976,6 +981,26 @@ extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
#endif
+#ifndef __HAVE_ARCH_PUDP_INVALIDATE_AD
+
+/*
+ * pudp_invalidate_ad() invalidates the PMD while changing a hugetlb mapping in
+ * the page tables. This function is similar to pudp_invalidate(), but should
+ * only be used if the access and dirty bits would not be cleared by the software
+ * in the new PUD value. The function ensures that hardware changes of the access
+ * and dirty bits updates would not be lost.
+ *
+ * Doing so can allow in certain architectures to avoid a TLB flush in most
+ * cases. Yet, another TLB flush might be necessary later if the PUD update
+ * itself requires such flush (e.g., if protection was set to be stricter). Yet,
+ * even when a TLB flush is needed because of the update, the caller may be able
+ * to batch these TLB flushing operations, so fewer TLB flush operations are
+ * needed.
+ */
+extern pud_t pudp_invalidate_ad(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp);
+#endif
+
#ifndef __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t pte_a, pte_t pte_b)
{
@@ -1406,7 +1431,16 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
#endif
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
-#ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
+static inline int pud_soft_dirty(pud_t pud)
+{
+ return 0;
+}
+#if !defined(CONFIG_ARCH_ENABLE_THP_MIGRATION) && !defined(CONFIG_HUGETLB_PAGE)
+static inline int pud_swp_soft_dirty(pud_t pud)
+{
+ return 0;
+}
+
static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
{
return pmd;
@@ -1487,6 +1521,11 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
{
return pmd;
}
+
+static inline int pud_swp_soft_dirty(pud_t pud)
+{
+ return 0;
+}
#endif
#ifndef __HAVE_PFNMAP_TRACKING
@@ -194,6 +194,27 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
}
#endif
+#ifndef __HAVE_ARCH_PUDP_INVALIDATE
+pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp)
+{
+ VM_WARN_ON_ONCE(!pud_present(*pudp));
+ pud_t old = pudp_establish(vma, address, pudp, pud_mkinvalid(*pudp));
+
+ flush_pud_tlb_range(vma, address, address + PUD_SIZE);
+ return old;
+}
+#endif
+
+#ifndef __HAVE_ARCH_PUDP_INVALIDATE_AD
+pud_t pudp_invalidate_ad(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp)
+{
+ VM_WARN_ON_ONCE(!pud_present(*pudp));
+ return pudp_invalidate(vma, address, pudp);
+}
+#endif
+
#ifndef __HAVE_ARCH_PMDP_INVALIDATE
pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
HugeTLB pages will be handled on pud level as well, so we need to implement pud-versions of pud_mkinvalid and pudp_establish. Signed-off-by: Oscar Salvador <osalvador@suse.de> --- arch/arm64/include/asm/pgtable.h | 11 ++++++ arch/loongarch/include/asm/pgtable.h | 8 ++++ arch/mips/include/asm/pgtable.h | 7 ++++ arch/powerpc/include/asm/book3s/64/pgtable.h | 7 +++- arch/powerpc/mm/book3s64/pgtable.c | 15 ++++++- arch/riscv/include/asm/pgtable.h | 15 +++++++ arch/x86/include/asm/pgtable.h | 31 ++++++++++++++- include/linux/pgtable.h | 41 +++++++++++++++++++- mm/pgtable-generic.c | 21 ++++++++++ 9 files changed, 150 insertions(+), 6 deletions(-)