@@ -104,6 +104,7 @@ config ARM64
select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
select ARCH_WANT_FRAME_POINTERS
+ select ARCH_WANT_GENERAL_HUGETLB_CONTPTE
select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANTS_EXECMEM_LATE if EXECMEM
@@ -12,6 +12,7 @@
#include <asm/cacheflush.h>
#include <asm/page.h>
+#include <linux/hugetlb_contpte.h>
#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
#define arch_hugetlb_migration_supported arch_hugetlb_migration_supported
@@ -45,8 +46,6 @@ extern pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
#define __HAVE_ARCH_HUGE_PTE_CLEAR
extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long sz);
-#define __HAVE_ARCH_HUGE_PTEP_GET
-extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
void __init arm64_hugetlb_cma_reserve(void);
@@ -397,9 +397,10 @@ static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte));
}
-static inline void __set_ptes(struct mm_struct *mm,
- unsigned long __always_unused addr,
- pte_t *ptep, pte_t pte, unsigned int nr)
+static inline void ___set_ptes(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pte_t *ptep, pte_t pte, unsigned int nr,
+ size_t pgsize)
{
page_table_check_ptes_set(mm, ptep, pte, nr);
__sync_cache_and_tags(pte, nr);
@@ -410,10 +411,15 @@ static inline void __set_ptes(struct mm_struct *mm,
if (--nr == 0)
break;
ptep++;
- pte = pte_advance_pfn(pte, 1);
+ pte = pte_advance_pfn(pte, pgsize >> PAGE_SHIFT);
}
}
+#define __set_ptes(mm, addr, ptep, pte, nr) \
+ ___set_ptes(mm, addr, ptep, pte, nr, PAGE_SIZE)
+
+#define set_contptes ___set_ptes
+
/*
* Huge pte definitions.
*/
@@ -1760,6 +1766,40 @@ static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
#endif /* CONFIG_ARM64_CONTPTE */
+static inline int arch_contpte_get_num_contig(pte_t *ptep,
+ unsigned long size,
+ size_t *pgsize)
+{
+ int contig_ptes = 0;
+
+ if (pgsize)
+ *pgsize = size;
+
+ switch (size) {
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ if (pud_sect_supported())
+ contig_ptes = 1;
+ break;
+#endif
+ case PMD_SIZE:
+ contig_ptes = 1;
+ break;
+ case CONT_PMD_SIZE:
+ if (pgsize)
+ *pgsize = PMD_SIZE;
+ contig_ptes = CONT_PMDS;
+ break;
+ case CONT_PTE_SIZE:
+ if (pgsize)
+ *pgsize = PAGE_SIZE;
+ contig_ptes = CONT_PTES;
+ break;
+ }
+
+ return contig_ptes;
+}
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PGTABLE_H */
@@ -98,57 +98,6 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr,
return CONT_PTES;
}
-static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
-{
- int contig_ptes = 0;
-
- *pgsize = size;
-
- switch (size) {
-#ifndef __PAGETABLE_PMD_FOLDED
- case PUD_SIZE:
- if (pud_sect_supported())
- contig_ptes = 1;
- break;
-#endif
- case PMD_SIZE:
- contig_ptes = 1;
- break;
- case CONT_PMD_SIZE:
- *pgsize = PMD_SIZE;
- contig_ptes = CONT_PMDS;
- break;
- case CONT_PTE_SIZE:
- *pgsize = PAGE_SIZE;
- contig_ptes = CONT_PTES;
- break;
- }
-
- return contig_ptes;
-}
-
-pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
- int ncontig, i;
- size_t pgsize;
- pte_t orig_pte = __ptep_get(ptep);
-
- if (!pte_present(orig_pte) || !pte_cont(orig_pte))
- return orig_pte;
-
- ncontig = num_contig_ptes(page_size(pte_page(orig_pte)), &pgsize);
- for (i = 0; i < ncontig; i++, ptep++) {
- pte_t pte = __ptep_get(ptep);
-
- if (pte_dirty(pte))
- orig_pte = pte_mkdirty(orig_pte);
-
- if (pte_young(pte))
- orig_pte = pte_mkyoung(orig_pte);
- }
- return orig_pte;
-}
-
/*
* Changing some bits of contiguous entries requires us to follow a
* Break-Before-Make approach, breaking the whole contiguous set
@@ -229,7 +178,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
unsigned long pfn, dpfn;
pgprot_t hugeprot;
- ncontig = num_contig_ptes(sz, &pgsize);
+ ncontig = arch_contpte_get_num_contig(ptep, sz, &pgsize);
if (!pte_present(pte)) {
for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
@@ -379,7 +328,7 @@ void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
int i, ncontig;
size_t pgsize;
- ncontig = num_contig_ptes(sz, &pgsize);
+ ncontig = arch_contpte_get_num_contig(ptep, sz, &pgsize);
for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
__pte_clear(mm, addr, ptep);
@@ -73,6 +73,7 @@ config RISCV
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT
+ select ARCH_WANT_GENERAL_HUGETLB_CONTPTE if RISCV_ISA_SVNAPOT
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
select ARCH_WANT_LD_ORPHAN_WARN if !XIP_KERNEL
select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
@@ -4,6 +4,9 @@
#include <asm/cacheflush.h>
#include <asm/page.h>
+#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB_CONTPTE
+#include <linux/hugetlb_contpte.h>
+#endif
static inline void arch_clear_hugetlb_flags(struct folio *folio)
{
@@ -43,9 +46,6 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty);
-#define __HAVE_ARCH_HUGE_PTEP_GET
-pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
-
pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags);
#define arch_make_huge_pte arch_make_huge_pte
@@ -300,6 +300,8 @@ static inline unsigned long pte_napot(pte_t pte)
return pte_val(pte) & _PAGE_NAPOT;
}
+#define pte_cont pte_napot
+
#define pte_valid_napot(pte) (pte_present(pte) && pte_napot(pte))
static inline pte_t pte_mknapot(pte_t pte, unsigned int order)
@@ -581,6 +583,38 @@ static inline void __set_ptes(struct mm_struct *mm, unsigned long addr,
}
}
+#ifdef CONFIG_RISCV_ISA_SVNAPOT
+static inline int arch_contpte_get_num_contig(pte_t *ptep, unsigned long size,
+ size_t *pgsize)
+{
+ unsigned long hugepage_shift;
+ pte_t __pte;
+
+ if (size >= PGDIR_SIZE)
+ hugepage_shift = PGDIR_SHIFT;
+ else if (size >= P4D_SIZE)
+ hugepage_shift = P4D_SHIFT;
+ else if (size >= PUD_SIZE)
+ hugepage_shift = PUD_SHIFT;
+ else if (size >= PMD_SIZE)
+ hugepage_shift = PMD_SHIFT;
+ else
+ hugepage_shift = PAGE_SHIFT;
+
+ if (pgsize)
+ *pgsize = BIT(hugepage_shift);
+
+ /* We must read the raw value of the pte to get the size of the mapping */
+ __pte = __ptep_get(ptep);
+
+ /* Make sure __pte is not a swap entry */
+ if (pte_valid_napot(__pte))
+ return napot_pte_num(napot_cont_order(__pte));
+
+ return size >> hugepage_shift;
+}
+#endif
+
static inline void pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
@@ -665,6 +699,8 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
__set_ptes(mm, addr, ptep, pteval, nr);
}
#define set_ptes set_ptes
+#define set_contptes(mm, addr, ptep, pte, nr, pgsize) \
+ set_ptes(mm, addr, ptep, pte, nr)
static inline pte_t ptep_get(pte_t *ptep)
{
@@ -3,30 +3,6 @@
#include <linux/err.h>
#ifdef CONFIG_RISCV_ISA_SVNAPOT
-pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
- unsigned long pte_num;
- int i;
- pte_t orig_pte = ptep_get(ptep);
-
- if (!pte_present(orig_pte) || !pte_napot(orig_pte))
- return orig_pte;
-
- pte_num = napot_pte_num(napot_cont_order(orig_pte));
-
- for (i = 0; i < pte_num; i++, ptep++) {
- pte_t pte = ptep_get(ptep);
-
- if (pte_dirty(pte))
- orig_pte = pte_mkdirty(orig_pte);
-
- if (pte_young(pte))
- orig_pte = pte_mkyoung(orig_pte);
- }
-
- return orig_pte;
-}
-
pte_t *huge_pte_alloc(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long addr,
@@ -266,15 +242,13 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
int dirty)
{
struct mm_struct *mm = vma->vm_mm;
- unsigned long order;
pte_t orig_pte;
- int i, pte_num;
+ int pte_num;
if (!pte_napot(pte))
return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
- order = napot_cont_order(pte);
- pte_num = napot_pte_num(order);
+ pte_num = arch_contpte_get_num_contig(ptep, 0, NULL);
orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num);
if (pte_dirty(orig_pte))
@@ -298,7 +272,7 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
if (!pte_napot(orig_pte))
return ptep_get_and_clear(mm, addr, ptep);
- pte_num = napot_pte_num(napot_cont_order(orig_pte));
+ pte_num = arch_contpte_get_num_contig(ptep, 0, NULL);
return get_clear_contig(mm, addr, ptep, pte_num);
}
@@ -308,17 +282,15 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
pte_t *ptep)
{
pte_t pte = ptep_get(ptep);
- unsigned long order;
pte_t orig_pte;
- int i, pte_num;
+ int pte_num;
if (!pte_napot(pte)) {
ptep_set_wrprotect(mm, addr, ptep);
return;
}
- order = napot_cont_order(pte);
- pte_num = napot_pte_num(order);
+ pte_num = arch_contpte_get_num_contig(ptep, 0, NULL);
orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num);
orig_pte = pte_wrprotect(orig_pte);
@@ -336,7 +308,7 @@ pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
if (!pte_napot(pte))
return ptep_clear_flush(vma, addr, ptep);
- pte_num = napot_pte_num(napot_cont_order(pte));
+ pte_num = arch_contpte_get_num_contig(ptep, 0, NULL);
return get_clear_contig_flush(vma->vm_mm, addr, ptep, pte_num);
}
@@ -346,6 +318,7 @@ void huge_pte_clear(struct mm_struct *mm,
pte_t *ptep,
unsigned long sz)
{
+ size_t pgsize;
pte_t pte = ptep_get(ptep);
int i, pte_num;
@@ -354,8 +327,8 @@ void huge_pte_clear(struct mm_struct *mm,
return;
}
- pte_num = napot_pte_num(napot_cont_order(pte));
- for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
+ pte_num = arch_contpte_get_num_contig(ptep, sz, &pgsize);
+ for (i = 0; i < pte_num; i++, addr += pgsize, ptep++)
pte_clear(mm, addr, ptep);
}
new file mode 100644
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2024 Rivos Inc.
+ */
+
+#ifndef _LINUX_HUGETLB_CONTPTE_H
+#define _LINUX_HUGETLB_CONTPTE_H
+
+#define __HAVE_ARCH_HUGE_PTEP_GET
+extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+
+#endif /* _LINUX_HUGETLB_CONTPTE_H */
@@ -798,6 +798,9 @@ config NOMMU_INITIAL_TRIM_EXCESS
config ARCH_WANT_GENERAL_HUGETLB
bool
+config ARCH_WANT_GENERAL_HUGETLB_CONTPTE
+ bool
+
config ARCH_WANTS_THP_SWAP
def_bool n
@@ -95,6 +95,7 @@ obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_NUMA) += memory-tiers.o
obj-$(CONFIG_DEVICE_MIGRATION) += migrate_device.o
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
+obj-$(CONFIG_ARCH_WANT_GENERAL_HUGETLB_CONTPTE) += hugetlb_contpte.o
obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
obj-$(CONFIG_MEMCG_V1) += memcontrol-v1.o
obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
new file mode 100644
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2024 Rivos Inc.
+ */
+
+#include <linux/pgtable.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+
+/*
+ * Any arch that wants to use that needs to define:
+ * - __ptep_get()
+ * - pte_cont()
+ * - arch_contpte_get_num_contig()
+ */
+
+/*
+ * This file implements the following contpte aware API:
+ * - huge_ptep_get()
+ */
+
+pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ int ncontig, i;
+ pte_t orig_pte = __ptep_get(ptep);
+
+ if (!pte_present(orig_pte) || !pte_cont(orig_pte))
+ return orig_pte;
+
+ ncontig = arch_contpte_get_num_contig(ptep,
+ page_size(pte_page(orig_pte)),
+ NULL);
+
+ for (i = 0; i < ncontig; i++, ptep++) {
+ pte_t pte = __ptep_get(ptep);
+
+ if (pte_dirty(pte))
+ orig_pte = pte_mkdirty(orig_pte);
+
+ if (pte_young(pte))
+ orig_pte = pte_mkyoung(orig_pte);
+ }
+ return orig_pte;
+}
After some adjustments, both architectures have the same implementation so move it to the generic code. Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/hugetlb.h | 3 +- arch/arm64/include/asm/pgtable.h | 48 +++++++++++++++++++++++++--- arch/arm64/mm/hugetlbpage.c | 55 ++------------------------------ arch/riscv/Kconfig | 1 + arch/riscv/include/asm/hugetlb.h | 6 ++-- arch/riscv/include/asm/pgtable.h | 36 +++++++++++++++++++++ arch/riscv/mm/hugetlbpage.c | 45 ++++++-------------------- include/linux/hugetlb_contpte.h | 12 +++++++ mm/Kconfig | 3 ++ mm/Makefile | 1 + mm/hugetlb_contpte.c | 44 +++++++++++++++++++++++++ 12 files changed, 157 insertions(+), 98 deletions(-) create mode 100644 include/linux/hugetlb_contpte.h create mode 100644 mm/hugetlb_contpte.c