@@ -290,6 +290,11 @@ static inline pmd_t pmd_mkcont(pmd_t pmd)
return __pmd(pmd_val(pmd) | PMD_SECT_CONT);
}
+static inline pmd_t pmd_mknoncont(pmd_t pmd)
+{
+ return __pmd(pmd_val(pmd) & ~PMD_SECT_CONT);
+}
+
static inline pte_t pte_mkdevmap(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL));
@@ -167,19 +167,36 @@ static void init_clear_pgtable(void *table)
dsb(ishst);
}
+static void split_cont_pte(pte_t *ptep)
+{
+ pte_t *_ptep = PTR_ALIGN_DOWN(ptep, sizeof(*ptep) * CONT_PTES);
+ pte_t _pte;
+ for (int i = 0; i < CONT_PTES; i++, _ptep++) {
+ _pte = READ_ONCE(*_ptep);
+ _pte = pte_mknoncont(_pte);
+ __set_pte_nosync(_ptep, _pte);
+ }
+
+ dsb(ishst);
+ isb();
+}
+
static int split_pmd(pmd_t *pmdp, pmd_t pmdval,
- phys_addr_t (*pgtable_alloc)(int))
+ phys_addr_t (*pgtable_alloc)(int), int flags)
{
unsigned long pfn;
pgprot_t prot;
phys_addr_t pte_phys;
pte_t *ptep;
+ bool cont;
+ int i;
if (!pmd_leaf(pmdval))
return 0;
pfn = pmd_pfn(pmdval);
prot = pmd_pgprot(pmdval);
+ cont = pgprot_val(prot) & PTE_CONT;
pte_phys = pgtable_alloc(PAGE_SHIFT);
if (!pte_phys)
@@ -188,11 +205,27 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmdval,
ptep = (pte_t *)phys_to_virt(pte_phys);
init_clear_pgtable(ptep);
prot = __pgprot(pgprot_val(prot) | PTE_TYPE_PAGE);
- for (int i = 0; i < PTRS_PER_PTE; i++, ptep++)
+
+ /* It must be naturally aligned if PMD is leaf */
+ if ((flags & NO_CONT_MAPPINGS) == 0)
+ prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+
+ for (i = 0; i < PTRS_PER_PTE; i++, ptep++)
__set_pte_nosync(ptep, pfn_pte(pfn + i, prot));
dsb(ishst);
+ /* Clear CONT bit for the PMDs in the range */
+ if (cont) {
+ pmd_t *_pmdp, _pmd;
+ _pmdp = PTR_ALIGN_DOWN(pmdp, sizeof(*pmdp) * CONT_PMDS);
+ for (i = 0; i < CONT_PMDS; i++, _pmdp++) {
+ _pmd = READ_ONCE(*_pmdp);
+ _pmd = pmd_mknoncont(_pmd);
+ set_pmd(_pmdp, _pmd);
+ }
+ }
+
set_pmd(pmdp, pfn_pmd(__phys_to_pfn(pte_phys),
__pgprot(PMD_TYPE_TABLE)));
@@ -200,7 +233,7 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmdval,
}
static int split_pud(pud_t *pudp, pud_t pudval,
- phys_addr_t (*pgtable_alloc)(int))
+ phys_addr_t (*pgtable_alloc)(int), int flags)
{
unsigned long pfn;
pgprot_t prot;
@@ -221,6 +254,11 @@ static int split_pud(pud_t *pudp, pud_t pudval,
pmdp = (pmd_t *)phys_to_virt(pmd_phys);
init_clear_pgtable(pmdp);
+
+ /* It must be naturally aligned if PUD is leaf */
+ if ((flags & NO_CONT_MAPPINGS) == 0)
+ prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+
for (int i = 0; i < PTRS_PER_PMD; i++, pmdp++) {
__set_pmd_nosync(pmdp, pfn_pmd(pfn, prot));
pfn += step;
@@ -235,11 +273,18 @@ static int split_pud(pud_t *pudp, pud_t pudval,
}
static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
- phys_addr_t phys, pgprot_t prot)
+ phys_addr_t phys, pgprot_t prot, int flags)
{
do {
pte_t old_pte = __ptep_get(ptep);
+ if (flags & SPLIT_MAPPINGS) {
+ if (pte_cont(old_pte))
+ split_cont_pte(ptep);
+
+ continue;
+ }
+
/*
* Required barriers to make this visible to the table walker
* are deferred to the end of alloc_init_cont_pte().
@@ -266,8 +311,16 @@ static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
unsigned long next;
pmd_t pmd = READ_ONCE(*pmdp);
pte_t *ptep;
+ bool split = flags & SPLIT_MAPPINGS;
BUG_ON(pmd_sect(pmd));
+
+ if (split) {
+ BUG_ON(pmd_none(pmd));
+ ptep = pte_offset_kernel(pmdp, addr);
+ goto split_pgtable;
+ }
+
if (pmd_none(pmd)) {
pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF;
phys_addr_t pte_phys;
@@ -287,6 +340,7 @@ static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
ptep = pte_set_fixmap_offset(pmdp, addr);
}
+split_pgtable:
do {
pgprot_t __prot = prot;
@@ -297,7 +351,7 @@ static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
(flags & NO_CONT_MAPPINGS) == 0)
__prot = __pgprot(pgprot_val(prot) | PTE_CONT);
- init_pte(ptep, addr, next, phys, __prot);
+ init_pte(ptep, addr, next, phys, __prot, flags);
ptep += pte_index(next) - pte_index(addr);
phys += next - addr;
@@ -308,7 +362,8 @@ static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
* ensure that all previous pgtable writes are visible to the table
* walker.
*/
- pte_clear_fixmap();
+ if (!split)
+ pte_clear_fixmap();
return 0;
}
@@ -327,7 +382,12 @@ static int init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
next = pmd_addr_end(addr, end);
if (split) {
- ret = split_pmd(pmdp, old_pmd, pgtable_alloc);
+ ret = split_pmd(pmdp, old_pmd, pgtable_alloc, flags);
+ if (ret)
+ break;
+
+ ret = alloc_init_cont_pte(pmdp, addr, next, phys, prot,
+ pgtable_alloc, flags);
if (ret)
break;
@@ -469,7 +529,7 @@ static int alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
next = pud_addr_end(addr, end);
if (split) {
- ret = split_pud(pudp, old_pud, pgtable_alloc);
+ ret = split_pud(pudp, old_pud, pgtable_alloc, flags);
if (ret)
break;
@@ -846,9 +906,6 @@ static void __init map_mem(pgd_t *pgdp)
if (force_pte_mapping())
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
- if (rodata_full)
- flags |= NO_CONT_MAPPINGS;
-
/*
* Take care not to create a writable alias for the
* read-only text and rodata sections of the kernel image.
@@ -1547,9 +1604,6 @@ int arch_add_memory(int nid, u64 start, u64 size,
if (force_pte_mapping())
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
- if (rodata_full)
- flags |= NO_CONT_MAPPINGS;
-
__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
size, params->pgprot, __pgd_pgtable_alloc,
flags);
@@ -43,6 +43,8 @@ static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
struct page_change_data *cdata = data;
pte_t pte = __ptep_get(ptep);
+ BUG_ON(pte_cont(pte));
+
pte = clear_pte_bit(pte, cdata->clear_mask);
pte = set_pte_bit(pte, cdata->set_mask);
Add split CONT mappings support in order to support CONT mappings for direct map. This should help reduce TLB pressure further. When splitting PUD, all PMDs will have CONT bit set since the leaf PUD must be naturally aligned. When splitting PMD, all PTEs will have CONT bit set since the leaf PMD must be naturally aligned too, but the PMDs in the cont range of split PMD will have CONT bit cleared. Splitting CONT PTEs by clearing CONT bit for all PTEs in the range. Signed-off-by: Yang Shi <yang@os.amperecomputing.com> --- arch/arm64/include/asm/pgtable.h | 5 ++ arch/arm64/mm/mmu.c | 82 ++++++++++++++++++++++++++------ arch/arm64/mm/pageattr.c | 2 + 3 files changed, 75 insertions(+), 14 deletions(-)