Message ID | 20240404143308.2224141-5-ryan.roberts@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Speed up boot with faster linear map creation | expand |
On Thu, Apr 04, 2024 at 03:33:08PM +0100, Ryan Roberts wrote: > With the pgtable operations abstracted into `struct pgtable_ops`, the > early pgtable alloc, map and unmap operations are nicely centralized. So > let's enhance the implementation to speed up the clearing of pte table > mappings in the fixmap. > > Extend FIX_MAP so that we now have 16 slots in the fixmap dedicated for > pte tables. At alloc/map time, we select the next slot in the series and > map it. Or if we are at the end and no more slots are available, clear > down all of the slots and start at the beginning again. Batching the > clear like this means we can issue tlbis more efficiently. > > Due to the batching, there may still be some slots mapped at the end, so > address this by adding an optional cleanup() function to `struct > pgtable_ops` to handle this for us. > > Execution time of map_mem(), which creates the kernel linear map page > tables, was measured on different machines with different RAM configs: > > | Apple M2 VM | Ampere Altra| Ampere Altra| Ampere Altra > | VM, 16G | VM, 64G | VM, 256G | Metal, 512G > ---------------|-------------|-------------|-------------|------------- > | ms (%) | ms (%) | ms (%) | ms (%) > ---------------|-------------|-------------|-------------|------------- > before | 11 (0%) | 109 (0%) | 449 (0%) | 1257 (0%) > after | 6 (-46%) | 61 (-44%) | 257 (-43%) | 838 (-33%) I'd prefer to leave this as-is for now, as compared to the baseline this is the last 2-3%, and (assuming my comments on patch 3 hold) that way we don't need the pgtable_ops indirection, which'll keep the code fairly simple to understand. So unless Catalin or Will feel otherwise, I'd suggest that we take patches 1 and 2, drop 3 and 4 for now, and maybe try the alternative approach I've commented on patch 3. Does that sound ok to you? Mark. > Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> > Tested-by: Itaru Kitayama <itaru.kitayama@fujitsu.com> > Tested-by: Eric Chanudet <echanude@redhat.com> > --- > arch/arm64/include/asm/fixmap.h | 5 +++- > arch/arm64/include/asm/pgtable.h | 4 --- > arch/arm64/mm/fixmap.c | 11 ++++++++ > arch/arm64/mm/mmu.c | 44 +++++++++++++++++++++++++++++--- > 4 files changed, 56 insertions(+), 8 deletions(-) > > diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h > index 87e307804b99..91fcd7c5c513 100644 > --- a/arch/arm64/include/asm/fixmap.h > +++ b/arch/arm64/include/asm/fixmap.h > @@ -84,7 +84,9 @@ enum fixed_addresses { > * Used for kernel page table creation, so unmapped memory may be used > * for tables. > */ > - FIX_PTE, > +#define NR_PTE_SLOTS 16 > + FIX_PTE_END, > + FIX_PTE_BEGIN = FIX_PTE_END + NR_PTE_SLOTS - 1, > FIX_PMD, > FIX_PUD, > FIX_P4D, > @@ -108,6 +110,7 @@ void __init early_fixmap_init(void); > #define __late_clear_fixmap(idx) __set_fixmap((idx), 0, FIXMAP_PAGE_CLEAR) > > extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); > +void __init clear_fixmap_nosync(enum fixed_addresses idx); > > #include <asm-generic/fixmap.h> > > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h > index 92c9aed5e7af..4c7114d49697 100644 > --- a/arch/arm64/include/asm/pgtable.h > +++ b/arch/arm64/include/asm/pgtable.h > @@ -691,10 +691,6 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) > /* Find an entry in the third-level page table. */ > #define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t)) > > -#define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) > -#define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) > -#define pte_clear_fixmap() clear_fixmap(FIX_PTE) > - > #define pmd_page(pmd) phys_to_page(__pmd_to_phys(pmd)) > > /* use ONLY for statically allocated translation tables */ > diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c > index de1e09d986ad..0cb09bedeeec 100644 > --- a/arch/arm64/mm/fixmap.c > +++ b/arch/arm64/mm/fixmap.c > @@ -131,6 +131,17 @@ void __set_fixmap(enum fixed_addresses idx, > } > } > > +void __init clear_fixmap_nosync(enum fixed_addresses idx) > +{ > + unsigned long addr = __fix_to_virt(idx); > + pte_t *ptep; > + > + BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); > + > + ptep = fixmap_pte(addr); > + __pte_clear(&init_mm, addr, ptep); > +} > + > void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) > { > const u64 dt_virt_base = __fix_to_virt(FIX_FDT); > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c > index 90bf822859b8..2e3b594aa23c 100644 > --- a/arch/arm64/mm/mmu.c > +++ b/arch/arm64/mm/mmu.c > @@ -66,11 +66,14 @@ enum pgtable_type { > * mapped either as a result of a previous call to alloc() or > * map(). The page's virtual address must be considered invalid > * after this call returns. > + * @cleanup: (Optional) Called at the end of a set of operations to cleanup > + * any lazy state. > */ > struct pgtable_ops { > void *(*alloc)(int type, phys_addr_t *pa); > void *(*map)(int type, void *parent, unsigned long addr); > void (*unmap)(int type); > + void (*cleanup)(void); > }; > > #define NO_BLOCK_MAPPINGS BIT(0) > @@ -139,9 +142,33 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, > } > EXPORT_SYMBOL(phys_mem_access_prot); > > +static int pte_slot_next __initdata = FIX_PTE_BEGIN; > + > +static void __init clear_pte_fixmap_slots(void) > +{ > + unsigned long start = __fix_to_virt(FIX_PTE_BEGIN); > + unsigned long end = __fix_to_virt(pte_slot_next); > + int i; > + > + for (i = FIX_PTE_BEGIN; i > pte_slot_next; i--) > + clear_fixmap_nosync(i); > + > + flush_tlb_kernel_range(start, end); > + pte_slot_next = FIX_PTE_BEGIN; > +} > + > +static int __init pte_fixmap_slot(void) > +{ > + if (pte_slot_next < FIX_PTE_END) > + clear_pte_fixmap_slots(); > + > + return pte_slot_next--; > +} > + > static void *__init early_pgtable_alloc(int type, phys_addr_t *pa) > { > void *va; > + int slot; > > *pa = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, > MEMBLOCK_ALLOC_NOLEAKTRACE); > @@ -159,7 +186,9 @@ static void *__init early_pgtable_alloc(int type, phys_addr_t *pa) > va = pmd_set_fixmap(*pa); > break; > case TYPE_PTE: > - va = pte_set_fixmap(*pa); > + slot = pte_fixmap_slot(); > + set_fixmap(slot, *pa); > + va = (pte_t *)__fix_to_virt(slot); > break; > default: > BUG(); > @@ -174,7 +203,9 @@ static void *__init early_pgtable_alloc(int type, phys_addr_t *pa) > > static void *__init early_pgtable_map(int type, void *parent, unsigned long addr) > { > + phys_addr_t pa; > void *entry; > + int slot; > > switch (type) { > case TYPE_P4D: > @@ -187,7 +218,10 @@ static void *__init early_pgtable_map(int type, void *parent, unsigned long addr > entry = pmd_set_fixmap_offset((pud_t *)parent, addr); > break; > case TYPE_PTE: > - entry = pte_set_fixmap_offset((pmd_t *)parent, addr); > + slot = pte_fixmap_slot(); > + pa = pte_offset_phys((pmd_t *)parent, addr); > + set_fixmap(slot, pa); > + entry = (pte_t *)(__fix_to_virt(slot) + (pa & (PAGE_SIZE - 1))); > break; > default: > BUG(); > @@ -209,7 +243,7 @@ static void __init early_pgtable_unmap(int type) > pmd_clear_fixmap(); > break; > case TYPE_PTE: > - pte_clear_fixmap(); > + // Unmap lazily: see clear_pte_fixmap_slots(). > break; > default: > BUG(); > @@ -220,6 +254,7 @@ static struct pgtable_ops early_pgtable_ops __initdata = { > .alloc = early_pgtable_alloc, > .map = early_pgtable_map, > .unmap = early_pgtable_unmap, > + .cleanup = clear_pte_fixmap_slots, > }; > > bool pgattr_change_is_safe(u64 old, u64 new) > @@ -538,6 +573,9 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, > alloc_init_p4d(pgdp, addr, next, phys, prot, ops, flags); > phys += next - addr; > } while (pgdp++, addr = next, addr != end); > + > + if (ops->cleanup) > + ops->cleanup(); > } > > static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, > -- > 2.25.1 >
On 11/04/2024 14:24, Mark Rutland wrote: > On Thu, Apr 04, 2024 at 03:33:08PM +0100, Ryan Roberts wrote: >> With the pgtable operations abstracted into `struct pgtable_ops`, the >> early pgtable alloc, map and unmap operations are nicely centralized. So >> let's enhance the implementation to speed up the clearing of pte table >> mappings in the fixmap. >> >> Extend FIX_MAP so that we now have 16 slots in the fixmap dedicated for >> pte tables. At alloc/map time, we select the next slot in the series and >> map it. Or if we are at the end and no more slots are available, clear >> down all of the slots and start at the beginning again. Batching the >> clear like this means we can issue tlbis more efficiently. >> >> Due to the batching, there may still be some slots mapped at the end, so >> address this by adding an optional cleanup() function to `struct >> pgtable_ops` to handle this for us. >> >> Execution time of map_mem(), which creates the kernel linear map page >> tables, was measured on different machines with different RAM configs: >> >> | Apple M2 VM | Ampere Altra| Ampere Altra| Ampere Altra >> | VM, 16G | VM, 64G | VM, 256G | Metal, 512G >> ---------------|-------------|-------------|-------------|------------- >> | ms (%) | ms (%) | ms (%) | ms (%) >> ---------------|-------------|-------------|-------------|------------- >> before | 11 (0%) | 109 (0%) | 449 (0%) | 1257 (0%) >> after | 6 (-46%) | 61 (-44%) | 257 (-43%) | 838 (-33%) > > I'd prefer to leave this as-is for now, as compared to the baseline this is the > last 2-3%, and (assuming my comments on patch 3 hold) that way we don't need > the pgtable_ops indirection, which'll keep the code fairly simple to understand. > > So unless Catalin or Will feel otherwise, I'd suggest that we take patches 1 > and 2, drop 3 and 4 for now, and maybe try the alternative approach I've > commented on patch 3. > > Does that sound ok to you? In principle, yes. Let me do some benchmarking with your proposed set of patches to confim :) > > Mark. > >> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> >> Tested-by: Itaru Kitayama <itaru.kitayama@fujitsu.com> >> Tested-by: Eric Chanudet <echanude@redhat.com> >> --- >> arch/arm64/include/asm/fixmap.h | 5 +++- >> arch/arm64/include/asm/pgtable.h | 4 --- >> arch/arm64/mm/fixmap.c | 11 ++++++++ >> arch/arm64/mm/mmu.c | 44 +++++++++++++++++++++++++++++--- >> 4 files changed, 56 insertions(+), 8 deletions(-) >> >> diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h >> index 87e307804b99..91fcd7c5c513 100644 >> --- a/arch/arm64/include/asm/fixmap.h >> +++ b/arch/arm64/include/asm/fixmap.h >> @@ -84,7 +84,9 @@ enum fixed_addresses { >> * Used for kernel page table creation, so unmapped memory may be used >> * for tables. >> */ >> - FIX_PTE, >> +#define NR_PTE_SLOTS 16 >> + FIX_PTE_END, >> + FIX_PTE_BEGIN = FIX_PTE_END + NR_PTE_SLOTS - 1, >> FIX_PMD, >> FIX_PUD, >> FIX_P4D, >> @@ -108,6 +110,7 @@ void __init early_fixmap_init(void); >> #define __late_clear_fixmap(idx) __set_fixmap((idx), 0, FIXMAP_PAGE_CLEAR) >> >> extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); >> +void __init clear_fixmap_nosync(enum fixed_addresses idx); >> >> #include <asm-generic/fixmap.h> >> >> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h >> index 92c9aed5e7af..4c7114d49697 100644 >> --- a/arch/arm64/include/asm/pgtable.h >> +++ b/arch/arm64/include/asm/pgtable.h >> @@ -691,10 +691,6 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) >> /* Find an entry in the third-level page table. */ >> #define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t)) >> >> -#define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) >> -#define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) >> -#define pte_clear_fixmap() clear_fixmap(FIX_PTE) >> - >> #define pmd_page(pmd) phys_to_page(__pmd_to_phys(pmd)) >> >> /* use ONLY for statically allocated translation tables */ >> diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c >> index de1e09d986ad..0cb09bedeeec 100644 >> --- a/arch/arm64/mm/fixmap.c >> +++ b/arch/arm64/mm/fixmap.c >> @@ -131,6 +131,17 @@ void __set_fixmap(enum fixed_addresses idx, >> } >> } >> >> +void __init clear_fixmap_nosync(enum fixed_addresses idx) >> +{ >> + unsigned long addr = __fix_to_virt(idx); >> + pte_t *ptep; >> + >> + BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); >> + >> + ptep = fixmap_pte(addr); >> + __pte_clear(&init_mm, addr, ptep); >> +} >> + >> void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) >> { >> const u64 dt_virt_base = __fix_to_virt(FIX_FDT); >> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c >> index 90bf822859b8..2e3b594aa23c 100644 >> --- a/arch/arm64/mm/mmu.c >> +++ b/arch/arm64/mm/mmu.c >> @@ -66,11 +66,14 @@ enum pgtable_type { >> * mapped either as a result of a previous call to alloc() or >> * map(). The page's virtual address must be considered invalid >> * after this call returns. >> + * @cleanup: (Optional) Called at the end of a set of operations to cleanup >> + * any lazy state. >> */ >> struct pgtable_ops { >> void *(*alloc)(int type, phys_addr_t *pa); >> void *(*map)(int type, void *parent, unsigned long addr); >> void (*unmap)(int type); >> + void (*cleanup)(void); >> }; >> >> #define NO_BLOCK_MAPPINGS BIT(0) >> @@ -139,9 +142,33 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, >> } >> EXPORT_SYMBOL(phys_mem_access_prot); >> >> +static int pte_slot_next __initdata = FIX_PTE_BEGIN; >> + >> +static void __init clear_pte_fixmap_slots(void) >> +{ >> + unsigned long start = __fix_to_virt(FIX_PTE_BEGIN); >> + unsigned long end = __fix_to_virt(pte_slot_next); >> + int i; >> + >> + for (i = FIX_PTE_BEGIN; i > pte_slot_next; i--) >> + clear_fixmap_nosync(i); >> + >> + flush_tlb_kernel_range(start, end); >> + pte_slot_next = FIX_PTE_BEGIN; >> +} >> + >> +static int __init pte_fixmap_slot(void) >> +{ >> + if (pte_slot_next < FIX_PTE_END) >> + clear_pte_fixmap_slots(); >> + >> + return pte_slot_next--; >> +} >> + >> static void *__init early_pgtable_alloc(int type, phys_addr_t *pa) >> { >> void *va; >> + int slot; >> >> *pa = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, >> MEMBLOCK_ALLOC_NOLEAKTRACE); >> @@ -159,7 +186,9 @@ static void *__init early_pgtable_alloc(int type, phys_addr_t *pa) >> va = pmd_set_fixmap(*pa); >> break; >> case TYPE_PTE: >> - va = pte_set_fixmap(*pa); >> + slot = pte_fixmap_slot(); >> + set_fixmap(slot, *pa); >> + va = (pte_t *)__fix_to_virt(slot); >> break; >> default: >> BUG(); >> @@ -174,7 +203,9 @@ static void *__init early_pgtable_alloc(int type, phys_addr_t *pa) >> >> static void *__init early_pgtable_map(int type, void *parent, unsigned long addr) >> { >> + phys_addr_t pa; >> void *entry; >> + int slot; >> >> switch (type) { >> case TYPE_P4D: >> @@ -187,7 +218,10 @@ static void *__init early_pgtable_map(int type, void *parent, unsigned long addr >> entry = pmd_set_fixmap_offset((pud_t *)parent, addr); >> break; >> case TYPE_PTE: >> - entry = pte_set_fixmap_offset((pmd_t *)parent, addr); >> + slot = pte_fixmap_slot(); >> + pa = pte_offset_phys((pmd_t *)parent, addr); >> + set_fixmap(slot, pa); >> + entry = (pte_t *)(__fix_to_virt(slot) + (pa & (PAGE_SIZE - 1))); >> break; >> default: >> BUG(); >> @@ -209,7 +243,7 @@ static void __init early_pgtable_unmap(int type) >> pmd_clear_fixmap(); >> break; >> case TYPE_PTE: >> - pte_clear_fixmap(); >> + // Unmap lazily: see clear_pte_fixmap_slots(). >> break; >> default: >> BUG(); >> @@ -220,6 +254,7 @@ static struct pgtable_ops early_pgtable_ops __initdata = { >> .alloc = early_pgtable_alloc, >> .map = early_pgtable_map, >> .unmap = early_pgtable_unmap, >> + .cleanup = clear_pte_fixmap_slots, >> }; >> >> bool pgattr_change_is_safe(u64 old, u64 new) >> @@ -538,6 +573,9 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, >> alloc_init_p4d(pgdp, addr, next, phys, prot, ops, flags); >> phys += next - addr; >> } while (pgdp++, addr = next, addr != end); >> + >> + if (ops->cleanup) >> + ops->cleanup(); >> } >> >> static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, >> -- >> 2.25.1 >>
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 87e307804b99..91fcd7c5c513 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -84,7 +84,9 @@ enum fixed_addresses { * Used for kernel page table creation, so unmapped memory may be used * for tables. */ - FIX_PTE, +#define NR_PTE_SLOTS 16 + FIX_PTE_END, + FIX_PTE_BEGIN = FIX_PTE_END + NR_PTE_SLOTS - 1, FIX_PMD, FIX_PUD, FIX_P4D, @@ -108,6 +110,7 @@ void __init early_fixmap_init(void); #define __late_clear_fixmap(idx) __set_fixmap((idx), 0, FIXMAP_PAGE_CLEAR) extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); +void __init clear_fixmap_nosync(enum fixed_addresses idx); #include <asm-generic/fixmap.h> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 92c9aed5e7af..4c7114d49697 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -691,10 +691,6 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) /* Find an entry in the third-level page table. */ #define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t)) -#define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) -#define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) -#define pte_clear_fixmap() clear_fixmap(FIX_PTE) - #define pmd_page(pmd) phys_to_page(__pmd_to_phys(pmd)) /* use ONLY for statically allocated translation tables */ diff --git a/arch/arm64/mm/fixmap.c b/arch/arm64/mm/fixmap.c index de1e09d986ad..0cb09bedeeec 100644 --- a/arch/arm64/mm/fixmap.c +++ b/arch/arm64/mm/fixmap.c @@ -131,6 +131,17 @@ void __set_fixmap(enum fixed_addresses idx, } } +void __init clear_fixmap_nosync(enum fixed_addresses idx) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *ptep; + + BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); + + ptep = fixmap_pte(addr); + __pte_clear(&init_mm, addr, ptep); +} + void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) { const u64 dt_virt_base = __fix_to_virt(FIX_FDT); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 90bf822859b8..2e3b594aa23c 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -66,11 +66,14 @@ enum pgtable_type { * mapped either as a result of a previous call to alloc() or * map(). The page's virtual address must be considered invalid * after this call returns. + * @cleanup: (Optional) Called at the end of a set of operations to cleanup + * any lazy state. */ struct pgtable_ops { void *(*alloc)(int type, phys_addr_t *pa); void *(*map)(int type, void *parent, unsigned long addr); void (*unmap)(int type); + void (*cleanup)(void); }; #define NO_BLOCK_MAPPINGS BIT(0) @@ -139,9 +142,33 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, } EXPORT_SYMBOL(phys_mem_access_prot); +static int pte_slot_next __initdata = FIX_PTE_BEGIN; + +static void __init clear_pte_fixmap_slots(void) +{ + unsigned long start = __fix_to_virt(FIX_PTE_BEGIN); + unsigned long end = __fix_to_virt(pte_slot_next); + int i; + + for (i = FIX_PTE_BEGIN; i > pte_slot_next; i--) + clear_fixmap_nosync(i); + + flush_tlb_kernel_range(start, end); + pte_slot_next = FIX_PTE_BEGIN; +} + +static int __init pte_fixmap_slot(void) +{ + if (pte_slot_next < FIX_PTE_END) + clear_pte_fixmap_slots(); + + return pte_slot_next--; +} + static void *__init early_pgtable_alloc(int type, phys_addr_t *pa) { void *va; + int slot; *pa = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, MEMBLOCK_ALLOC_NOLEAKTRACE); @@ -159,7 +186,9 @@ static void *__init early_pgtable_alloc(int type, phys_addr_t *pa) va = pmd_set_fixmap(*pa); break; case TYPE_PTE: - va = pte_set_fixmap(*pa); + slot = pte_fixmap_slot(); + set_fixmap(slot, *pa); + va = (pte_t *)__fix_to_virt(slot); break; default: BUG(); @@ -174,7 +203,9 @@ static void *__init early_pgtable_alloc(int type, phys_addr_t *pa) static void *__init early_pgtable_map(int type, void *parent, unsigned long addr) { + phys_addr_t pa; void *entry; + int slot; switch (type) { case TYPE_P4D: @@ -187,7 +218,10 @@ static void *__init early_pgtable_map(int type, void *parent, unsigned long addr entry = pmd_set_fixmap_offset((pud_t *)parent, addr); break; case TYPE_PTE: - entry = pte_set_fixmap_offset((pmd_t *)parent, addr); + slot = pte_fixmap_slot(); + pa = pte_offset_phys((pmd_t *)parent, addr); + set_fixmap(slot, pa); + entry = (pte_t *)(__fix_to_virt(slot) + (pa & (PAGE_SIZE - 1))); break; default: BUG(); @@ -209,7 +243,7 @@ static void __init early_pgtable_unmap(int type) pmd_clear_fixmap(); break; case TYPE_PTE: - pte_clear_fixmap(); + // Unmap lazily: see clear_pte_fixmap_slots(). break; default: BUG(); @@ -220,6 +254,7 @@ static struct pgtable_ops early_pgtable_ops __initdata = { .alloc = early_pgtable_alloc, .map = early_pgtable_map, .unmap = early_pgtable_unmap, + .cleanup = clear_pte_fixmap_slots, }; bool pgattr_change_is_safe(u64 old, u64 new) @@ -538,6 +573,9 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, alloc_init_p4d(pgdp, addr, next, phys, prot, ops, flags); phys += next - addr; } while (pgdp++, addr = next, addr != end); + + if (ops->cleanup) + ops->cleanup(); } static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,