Message ID | 20200216081843.28670-8-rppt@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm: remove __ARCH_HAS_5LEVEL_HACK | expand |
Le 16/02/2020 à 09:18, Mike Rapoport a écrit : > From: Mike Rapoport <rppt@linux.ibm.com> > > Implement primitives necessary for the 4th level folding, add walks of p4d > level where appropriate and replace 5level-fixup.h with pgtable-nop4d.h. I don't think it is worth adding all this additionnals walks of p4d, this patch could be limited to changes like: - pud = pud_offset(pgd, gpa); + pud = pud_offset(p4d_offset(pgd, gpa), gpa); The additionnal walks should be added through another patch the day powerpc need them. See below for more comments. > > Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> > Tested-by: Christophe Leroy <christophe.leroy@c-s.fr> # 8xx and 83xx > --- > arch/powerpc/include/asm/book3s/32/pgtable.h | 1 - > arch/powerpc/include/asm/book3s/64/hash.h | 4 +- > arch/powerpc/include/asm/book3s/64/pgalloc.h | 4 +- > arch/powerpc/include/asm/book3s/64/pgtable.h | 58 ++++++++++-------- > arch/powerpc/include/asm/book3s/64/radix.h | 6 +- > arch/powerpc/include/asm/nohash/32/pgtable.h | 1 - > arch/powerpc/include/asm/nohash/64/pgalloc.h | 2 +- > .../include/asm/nohash/64/pgtable-4k.h | 32 +++++----- > arch/powerpc/include/asm/nohash/64/pgtable.h | 6 +- > arch/powerpc/include/asm/pgtable.h | 8 +++ > arch/powerpc/kvm/book3s_64_mmu_radix.c | 59 ++++++++++++++++--- > arch/powerpc/lib/code-patching.c | 7 ++- > arch/powerpc/mm/book3s32/mmu.c | 2 +- > arch/powerpc/mm/book3s32/tlb.c | 4 +- > arch/powerpc/mm/book3s64/hash_pgtable.c | 4 +- > arch/powerpc/mm/book3s64/radix_pgtable.c | 19 ++++-- > arch/powerpc/mm/book3s64/subpage_prot.c | 6 +- > arch/powerpc/mm/hugetlbpage.c | 28 +++++---- > arch/powerpc/mm/kasan/kasan_init_32.c | 8 +-- > arch/powerpc/mm/mem.c | 4 +- > arch/powerpc/mm/nohash/40x.c | 4 +- > arch/powerpc/mm/nohash/book3e_pgtable.c | 15 +++-- > arch/powerpc/mm/pgtable.c | 25 +++++++- > arch/powerpc/mm/pgtable_32.c | 28 +++++---- > arch/powerpc/mm/pgtable_64.c | 10 ++-- > arch/powerpc/mm/ptdump/hashpagetable.c | 20 ++++++- > arch/powerpc/mm/ptdump/ptdump.c | 22 ++++++- > arch/powerpc/xmon/xmon.c | 17 +++++- > 28 files changed, 284 insertions(+), 120 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h > index 5b39c11e884a..39ec11371be0 100644 > --- a/arch/powerpc/include/asm/book3s/32/pgtable.h > +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h > @@ -2,7 +2,6 @@ > #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H > #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H > > -#define __ARCH_USE_5LEVEL_HACK > #include <asm-generic/pgtable-nopmd.h> > > #include <asm/book3s/32/hash.h> > diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h > index 2781ebf6add4..876d1528c2cf 100644 > --- a/arch/powerpc/include/asm/book3s/64/hash.h > +++ b/arch/powerpc/include/asm/book3s/64/hash.h > @@ -134,9 +134,9 @@ static inline int get_region_id(unsigned long ea) > > #define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS) > #define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS) > -static inline int hash__pgd_bad(pgd_t pgd) > +static inline int hash__p4d_bad(p4d_t p4d) > { > - return (pgd_val(pgd) == 0); > + return (p4d_val(p4d) == 0); > } > #ifdef CONFIG_STRICT_KERNEL_RWX > extern void hash__mark_rodata_ro(void); > diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h > index a41e91bd0580..69c5b051734f 100644 > --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h > +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h > @@ -85,9 +85,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) > kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); > } > > -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) > +static inline void p4d_populate(struct mm_struct *mm, p4d_t *pgd, pud_t *pud) > { > - *pgd = __pgd(__pgtable_ptr_val(pud) | PGD_VAL_BITS); > + *pgd = __p4d(__pgtable_ptr_val(pud) | PGD_VAL_BITS); > } > > static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) > diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h > index 201a69e6a355..ddddbafff0ab 100644 > --- a/arch/powerpc/include/asm/book3s/64/pgtable.h > +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h > @@ -2,7 +2,7 @@ > #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ > #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ > > -#include <asm-generic/5level-fixup.h> > +#include <asm-generic/pgtable-nop4d.h> > > #ifndef __ASSEMBLY__ > #include <linux/mmdebug.h> > @@ -251,7 +251,7 @@ extern unsigned long __pmd_frag_size_shift; > /* Bits to mask out from a PUD to get to the PMD page */ > #define PUD_MASKED_BITS 0xc0000000000000ffUL > /* Bits to mask out from a PGD to get to the PUD page */ > -#define PGD_MASKED_BITS 0xc0000000000000ffUL > +#define P4D_MASKED_BITS 0xc0000000000000ffUL > > /* > * Used as an indicator for rcu callback functions > @@ -949,54 +949,60 @@ static inline bool pud_access_permitted(pud_t pud, bool write) > return pte_access_permitted(pud_pte(pud), write); > } > > -#define pgd_write(pgd) pte_write(pgd_pte(pgd)) > +#define __p4d_raw(x) ((p4d_t) { __pgd_raw(x) }) > +static inline __be64 p4d_raw(p4d_t x) > +{ > + return pgd_raw(x.pgd); > +} > + Shouldn't this be defined in asm/pgtable-be-types.h, just like other __pxx_raw() ? > +#define p4d_write(p4d) pte_write(p4d_pte(p4d)) > > -static inline void pgd_clear(pgd_t *pgdp) > +static inline void p4d_clear(p4d_t *p4dp) > { > - *pgdp = __pgd(0); > + *p4dp = __p4d(0); > } > > -static inline int pgd_none(pgd_t pgd) > +static inline int p4d_none(p4d_t p4d) > { > - return !pgd_raw(pgd); > + return !p4d_raw(p4d); > } > > -static inline int pgd_present(pgd_t pgd) > +static inline int p4d_present(p4d_t p4d) > { > - return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT)); > + return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PRESENT)); > } > > -static inline pte_t pgd_pte(pgd_t pgd) > +static inline pte_t p4d_pte(p4d_t p4d) > { > - return __pte_raw(pgd_raw(pgd)); > + return __pte_raw(p4d_raw(p4d)); > } > > -static inline pgd_t pte_pgd(pte_t pte) > +static inline p4d_t pte_p4d(pte_t pte) > { > - return __pgd_raw(pte_raw(pte)); > + return __p4d_raw(pte_raw(pte)); > } > > -static inline int pgd_bad(pgd_t pgd) > +static inline int p4d_bad(p4d_t p4d) > { > if (radix_enabled()) > - return radix__pgd_bad(pgd); > - return hash__pgd_bad(pgd); > + return radix__p4d_bad(p4d); > + return hash__p4d_bad(p4d); > } > > -#define pgd_access_permitted pgd_access_permitted > -static inline bool pgd_access_permitted(pgd_t pgd, bool write) > +#define p4d_access_permitted p4d_access_permitted > +static inline bool p4d_access_permitted(p4d_t p4d, bool write) > { > - return pte_access_permitted(pgd_pte(pgd), write); > + return pte_access_permitted(p4d_pte(p4d), write); > } > > -extern struct page *pgd_page(pgd_t pgd); > +extern struct page *p4d_page(p4d_t p4d); > > /* Pointers in the page table tree are physical addresses */ > #define __pgtable_ptr_val(ptr) __pa(ptr) > > #define pmd_page_vaddr(pmd) __va(pmd_val(pmd) & ~PMD_MASKED_BITS) > #define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS) > -#define pgd_page_vaddr(pgd) __va(pgd_val(pgd) & ~PGD_MASKED_BITS) > +#define p4d_page_vaddr(p4d) __va(p4d_val(p4d) & ~P4D_MASKED_BITS) > > #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) > #define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) > @@ -1010,8 +1016,8 @@ extern struct page *pgd_page(pgd_t pgd); > > #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) > > -#define pud_offset(pgdp, addr) \ > - (((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr)) > +#define pud_offset(p4dp, addr) \ > + (((pud_t *) p4d_page_vaddr(*(p4dp))) + pud_index(addr)) > #define pmd_offset(pudp,addr) \ > (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr)) > #define pte_offset_kernel(dir,addr) \ > @@ -1368,6 +1374,12 @@ static inline bool pud_is_leaf(pud_t pud) > return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); > } > > +#define p4d_is_leaf p4d_is_leaf > +static inline bool p4d_is_leaf(p4d_t p4d) > +{ > + return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PTE)); > +} > + > #define pgd_is_leaf pgd_is_leaf > #define pgd_leaf pgd_is_leaf > static inline bool pgd_is_leaf(pgd_t pgd) [...] > diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h > index 8cc543ed114c..0a05fddd7881 100644 > --- a/arch/powerpc/include/asm/pgtable.h > +++ b/arch/powerpc/include/asm/pgtable.h > @@ -139,6 +139,14 @@ static inline bool pud_is_leaf(pud_t pud) > } > #endif > > +#ifndef p4d_is_leaf > +#define p4d_is_leaf p4d_is_leaf > +static inline bool p4d_is_leaf(p4d_t p4d) > +{ > + return false; > +} > +#endif > + > #ifndef pgd_is_leaf > #define pgd_is_leaf pgd_is_leaf > static inline bool pgd_is_leaf(pgd_t pgd) > diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c > index 803940d79b73..5aacfa0b27ef 100644 > --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c > +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c > @@ -494,17 +494,39 @@ static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud, > pud_free(kvm->mm, pud); > } > > +static void kvmppc_unmap_free_p4d(struct kvm *kvm, p4d_t *p4d, > + unsigned int lpid) > +{ > + unsigned long iu; > + p4d_t *p = p4d; > + > + for (iu = 0; iu < PTRS_PER_P4D; ++iu, ++p) { > + if (!p4d_present(*p)) > + continue; > + if (p4d_is_leaf(*p)) { > + p4d_clear(p); > + } else { > + pud_t *pud; > + > + pud = pud_offset(p, 0); > + kvmppc_unmap_free_pud(kvm, pud, lpid); > + p4d_clear(p); > + } > + } > + p4d_free(kvm->mm, p4d); > +} > + > void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid) > { > unsigned long ig; > > for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) { > - pud_t *pud; > + p4d_t *p4d; > > if (!pgd_present(*pgd)) > continue; > - pud = pud_offset(pgd, 0); > - kvmppc_unmap_free_pud(kvm, pud, lpid); > + p4d = p4d_offset(pgd, 0); > + kvmppc_unmap_free_p4d(kvm, p4d, lpid); > pgd_clear(pgd); > } > } > @@ -566,6 +588,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, > unsigned long *rmapp, struct rmap_nested **n_rmap) > { > pgd_t *pgd; > + p4d_t *p4d, *new_p4d = NULL; > pud_t *pud, *new_pud = NULL; > pmd_t *pmd, *new_pmd = NULL; > pte_t *ptep, *new_ptep = NULL; > @@ -573,9 +596,15 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, > > /* Traverse the guest's 2nd-level tree, allocate new levels needed */ > pgd = pgtable + pgd_index(gpa); > - pud = NULL; > + p4d = NULL; > if (pgd_present(*pgd)) > - pud = pud_offset(pgd, gpa); > + p4d = p4d_offset(pgd, gpa); > + else > + new_p4d = p4d_alloc_one(kvm->mm, gpa); > + > + pud = NULL; > + if (p4d_present(*p4d)) > + pud = pud_offset(p4d, gpa); Is it worth adding all this new code ? My understanding is that the series objective is to get rid of __ARCH_HAS_5LEVEL_HACK, to to add support for 5 levels to an architecture that not need it (at least for now). If we want to add support for 5 levels, it can be done later in another patch. Here I think your change could be limited to: - pud = pud_offset(pgd, gpa); + pud = pud_offset(p4d_offset(pgd, gpa), gpa); > else > new_pud = pud_alloc_one(kvm->mm, gpa); > > @@ -597,12 +626,18 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, > /* Now traverse again under the lock and change the tree */ > ret = -ENOMEM; > if (pgd_none(*pgd)) { > + if (!new_p4d) > + goto out_unlock; > + pgd_populate(kvm->mm, pgd, new_p4d); > + new_p4d = NULL; > + } > + if (p4d_none(*p4d)) { > if (!new_pud) > goto out_unlock; > - pgd_populate(kvm->mm, pgd, new_pud); > + p4d_populate(kvm->mm, p4d, new_pud); > new_pud = NULL; > } > - pud = pud_offset(pgd, gpa); > + pud = pud_offset(p4d, gpa); > if (pud_is_leaf(*pud)) { > unsigned long hgpa = gpa & PUD_MASK; > > @@ -1220,6 +1255,7 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf, > pgd_t *pgt; > struct kvm_nested_guest *nested; > pgd_t pgd, *pgdp; > + p4d_t p4d, *p4dp; > pud_t pud, *pudp; > pmd_t pmd, *pmdp; > pte_t *ptep; > @@ -1298,7 +1334,14 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf, > continue; > } > > - pudp = pud_offset(&pgd, gpa); > + p4dp = p4d_offset(&pgd, gpa); > + p4d = READ_ONCE(*p4dp); > + if (!(p4d_val(p4d) & _PAGE_PRESENT)) { > + gpa = (gpa & P4D_MASK) + P4D_SIZE; > + continue; > + } > + > + pudp = pud_offset(&p4d, gpa); Same, here you are forcing a useless read with READ_ONCE(). Your change could be limited to - pudp = pud_offset(&pgd, gpa); + pudp = pud_offset(p4d_offset(&pgd, gpa), gpa); This comment applies to many other places. > pud = READ_ONCE(*pudp); > if (!(pud_val(pud) & _PAGE_PRESENT)) { > gpa = (gpa & PUD_MASK) + PUD_SIZE; > diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c > index 3345f039a876..7a59f6863cec 100644 > --- a/arch/powerpc/lib/code-patching.c > +++ b/arch/powerpc/lib/code-patching.c > @@ -107,13 +107,18 @@ static inline int unmap_patch_area(unsigned long addr) > pte_t *ptep; > pmd_t *pmdp; > pud_t *pudp; > + p4d_t *p4dp; > pgd_t *pgdp; > > pgdp = pgd_offset_k(addr); > if (unlikely(!pgdp)) > return -EINVAL; > > - pudp = pud_offset(pgdp, addr); > + p4dp = p4d_offset(pgdp, addr); > + if (unlikely(!p4dp)) > + return -EINVAL; > + > + pudp = pud_offset(p4dp, addr); > if (unlikely(!pudp)) > return -EINVAL; > > diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c > index 0a1c65a2c565..b2fc3e71165c 100644 > --- a/arch/powerpc/mm/book3s32/mmu.c > +++ b/arch/powerpc/mm/book3s32/mmu.c > @@ -312,7 +312,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea) > > if (!Hash) > return; > - pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea); > + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, ea), ea), ea), ea); If we continue like this, in ten years this like is going to be many kilometers long. I think the above would be worth a generic helper. > if (!pmd_none(*pmd)) > add_hash_page(mm->context.id, ea, pmd_val(*pmd)); > } > diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c > index 2fcd321040ff..175bc33b41b7 100644 > --- a/arch/powerpc/mm/book3s32/tlb.c > +++ b/arch/powerpc/mm/book3s32/tlb.c > @@ -87,7 +87,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start, > if (start >= end) > return; > end = (end - 1) | ~PAGE_MASK; > - pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start); > + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, start), start), start), start); > for (;;) { > pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; > if (pmd_end > end) > @@ -145,7 +145,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) > return; > } > mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; > - pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr); > + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr), vmaddr); > if (!pmd_none(*pmd)) > flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); > } > diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c > index 64733b9cb20a..9cd15937e88a 100644 > --- a/arch/powerpc/mm/book3s64/hash_pgtable.c > +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c > @@ -148,6 +148,7 @@ void hash__vmemmap_remove_mapping(unsigned long start, > int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) > { > pgd_t *pgdp; > + p4d_t *p4dp; > pud_t *pudp; > pmd_t *pmdp; > pte_t *ptep; > @@ -155,7 +156,8 @@ int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) > BUILD_BUG_ON(TASK_SIZE_USER64 > H_PGTABLE_RANGE); > if (slab_is_available()) { > pgdp = pgd_offset_k(ea); > - pudp = pud_alloc(&init_mm, pgdp, ea); > + p4dp = p4d_offset(pgdp, ea); > + pudp = pud_alloc(&init_mm, p4dp, ea); Could be a single line, without a new var. - pudp = pud_alloc(&init_mm, pgdp, ea); + pudp = pud_alloc(&init_mm, p4d_offset(pgdp, ea), ea); Same kind of comments as already done apply to the rest. Christophe
On Sun, Feb 16, 2020 at 11:41:07AM +0100, Christophe Leroy wrote: > > > Le 16/02/2020 à 09:18, Mike Rapoport a écrit : > > From: Mike Rapoport <rppt@linux.ibm.com> > > > > Implement primitives necessary for the 4th level folding, add walks of p4d > > level where appropriate and replace 5level-fixup.h with pgtable-nop4d.h. > > I don't think it is worth adding all this additionnals walks of p4d, this > patch could be limited to changes like: > > - pud = pud_offset(pgd, gpa); > + pud = pud_offset(p4d_offset(pgd, gpa), gpa); > > The additionnal walks should be added through another patch the day powerpc > need them. Ok, I'll update the patch to reduce walking the p4d. > See below for more comments. > > > > > Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> > > Tested-by: Christophe Leroy <christophe.leroy@c-s.fr> # 8xx and 83xx > > --- ... > > diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h > > index 201a69e6a355..ddddbafff0ab 100644 > > --- a/arch/powerpc/include/asm/book3s/64/pgtable.h > > +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h > > @@ -2,7 +2,7 @@ > > #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ > > #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ > > -#include <asm-generic/5level-fixup.h> > > +#include <asm-generic/pgtable-nop4d.h> > > #ifndef __ASSEMBLY__ > > #include <linux/mmdebug.h> > > @@ -251,7 +251,7 @@ extern unsigned long __pmd_frag_size_shift; > > /* Bits to mask out from a PUD to get to the PMD page */ > > #define PUD_MASKED_BITS 0xc0000000000000ffUL > > /* Bits to mask out from a PGD to get to the PUD page */ > > -#define PGD_MASKED_BITS 0xc0000000000000ffUL > > +#define P4D_MASKED_BITS 0xc0000000000000ffUL > > /* > > * Used as an indicator for rcu callback functions > > @@ -949,54 +949,60 @@ static inline bool pud_access_permitted(pud_t pud, bool write) > > return pte_access_permitted(pud_pte(pud), write); > > } > > -#define pgd_write(pgd) pte_write(pgd_pte(pgd)) > > +#define __p4d_raw(x) ((p4d_t) { __pgd_raw(x) }) > > +static inline __be64 p4d_raw(p4d_t x) > > +{ > > + return pgd_raw(x.pgd); > > +} > > + > > Shouldn't this be defined in asm/pgtable-be-types.h, just like other > __pxx_raw() ? Ideally yes, but this creates weird header file dependencies and untangling them would generate way too much churn. > > +#define p4d_write(p4d) pte_write(p4d_pte(p4d)) > > -static inline void pgd_clear(pgd_t *pgdp) > > +static inline void p4d_clear(p4d_t *p4dp) > > { > > - *pgdp = __pgd(0); > > + *p4dp = __p4d(0); > > } ... > > @@ -573,9 +596,15 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, > > /* Traverse the guest's 2nd-level tree, allocate new levels needed */ > > pgd = pgtable + pgd_index(gpa); > > - pud = NULL; > > + p4d = NULL; > > if (pgd_present(*pgd)) > > - pud = pud_offset(pgd, gpa); > > + p4d = p4d_offset(pgd, gpa); > > + else > > + new_p4d = p4d_alloc_one(kvm->mm, gpa); > > + > > + pud = NULL; > > + if (p4d_present(*p4d)) > > + pud = pud_offset(p4d, gpa); > > Is it worth adding all this new code ? > > My understanding is that the series objective is to get rid of > __ARCH_HAS_5LEVEL_HACK, to to add support for 5 levels to an architecture > that not need it (at least for now). > If we want to add support for 5 levels, it can be done later in another > patch. > > Here I think your change could be limited to: > > - pud = pud_offset(pgd, gpa); > + pud = pud_offset(p4d_offset(pgd, gpa), gpa); This won't work. Without __ARCH_USE_5LEVEL_HACK defined pgd_present() is hardwired to 1 and the actual check for the top level is performed with p4d_present(). The 'else' clause that allocates p4d will never be taken and it could be removed, but I prefer to keep it for consistency. > > else > > new_pud = pud_alloc_one(kvm->mm, gpa); > > @@ -597,12 +626,18 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, > > /* Now traverse again under the lock and change the tree */ > > ret = -ENOMEM; > > if (pgd_none(*pgd)) { > > + if (!new_p4d) > > + goto out_unlock; > > + pgd_populate(kvm->mm, pgd, new_p4d); > > + new_p4d = NULL; > > + } > > + if (p4d_none(*p4d)) { > > if (!new_pud) > > goto out_unlock; > > - pgd_populate(kvm->mm, pgd, new_pud); > > + p4d_populate(kvm->mm, p4d, new_pud); > > new_pud = NULL; > > } > > - pud = pud_offset(pgd, gpa); > > + pud = pud_offset(p4d, gpa); > > if (pud_is_leaf(*pud)) { > > unsigned long hgpa = gpa & PUD_MASK; > > @@ -1220,6 +1255,7 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf, > > pgd_t *pgt; > > struct kvm_nested_guest *nested; > > pgd_t pgd, *pgdp; > > + p4d_t p4d, *p4dp; > > pud_t pud, *pudp; > > pmd_t pmd, *pmdp; > > pte_t *ptep; > > @@ -1298,7 +1334,14 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf, > > continue; > > } > > - pudp = pud_offset(&pgd, gpa); > > + p4dp = p4d_offset(&pgd, gpa); > > + p4d = READ_ONCE(*p4dp); > > + if (!(p4d_val(p4d) & _PAGE_PRESENT)) { > > + gpa = (gpa & P4D_MASK) + P4D_SIZE; > > + continue; > > + } > > + > > + pudp = pud_offset(&p4d, gpa); > > Same, here you are forcing a useless read with READ_ONCE(). > > Your change could be limited to > > - pudp = pud_offset(&pgd, gpa); > + pudp = pud_offset(p4d_offset(&pgd, gpa), gpa); Here again the actual check must be done against p4d rather than pgd. We could skip READ_ONCE() for pgd, but since it is a debugfs method I don't think it is more important than code consistency. > This comment applies to many other places. I'll make another pass to see where we can take the shortcut and use pudp = pud_offset(p4d_offset(...)) > > pud = READ_ONCE(*pudp); > > if (!(pud_val(pud) & _PAGE_PRESENT)) { > > gpa = (gpa & PUD_MASK) + PUD_SIZE; > > diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c > > index 3345f039a876..7a59f6863cec 100644 > > --- a/arch/powerpc/lib/code-patching.c > > +++ b/arch/powerpc/lib/code-patching.c > > @@ -107,13 +107,18 @@ static inline int unmap_patch_area(unsigned long addr) > > pte_t *ptep; > > pmd_t *pmdp; > > pud_t *pudp; > > + p4d_t *p4dp; > > pgd_t *pgdp; > > pgdp = pgd_offset_k(addr); > > if (unlikely(!pgdp)) > > return -EINVAL; > > - pudp = pud_offset(pgdp, addr); > > + p4dp = p4d_offset(pgdp, addr); > > + if (unlikely(!p4dp)) > > + return -EINVAL; > > + > > + pudp = pud_offset(p4dp, addr); > > if (unlikely(!pudp)) > > return -EINVAL; > > diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c > > index 0a1c65a2c565..b2fc3e71165c 100644 > > --- a/arch/powerpc/mm/book3s32/mmu.c > > +++ b/arch/powerpc/mm/book3s32/mmu.c > > @@ -312,7 +312,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea) > > if (!Hash) > > return; > > - pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea); > > + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, ea), ea), ea), ea); > > If we continue like this, in ten years this like is going to be many > kilometers long. > > I think the above would be worth a generic helper. Agree. My plan was to first unify all the architectures and then start introducing the generic helpers, like e.g. pmd_offset_mm(). > > if (!pmd_none(*pmd)) > > add_hash_page(mm->context.id, ea, pmd_val(*pmd)); > > } > > diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c > > index 2fcd321040ff..175bc33b41b7 100644 > > --- a/arch/powerpc/mm/book3s32/tlb.c > > +++ b/arch/powerpc/mm/book3s32/tlb.c > > @@ -87,7 +87,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start, > > if (start >= end) > > return; > > end = (end - 1) | ~PAGE_MASK; > > - pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start); > > + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, start), start), start), start); > > for (;;) { > > pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; > > if (pmd_end > end) > > @@ -145,7 +145,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) > > return; > > } > > mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; > > - pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr); > > + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr), vmaddr); > > if (!pmd_none(*pmd)) > > flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); > > } > > diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c > > index 64733b9cb20a..9cd15937e88a 100644 > > --- a/arch/powerpc/mm/book3s64/hash_pgtable.c > > +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c > > @@ -148,6 +148,7 @@ void hash__vmemmap_remove_mapping(unsigned long start, > > int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) > > { > > pgd_t *pgdp; > > + p4d_t *p4dp; > > pud_t *pudp; > > pmd_t *pmdp; > > pte_t *ptep; > > @@ -155,7 +156,8 @@ int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) > > BUILD_BUG_ON(TASK_SIZE_USER64 > H_PGTABLE_RANGE); > > if (slab_is_available()) { > > pgdp = pgd_offset_k(ea); > > - pudp = pud_alloc(&init_mm, pgdp, ea); > > + p4dp = p4d_offset(pgdp, ea); > > + pudp = pud_alloc(&init_mm, p4dp, ea); > > Could be a single line, without a new var. > > - pudp = pud_alloc(&init_mm, pgdp, ea); > + pudp = pud_alloc(&init_mm, p4d_offset(pgdp, ea), ea); > > > Same kind of comments as already done apply to the rest. > > Christophe
Le 16/02/2020 à 09:18, Mike Rapoport a écrit : > From: Mike Rapoport <rppt@linux.ibm.com> > > Implement primitives necessary for the 4th level folding, add walks of p4d > level where appropriate and replace 5level-fixup.h with pgtable-nop4d.h. > > Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> > Tested-by: Christophe Leroy <christophe.leroy@c-s.fr> # 8xx and 83xx > --- > arch/powerpc/include/asm/book3s/32/pgtable.h | 1 - > arch/powerpc/include/asm/book3s/64/hash.h | 4 +- > arch/powerpc/include/asm/book3s/64/pgalloc.h | 4 +- > arch/powerpc/include/asm/book3s/64/pgtable.h | 58 ++++++++++-------- > arch/powerpc/include/asm/book3s/64/radix.h | 6 +- > arch/powerpc/include/asm/nohash/32/pgtable.h | 1 - > arch/powerpc/include/asm/nohash/64/pgalloc.h | 2 +- > .../include/asm/nohash/64/pgtable-4k.h | 32 +++++----- > arch/powerpc/include/asm/nohash/64/pgtable.h | 6 +- > arch/powerpc/include/asm/pgtable.h | 8 +++ > arch/powerpc/kvm/book3s_64_mmu_radix.c | 59 ++++++++++++++++--- > arch/powerpc/lib/code-patching.c | 7 ++- > arch/powerpc/mm/book3s32/mmu.c | 2 +- > arch/powerpc/mm/book3s32/tlb.c | 4 +- > arch/powerpc/mm/book3s64/hash_pgtable.c | 4 +- > arch/powerpc/mm/book3s64/radix_pgtable.c | 19 ++++-- > arch/powerpc/mm/book3s64/subpage_prot.c | 6 +- > arch/powerpc/mm/hugetlbpage.c | 28 +++++---- > arch/powerpc/mm/kasan/kasan_init_32.c | 8 +-- > arch/powerpc/mm/mem.c | 4 +- > arch/powerpc/mm/nohash/40x.c | 4 +- > arch/powerpc/mm/nohash/book3e_pgtable.c | 15 +++-- > arch/powerpc/mm/pgtable.c | 25 +++++++- > arch/powerpc/mm/pgtable_32.c | 28 +++++---- > arch/powerpc/mm/pgtable_64.c | 10 ++-- > arch/powerpc/mm/ptdump/hashpagetable.c | 20 ++++++- > arch/powerpc/mm/ptdump/ptdump.c | 22 ++++++- > arch/powerpc/xmon/xmon.c | 17 +++++- > 28 files changed, 284 insertions(+), 120 deletions(-) > > diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c > index 206156255247..7bd4b81d5b5d 100644 > --- a/arch/powerpc/mm/ptdump/ptdump.c > +++ b/arch/powerpc/mm/ptdump/ptdump.c > @@ -277,9 +277,9 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) > } > } > > -static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) > +static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start) > { > - pud_t *pud = pud_offset(pgd, 0); > + pud_t *pud = pud_offset(p4d, 0); > unsigned long addr; > unsigned int i; > > @@ -293,6 +293,22 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) > } > } > > +static void walk_p4d(struct pg_state *st, pgd_t *pgd, unsigned long start) > +{ > + p4d_t *p4d = p4d_offset(pgd, 0); > + unsigned long addr; > + unsigned int i; > + > + for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { > + addr = start + i * P4D_SIZE; > + if (!p4d_none(*p4d) && !p4d_is_leaf(*p4d)) > + /* p4d exists */ > + walk_pud(st, p4d, addr); > + else > + note_page(st, addr, 2, p4d_val(*p4d)); Level 2 is already used by walk_pud(). I think you have to increment the level used in walk_pud() and walk_pmd() and walk_pte() > + } > +} > + > static void walk_pagetables(struct pg_state *st) > { > unsigned int i; > @@ -306,7 +322,7 @@ static void walk_pagetables(struct pg_state *st) > for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { > if (!pgd_none(*pgd) && !pgd_is_leaf(*pgd)) > /* pgd exists */ > - walk_pud(st, pgd, addr); > + walk_p4d(st, pgd, addr); > else > note_page(st, addr, 1, pgd_val(*pgd)); > } Christophe
On Wed, Feb 19, 2020 at 01:07:55PM +0100, Christophe Leroy wrote: > > Le 16/02/2020 à 09:18, Mike Rapoport a écrit : > > diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c > > index 206156255247..7bd4b81d5b5d 100644 > > --- a/arch/powerpc/mm/ptdump/ptdump.c > > +++ b/arch/powerpc/mm/ptdump/ptdump.c > > @@ -277,9 +277,9 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) > > } > > } > > -static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) > > +static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start) > > { > > - pud_t *pud = pud_offset(pgd, 0); > > + pud_t *pud = pud_offset(p4d, 0); > > unsigned long addr; > > unsigned int i; > > @@ -293,6 +293,22 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) > > } > > } > > +static void walk_p4d(struct pg_state *st, pgd_t *pgd, unsigned long start) > > +{ > > + p4d_t *p4d = p4d_offset(pgd, 0); > > + unsigned long addr; > > + unsigned int i; > > + > > + for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { > > + addr = start + i * P4D_SIZE; > > + if (!p4d_none(*p4d) && !p4d_is_leaf(*p4d)) > > + /* p4d exists */ > > + walk_pud(st, p4d, addr); > > + else > > + note_page(st, addr, 2, p4d_val(*p4d)); > > Level 2 is already used by walk_pud(). > > I think you have to increment the level used in walk_pud() and walk_pmd() > and walk_pte() Thanks for catching this! I'll fix the numbers in the next version. > > + } > > +} > > + > > static void walk_pagetables(struct pg_state *st) > > { > > unsigned int i; > > @@ -306,7 +322,7 @@ static void walk_pagetables(struct pg_state *st) > > for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { > > if (!pgd_none(*pgd) && !pgd_is_leaf(*pgd)) > > /* pgd exists */ > > - walk_pud(st, pgd, addr); > > + walk_p4d(st, pgd, addr); > > else > > note_page(st, addr, 1, pgd_val(*pgd)); > > } > > Christophe
On Tue, Feb 18, 2020 at 12:54:40PM +0200, Mike Rapoport wrote: > On Sun, Feb 16, 2020 at 11:41:07AM +0100, Christophe Leroy wrote: > > > > > > Le 16/02/2020 à 09:18, Mike Rapoport a écrit : > > > From: Mike Rapoport <rppt@linux.ibm.com> > > > > > > Implement primitives necessary for the 4th level folding, add walks of p4d > > > level where appropriate and replace 5level-fixup.h with pgtable-nop4d.h. > > > > I don't think it is worth adding all this additionnals walks of p4d, this > > patch could be limited to changes like: > > > > - pud = pud_offset(pgd, gpa); > > + pud = pud_offset(p4d_offset(pgd, gpa), gpa); > > > > The additionnal walks should be added through another patch the day powerpc > > need them. > > Ok, I'll update the patch to reduce walking the p4d. Here's what I have with more direct acceses from pgd to pud. From 6c59a86ce8394fb6100e9b6ced2e346981fb0ce9 Mon Sep 17 00:00:00 2001 From: Mike Rapoport <rppt@linux.ibm.com> Date: Sun, 24 Nov 2019 15:38:00 +0200 Subject: [PATCH v3] powerpc: add support for folded p4d page tables Implement primitives necessary for the 4th level folding, add walks of p4d level where appropriate and replace 5level-fixup.h with pgtable-nop4d.h. Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Tested-by: Christophe Leroy <christophe.leroy@c-s.fr> # 8xx and 83xx --- v3: * reduce amount of added p4d walks * kill pgtable_32::get_pteptr and traverse page table in pgtable_32::__change_page_attr_noflush arch/powerpc/include/asm/book3s/32/pgtable.h | 1 - arch/powerpc/include/asm/book3s/64/hash.h | 4 +- arch/powerpc/include/asm/book3s/64/pgalloc.h | 4 +- arch/powerpc/include/asm/book3s/64/pgtable.h | 60 ++++++++++--------- arch/powerpc/include/asm/book3s/64/radix.h | 6 +- arch/powerpc/include/asm/nohash/32/pgtable.h | 1 - arch/powerpc/include/asm/nohash/64/pgalloc.h | 2 +- .../include/asm/nohash/64/pgtable-4k.h | 32 +++++----- arch/powerpc/include/asm/nohash/64/pgtable.h | 6 +- arch/powerpc/include/asm/pgtable.h | 6 +- arch/powerpc/kvm/book3s_64_mmu_radix.c | 30 ++++++---- arch/powerpc/lib/code-patching.c | 7 ++- arch/powerpc/mm/book3s32/mmu.c | 2 +- arch/powerpc/mm/book3s32/tlb.c | 4 +- arch/powerpc/mm/book3s64/hash_pgtable.c | 4 +- arch/powerpc/mm/book3s64/radix_pgtable.c | 26 +++++--- arch/powerpc/mm/book3s64/subpage_prot.c | 6 +- arch/powerpc/mm/hugetlbpage.c | 28 +++++---- arch/powerpc/mm/kasan/kasan_init_32.c | 8 +-- arch/powerpc/mm/mem.c | 4 +- arch/powerpc/mm/nohash/40x.c | 4 +- arch/powerpc/mm/nohash/book3e_pgtable.c | 15 ++--- arch/powerpc/mm/pgtable.c | 30 ++++++---- arch/powerpc/mm/pgtable_32.c | 45 +++----------- arch/powerpc/mm/pgtable_64.c | 10 ++-- arch/powerpc/mm/ptdump/hashpagetable.c | 20 ++++++- arch/powerpc/mm/ptdump/ptdump.c | 14 +++-- arch/powerpc/xmon/xmon.c | 18 +++--- 28 files changed, 213 insertions(+), 184 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 5b39c11e884a..39ec11371be0 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -2,7 +2,6 @@ #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H -#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <asm/book3s/32/hash.h> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 2781ebf6add4..876d1528c2cf 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -134,9 +134,9 @@ static inline int get_region_id(unsigned long ea) #define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS) #define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS) -static inline int hash__pgd_bad(pgd_t pgd) +static inline int hash__p4d_bad(p4d_t p4d) { - return (pgd_val(pgd) == 0); + return (p4d_val(p4d) == 0); } #ifdef CONFIG_STRICT_KERNEL_RWX extern void hash__mark_rodata_ro(void); diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index a41e91bd0580..69c5b051734f 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -85,9 +85,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); } -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +static inline void p4d_populate(struct mm_struct *mm, p4d_t *pgd, pud_t *pud) { - *pgd = __pgd(__pgtable_ptr_val(pud) | PGD_VAL_BITS); + *pgd = __p4d(__pgtable_ptr_val(pud) | PGD_VAL_BITS); } static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 201a69e6a355..fa60e8594b9f 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -2,7 +2,7 @@ #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ -#include <asm-generic/5level-fixup.h> +#include <asm-generic/pgtable-nop4d.h> #ifndef __ASSEMBLY__ #include <linux/mmdebug.h> @@ -251,7 +251,7 @@ extern unsigned long __pmd_frag_size_shift; /* Bits to mask out from a PUD to get to the PMD page */ #define PUD_MASKED_BITS 0xc0000000000000ffUL /* Bits to mask out from a PGD to get to the PUD page */ -#define PGD_MASKED_BITS 0xc0000000000000ffUL +#define P4D_MASKED_BITS 0xc0000000000000ffUL /* * Used as an indicator for rcu callback functions @@ -949,54 +949,60 @@ static inline bool pud_access_permitted(pud_t pud, bool write) return pte_access_permitted(pud_pte(pud), write); } -#define pgd_write(pgd) pte_write(pgd_pte(pgd)) +#define __p4d_raw(x) ((p4d_t) { __pgd_raw(x) }) +static inline __be64 p4d_raw(p4d_t x) +{ + return pgd_raw(x.pgd); +} + +#define p4d_write(p4d) pte_write(p4d_pte(p4d)) -static inline void pgd_clear(pgd_t *pgdp) +static inline void p4d_clear(p4d_t *p4dp) { - *pgdp = __pgd(0); + *p4dp = __p4d(0); } -static inline int pgd_none(pgd_t pgd) +static inline int p4d_none(p4d_t p4d) { - return !pgd_raw(pgd); + return !p4d_raw(p4d); } -static inline int pgd_present(pgd_t pgd) +static inline int p4d_present(p4d_t p4d) { - return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT)); + return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PRESENT)); } -static inline pte_t pgd_pte(pgd_t pgd) +static inline pte_t p4d_pte(p4d_t p4d) { - return __pte_raw(pgd_raw(pgd)); + return __pte_raw(p4d_raw(p4d)); } -static inline pgd_t pte_pgd(pte_t pte) +static inline p4d_t pte_p4d(pte_t pte) { - return __pgd_raw(pte_raw(pte)); + return __p4d_raw(pte_raw(pte)); } -static inline int pgd_bad(pgd_t pgd) +static inline int p4d_bad(p4d_t p4d) { if (radix_enabled()) - return radix__pgd_bad(pgd); - return hash__pgd_bad(pgd); + return radix__p4d_bad(p4d); + return hash__p4d_bad(p4d); } -#define pgd_access_permitted pgd_access_permitted -static inline bool pgd_access_permitted(pgd_t pgd, bool write) +#define p4d_access_permitted p4d_access_permitted +static inline bool p4d_access_permitted(p4d_t p4d, bool write) { - return pte_access_permitted(pgd_pte(pgd), write); + return pte_access_permitted(p4d_pte(p4d), write); } -extern struct page *pgd_page(pgd_t pgd); +extern struct page *p4d_page(p4d_t p4d); /* Pointers in the page table tree are physical addresses */ #define __pgtable_ptr_val(ptr) __pa(ptr) #define pmd_page_vaddr(pmd) __va(pmd_val(pmd) & ~PMD_MASKED_BITS) #define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS) -#define pgd_page_vaddr(pgd) __va(pgd_val(pgd) & ~PGD_MASKED_BITS) +#define p4d_page_vaddr(p4d) __va(p4d_val(p4d) & ~P4D_MASKED_BITS) #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) #define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) @@ -1010,8 +1016,8 @@ extern struct page *pgd_page(pgd_t pgd); #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) -#define pud_offset(pgdp, addr) \ - (((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr)) +#define pud_offset(p4dp, addr) \ + (((pud_t *) p4d_page_vaddr(*(p4dp))) + pud_index(addr)) #define pmd_offset(pudp,addr) \ (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr)) #define pte_offset_kernel(dir,addr) \ @@ -1368,11 +1374,11 @@ static inline bool pud_is_leaf(pud_t pud) return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); } -#define pgd_is_leaf pgd_is_leaf -#define pgd_leaf pgd_is_leaf -static inline bool pgd_is_leaf(pgd_t pgd) +#define p4d_is_leaf p4d_is_leaf +#define p4d_leaf p4d_is_leaf +static inline bool p4d_is_leaf(p4d_t p4d) { - return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE)); + return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PTE)); } #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index d97db3ad9aae..9bca2ac64220 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -30,7 +30,7 @@ /* Don't have anything in the reserved bits and leaf bits */ #define RADIX_PMD_BAD_BITS 0x60000000000000e0UL #define RADIX_PUD_BAD_BITS 0x60000000000000e0UL -#define RADIX_PGD_BAD_BITS 0x60000000000000e0UL +#define RADIX_P4D_BAD_BITS 0x60000000000000e0UL #define RADIX_PMD_SHIFT (PAGE_SHIFT + RADIX_PTE_INDEX_SIZE) #define RADIX_PUD_SHIFT (RADIX_PMD_SHIFT + RADIX_PMD_INDEX_SIZE) @@ -227,9 +227,9 @@ static inline int radix__pud_bad(pud_t pud) } -static inline int radix__pgd_bad(pgd_t pgd) +static inline int radix__p4d_bad(p4d_t p4d) { - return !!(pgd_val(pgd) & RADIX_PGD_BAD_BITS); + return !!(p4d_val(p4d) & RADIX_P4D_BAD_BITS); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 60c4d829152e..d4c2c4259fa3 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -2,7 +2,6 @@ #ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H #define _ASM_POWERPC_NOHASH_32_PGTABLE_H -#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index b9534a793293..668aee6017e7 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -15,7 +15,7 @@ struct vmemmap_backing { }; extern struct vmemmap_backing *vmemmap_list; -#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD) +#define p4d_populate(MM, P4D, PUD) p4d_set(P4D, (unsigned long)PUD) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h index c40ec32b8194..81b1c54e3cf1 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h @@ -2,7 +2,7 @@ #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H -#include <asm-generic/5level-fixup.h> +#include <asm-generic/pgtable-nop4d.h> /* * Entries per page directory level. The PTE level must use a 64b record @@ -45,41 +45,41 @@ #define PMD_MASKED_BITS 0 /* Bits to mask out from a PUD to get to the PMD page */ #define PUD_MASKED_BITS 0 -/* Bits to mask out from a PGD to get to the PUD page */ -#define PGD_MASKED_BITS 0 +/* Bits to mask out from a P4D to get to the PUD page */ +#define P4D_MASKED_BITS 0 /* * 4-level page tables related bits */ -#define pgd_none(pgd) (!pgd_val(pgd)) -#define pgd_bad(pgd) (pgd_val(pgd) == 0) -#define pgd_present(pgd) (pgd_val(pgd) != 0) -#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) +#define p4d_none(p4d) (!p4d_val(p4d)) +#define p4d_bad(p4d) (p4d_val(p4d) == 0) +#define p4d_present(p4d) (p4d_val(p4d) != 0) +#define p4d_page_vaddr(p4d) (p4d_val(p4d) & ~P4D_MASKED_BITS) #ifndef __ASSEMBLY__ -static inline void pgd_clear(pgd_t *pgdp) +static inline void p4d_clear(p4d_t *p4dp) { - *pgdp = __pgd(0); + *p4dp = __p4d(0); } -static inline pte_t pgd_pte(pgd_t pgd) +static inline pte_t p4d_pte(p4d_t p4d) { - return __pte(pgd_val(pgd)); + return __pte(p4d_val(p4d)); } -static inline pgd_t pte_pgd(pte_t pte) +static inline p4d_t pte_p4d(pte_t pte) { - return __pgd(pte_val(pte)); + return __p4d(pte_val(pte)); } -extern struct page *pgd_page(pgd_t pgd); +extern struct page *p4d_page(p4d_t p4d); #endif /* !__ASSEMBLY__ */ -#define pud_offset(pgdp, addr) \ - (((pud_t *) pgd_page_vaddr(*(pgdp))) + \ +#define pud_offset(p4dp, addr) \ + (((pud_t *) p4d_page_vaddr(*(p4dp))) + \ (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) #define pud_ERROR(e) \ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 9a33b8bd842d..b360f262b9c6 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -175,11 +175,11 @@ static inline pud_t pte_pud(pte_t pte) return __pud(pte_val(pte)); } #define pud_write(pud) pte_write(pud_pte(pud)) -#define pgd_write(pgd) pte_write(pgd_pte(pgd)) +#define p4d_write(pgd) pte_write(p4d_pte(p4d)) -static inline void pgd_set(pgd_t *pgdp, unsigned long val) +static inline void p4d_set(p4d_t *p4dp, unsigned long val) { - *pgdp = __pgd(val); + *p4dp = __p4d(val); } /* diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 8cc543ed114c..05205d7a7b4a 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -139,9 +139,9 @@ static inline bool pud_is_leaf(pud_t pud) } #endif -#ifndef pgd_is_leaf -#define pgd_is_leaf pgd_is_leaf -static inline bool pgd_is_leaf(pgd_t pgd) +#ifndef p4d_is_leaf +#define p4d_is_leaf p4d_is_leaf +static inline bool p4d_is_leaf(p4d_t p4d) { return false; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 803940d79b73..beb694285100 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -499,13 +499,14 @@ void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid) unsigned long ig; for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) { + p4d_t *p4d = p4d_offset(pgd, 0); pud_t *pud; - if (!pgd_present(*pgd)) + if (!p4d_present(*p4d)) continue; - pud = pud_offset(pgd, 0); + pud = pud_offset(p4d, 0); kvmppc_unmap_free_pud(kvm, pud, lpid); - pgd_clear(pgd); + p4d_clear(p4d); } } @@ -566,6 +567,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, unsigned long *rmapp, struct rmap_nested **n_rmap) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud, *new_pud = NULL; pmd_t *pmd, *new_pmd = NULL; pte_t *ptep, *new_ptep = NULL; @@ -573,9 +575,11 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, /* Traverse the guest's 2nd-level tree, allocate new levels needed */ pgd = pgtable + pgd_index(gpa); + p4d = p4d_offset(pgd, gpa); + pud = NULL; - if (pgd_present(*pgd)) - pud = pud_offset(pgd, gpa); + if (p4d_present(*p4d)) + pud = pud_offset(p4d, gpa); else new_pud = pud_alloc_one(kvm->mm, gpa); @@ -596,13 +600,13 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, /* Now traverse again under the lock and change the tree */ ret = -ENOMEM; - if (pgd_none(*pgd)) { + if (p4d_none(*p4d)) { if (!new_pud) goto out_unlock; - pgd_populate(kvm->mm, pgd, new_pud); + p4d_populate(kvm->mm, p4d, new_pud); new_pud = NULL; } - pud = pud_offset(pgd, gpa); + pud = pud_offset(p4d, gpa); if (pud_is_leaf(*pud)) { unsigned long hgpa = gpa & PUD_MASK; @@ -1220,6 +1224,7 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf, pgd_t *pgt; struct kvm_nested_guest *nested; pgd_t pgd, *pgdp; + p4d_t p4d, *p4dp; pud_t pud, *pudp; pmd_t pmd, *pmdp; pte_t *ptep; @@ -1292,13 +1297,14 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf, } pgdp = pgt + pgd_index(gpa); - pgd = READ_ONCE(*pgdp); - if (!(pgd_val(pgd) & _PAGE_PRESENT)) { - gpa = (gpa & PGDIR_MASK) + PGDIR_SIZE; + p4dp = p4d_offset(pgdp, gpa); + p4d = READ_ONCE(*p4dp); + if (!(p4d_val(p4d) & _PAGE_PRESENT)) { + gpa = (gpa & P4D_MASK) + P4D_SIZE; continue; } - pudp = pud_offset(&pgd, gpa); + pudp = pud_offset(&p4d, gpa); pud = READ_ONCE(*pudp); if (!(pud_val(pud) & _PAGE_PRESENT)) { gpa = (gpa & PUD_MASK) + PUD_SIZE; diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 3345f039a876..7a59f6863cec 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -107,13 +107,18 @@ static inline int unmap_patch_area(unsigned long addr) pte_t *ptep; pmd_t *pmdp; pud_t *pudp; + p4d_t *p4dp; pgd_t *pgdp; pgdp = pgd_offset_k(addr); if (unlikely(!pgdp)) return -EINVAL; - pudp = pud_offset(pgdp, addr); + p4dp = p4d_offset(pgdp, addr); + if (unlikely(!p4dp)) + return -EINVAL; + + pudp = pud_offset(p4dp, addr); if (unlikely(!pudp)) return -EINVAL; diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index f888cbb109b9..edef17c97206 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -312,7 +312,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea) if (!Hash) return; - pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea); + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, ea), ea), ea), ea); if (!pmd_none(*pmd)) add_hash_page(mm->context.id, ea, pmd_val(*pmd)); } diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c index 2fcd321040ff..175bc33b41b7 100644 --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -87,7 +87,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start, if (start >= end) return; end = (end - 1) | ~PAGE_MASK; - pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start); + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, start), start), start), start); for (;;) { pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; if (pmd_end > end) @@ -145,7 +145,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) return; } mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; - pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr); + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr), vmaddr); if (!pmd_none(*pmd)) flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); } diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 64733b9cb20a..9cd15937e88a 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -148,6 +148,7 @@ void hash__vmemmap_remove_mapping(unsigned long start, int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) { pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -155,7 +156,8 @@ int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) BUILD_BUG_ON(TASK_SIZE_USER64 > H_PGTABLE_RANGE); if (slab_is_available()) { pgdp = pgd_offset_k(ea); - pudp = pud_alloc(&init_mm, pgdp, ea); + p4dp = p4d_offset(pgdp, ea); + pudp = pud_alloc(&init_mm, p4dp, ea); if (!pudp) return -ENOMEM; pmdp = pmd_alloc(&init_mm, pudp, ea); diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index dd1bea45325c..fc3d0b0460b0 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -64,17 +64,19 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, { unsigned long pfn = pa >> PAGE_SHIFT; pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; pgdp = pgd_offset_k(ea); - if (pgd_none(*pgdp)) { + p4dp = p4d_offset(pgdp, ea); + if (p4d_none(*p4dp)) { pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid, region_start, region_end); - pgd_populate(&init_mm, pgdp, pudp); + p4d_populate(&init_mm, p4dp, pudp); } - pudp = pud_offset(pgdp, ea); + pudp = pud_offset(p4dp, ea); if (map_page_size == PUD_SIZE) { ptep = (pte_t *)pudp; goto set_the_pte; @@ -114,6 +116,7 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, { unsigned long pfn = pa >> PAGE_SHIFT; pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -136,7 +139,8 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, * boot. */ pgdp = pgd_offset_k(ea); - pudp = pud_alloc(&init_mm, pgdp, ea); + p4dp = p4d_offset(pgdp, ea); + pudp = pud_alloc(&init_mm, p4dp, ea); if (!pudp) return -ENOMEM; if (map_page_size == PUD_SIZE) { @@ -173,6 +177,7 @@ void radix__change_memory_range(unsigned long start, unsigned long end, { unsigned long idx; pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -185,7 +190,8 @@ void radix__change_memory_range(unsigned long start, unsigned long end, for (idx = start; idx < end; idx += PAGE_SIZE) { pgdp = pgd_offset_k(idx); - pudp = pud_alloc(&init_mm, pgdp, idx); + p4dp = p4d_offset(pgdp, idx); + pudp = pud_alloc(&init_mm, p4dp, idx); if (!pudp) continue; if (pud_is_leaf(*pudp)) { @@ -847,6 +853,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) unsigned long addr, next; pud_t *pud_base; pgd_t *pgd; + p4d_t *p4d; spin_lock(&init_mm.page_table_lock); @@ -854,15 +861,16 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) next = pgd_addr_end(addr, end); pgd = pgd_offset_k(addr); - if (!pgd_present(*pgd)) + p4d = p4d_offset(pgd, addr); + if (!p4d_present(*p4d)) continue; - if (pgd_is_leaf(*pgd)) { - split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd); + if (p4d_is_leaf(*p4d)) { + split_kernel_mapping(addr, end, P4D_SIZE, (pte_t *)p4d); continue; } - pud_base = (pud_t *)pgd_page_vaddr(*pgd); + pud_base = (pud_t *)p4d_page_vaddr(*p4d); remove_pud_table(pud_base, addr, next); } diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c index 2ef24a53f4c9..25a0c044bd93 100644 --- a/arch/powerpc/mm/book3s64/subpage_prot.c +++ b/arch/powerpc/mm/book3s64/subpage_prot.c @@ -54,15 +54,17 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, int npages) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; spinlock_t *ptl; pgd = pgd_offset(mm, addr); - if (pgd_none(*pgd)) + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) return; - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); if (pud_none(*pud)) return; pmd = pmd_offset(pud, addr); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 33b3461d91e8..54f5994d4cbb 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -119,6 +119,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pg; + p4d_t *p4; pud_t *pu; pmd_t *pm; hugepd_t *hpdp = NULL; @@ -128,20 +129,21 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz addr &= ~(sz-1); pg = pgd_offset(mm, addr); + p4 = p4d_offset(pg, addr); #ifdef CONFIG_PPC_BOOK3S_64 if (pshift == PGDIR_SHIFT) /* 16GB huge page */ - return (pte_t *) pg; + return (pte_t *) p4; else if (pshift > PUD_SHIFT) { /* * We need to use hugepd table */ ptl = &mm->page_table_lock; - hpdp = (hugepd_t *)pg; + hpdp = (hugepd_t *)p4; } else { pdshift = PUD_SHIFT; - pu = pud_alloc(mm, pg, addr); + pu = pud_alloc(mm, p4, addr); if (!pu) return NULL; if (pshift == PUD_SHIFT) @@ -166,10 +168,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz #else if (pshift >= PGDIR_SHIFT) { ptl = &mm->page_table_lock; - hpdp = (hugepd_t *)pg; + hpdp = (hugepd_t *)p4; } else { pdshift = PUD_SHIFT; - pu = pud_alloc(mm, pg, addr); + pu = pud_alloc(mm, p4, addr); if (!pu) return NULL; if (pshift >= PUD_SHIFT) { @@ -390,7 +392,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, mm_dec_nr_pmds(tlb->mm); } -static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, +static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { @@ -400,7 +402,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, start = addr; do { - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); next = pud_addr_end(addr, end); if (!is_hugepd(__hugepd(pud_val(*pud)))) { if (pud_none_or_clear_bad(pud)) @@ -435,8 +437,8 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, if (end - 1 > ceiling - 1) return; - pud = pud_offset(pgd, start); - pgd_clear(pgd); + pud = pud_offset(p4d, start); + p4d_clear(p4d); pud_free_tlb(tlb, pud, start); mm_dec_nr_puds(tlb->mm); } @@ -449,6 +451,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long floor, unsigned long ceiling) { pgd_t *pgd; + p4d_t *p4d; unsigned long next; /* @@ -471,10 +474,11 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, do { next = pgd_addr_end(addr, end); pgd = pgd_offset(tlb->mm, addr); + p4d = p4d_offset(pgd, addr); if (!is_hugepd(__hugepd(pgd_val(*pgd)))) { - if (pgd_none_or_clear_bad(pgd)) + if (p4d_none_or_clear_bad(p4d)) continue; - hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); + hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling); } else { unsigned long more; /* @@ -487,7 +491,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, if (more > next) next = more; - free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, + free_hugepd_range(tlb, (hugepd_t *)p4d, PGDIR_SHIFT, addr, next, floor, ceiling); } } while (addr = next, addr != end); diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index db5664dde5ff..88e2e16380b5 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -36,7 +36,7 @@ static int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned unsigned long k_cur, k_next; pte_t *new = NULL; - pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start); + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(k_start), k_start), k_start), k_start); for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) { k_next = pgd_addr_end(k_cur, k_end); @@ -78,7 +78,7 @@ static int __init kasan_init_region(void *start, size_t size) block = memblock_alloc(k_end - k_start, PAGE_SIZE); for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); + pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(k_cur), k_cur), k_cur), k_cur); void *va = block + k_cur - k_start; pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); @@ -102,7 +102,7 @@ static void __init kasan_remap_early_shadow_ro(void) kasan_populate_pte(kasan_early_shadow_pte, prot); for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); + pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(k_cur), k_cur), k_cur), k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) @@ -201,7 +201,7 @@ void __init kasan_early_init(void) unsigned long addr = KASAN_SHADOW_START; unsigned long end = KASAN_SHADOW_END; unsigned long next; - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(addr), addr), addr); + pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(addr), addr), addr), addr); BUILD_BUG_ON(KASAN_SHADOW_START & ~PGDIR_MASK); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index ef7b1119b2e2..8262b384dcf3 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -69,8 +69,8 @@ EXPORT_SYMBOL(kmap_prot); static inline pte_t *virt_to_kpte(unsigned long vaddr) { - return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), - vaddr), vaddr), vaddr); + return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(vaddr), + vaddr), vaddr), vaddr), vaddr); } #endif diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c index f348104eb461..7aaf7155e350 100644 --- a/arch/powerpc/mm/nohash/40x.c +++ b/arch/powerpc/mm/nohash/40x.c @@ -104,7 +104,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) pmd_t *pmdp; unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE; - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); + pmdp = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(v), v), v), v); *pmdp++ = __pmd(val); *pmdp++ = __pmd(val); *pmdp++ = __pmd(val); @@ -119,7 +119,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) pmd_t *pmdp; unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE; - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); + pmdp = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(v), v), v), v); *pmdp = __pmd(val); v += LARGE_PAGE_SIZE_4M; diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c index 4637fdd469cf..77884e24281d 100644 --- a/arch/powerpc/mm/nohash/book3e_pgtable.c +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c @@ -73,6 +73,7 @@ static void __init *early_alloc_pgtable(unsigned long size) int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) { pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -80,7 +81,8 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) BUILD_BUG_ON(TASK_SIZE_USER64 > PGTABLE_RANGE); if (slab_is_available()) { pgdp = pgd_offset_k(ea); - pudp = pud_alloc(&init_mm, pgdp, ea); + p4dp = p4d_offset(pgdp, ea); + pudp = pud_alloc(&init_mm, p4dp, ea); if (!pudp) return -ENOMEM; pmdp = pmd_alloc(&init_mm, pudp, ea); @@ -91,13 +93,12 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) return -ENOMEM; } else { pgdp = pgd_offset_k(ea); -#ifndef __PAGETABLE_PUD_FOLDED - if (pgd_none(*pgdp)) { - pudp = early_alloc_pgtable(PUD_TABLE_SIZE); - pgd_populate(&init_mm, pgdp, pudp); + p4dp = p4d_offset(pgdp, ea); + if (p4d_none(*p4dp)) { + pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); + p4d_populate(&init_mm, p4dp, pmdp); } -#endif /* !__PAGETABLE_PUD_FOLDED */ - pudp = pud_offset(pgdp, ea); + pudp = pud_offset(p4dp, ea); if (pud_none(*pudp)) { pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); pud_populate(&init_mm, pudp, pmdp); diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index e3759b69f81b..c2499271f6c1 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -265,6 +265,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, void assert_pte_locked(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; @@ -272,7 +273,9 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr) return; pgd = mm->pgd + pgd_index(addr); BUG_ON(pgd_none(*pgd)); - pud = pud_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + BUG_ON(p4d_none(*p4d)); + pud = pud_offset(p4d, addr); BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, addr); /* @@ -312,12 +315,13 @@ EXPORT_SYMBOL_GPL(vmalloc_to_phys); pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, bool *is_thp, unsigned *hpage_shift) { - pgd_t pgd, *pgdp; + pgd_t *pgdp; + p4d_t p4d, *p4dp; pud_t pud, *pudp; pmd_t pmd, *pmdp; pte_t *ret_pte; hugepd_t *hpdp = NULL; - unsigned pdshift = PGDIR_SHIFT; + unsigned pdshift; if (hpage_shift) *hpage_shift = 0; @@ -325,24 +329,28 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, if (is_thp) *is_thp = false; - pgdp = pgdir + pgd_index(ea); - pgd = READ_ONCE(*pgdp); /* * Always operate on the local stack value. This make sure the * value don't get updated by a parallel THP split/collapse, * page fault or a page unmap. The return pte_t * is still not * stable. So should be checked there for above conditions. + * Top level is an exception because it is folded into p4d. */ - if (pgd_none(pgd)) + pgdp = pgdir + pgd_index(ea); + p4dp = p4d_offset(pgdp, ea); + p4d = READ_ONCE(*p4dp); + pdshift = P4D_SHIFT; + + if (p4d_none(p4d)) return NULL; - if (pgd_is_leaf(pgd)) { - ret_pte = (pte_t *)pgdp; + if (p4d_is_leaf(p4d)) { + ret_pte = (pte_t *)p4dp; goto out; } - if (is_hugepd(__hugepd(pgd_val(pgd)))) { - hpdp = (hugepd_t *)&pgd; + if (is_hugepd(__hugepd(p4d_val(p4d)))) { + hpdp = (hugepd_t *)&p4d; goto out_huge; } @@ -352,7 +360,7 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, * irq disabled */ pdshift = PUD_SHIFT; - pudp = pud_offset(&pgd, ea); + pudp = pud_offset(&p4d, ea); pud = READ_ONCE(*pudp); if (pud_none(pud)) diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 5fb90edd865e..5774d4bc94d0 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -63,7 +63,7 @@ int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) int err = -ENOMEM; /* Use upper 10 bits of VA to index the first level map */ - pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va); + pd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va); /* Use middle 10 bits of VA to index the second-level map */ if (likely(slab_is_available())) pg = pte_alloc_kernel(pd, va); @@ -121,53 +121,24 @@ void __init mapin_ram(void) } } -/* Scan the real Linux page tables and return a PTE pointer for - * a virtual address in a context. - * Returns true (1) if PTE was found, zero otherwise. The pointer to - * the PTE pointer is unmodified if PTE is not found. - */ -static int -get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - int retval = 0; - - pgd = pgd_offset(mm, addr & PAGE_MASK); - if (pgd) { - pud = pud_offset(pgd, addr & PAGE_MASK); - if (pud && pud_present(*pud)) { - pmd = pmd_offset(pud, addr & PAGE_MASK); - if (pmd_present(*pmd)) { - pte = pte_offset_map(pmd, addr & PAGE_MASK); - if (pte) { - retval = 1; - *ptep = pte; - if (pmdp) - *pmdp = pmd; - /* XXX caller needs to do pte_unmap, yuck */ - } - } - } - } - return(retval); -} - static int __change_page_attr_noflush(struct page *page, pgprot_t prot) { pte_t *kpte; pmd_t *kpmd; - unsigned long address; + unsigned long address, va; BUG_ON(PageHighMem(page)); address = (unsigned long)page_address(page); + va = address & PAGE_MASK; if (v_block_mapped(address)) return 0; - if (!get_pteptr(&init_mm, address, &kpte, &kpmd)) + + kpmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va); + if (!pmd_present(*kpmd)) return -EINVAL; + + kpte = pte_offset_map(kpmd, va); __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0); pte_unmap(kpte); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index e78832dce7bb..1f86a88fd4bb 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -101,13 +101,13 @@ EXPORT_SYMBOL(__pte_frag_size_shift); #ifndef __PAGETABLE_PUD_FOLDED /* 4 level page table */ -struct page *pgd_page(pgd_t pgd) +struct page *p4d_page(p4d_t p4d) { - if (pgd_is_leaf(pgd)) { - VM_WARN_ON(!pgd_huge(pgd)); - return pte_page(pgd_pte(pgd)); + if (p4d_is_leaf(p4d)) { + VM_WARN_ON(!p4d_huge(p4d)); + return pte_page(p4d_pte(p4d)); } - return virt_to_page(pgd_page_vaddr(pgd)); + return virt_to_page(p4d_page_vaddr(p4d)); } #endif diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index a07278027c6f..ac360ad865a8 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -417,9 +417,9 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) } } -static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start) { - pud_t *pud = pud_offset(pgd, 0); + pud_t *pud = pud_offset(p4d, 0); unsigned long addr; unsigned int i; @@ -431,6 +431,20 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) } } +static void walk_p4d(struct pg_state *st, pgd_t *pgd, unsigned long start) +{ + p4d_t *p4d = p4d_offset(pgd, 0); + unsigned long addr; + unsigned int i; + + for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { + addr = start + i * P4D_SIZE; + if (!p4d_none(*p4d)) + /* p4d exists */ + walk_pud(st, p4d, addr); + } +} + static void walk_pagetables(struct pg_state *st) { pgd_t *pgd = pgd_offset_k(0UL); @@ -445,7 +459,7 @@ static void walk_pagetables(struct pg_state *st) addr = KERN_VIRT_START + i * PGDIR_SIZE; if (!pgd_none(*pgd)) /* pgd exists */ - walk_pud(st, pgd, addr); + walk_p4d(st, pgd, addr); } } diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 206156255247..9d6256b61df3 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -277,9 +277,9 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) } } -static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start) { - pud_t *pud = pud_offset(pgd, 0); + pud_t *pud = pud_offset(p4d, 0); unsigned long addr; unsigned int i; @@ -304,11 +304,13 @@ static void walk_pagetables(struct pg_state *st) * the hash pagetable. */ for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { - if (!pgd_none(*pgd) && !pgd_is_leaf(*pgd)) - /* pgd exists */ - walk_pud(st, pgd, addr); + p4d_t *p4d = p4d_offset(pgd, 0); + + if (!p4d_none(*p4d) && !p4d_is_leaf(*p4d)) + /* p4d exists */ + walk_pud(st, p4d, addr); else - note_page(st, addr, 1, pgd_val(*pgd)); + note_page(st, addr, 1, p4d_val(*p4d)); } } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 0ec9640335bb..3e29128c58cc 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3130,6 +3130,7 @@ static void show_pte(unsigned long addr) struct task_struct *tsk = NULL; struct mm_struct *mm; pgd_t *pgdp, *pgdir; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -3161,20 +3162,21 @@ static void show_pte(unsigned long addr) pgdir = pgd_offset(mm, 0); } - if (pgd_none(*pgdp)) { - printf("no linux page table for address\n"); + p4dp = p4d_offset(pgdp, addr); + + if (p4d_none(*p4dp)) { + printf("No valid P4D\n"); return; } - printf("pgd @ 0x%px\n", pgdir); - - if (pgd_is_leaf(*pgdp)) { - format_pte(pgdp, pgd_val(*pgdp)); + if (p4d_is_leaf(*p4dp)) { + format_pte(p4dp, p4d_val(*p4dp)); return; } - printf("pgdp @ 0x%px = 0x%016lx\n", pgdp, pgd_val(*pgdp)); - pudp = pud_offset(pgdp, addr); + printf("p4dp @ 0x%px = 0x%016lx\n", p4dp, p4d_val(*p4dp)); + + pudp = pud_offset(p4dp, addr); if (pud_none(*pudp)) { printf("No valid PUD\n");
Le 26/02/2020 à 10:13, Mike Rapoport a écrit : > On Tue, Feb 18, 2020 at 12:54:40PM +0200, Mike Rapoport wrote: >> On Sun, Feb 16, 2020 at 11:41:07AM +0100, Christophe Leroy wrote: >>> >>> >>> Le 16/02/2020 à 09:18, Mike Rapoport a écrit : >>>> From: Mike Rapoport <rppt@linux.ibm.com> >>>> >>>> Implement primitives necessary for the 4th level folding, add walks of p4d >>>> level where appropriate and replace 5level-fixup.h with pgtable-nop4d.h. >>> >>> I don't think it is worth adding all this additionnals walks of p4d, this >>> patch could be limited to changes like: >>> >>> - pud = pud_offset(pgd, gpa); >>> + pud = pud_offset(p4d_offset(pgd, gpa), gpa); >>> >>> The additionnal walks should be added through another patch the day powerpc >>> need them. >> >> Ok, I'll update the patch to reduce walking the p4d. > > Here's what I have with more direct acceses from pgd to pud. I went quickly through. This looks promising. Do we need the walk_p4d() in arch/powerpc/mm/ptdump/hashpagetable.c ? Can't we just do @@ -445,7 +459,7 @@ static void walk_pagetables(struct pg_state *st) addr = KERN_VIRT_START + i * PGDIR_SIZE; if (!pgd_none(*pgd)) /* pgd exists */ - walk_pud(st, pgd, addr); + walk_pud(st, p4d_offset(pgd, addr), addr); } } Also, I think the removal of get_pteptr() should be a patch by itself. You could include my patches in your series. See https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=152102 Christophe > > From 6c59a86ce8394fb6100e9b6ced2e346981fb0ce9 Mon Sep 17 00:00:00 2001 > From: Mike Rapoport <rppt@linux.ibm.com> > Date: Sun, 24 Nov 2019 15:38:00 +0200 > Subject: [PATCH v3] powerpc: add support for folded p4d page tables > > Implement primitives necessary for the 4th level folding, add walks of p4d > level where appropriate and replace 5level-fixup.h with pgtable-nop4d.h. > > Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> > Tested-by: Christophe Leroy <christophe.leroy@c-s.fr> # 8xx and 83xx > --- > v3: > * reduce amount of added p4d walks > * kill pgtable_32::get_pteptr and traverse page table in > pgtable_32::__change_page_attr_noflush > > > arch/powerpc/include/asm/book3s/32/pgtable.h | 1 - > arch/powerpc/include/asm/book3s/64/hash.h | 4 +- > arch/powerpc/include/asm/book3s/64/pgalloc.h | 4 +- > arch/powerpc/include/asm/book3s/64/pgtable.h | 60 ++++++++++--------- > arch/powerpc/include/asm/book3s/64/radix.h | 6 +- > arch/powerpc/include/asm/nohash/32/pgtable.h | 1 - > arch/powerpc/include/asm/nohash/64/pgalloc.h | 2 +- > .../include/asm/nohash/64/pgtable-4k.h | 32 +++++----- > arch/powerpc/include/asm/nohash/64/pgtable.h | 6 +- > arch/powerpc/include/asm/pgtable.h | 6 +- > arch/powerpc/kvm/book3s_64_mmu_radix.c | 30 ++++++---- > arch/powerpc/lib/code-patching.c | 7 ++- > arch/powerpc/mm/book3s32/mmu.c | 2 +- > arch/powerpc/mm/book3s32/tlb.c | 4 +- > arch/powerpc/mm/book3s64/hash_pgtable.c | 4 +- > arch/powerpc/mm/book3s64/radix_pgtable.c | 26 +++++--- > arch/powerpc/mm/book3s64/subpage_prot.c | 6 +- > arch/powerpc/mm/hugetlbpage.c | 28 +++++---- > arch/powerpc/mm/kasan/kasan_init_32.c | 8 +-- > arch/powerpc/mm/mem.c | 4 +- > arch/powerpc/mm/nohash/40x.c | 4 +- > arch/powerpc/mm/nohash/book3e_pgtable.c | 15 ++--- > arch/powerpc/mm/pgtable.c | 30 ++++++---- > arch/powerpc/mm/pgtable_32.c | 45 +++----------- > arch/powerpc/mm/pgtable_64.c | 10 ++-- > arch/powerpc/mm/ptdump/hashpagetable.c | 20 ++++++- > arch/powerpc/mm/ptdump/ptdump.c | 14 +++-- > arch/powerpc/xmon/xmon.c | 18 +++--- > 28 files changed, 213 insertions(+), 184 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h > index 5b39c11e884a..39ec11371be0 100644 > --- a/arch/powerpc/include/asm/book3s/32/pgtable.h > +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h > @@ -2,7 +2,6 @@ > #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H > #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H > > -#define __ARCH_USE_5LEVEL_HACK > #include <asm-generic/pgtable-nopmd.h> > > #include <asm/book3s/32/hash.h> > diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h > index 2781ebf6add4..876d1528c2cf 100644 > --- a/arch/powerpc/include/asm/book3s/64/hash.h > +++ b/arch/powerpc/include/asm/book3s/64/hash.h > @@ -134,9 +134,9 @@ static inline int get_region_id(unsigned long ea) > > #define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS) > #define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS) > -static inline int hash__pgd_bad(pgd_t pgd) > +static inline int hash__p4d_bad(p4d_t p4d) > { > - return (pgd_val(pgd) == 0); > + return (p4d_val(p4d) == 0); > } > #ifdef CONFIG_STRICT_KERNEL_RWX > extern void hash__mark_rodata_ro(void); > diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h > index a41e91bd0580..69c5b051734f 100644 > --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h > +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h > @@ -85,9 +85,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) > kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); > } > > -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) > +static inline void p4d_populate(struct mm_struct *mm, p4d_t *pgd, pud_t *pud) > { > - *pgd = __pgd(__pgtable_ptr_val(pud) | PGD_VAL_BITS); > + *pgd = __p4d(__pgtable_ptr_val(pud) | PGD_VAL_BITS); > } > > static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) > diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h > index 201a69e6a355..fa60e8594b9f 100644 > --- a/arch/powerpc/include/asm/book3s/64/pgtable.h > +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h > @@ -2,7 +2,7 @@ > #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ > #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ > > -#include <asm-generic/5level-fixup.h> > +#include <asm-generic/pgtable-nop4d.h> > > #ifndef __ASSEMBLY__ > #include <linux/mmdebug.h> > @@ -251,7 +251,7 @@ extern unsigned long __pmd_frag_size_shift; > /* Bits to mask out from a PUD to get to the PMD page */ > #define PUD_MASKED_BITS 0xc0000000000000ffUL > /* Bits to mask out from a PGD to get to the PUD page */ > -#define PGD_MASKED_BITS 0xc0000000000000ffUL > +#define P4D_MASKED_BITS 0xc0000000000000ffUL > > /* > * Used as an indicator for rcu callback functions > @@ -949,54 +949,60 @@ static inline bool pud_access_permitted(pud_t pud, bool write) > return pte_access_permitted(pud_pte(pud), write); > } > > -#define pgd_write(pgd) pte_write(pgd_pte(pgd)) > +#define __p4d_raw(x) ((p4d_t) { __pgd_raw(x) }) > +static inline __be64 p4d_raw(p4d_t x) > +{ > + return pgd_raw(x.pgd); > +} > + > +#define p4d_write(p4d) pte_write(p4d_pte(p4d)) > > -static inline void pgd_clear(pgd_t *pgdp) > +static inline void p4d_clear(p4d_t *p4dp) > { > - *pgdp = __pgd(0); > + *p4dp = __p4d(0); > } > > -static inline int pgd_none(pgd_t pgd) > +static inline int p4d_none(p4d_t p4d) > { > - return !pgd_raw(pgd); > + return !p4d_raw(p4d); > } > > -static inline int pgd_present(pgd_t pgd) > +static inline int p4d_present(p4d_t p4d) > { > - return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT)); > + return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PRESENT)); > } > > -static inline pte_t pgd_pte(pgd_t pgd) > +static inline pte_t p4d_pte(p4d_t p4d) > { > - return __pte_raw(pgd_raw(pgd)); > + return __pte_raw(p4d_raw(p4d)); > } > > -static inline pgd_t pte_pgd(pte_t pte) > +static inline p4d_t pte_p4d(pte_t pte) > { > - return __pgd_raw(pte_raw(pte)); > + return __p4d_raw(pte_raw(pte)); > } > > -static inline int pgd_bad(pgd_t pgd) > +static inline int p4d_bad(p4d_t p4d) > { > if (radix_enabled()) > - return radix__pgd_bad(pgd); > - return hash__pgd_bad(pgd); > + return radix__p4d_bad(p4d); > + return hash__p4d_bad(p4d); > } > > -#define pgd_access_permitted pgd_access_permitted > -static inline bool pgd_access_permitted(pgd_t pgd, bool write) > +#define p4d_access_permitted p4d_access_permitted > +static inline bool p4d_access_permitted(p4d_t p4d, bool write) > { > - return pte_access_permitted(pgd_pte(pgd), write); > + return pte_access_permitted(p4d_pte(p4d), write); > } > > -extern struct page *pgd_page(pgd_t pgd); > +extern struct page *p4d_page(p4d_t p4d); > > /* Pointers in the page table tree are physical addresses */ > #define __pgtable_ptr_val(ptr) __pa(ptr) > > #define pmd_page_vaddr(pmd) __va(pmd_val(pmd) & ~PMD_MASKED_BITS) > #define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS) > -#define pgd_page_vaddr(pgd) __va(pgd_val(pgd) & ~PGD_MASKED_BITS) > +#define p4d_page_vaddr(p4d) __va(p4d_val(p4d) & ~P4D_MASKED_BITS) > > #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) > #define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) > @@ -1010,8 +1016,8 @@ extern struct page *pgd_page(pgd_t pgd); > > #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) > > -#define pud_offset(pgdp, addr) \ > - (((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr)) > +#define pud_offset(p4dp, addr) \ > + (((pud_t *) p4d_page_vaddr(*(p4dp))) + pud_index(addr)) > #define pmd_offset(pudp,addr) \ > (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr)) > #define pte_offset_kernel(dir,addr) \ > @@ -1368,11 +1374,11 @@ static inline bool pud_is_leaf(pud_t pud) > return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); > } > > -#define pgd_is_leaf pgd_is_leaf > -#define pgd_leaf pgd_is_leaf > -static inline bool pgd_is_leaf(pgd_t pgd) > +#define p4d_is_leaf p4d_is_leaf > +#define p4d_leaf p4d_is_leaf > +static inline bool p4d_is_leaf(p4d_t p4d) > { > - return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE)); > + return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PTE)); > } > > #endif /* __ASSEMBLY__ */ > diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h > index d97db3ad9aae..9bca2ac64220 100644 > --- a/arch/powerpc/include/asm/book3s/64/radix.h > +++ b/arch/powerpc/include/asm/book3s/64/radix.h > @@ -30,7 +30,7 @@ > /* Don't have anything in the reserved bits and leaf bits */ > #define RADIX_PMD_BAD_BITS 0x60000000000000e0UL > #define RADIX_PUD_BAD_BITS 0x60000000000000e0UL > -#define RADIX_PGD_BAD_BITS 0x60000000000000e0UL > +#define RADIX_P4D_BAD_BITS 0x60000000000000e0UL > > #define RADIX_PMD_SHIFT (PAGE_SHIFT + RADIX_PTE_INDEX_SIZE) > #define RADIX_PUD_SHIFT (RADIX_PMD_SHIFT + RADIX_PMD_INDEX_SIZE) > @@ -227,9 +227,9 @@ static inline int radix__pud_bad(pud_t pud) > } > > > -static inline int radix__pgd_bad(pgd_t pgd) > +static inline int radix__p4d_bad(p4d_t p4d) > { > - return !!(pgd_val(pgd) & RADIX_PGD_BAD_BITS); > + return !!(p4d_val(p4d) & RADIX_P4D_BAD_BITS); > } > > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h > index 60c4d829152e..d4c2c4259fa3 100644 > --- a/arch/powerpc/include/asm/nohash/32/pgtable.h > +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h > @@ -2,7 +2,6 @@ > #ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H > #define _ASM_POWERPC_NOHASH_32_PGTABLE_H > > -#define __ARCH_USE_5LEVEL_HACK > #include <asm-generic/pgtable-nopmd.h> > > #ifndef __ASSEMBLY__ > diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h > index b9534a793293..668aee6017e7 100644 > --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h > +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h > @@ -15,7 +15,7 @@ struct vmemmap_backing { > }; > extern struct vmemmap_backing *vmemmap_list; > > -#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD) > +#define p4d_populate(MM, P4D, PUD) p4d_set(P4D, (unsigned long)PUD) > > static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) > { > diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h > index c40ec32b8194..81b1c54e3cf1 100644 > --- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h > +++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h > @@ -2,7 +2,7 @@ > #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H > #define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H > > -#include <asm-generic/5level-fixup.h> > +#include <asm-generic/pgtable-nop4d.h> > > /* > * Entries per page directory level. The PTE level must use a 64b record > @@ -45,41 +45,41 @@ > #define PMD_MASKED_BITS 0 > /* Bits to mask out from a PUD to get to the PMD page */ > #define PUD_MASKED_BITS 0 > -/* Bits to mask out from a PGD to get to the PUD page */ > -#define PGD_MASKED_BITS 0 > +/* Bits to mask out from a P4D to get to the PUD page */ > +#define P4D_MASKED_BITS 0 > > > /* > * 4-level page tables related bits > */ > > -#define pgd_none(pgd) (!pgd_val(pgd)) > -#define pgd_bad(pgd) (pgd_val(pgd) == 0) > -#define pgd_present(pgd) (pgd_val(pgd) != 0) > -#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) > +#define p4d_none(p4d) (!p4d_val(p4d)) > +#define p4d_bad(p4d) (p4d_val(p4d) == 0) > +#define p4d_present(p4d) (p4d_val(p4d) != 0) > +#define p4d_page_vaddr(p4d) (p4d_val(p4d) & ~P4D_MASKED_BITS) > > #ifndef __ASSEMBLY__ > > -static inline void pgd_clear(pgd_t *pgdp) > +static inline void p4d_clear(p4d_t *p4dp) > { > - *pgdp = __pgd(0); > + *p4dp = __p4d(0); > } > > -static inline pte_t pgd_pte(pgd_t pgd) > +static inline pte_t p4d_pte(p4d_t p4d) > { > - return __pte(pgd_val(pgd)); > + return __pte(p4d_val(p4d)); > } > > -static inline pgd_t pte_pgd(pte_t pte) > +static inline p4d_t pte_p4d(pte_t pte) > { > - return __pgd(pte_val(pte)); > + return __p4d(pte_val(pte)); > } > -extern struct page *pgd_page(pgd_t pgd); > +extern struct page *p4d_page(p4d_t p4d); > > #endif /* !__ASSEMBLY__ */ > > -#define pud_offset(pgdp, addr) \ > - (((pud_t *) pgd_page_vaddr(*(pgdp))) + \ > +#define pud_offset(p4dp, addr) \ > + (((pud_t *) p4d_page_vaddr(*(p4dp))) + \ > (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) > > #define pud_ERROR(e) \ > diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h > index 9a33b8bd842d..b360f262b9c6 100644 > --- a/arch/powerpc/include/asm/nohash/64/pgtable.h > +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h > @@ -175,11 +175,11 @@ static inline pud_t pte_pud(pte_t pte) > return __pud(pte_val(pte)); > } > #define pud_write(pud) pte_write(pud_pte(pud)) > -#define pgd_write(pgd) pte_write(pgd_pte(pgd)) > +#define p4d_write(pgd) pte_write(p4d_pte(p4d)) > > -static inline void pgd_set(pgd_t *pgdp, unsigned long val) > +static inline void p4d_set(p4d_t *p4dp, unsigned long val) > { > - *pgdp = __pgd(val); > + *p4dp = __p4d(val); > } > > /* > diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h > index 8cc543ed114c..05205d7a7b4a 100644 > --- a/arch/powerpc/include/asm/pgtable.h > +++ b/arch/powerpc/include/asm/pgtable.h > @@ -139,9 +139,9 @@ static inline bool pud_is_leaf(pud_t pud) > } > #endif > > -#ifndef pgd_is_leaf > -#define pgd_is_leaf pgd_is_leaf > -static inline bool pgd_is_leaf(pgd_t pgd) > +#ifndef p4d_is_leaf > +#define p4d_is_leaf p4d_is_leaf > +static inline bool p4d_is_leaf(p4d_t p4d) > { > return false; > } > diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c > index 803940d79b73..beb694285100 100644 > --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c > +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c > @@ -499,13 +499,14 @@ void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid) > unsigned long ig; > > for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) { > + p4d_t *p4d = p4d_offset(pgd, 0); > pud_t *pud; > > - if (!pgd_present(*pgd)) > + if (!p4d_present(*p4d)) > continue; > - pud = pud_offset(pgd, 0); > + pud = pud_offset(p4d, 0); > kvmppc_unmap_free_pud(kvm, pud, lpid); > - pgd_clear(pgd); > + p4d_clear(p4d); > } > } > > @@ -566,6 +567,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, > unsigned long *rmapp, struct rmap_nested **n_rmap) > { > pgd_t *pgd; > + p4d_t *p4d; > pud_t *pud, *new_pud = NULL; > pmd_t *pmd, *new_pmd = NULL; > pte_t *ptep, *new_ptep = NULL; > @@ -573,9 +575,11 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, > > /* Traverse the guest's 2nd-level tree, allocate new levels needed */ > pgd = pgtable + pgd_index(gpa); > + p4d = p4d_offset(pgd, gpa); > + > pud = NULL; > - if (pgd_present(*pgd)) > - pud = pud_offset(pgd, gpa); > + if (p4d_present(*p4d)) > + pud = pud_offset(p4d, gpa); > else > new_pud = pud_alloc_one(kvm->mm, gpa); > > @@ -596,13 +600,13 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, > > /* Now traverse again under the lock and change the tree */ > ret = -ENOMEM; > - if (pgd_none(*pgd)) { > + if (p4d_none(*p4d)) { > if (!new_pud) > goto out_unlock; > - pgd_populate(kvm->mm, pgd, new_pud); > + p4d_populate(kvm->mm, p4d, new_pud); > new_pud = NULL; > } > - pud = pud_offset(pgd, gpa); > + pud = pud_offset(p4d, gpa); > if (pud_is_leaf(*pud)) { > unsigned long hgpa = gpa & PUD_MASK; > > @@ -1220,6 +1224,7 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf, > pgd_t *pgt; > struct kvm_nested_guest *nested; > pgd_t pgd, *pgdp; > + p4d_t p4d, *p4dp; > pud_t pud, *pudp; > pmd_t pmd, *pmdp; > pte_t *ptep; > @@ -1292,13 +1297,14 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf, > } > > pgdp = pgt + pgd_index(gpa); > - pgd = READ_ONCE(*pgdp); > - if (!(pgd_val(pgd) & _PAGE_PRESENT)) { > - gpa = (gpa & PGDIR_MASK) + PGDIR_SIZE; > + p4dp = p4d_offset(pgdp, gpa); > + p4d = READ_ONCE(*p4dp); > + if (!(p4d_val(p4d) & _PAGE_PRESENT)) { > + gpa = (gpa & P4D_MASK) + P4D_SIZE; > continue; > } > > - pudp = pud_offset(&pgd, gpa); > + pudp = pud_offset(&p4d, gpa); > pud = READ_ONCE(*pudp); > if (!(pud_val(pud) & _PAGE_PRESENT)) { > gpa = (gpa & PUD_MASK) + PUD_SIZE; > diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c > index 3345f039a876..7a59f6863cec 100644 > --- a/arch/powerpc/lib/code-patching.c > +++ b/arch/powerpc/lib/code-patching.c > @@ -107,13 +107,18 @@ static inline int unmap_patch_area(unsigned long addr) > pte_t *ptep; > pmd_t *pmdp; > pud_t *pudp; > + p4d_t *p4dp; > pgd_t *pgdp; > > pgdp = pgd_offset_k(addr); > if (unlikely(!pgdp)) > return -EINVAL; > > - pudp = pud_offset(pgdp, addr); > + p4dp = p4d_offset(pgdp, addr); > + if (unlikely(!p4dp)) > + return -EINVAL; > + > + pudp = pud_offset(p4dp, addr); > if (unlikely(!pudp)) > return -EINVAL; > > diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c > index f888cbb109b9..edef17c97206 100644 > --- a/arch/powerpc/mm/book3s32/mmu.c > +++ b/arch/powerpc/mm/book3s32/mmu.c > @@ -312,7 +312,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea) > > if (!Hash) > return; > - pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea); > + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, ea), ea), ea), ea); > if (!pmd_none(*pmd)) > add_hash_page(mm->context.id, ea, pmd_val(*pmd)); > } > diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c > index 2fcd321040ff..175bc33b41b7 100644 > --- a/arch/powerpc/mm/book3s32/tlb.c > +++ b/arch/powerpc/mm/book3s32/tlb.c > @@ -87,7 +87,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start, > if (start >= end) > return; > end = (end - 1) | ~PAGE_MASK; > - pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start); > + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, start), start), start), start); > for (;;) { > pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; > if (pmd_end > end) > @@ -145,7 +145,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) > return; > } > mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; > - pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr); > + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr), vmaddr); > if (!pmd_none(*pmd)) > flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); > } > diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c > index 64733b9cb20a..9cd15937e88a 100644 > --- a/arch/powerpc/mm/book3s64/hash_pgtable.c > +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c > @@ -148,6 +148,7 @@ void hash__vmemmap_remove_mapping(unsigned long start, > int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) > { > pgd_t *pgdp; > + p4d_t *p4dp; > pud_t *pudp; > pmd_t *pmdp; > pte_t *ptep; > @@ -155,7 +156,8 @@ int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) > BUILD_BUG_ON(TASK_SIZE_USER64 > H_PGTABLE_RANGE); > if (slab_is_available()) { > pgdp = pgd_offset_k(ea); > - pudp = pud_alloc(&init_mm, pgdp, ea); > + p4dp = p4d_offset(pgdp, ea); > + pudp = pud_alloc(&init_mm, p4dp, ea); > if (!pudp) > return -ENOMEM; > pmdp = pmd_alloc(&init_mm, pudp, ea); > diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c > index dd1bea45325c..fc3d0b0460b0 100644 > --- a/arch/powerpc/mm/book3s64/radix_pgtable.c > +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c > @@ -64,17 +64,19 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, > { > unsigned long pfn = pa >> PAGE_SHIFT; > pgd_t *pgdp; > + p4d_t *p4dp; > pud_t *pudp; > pmd_t *pmdp; > pte_t *ptep; > > pgdp = pgd_offset_k(ea); > - if (pgd_none(*pgdp)) { > + p4dp = p4d_offset(pgdp, ea); > + if (p4d_none(*p4dp)) { > pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid, > region_start, region_end); > - pgd_populate(&init_mm, pgdp, pudp); > + p4d_populate(&init_mm, p4dp, pudp); > } > - pudp = pud_offset(pgdp, ea); > + pudp = pud_offset(p4dp, ea); > if (map_page_size == PUD_SIZE) { > ptep = (pte_t *)pudp; > goto set_the_pte; > @@ -114,6 +116,7 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, > { > unsigned long pfn = pa >> PAGE_SHIFT; > pgd_t *pgdp; > + p4d_t *p4dp; > pud_t *pudp; > pmd_t *pmdp; > pte_t *ptep; > @@ -136,7 +139,8 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, > * boot. > */ > pgdp = pgd_offset_k(ea); > - pudp = pud_alloc(&init_mm, pgdp, ea); > + p4dp = p4d_offset(pgdp, ea); > + pudp = pud_alloc(&init_mm, p4dp, ea); > if (!pudp) > return -ENOMEM; > if (map_page_size == PUD_SIZE) { > @@ -173,6 +177,7 @@ void radix__change_memory_range(unsigned long start, unsigned long end, > { > unsigned long idx; > pgd_t *pgdp; > + p4d_t *p4dp; > pud_t *pudp; > pmd_t *pmdp; > pte_t *ptep; > @@ -185,7 +190,8 @@ void radix__change_memory_range(unsigned long start, unsigned long end, > > for (idx = start; idx < end; idx += PAGE_SIZE) { > pgdp = pgd_offset_k(idx); > - pudp = pud_alloc(&init_mm, pgdp, idx); > + p4dp = p4d_offset(pgdp, idx); > + pudp = pud_alloc(&init_mm, p4dp, idx); > if (!pudp) > continue; > if (pud_is_leaf(*pudp)) { > @@ -847,6 +853,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) > unsigned long addr, next; > pud_t *pud_base; > pgd_t *pgd; > + p4d_t *p4d; > > spin_lock(&init_mm.page_table_lock); > > @@ -854,15 +861,16 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) > next = pgd_addr_end(addr, end); > > pgd = pgd_offset_k(addr); > - if (!pgd_present(*pgd)) > + p4d = p4d_offset(pgd, addr); > + if (!p4d_present(*p4d)) > continue; > > - if (pgd_is_leaf(*pgd)) { > - split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd); > + if (p4d_is_leaf(*p4d)) { > + split_kernel_mapping(addr, end, P4D_SIZE, (pte_t *)p4d); > continue; > } > > - pud_base = (pud_t *)pgd_page_vaddr(*pgd); > + pud_base = (pud_t *)p4d_page_vaddr(*p4d); > remove_pud_table(pud_base, addr, next); > } > > diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c > index 2ef24a53f4c9..25a0c044bd93 100644 > --- a/arch/powerpc/mm/book3s64/subpage_prot.c > +++ b/arch/powerpc/mm/book3s64/subpage_prot.c > @@ -54,15 +54,17 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, > int npages) > { > pgd_t *pgd; > + p4d_t *p4d; > pud_t *pud; > pmd_t *pmd; > pte_t *pte; > spinlock_t *ptl; > > pgd = pgd_offset(mm, addr); > - if (pgd_none(*pgd)) > + p4d = p4d_offset(pgd, addr); > + if (p4d_none(*p4d)) > return; > - pud = pud_offset(pgd, addr); > + pud = pud_offset(p4d, addr); > if (pud_none(*pud)) > return; > pmd = pmd_offset(pud, addr); > diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c > index 33b3461d91e8..54f5994d4cbb 100644 > --- a/arch/powerpc/mm/hugetlbpage.c > +++ b/arch/powerpc/mm/hugetlbpage.c > @@ -119,6 +119,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, > pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) > { > pgd_t *pg; > + p4d_t *p4; > pud_t *pu; > pmd_t *pm; > hugepd_t *hpdp = NULL; > @@ -128,20 +129,21 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz > > addr &= ~(sz-1); > pg = pgd_offset(mm, addr); > + p4 = p4d_offset(pg, addr); > > #ifdef CONFIG_PPC_BOOK3S_64 > if (pshift == PGDIR_SHIFT) > /* 16GB huge page */ > - return (pte_t *) pg; > + return (pte_t *) p4; > else if (pshift > PUD_SHIFT) { > /* > * We need to use hugepd table > */ > ptl = &mm->page_table_lock; > - hpdp = (hugepd_t *)pg; > + hpdp = (hugepd_t *)p4; > } else { > pdshift = PUD_SHIFT; > - pu = pud_alloc(mm, pg, addr); > + pu = pud_alloc(mm, p4, addr); > if (!pu) > return NULL; > if (pshift == PUD_SHIFT) > @@ -166,10 +168,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz > #else > if (pshift >= PGDIR_SHIFT) { > ptl = &mm->page_table_lock; > - hpdp = (hugepd_t *)pg; > + hpdp = (hugepd_t *)p4; > } else { > pdshift = PUD_SHIFT; > - pu = pud_alloc(mm, pg, addr); > + pu = pud_alloc(mm, p4, addr); > if (!pu) > return NULL; > if (pshift >= PUD_SHIFT) { > @@ -390,7 +392,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, > mm_dec_nr_pmds(tlb->mm); > } > > -static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, > +static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, > unsigned long addr, unsigned long end, > unsigned long floor, unsigned long ceiling) > { > @@ -400,7 +402,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, > > start = addr; > do { > - pud = pud_offset(pgd, addr); > + pud = pud_offset(p4d, addr); > next = pud_addr_end(addr, end); > if (!is_hugepd(__hugepd(pud_val(*pud)))) { > if (pud_none_or_clear_bad(pud)) > @@ -435,8 +437,8 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, > if (end - 1 > ceiling - 1) > return; > > - pud = pud_offset(pgd, start); > - pgd_clear(pgd); > + pud = pud_offset(p4d, start); > + p4d_clear(p4d); > pud_free_tlb(tlb, pud, start); > mm_dec_nr_puds(tlb->mm); > } > @@ -449,6 +451,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, > unsigned long floor, unsigned long ceiling) > { > pgd_t *pgd; > + p4d_t *p4d; > unsigned long next; > > /* > @@ -471,10 +474,11 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, > do { > next = pgd_addr_end(addr, end); > pgd = pgd_offset(tlb->mm, addr); > + p4d = p4d_offset(pgd, addr); > if (!is_hugepd(__hugepd(pgd_val(*pgd)))) { > - if (pgd_none_or_clear_bad(pgd)) > + if (p4d_none_or_clear_bad(p4d)) > continue; > - hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); > + hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling); > } else { > unsigned long more; > /* > @@ -487,7 +491,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, > if (more > next) > next = more; > > - free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, > + free_hugepd_range(tlb, (hugepd_t *)p4d, PGDIR_SHIFT, > addr, next, floor, ceiling); > } > } while (addr = next, addr != end); > diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c > index db5664dde5ff..88e2e16380b5 100644 > --- a/arch/powerpc/mm/kasan/kasan_init_32.c > +++ b/arch/powerpc/mm/kasan/kasan_init_32.c > @@ -36,7 +36,7 @@ static int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned > unsigned long k_cur, k_next; > pte_t *new = NULL; > > - pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start); > + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(k_start), k_start), k_start), k_start); > > for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) { > k_next = pgd_addr_end(k_cur, k_end); > @@ -78,7 +78,7 @@ static int __init kasan_init_region(void *start, size_t size) > block = memblock_alloc(k_end - k_start, PAGE_SIZE); > > for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { > - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); > + pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(k_cur), k_cur), k_cur), k_cur); > void *va = block + k_cur - k_start; > pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); > > @@ -102,7 +102,7 @@ static void __init kasan_remap_early_shadow_ro(void) > kasan_populate_pte(kasan_early_shadow_pte, prot); > > for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { > - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); > + pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(k_cur), k_cur), k_cur), k_cur); > pte_t *ptep = pte_offset_kernel(pmd, k_cur); > > if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) > @@ -201,7 +201,7 @@ void __init kasan_early_init(void) > unsigned long addr = KASAN_SHADOW_START; > unsigned long end = KASAN_SHADOW_END; > unsigned long next; > - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(addr), addr), addr); > + pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(addr), addr), addr), addr); > > BUILD_BUG_ON(KASAN_SHADOW_START & ~PGDIR_MASK); > > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c > index ef7b1119b2e2..8262b384dcf3 100644 > --- a/arch/powerpc/mm/mem.c > +++ b/arch/powerpc/mm/mem.c > @@ -69,8 +69,8 @@ EXPORT_SYMBOL(kmap_prot); > > static inline pte_t *virt_to_kpte(unsigned long vaddr) > { > - return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), > - vaddr), vaddr), vaddr); > + return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(vaddr), > + vaddr), vaddr), vaddr), vaddr); > } > #endif > > diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c > index f348104eb461..7aaf7155e350 100644 > --- a/arch/powerpc/mm/nohash/40x.c > +++ b/arch/powerpc/mm/nohash/40x.c > @@ -104,7 +104,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) > pmd_t *pmdp; > unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE; > > - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); > + pmdp = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(v), v), v), v); > *pmdp++ = __pmd(val); > *pmdp++ = __pmd(val); > *pmdp++ = __pmd(val); > @@ -119,7 +119,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) > pmd_t *pmdp; > unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE; > > - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); > + pmdp = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(v), v), v), v); > *pmdp = __pmd(val); > > v += LARGE_PAGE_SIZE_4M; > diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c > index 4637fdd469cf..77884e24281d 100644 > --- a/arch/powerpc/mm/nohash/book3e_pgtable.c > +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c > @@ -73,6 +73,7 @@ static void __init *early_alloc_pgtable(unsigned long size) > int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) > { > pgd_t *pgdp; > + p4d_t *p4dp; > pud_t *pudp; > pmd_t *pmdp; > pte_t *ptep; > @@ -80,7 +81,8 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) > BUILD_BUG_ON(TASK_SIZE_USER64 > PGTABLE_RANGE); > if (slab_is_available()) { > pgdp = pgd_offset_k(ea); > - pudp = pud_alloc(&init_mm, pgdp, ea); > + p4dp = p4d_offset(pgdp, ea); > + pudp = pud_alloc(&init_mm, p4dp, ea); > if (!pudp) > return -ENOMEM; > pmdp = pmd_alloc(&init_mm, pudp, ea); > @@ -91,13 +93,12 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) > return -ENOMEM; > } else { > pgdp = pgd_offset_k(ea); > -#ifndef __PAGETABLE_PUD_FOLDED > - if (pgd_none(*pgdp)) { > - pudp = early_alloc_pgtable(PUD_TABLE_SIZE); > - pgd_populate(&init_mm, pgdp, pudp); > + p4dp = p4d_offset(pgdp, ea); > + if (p4d_none(*p4dp)) { > + pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); > + p4d_populate(&init_mm, p4dp, pmdp); > } > -#endif /* !__PAGETABLE_PUD_FOLDED */ > - pudp = pud_offset(pgdp, ea); > + pudp = pud_offset(p4dp, ea); > if (pud_none(*pudp)) { > pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); > pud_populate(&init_mm, pudp, pmdp); > diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c > index e3759b69f81b..c2499271f6c1 100644 > --- a/arch/powerpc/mm/pgtable.c > +++ b/arch/powerpc/mm/pgtable.c > @@ -265,6 +265,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, > void assert_pte_locked(struct mm_struct *mm, unsigned long addr) > { > pgd_t *pgd; > + p4d_t *p4d; > pud_t *pud; > pmd_t *pmd; > > @@ -272,7 +273,9 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr) > return; > pgd = mm->pgd + pgd_index(addr); > BUG_ON(pgd_none(*pgd)); > - pud = pud_offset(pgd, addr); > + p4d = p4d_offset(pgd, addr); > + BUG_ON(p4d_none(*p4d)); > + pud = pud_offset(p4d, addr); > BUG_ON(pud_none(*pud)); > pmd = pmd_offset(pud, addr); > /* > @@ -312,12 +315,13 @@ EXPORT_SYMBOL_GPL(vmalloc_to_phys); > pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, > bool *is_thp, unsigned *hpage_shift) > { > - pgd_t pgd, *pgdp; > + pgd_t *pgdp; > + p4d_t p4d, *p4dp; > pud_t pud, *pudp; > pmd_t pmd, *pmdp; > pte_t *ret_pte; > hugepd_t *hpdp = NULL; > - unsigned pdshift = PGDIR_SHIFT; > + unsigned pdshift; > > if (hpage_shift) > *hpage_shift = 0; > @@ -325,24 +329,28 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, > if (is_thp) > *is_thp = false; > > - pgdp = pgdir + pgd_index(ea); > - pgd = READ_ONCE(*pgdp); > /* > * Always operate on the local stack value. This make sure the > * value don't get updated by a parallel THP split/collapse, > * page fault or a page unmap. The return pte_t * is still not > * stable. So should be checked there for above conditions. > + * Top level is an exception because it is folded into p4d. > */ > - if (pgd_none(pgd)) > + pgdp = pgdir + pgd_index(ea); > + p4dp = p4d_offset(pgdp, ea); > + p4d = READ_ONCE(*p4dp); > + pdshift = P4D_SHIFT; > + > + if (p4d_none(p4d)) > return NULL; > > - if (pgd_is_leaf(pgd)) { > - ret_pte = (pte_t *)pgdp; > + if (p4d_is_leaf(p4d)) { > + ret_pte = (pte_t *)p4dp; > goto out; > } > > - if (is_hugepd(__hugepd(pgd_val(pgd)))) { > - hpdp = (hugepd_t *)&pgd; > + if (is_hugepd(__hugepd(p4d_val(p4d)))) { > + hpdp = (hugepd_t *)&p4d; > goto out_huge; > } > > @@ -352,7 +360,7 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, > * irq disabled > */ > pdshift = PUD_SHIFT; > - pudp = pud_offset(&pgd, ea); > + pudp = pud_offset(&p4d, ea); > pud = READ_ONCE(*pudp); > > if (pud_none(pud)) > diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c > index 5fb90edd865e..5774d4bc94d0 100644 > --- a/arch/powerpc/mm/pgtable_32.c > +++ b/arch/powerpc/mm/pgtable_32.c > @@ -63,7 +63,7 @@ int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) > int err = -ENOMEM; > > /* Use upper 10 bits of VA to index the first level map */ > - pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va); > + pd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va); > /* Use middle 10 bits of VA to index the second-level map */ > if (likely(slab_is_available())) > pg = pte_alloc_kernel(pd, va); > @@ -121,53 +121,24 @@ void __init mapin_ram(void) > } > } > > -/* Scan the real Linux page tables and return a PTE pointer for > - * a virtual address in a context. > - * Returns true (1) if PTE was found, zero otherwise. The pointer to > - * the PTE pointer is unmodified if PTE is not found. > - */ > -static int > -get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp) > -{ > - pgd_t *pgd; > - pud_t *pud; > - pmd_t *pmd; > - pte_t *pte; > - int retval = 0; > - > - pgd = pgd_offset(mm, addr & PAGE_MASK); > - if (pgd) { > - pud = pud_offset(pgd, addr & PAGE_MASK); > - if (pud && pud_present(*pud)) { > - pmd = pmd_offset(pud, addr & PAGE_MASK); > - if (pmd_present(*pmd)) { > - pte = pte_offset_map(pmd, addr & PAGE_MASK); > - if (pte) { > - retval = 1; > - *ptep = pte; > - if (pmdp) > - *pmdp = pmd; > - /* XXX caller needs to do pte_unmap, yuck */ > - } > - } > - } > - } > - return(retval); > -} > - > static int __change_page_attr_noflush(struct page *page, pgprot_t prot) > { > pte_t *kpte; > pmd_t *kpmd; > - unsigned long address; > + unsigned long address, va; > > BUG_ON(PageHighMem(page)); > address = (unsigned long)page_address(page); > + va = address & PAGE_MASK; > > if (v_block_mapped(address)) > return 0; > - if (!get_pteptr(&init_mm, address, &kpte, &kpmd)) > + > + kpmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va); > + if (!pmd_present(*kpmd)) > return -EINVAL; > + > + kpte = pte_offset_map(kpmd, va); > __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0); > pte_unmap(kpte); > > diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c > index e78832dce7bb..1f86a88fd4bb 100644 > --- a/arch/powerpc/mm/pgtable_64.c > +++ b/arch/powerpc/mm/pgtable_64.c > @@ -101,13 +101,13 @@ EXPORT_SYMBOL(__pte_frag_size_shift); > > #ifndef __PAGETABLE_PUD_FOLDED > /* 4 level page table */ > -struct page *pgd_page(pgd_t pgd) > +struct page *p4d_page(p4d_t p4d) > { > - if (pgd_is_leaf(pgd)) { > - VM_WARN_ON(!pgd_huge(pgd)); > - return pte_page(pgd_pte(pgd)); > + if (p4d_is_leaf(p4d)) { > + VM_WARN_ON(!p4d_huge(p4d)); > + return pte_page(p4d_pte(p4d)); > } > - return virt_to_page(pgd_page_vaddr(pgd)); > + return virt_to_page(p4d_page_vaddr(p4d)); > } > #endif > > diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c > index a07278027c6f..ac360ad865a8 100644 > --- a/arch/powerpc/mm/ptdump/hashpagetable.c > +++ b/arch/powerpc/mm/ptdump/hashpagetable.c > @@ -417,9 +417,9 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) > } > } > > -static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) > +static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start) > { > - pud_t *pud = pud_offset(pgd, 0); > + pud_t *pud = pud_offset(p4d, 0); > unsigned long addr; > unsigned int i; > > @@ -431,6 +431,20 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) > } > } > > +static void walk_p4d(struct pg_state *st, pgd_t *pgd, unsigned long start) > +{ > + p4d_t *p4d = p4d_offset(pgd, 0); > + unsigned long addr; > + unsigned int i; > + > + for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { > + addr = start + i * P4D_SIZE; > + if (!p4d_none(*p4d)) > + /* p4d exists */ > + walk_pud(st, p4d, addr); > + } > +} > + > static void walk_pagetables(struct pg_state *st) > { > pgd_t *pgd = pgd_offset_k(0UL); > @@ -445,7 +459,7 @@ static void walk_pagetables(struct pg_state *st) > addr = KERN_VIRT_START + i * PGDIR_SIZE; > if (!pgd_none(*pgd)) > /* pgd exists */ > - walk_pud(st, pgd, addr); > + walk_p4d(st, pgd, addr); > } > } > > diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c > index 206156255247..9d6256b61df3 100644 > --- a/arch/powerpc/mm/ptdump/ptdump.c > +++ b/arch/powerpc/mm/ptdump/ptdump.c > @@ -277,9 +277,9 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) > } > } > > -static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) > +static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start) > { > - pud_t *pud = pud_offset(pgd, 0); > + pud_t *pud = pud_offset(p4d, 0); > unsigned long addr; > unsigned int i; > > @@ -304,11 +304,13 @@ static void walk_pagetables(struct pg_state *st) > * the hash pagetable. > */ > for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { > - if (!pgd_none(*pgd) && !pgd_is_leaf(*pgd)) > - /* pgd exists */ > - walk_pud(st, pgd, addr); > + p4d_t *p4d = p4d_offset(pgd, 0); > + > + if (!p4d_none(*p4d) && !p4d_is_leaf(*p4d)) > + /* p4d exists */ > + walk_pud(st, p4d, addr); > else > - note_page(st, addr, 1, pgd_val(*pgd)); > + note_page(st, addr, 1, p4d_val(*p4d)); > } > } > > diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c > index 0ec9640335bb..3e29128c58cc 100644 > --- a/arch/powerpc/xmon/xmon.c > +++ b/arch/powerpc/xmon/xmon.c > @@ -3130,6 +3130,7 @@ static void show_pte(unsigned long addr) > struct task_struct *tsk = NULL; > struct mm_struct *mm; > pgd_t *pgdp, *pgdir; > + p4d_t *p4dp; > pud_t *pudp; > pmd_t *pmdp; > pte_t *ptep; > @@ -3161,20 +3162,21 @@ static void show_pte(unsigned long addr) > pgdir = pgd_offset(mm, 0); > } > > - if (pgd_none(*pgdp)) { > - printf("no linux page table for address\n"); > + p4dp = p4d_offset(pgdp, addr); > + > + if (p4d_none(*p4dp)) { > + printf("No valid P4D\n"); > return; > } > > - printf("pgd @ 0x%px\n", pgdir); > - > - if (pgd_is_leaf(*pgdp)) { > - format_pte(pgdp, pgd_val(*pgdp)); > + if (p4d_is_leaf(*p4dp)) { > + format_pte(p4dp, p4d_val(*p4dp)); > return; > } > - printf("pgdp @ 0x%px = 0x%016lx\n", pgdp, pgd_val(*pgdp)); > > - pudp = pud_offset(pgdp, addr); > + printf("p4dp @ 0x%px = 0x%016lx\n", p4dp, p4d_val(*p4dp)); > + > + pudp = pud_offset(p4dp, addr); > > if (pud_none(*pudp)) { > printf("No valid PUD\n"); >
On Wed, Feb 26, 2020 at 10:46:13AM +0100, Christophe Leroy wrote: > > > Le 26/02/2020 à 10:13, Mike Rapoport a écrit : > > On Tue, Feb 18, 2020 at 12:54:40PM +0200, Mike Rapoport wrote: > > > On Sun, Feb 16, 2020 at 11:41:07AM +0100, Christophe Leroy wrote: > > > > > > > > > > > > Le 16/02/2020 à 09:18, Mike Rapoport a écrit : > > > > > From: Mike Rapoport <rppt@linux.ibm.com> > > > > > > > > > > Implement primitives necessary for the 4th level folding, add walks of p4d > > > > > level where appropriate and replace 5level-fixup.h with pgtable-nop4d.h. > > > > > > > > I don't think it is worth adding all this additionnals walks of p4d, this > > > > patch could be limited to changes like: > > > > > > > > - pud = pud_offset(pgd, gpa); > > > > + pud = pud_offset(p4d_offset(pgd, gpa), gpa); > > > > > > > > The additionnal walks should be added through another patch the day powerpc > > > > need them. > > > > > > Ok, I'll update the patch to reduce walking the p4d. > > > > Here's what I have with more direct acceses from pgd to pud. > > I went quickly through. This looks promising. > > Do we need the walk_p4d() in arch/powerpc/mm/ptdump/hashpagetable.c ? > Can't we just do > > @@ -445,7 +459,7 @@ static void walk_pagetables(struct pg_state *st) > addr = KERN_VIRT_START + i * PGDIR_SIZE; > if (!pgd_none(*pgd)) > /* pgd exists */ > - walk_pud(st, pgd, addr); > + walk_pud(st, p4d_offset(pgd, addr), addr); We can do addr = KERN_VIRT_START + i * PGDIR_SIZE; p4d = p4d_offset(pgd, addr); if (!p4d_none(*pgd)) walk_pud() But I don't think this is really essential. Again, we are trading off code consistency vs line count. I don't think line count is that important. > } > } > > > > Also, I think the removal of get_pteptr() should be a patch by itself. > > You could include my patches in your series. See > https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=152102 > > Christophe > > > > > > > From 6c59a86ce8394fb6100e9b6ced2e346981fb0ce9 Mon Sep 17 00:00:00 2001 > > From: Mike Rapoport <rppt@linux.ibm.com> > > Date: Sun, 24 Nov 2019 15:38:00 +0200 > > Subject: [PATCH v3] powerpc: add support for folded p4d page tables > > > > Implement primitives necessary for the 4th level folding, add walks of p4d > > level where appropriate and replace 5level-fixup.h with pgtable-nop4d.h. > > > > Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> > > Tested-by: Christophe Leroy <christophe.leroy@c-s.fr> # 8xx and 83xx > > --- > > v3: > > * reduce amount of added p4d walks > > * kill pgtable_32::get_pteptr and traverse page table in > > pgtable_32::__change_page_attr_noflush > > > > > > arch/powerpc/include/asm/book3s/32/pgtable.h | 1 - > > arch/powerpc/include/asm/book3s/64/hash.h | 4 +- > > arch/powerpc/include/asm/book3s/64/pgalloc.h | 4 +- > > arch/powerpc/include/asm/book3s/64/pgtable.h | 60 ++++++++++--------- > > arch/powerpc/include/asm/book3s/64/radix.h | 6 +- > > arch/powerpc/include/asm/nohash/32/pgtable.h | 1 - > > arch/powerpc/include/asm/nohash/64/pgalloc.h | 2 +- > > .../include/asm/nohash/64/pgtable-4k.h | 32 +++++----- > > arch/powerpc/include/asm/nohash/64/pgtable.h | 6 +- > > arch/powerpc/include/asm/pgtable.h | 6 +- > > arch/powerpc/kvm/book3s_64_mmu_radix.c | 30 ++++++---- > > arch/powerpc/lib/code-patching.c | 7 ++- > > arch/powerpc/mm/book3s32/mmu.c | 2 +- > > arch/powerpc/mm/book3s32/tlb.c | 4 +- > > arch/powerpc/mm/book3s64/hash_pgtable.c | 4 +- > > arch/powerpc/mm/book3s64/radix_pgtable.c | 26 +++++--- > > arch/powerpc/mm/book3s64/subpage_prot.c | 6 +- > > arch/powerpc/mm/hugetlbpage.c | 28 +++++---- > > arch/powerpc/mm/kasan/kasan_init_32.c | 8 +-- > > arch/powerpc/mm/mem.c | 4 +- > > arch/powerpc/mm/nohash/40x.c | 4 +- > > arch/powerpc/mm/nohash/book3e_pgtable.c | 15 ++--- > > arch/powerpc/mm/pgtable.c | 30 ++++++---- > > arch/powerpc/mm/pgtable_32.c | 45 +++----------- > > arch/powerpc/mm/pgtable_64.c | 10 ++-- > > arch/powerpc/mm/ptdump/hashpagetable.c | 20 ++++++- > > arch/powerpc/mm/ptdump/ptdump.c | 14 +++-- > > arch/powerpc/xmon/xmon.c | 18 +++--- > > 28 files changed, 213 insertions(+), 184 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h > > index 5b39c11e884a..39ec11371be0 100644 > > --- a/arch/powerpc/include/asm/book3s/32/pgtable.h > > +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h > > @@ -2,7 +2,6 @@ > > #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H > > #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H > > -#define __ARCH_USE_5LEVEL_HACK > > #include <asm-generic/pgtable-nopmd.h> > > #include <asm/book3s/32/hash.h> > > diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h > > index 2781ebf6add4..876d1528c2cf 100644 > > --- a/arch/powerpc/include/asm/book3s/64/hash.h > > +++ b/arch/powerpc/include/asm/book3s/64/hash.h > > @@ -134,9 +134,9 @@ static inline int get_region_id(unsigned long ea) > > #define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS) > > #define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS) > > -static inline int hash__pgd_bad(pgd_t pgd) > > +static inline int hash__p4d_bad(p4d_t p4d) > > { > > - return (pgd_val(pgd) == 0); > > + return (p4d_val(p4d) == 0); > > } > > #ifdef CONFIG_STRICT_KERNEL_RWX > > extern void hash__mark_rodata_ro(void); > > diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h > > index a41e91bd0580..69c5b051734f 100644 > > --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h > > +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h > > @@ -85,9 +85,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) > > kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); > > } > > -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) > > +static inline void p4d_populate(struct mm_struct *mm, p4d_t *pgd, pud_t *pud) > > { > > - *pgd = __pgd(__pgtable_ptr_val(pud) | PGD_VAL_BITS); > > + *pgd = __p4d(__pgtable_ptr_val(pud) | PGD_VAL_BITS); > > } > > static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) > > diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h > > index 201a69e6a355..fa60e8594b9f 100644 > > --- a/arch/powerpc/include/asm/book3s/64/pgtable.h > > +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h > > @@ -2,7 +2,7 @@ > > #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ > > #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ > > -#include <asm-generic/5level-fixup.h> > > +#include <asm-generic/pgtable-nop4d.h> > > #ifndef __ASSEMBLY__ > > #include <linux/mmdebug.h> > > @@ -251,7 +251,7 @@ extern unsigned long __pmd_frag_size_shift; > > /* Bits to mask out from a PUD to get to the PMD page */ > > #define PUD_MASKED_BITS 0xc0000000000000ffUL > > /* Bits to mask out from a PGD to get to the PUD page */ > > -#define PGD_MASKED_BITS 0xc0000000000000ffUL > > +#define P4D_MASKED_BITS 0xc0000000000000ffUL > > /* > > * Used as an indicator for rcu callback functions > > @@ -949,54 +949,60 @@ static inline bool pud_access_permitted(pud_t pud, bool write) > > return pte_access_permitted(pud_pte(pud), write); > > } > > -#define pgd_write(pgd) pte_write(pgd_pte(pgd)) > > +#define __p4d_raw(x) ((p4d_t) { __pgd_raw(x) }) > > +static inline __be64 p4d_raw(p4d_t x) > > +{ > > + return pgd_raw(x.pgd); > > +} > > + > > +#define p4d_write(p4d) pte_write(p4d_pte(p4d)) > > -static inline void pgd_clear(pgd_t *pgdp) > > +static inline void p4d_clear(p4d_t *p4dp) > > { > > - *pgdp = __pgd(0); > > + *p4dp = __p4d(0); > > } > > -static inline int pgd_none(pgd_t pgd) > > +static inline int p4d_none(p4d_t p4d) > > { > > - return !pgd_raw(pgd); > > + return !p4d_raw(p4d); > > } > > -static inline int pgd_present(pgd_t pgd) > > +static inline int p4d_present(p4d_t p4d) > > { > > - return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT)); > > + return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PRESENT)); > > } > > -static inline pte_t pgd_pte(pgd_t pgd) > > +static inline pte_t p4d_pte(p4d_t p4d) > > { > > - return __pte_raw(pgd_raw(pgd)); > > + return __pte_raw(p4d_raw(p4d)); > > } > > -static inline pgd_t pte_pgd(pte_t pte) > > +static inline p4d_t pte_p4d(pte_t pte) > > { > > - return __pgd_raw(pte_raw(pte)); > > + return __p4d_raw(pte_raw(pte)); > > } > > -static inline int pgd_bad(pgd_t pgd) > > +static inline int p4d_bad(p4d_t p4d) > > { > > if (radix_enabled()) > > - return radix__pgd_bad(pgd); > > - return hash__pgd_bad(pgd); > > + return radix__p4d_bad(p4d); > > + return hash__p4d_bad(p4d); > > } > > -#define pgd_access_permitted pgd_access_permitted > > -static inline bool pgd_access_permitted(pgd_t pgd, bool write) > > +#define p4d_access_permitted p4d_access_permitted > > +static inline bool p4d_access_permitted(p4d_t p4d, bool write) > > { > > - return pte_access_permitted(pgd_pte(pgd), write); > > + return pte_access_permitted(p4d_pte(p4d), write); > > } > > -extern struct page *pgd_page(pgd_t pgd); > > +extern struct page *p4d_page(p4d_t p4d); > > /* Pointers in the page table tree are physical addresses */ > > #define __pgtable_ptr_val(ptr) __pa(ptr) > > #define pmd_page_vaddr(pmd) __va(pmd_val(pmd) & ~PMD_MASKED_BITS) > > #define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS) > > -#define pgd_page_vaddr(pgd) __va(pgd_val(pgd) & ~PGD_MASKED_BITS) > > +#define p4d_page_vaddr(p4d) __va(p4d_val(p4d) & ~P4D_MASKED_BITS) > > #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) > > #define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) > > @@ -1010,8 +1016,8 @@ extern struct page *pgd_page(pgd_t pgd); > > #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) > > -#define pud_offset(pgdp, addr) \ > > - (((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr)) > > +#define pud_offset(p4dp, addr) \ > > + (((pud_t *) p4d_page_vaddr(*(p4dp))) + pud_index(addr)) > > #define pmd_offset(pudp,addr) \ > > (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr)) > > #define pte_offset_kernel(dir,addr) \ > > @@ -1368,11 +1374,11 @@ static inline bool pud_is_leaf(pud_t pud) > > return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); > > } > > -#define pgd_is_leaf pgd_is_leaf > > -#define pgd_leaf pgd_is_leaf > > -static inline bool pgd_is_leaf(pgd_t pgd) > > +#define p4d_is_leaf p4d_is_leaf > > +#define p4d_leaf p4d_is_leaf > > +static inline bool p4d_is_leaf(p4d_t p4d) > > { > > - return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE)); > > + return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PTE)); > > } > > #endif /* __ASSEMBLY__ */ > > diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h > > index d97db3ad9aae..9bca2ac64220 100644 > > --- a/arch/powerpc/include/asm/book3s/64/radix.h > > +++ b/arch/powerpc/include/asm/book3s/64/radix.h > > @@ -30,7 +30,7 @@ > > /* Don't have anything in the reserved bits and leaf bits */ > > #define RADIX_PMD_BAD_BITS 0x60000000000000e0UL > > #define RADIX_PUD_BAD_BITS 0x60000000000000e0UL > > -#define RADIX_PGD_BAD_BITS 0x60000000000000e0UL > > +#define RADIX_P4D_BAD_BITS 0x60000000000000e0UL > > #define RADIX_PMD_SHIFT (PAGE_SHIFT + RADIX_PTE_INDEX_SIZE) > > #define RADIX_PUD_SHIFT (RADIX_PMD_SHIFT + RADIX_PMD_INDEX_SIZE) > > @@ -227,9 +227,9 @@ static inline int radix__pud_bad(pud_t pud) > > } > > -static inline int radix__pgd_bad(pgd_t pgd) > > +static inline int radix__p4d_bad(p4d_t p4d) > > { > > - return !!(pgd_val(pgd) & RADIX_PGD_BAD_BITS); > > + return !!(p4d_val(p4d) & RADIX_P4D_BAD_BITS); > > } > > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > > diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h > > index 60c4d829152e..d4c2c4259fa3 100644 > > --- a/arch/powerpc/include/asm/nohash/32/pgtable.h > > +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h > > @@ -2,7 +2,6 @@ > > #ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H > > #define _ASM_POWERPC_NOHASH_32_PGTABLE_H > > -#define __ARCH_USE_5LEVEL_HACK > > #include <asm-generic/pgtable-nopmd.h> > > #ifndef __ASSEMBLY__ > > diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h > > index b9534a793293..668aee6017e7 100644 > > --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h > > +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h > > @@ -15,7 +15,7 @@ struct vmemmap_backing { > > }; > > extern struct vmemmap_backing *vmemmap_list; > > -#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD) > > +#define p4d_populate(MM, P4D, PUD) p4d_set(P4D, (unsigned long)PUD) > > static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) > > { > > diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h > > index c40ec32b8194..81b1c54e3cf1 100644 > > --- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h > > +++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h > > @@ -2,7 +2,7 @@ > > #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H > > #define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H > > -#include <asm-generic/5level-fixup.h> > > +#include <asm-generic/pgtable-nop4d.h> > > /* > > * Entries per page directory level. The PTE level must use a 64b record > > @@ -45,41 +45,41 @@ > > #define PMD_MASKED_BITS 0 > > /* Bits to mask out from a PUD to get to the PMD page */ > > #define PUD_MASKED_BITS 0 > > -/* Bits to mask out from a PGD to get to the PUD page */ > > -#define PGD_MASKED_BITS 0 > > +/* Bits to mask out from a P4D to get to the PUD page */ > > +#define P4D_MASKED_BITS 0 > > /* > > * 4-level page tables related bits > > */ > > -#define pgd_none(pgd) (!pgd_val(pgd)) > > -#define pgd_bad(pgd) (pgd_val(pgd) == 0) > > -#define pgd_present(pgd) (pgd_val(pgd) != 0) > > -#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) > > +#define p4d_none(p4d) (!p4d_val(p4d)) > > +#define p4d_bad(p4d) (p4d_val(p4d) == 0) > > +#define p4d_present(p4d) (p4d_val(p4d) != 0) > > +#define p4d_page_vaddr(p4d) (p4d_val(p4d) & ~P4D_MASKED_BITS) > > #ifndef __ASSEMBLY__ > > -static inline void pgd_clear(pgd_t *pgdp) > > +static inline void p4d_clear(p4d_t *p4dp) > > { > > - *pgdp = __pgd(0); > > + *p4dp = __p4d(0); > > } > > -static inline pte_t pgd_pte(pgd_t pgd) > > +static inline pte_t p4d_pte(p4d_t p4d) > > { > > - return __pte(pgd_val(pgd)); > > + return __pte(p4d_val(p4d)); > > } > > -static inline pgd_t pte_pgd(pte_t pte) > > +static inline p4d_t pte_p4d(pte_t pte) > > { > > - return __pgd(pte_val(pte)); > > + return __p4d(pte_val(pte)); > > } > > -extern struct page *pgd_page(pgd_t pgd); > > +extern struct page *p4d_page(p4d_t p4d); > > #endif /* !__ASSEMBLY__ */ > > -#define pud_offset(pgdp, addr) \ > > - (((pud_t *) pgd_page_vaddr(*(pgdp))) + \ > > +#define pud_offset(p4dp, addr) \ > > + (((pud_t *) p4d_page_vaddr(*(p4dp))) + \ > > (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) > > #define pud_ERROR(e) \ > > diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h > > index 9a33b8bd842d..b360f262b9c6 100644 > > --- a/arch/powerpc/include/asm/nohash/64/pgtable.h > > +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h > > @@ -175,11 +175,11 @@ static inline pud_t pte_pud(pte_t pte) > > return __pud(pte_val(pte)); > > } > > #define pud_write(pud) pte_write(pud_pte(pud)) > > -#define pgd_write(pgd) pte_write(pgd_pte(pgd)) > > +#define p4d_write(pgd) pte_write(p4d_pte(p4d)) > > -static inline void pgd_set(pgd_t *pgdp, unsigned long val) > > +static inline void p4d_set(p4d_t *p4dp, unsigned long val) > > { > > - *pgdp = __pgd(val); > > + *p4dp = __p4d(val); > > } > > /* > > diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h > > index 8cc543ed114c..05205d7a7b4a 100644 > > --- a/arch/powerpc/include/asm/pgtable.h > > +++ b/arch/powerpc/include/asm/pgtable.h > > @@ -139,9 +139,9 @@ static inline bool pud_is_leaf(pud_t pud) > > } > > #endif > > -#ifndef pgd_is_leaf > > -#define pgd_is_leaf pgd_is_leaf > > -static inline bool pgd_is_leaf(pgd_t pgd) > > +#ifndef p4d_is_leaf > > +#define p4d_is_leaf p4d_is_leaf > > +static inline bool p4d_is_leaf(p4d_t p4d) > > { > > return false; > > } > > diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c > > index 803940d79b73..beb694285100 100644 > > --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c > > +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c > > @@ -499,13 +499,14 @@ void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid) > > unsigned long ig; > > for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) { > > + p4d_t *p4d = p4d_offset(pgd, 0); > > pud_t *pud; > > - if (!pgd_present(*pgd)) > > + if (!p4d_present(*p4d)) > > continue; > > - pud = pud_offset(pgd, 0); > > + pud = pud_offset(p4d, 0); > > kvmppc_unmap_free_pud(kvm, pud, lpid); > > - pgd_clear(pgd); > > + p4d_clear(p4d); > > } > > } > > @@ -566,6 +567,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, > > unsigned long *rmapp, struct rmap_nested **n_rmap) > > { > > pgd_t *pgd; > > + p4d_t *p4d; > > pud_t *pud, *new_pud = NULL; > > pmd_t *pmd, *new_pmd = NULL; > > pte_t *ptep, *new_ptep = NULL; > > @@ -573,9 +575,11 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, > > /* Traverse the guest's 2nd-level tree, allocate new levels needed */ > > pgd = pgtable + pgd_index(gpa); > > + p4d = p4d_offset(pgd, gpa); > > + > > pud = NULL; > > - if (pgd_present(*pgd)) > > - pud = pud_offset(pgd, gpa); > > + if (p4d_present(*p4d)) > > + pud = pud_offset(p4d, gpa); > > else > > new_pud = pud_alloc_one(kvm->mm, gpa); > > @@ -596,13 +600,13 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, > > /* Now traverse again under the lock and change the tree */ > > ret = -ENOMEM; > > - if (pgd_none(*pgd)) { > > + if (p4d_none(*p4d)) { > > if (!new_pud) > > goto out_unlock; > > - pgd_populate(kvm->mm, pgd, new_pud); > > + p4d_populate(kvm->mm, p4d, new_pud); > > new_pud = NULL; > > } > > - pud = pud_offset(pgd, gpa); > > + pud = pud_offset(p4d, gpa); > > if (pud_is_leaf(*pud)) { > > unsigned long hgpa = gpa & PUD_MASK; > > @@ -1220,6 +1224,7 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf, > > pgd_t *pgt; > > struct kvm_nested_guest *nested; > > pgd_t pgd, *pgdp; > > + p4d_t p4d, *p4dp; > > pud_t pud, *pudp; > > pmd_t pmd, *pmdp; > > pte_t *ptep; > > @@ -1292,13 +1297,14 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf, > > } > > pgdp = pgt + pgd_index(gpa); > > - pgd = READ_ONCE(*pgdp); > > - if (!(pgd_val(pgd) & _PAGE_PRESENT)) { > > - gpa = (gpa & PGDIR_MASK) + PGDIR_SIZE; > > + p4dp = p4d_offset(pgdp, gpa); > > + p4d = READ_ONCE(*p4dp); > > + if (!(p4d_val(p4d) & _PAGE_PRESENT)) { > > + gpa = (gpa & P4D_MASK) + P4D_SIZE; > > continue; > > } > > - pudp = pud_offset(&pgd, gpa); > > + pudp = pud_offset(&p4d, gpa); > > pud = READ_ONCE(*pudp); > > if (!(pud_val(pud) & _PAGE_PRESENT)) { > > gpa = (gpa & PUD_MASK) + PUD_SIZE; > > diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c > > index 3345f039a876..7a59f6863cec 100644 > > --- a/arch/powerpc/lib/code-patching.c > > +++ b/arch/powerpc/lib/code-patching.c > > @@ -107,13 +107,18 @@ static inline int unmap_patch_area(unsigned long addr) > > pte_t *ptep; > > pmd_t *pmdp; > > pud_t *pudp; > > + p4d_t *p4dp; > > pgd_t *pgdp; > > pgdp = pgd_offset_k(addr); > > if (unlikely(!pgdp)) > > return -EINVAL; > > - pudp = pud_offset(pgdp, addr); > > + p4dp = p4d_offset(pgdp, addr); > > + if (unlikely(!p4dp)) > > + return -EINVAL; > > + > > + pudp = pud_offset(p4dp, addr); > > if (unlikely(!pudp)) > > return -EINVAL; > > diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c > > index f888cbb109b9..edef17c97206 100644 > > --- a/arch/powerpc/mm/book3s32/mmu.c > > +++ b/arch/powerpc/mm/book3s32/mmu.c > > @@ -312,7 +312,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea) > > if (!Hash) > > return; > > - pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea); > > + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, ea), ea), ea), ea); > > if (!pmd_none(*pmd)) > > add_hash_page(mm->context.id, ea, pmd_val(*pmd)); > > } > > diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c > > index 2fcd321040ff..175bc33b41b7 100644 > > --- a/arch/powerpc/mm/book3s32/tlb.c > > +++ b/arch/powerpc/mm/book3s32/tlb.c > > @@ -87,7 +87,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start, > > if (start >= end) > > return; > > end = (end - 1) | ~PAGE_MASK; > > - pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start); > > + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, start), start), start), start); > > for (;;) { > > pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; > > if (pmd_end > end) > > @@ -145,7 +145,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) > > return; > > } > > mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; > > - pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr); > > + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr), vmaddr); > > if (!pmd_none(*pmd)) > > flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); > > } > > diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c > > index 64733b9cb20a..9cd15937e88a 100644 > > --- a/arch/powerpc/mm/book3s64/hash_pgtable.c > > +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c > > @@ -148,6 +148,7 @@ void hash__vmemmap_remove_mapping(unsigned long start, > > int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) > > { > > pgd_t *pgdp; > > + p4d_t *p4dp; > > pud_t *pudp; > > pmd_t *pmdp; > > pte_t *ptep; > > @@ -155,7 +156,8 @@ int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) > > BUILD_BUG_ON(TASK_SIZE_USER64 > H_PGTABLE_RANGE); > > if (slab_is_available()) { > > pgdp = pgd_offset_k(ea); > > - pudp = pud_alloc(&init_mm, pgdp, ea); > > + p4dp = p4d_offset(pgdp, ea); > > + pudp = pud_alloc(&init_mm, p4dp, ea); > > if (!pudp) > > return -ENOMEM; > > pmdp = pmd_alloc(&init_mm, pudp, ea); > > diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c > > index dd1bea45325c..fc3d0b0460b0 100644 > > --- a/arch/powerpc/mm/book3s64/radix_pgtable.c > > +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c > > @@ -64,17 +64,19 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, > > { > > unsigned long pfn = pa >> PAGE_SHIFT; > > pgd_t *pgdp; > > + p4d_t *p4dp; > > pud_t *pudp; > > pmd_t *pmdp; > > pte_t *ptep; > > pgdp = pgd_offset_k(ea); > > - if (pgd_none(*pgdp)) { > > + p4dp = p4d_offset(pgdp, ea); > > + if (p4d_none(*p4dp)) { > > pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid, > > region_start, region_end); > > - pgd_populate(&init_mm, pgdp, pudp); > > + p4d_populate(&init_mm, p4dp, pudp); > > } > > - pudp = pud_offset(pgdp, ea); > > + pudp = pud_offset(p4dp, ea); > > if (map_page_size == PUD_SIZE) { > > ptep = (pte_t *)pudp; > > goto set_the_pte; > > @@ -114,6 +116,7 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, > > { > > unsigned long pfn = pa >> PAGE_SHIFT; > > pgd_t *pgdp; > > + p4d_t *p4dp; > > pud_t *pudp; > > pmd_t *pmdp; > > pte_t *ptep; > > @@ -136,7 +139,8 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, > > * boot. > > */ > > pgdp = pgd_offset_k(ea); > > - pudp = pud_alloc(&init_mm, pgdp, ea); > > + p4dp = p4d_offset(pgdp, ea); > > + pudp = pud_alloc(&init_mm, p4dp, ea); > > if (!pudp) > > return -ENOMEM; > > if (map_page_size == PUD_SIZE) { > > @@ -173,6 +177,7 @@ void radix__change_memory_range(unsigned long start, unsigned long end, > > { > > unsigned long idx; > > pgd_t *pgdp; > > + p4d_t *p4dp; > > pud_t *pudp; > > pmd_t *pmdp; > > pte_t *ptep; > > @@ -185,7 +190,8 @@ void radix__change_memory_range(unsigned long start, unsigned long end, > > for (idx = start; idx < end; idx += PAGE_SIZE) { > > pgdp = pgd_offset_k(idx); > > - pudp = pud_alloc(&init_mm, pgdp, idx); > > + p4dp = p4d_offset(pgdp, idx); > > + pudp = pud_alloc(&init_mm, p4dp, idx); > > if (!pudp) > > continue; > > if (pud_is_leaf(*pudp)) { > > @@ -847,6 +853,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) > > unsigned long addr, next; > > pud_t *pud_base; > > pgd_t *pgd; > > + p4d_t *p4d; > > spin_lock(&init_mm.page_table_lock); > > @@ -854,15 +861,16 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) > > next = pgd_addr_end(addr, end); > > pgd = pgd_offset_k(addr); > > - if (!pgd_present(*pgd)) > > + p4d = p4d_offset(pgd, addr); > > + if (!p4d_present(*p4d)) > > continue; > > - if (pgd_is_leaf(*pgd)) { > > - split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd); > > + if (p4d_is_leaf(*p4d)) { > > + split_kernel_mapping(addr, end, P4D_SIZE, (pte_t *)p4d); > > continue; > > } > > - pud_base = (pud_t *)pgd_page_vaddr(*pgd); > > + pud_base = (pud_t *)p4d_page_vaddr(*p4d); > > remove_pud_table(pud_base, addr, next); > > } > > diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c > > index 2ef24a53f4c9..25a0c044bd93 100644 > > --- a/arch/powerpc/mm/book3s64/subpage_prot.c > > +++ b/arch/powerpc/mm/book3s64/subpage_prot.c > > @@ -54,15 +54,17 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, > > int npages) > > { > > pgd_t *pgd; > > + p4d_t *p4d; > > pud_t *pud; > > pmd_t *pmd; > > pte_t *pte; > > spinlock_t *ptl; > > pgd = pgd_offset(mm, addr); > > - if (pgd_none(*pgd)) > > + p4d = p4d_offset(pgd, addr); > > + if (p4d_none(*p4d)) > > return; > > - pud = pud_offset(pgd, addr); > > + pud = pud_offset(p4d, addr); > > if (pud_none(*pud)) > > return; > > pmd = pmd_offset(pud, addr); > > diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c > > index 33b3461d91e8..54f5994d4cbb 100644 > > --- a/arch/powerpc/mm/hugetlbpage.c > > +++ b/arch/powerpc/mm/hugetlbpage.c > > @@ -119,6 +119,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, > > pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) > > { > > pgd_t *pg; > > + p4d_t *p4; > > pud_t *pu; > > pmd_t *pm; > > hugepd_t *hpdp = NULL; > > @@ -128,20 +129,21 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz > > addr &= ~(sz-1); > > pg = pgd_offset(mm, addr); > > + p4 = p4d_offset(pg, addr); > > #ifdef CONFIG_PPC_BOOK3S_64 > > if (pshift == PGDIR_SHIFT) > > /* 16GB huge page */ > > - return (pte_t *) pg; > > + return (pte_t *) p4; > > else if (pshift > PUD_SHIFT) { > > /* > > * We need to use hugepd table > > */ > > ptl = &mm->page_table_lock; > > - hpdp = (hugepd_t *)pg; > > + hpdp = (hugepd_t *)p4; > > } else { > > pdshift = PUD_SHIFT; > > - pu = pud_alloc(mm, pg, addr); > > + pu = pud_alloc(mm, p4, addr); > > if (!pu) > > return NULL; > > if (pshift == PUD_SHIFT) > > @@ -166,10 +168,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz > > #else > > if (pshift >= PGDIR_SHIFT) { > > ptl = &mm->page_table_lock; > > - hpdp = (hugepd_t *)pg; > > + hpdp = (hugepd_t *)p4; > > } else { > > pdshift = PUD_SHIFT; > > - pu = pud_alloc(mm, pg, addr); > > + pu = pud_alloc(mm, p4, addr); > > if (!pu) > > return NULL; > > if (pshift >= PUD_SHIFT) { > > @@ -390,7 +392,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, > > mm_dec_nr_pmds(tlb->mm); > > } > > -static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, > > +static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, > > unsigned long addr, unsigned long end, > > unsigned long floor, unsigned long ceiling) > > { > > @@ -400,7 +402,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, > > start = addr; > > do { > > - pud = pud_offset(pgd, addr); > > + pud = pud_offset(p4d, addr); > > next = pud_addr_end(addr, end); > > if (!is_hugepd(__hugepd(pud_val(*pud)))) { > > if (pud_none_or_clear_bad(pud)) > > @@ -435,8 +437,8 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, > > if (end - 1 > ceiling - 1) > > return; > > - pud = pud_offset(pgd, start); > > - pgd_clear(pgd); > > + pud = pud_offset(p4d, start); > > + p4d_clear(p4d); > > pud_free_tlb(tlb, pud, start); > > mm_dec_nr_puds(tlb->mm); > > } > > @@ -449,6 +451,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, > > unsigned long floor, unsigned long ceiling) > > { > > pgd_t *pgd; > > + p4d_t *p4d; > > unsigned long next; > > /* > > @@ -471,10 +474,11 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, > > do { > > next = pgd_addr_end(addr, end); > > pgd = pgd_offset(tlb->mm, addr); > > + p4d = p4d_offset(pgd, addr); > > if (!is_hugepd(__hugepd(pgd_val(*pgd)))) { > > - if (pgd_none_or_clear_bad(pgd)) > > + if (p4d_none_or_clear_bad(p4d)) > > continue; > > - hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); > > + hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling); > > } else { > > unsigned long more; > > /* > > @@ -487,7 +491,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, > > if (more > next) > > next = more; > > - free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, > > + free_hugepd_range(tlb, (hugepd_t *)p4d, PGDIR_SHIFT, > > addr, next, floor, ceiling); > > } > > } while (addr = next, addr != end); > > diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c > > index db5664dde5ff..88e2e16380b5 100644 > > --- a/arch/powerpc/mm/kasan/kasan_init_32.c > > +++ b/arch/powerpc/mm/kasan/kasan_init_32.c > > @@ -36,7 +36,7 @@ static int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned > > unsigned long k_cur, k_next; > > pte_t *new = NULL; > > - pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start); > > + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(k_start), k_start), k_start), k_start); > > for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) { > > k_next = pgd_addr_end(k_cur, k_end); > > @@ -78,7 +78,7 @@ static int __init kasan_init_region(void *start, size_t size) > > block = memblock_alloc(k_end - k_start, PAGE_SIZE); > > for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { > > - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); > > + pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(k_cur), k_cur), k_cur), k_cur); > > void *va = block + k_cur - k_start; > > pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); > > @@ -102,7 +102,7 @@ static void __init kasan_remap_early_shadow_ro(void) > > kasan_populate_pte(kasan_early_shadow_pte, prot); > > for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { > > - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); > > + pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(k_cur), k_cur), k_cur), k_cur); > > pte_t *ptep = pte_offset_kernel(pmd, k_cur); > > if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) > > @@ -201,7 +201,7 @@ void __init kasan_early_init(void) > > unsigned long addr = KASAN_SHADOW_START; > > unsigned long end = KASAN_SHADOW_END; > > unsigned long next; > > - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(addr), addr), addr); > > + pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(addr), addr), addr), addr); > > BUILD_BUG_ON(KASAN_SHADOW_START & ~PGDIR_MASK); > > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c > > index ef7b1119b2e2..8262b384dcf3 100644 > > --- a/arch/powerpc/mm/mem.c > > +++ b/arch/powerpc/mm/mem.c > > @@ -69,8 +69,8 @@ EXPORT_SYMBOL(kmap_prot); > > static inline pte_t *virt_to_kpte(unsigned long vaddr) > > { > > - return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), > > - vaddr), vaddr), vaddr); > > + return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(vaddr), > > + vaddr), vaddr), vaddr), vaddr); > > } > > #endif > > diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c > > index f348104eb461..7aaf7155e350 100644 > > --- a/arch/powerpc/mm/nohash/40x.c > > +++ b/arch/powerpc/mm/nohash/40x.c > > @@ -104,7 +104,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) > > pmd_t *pmdp; > > unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE; > > - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); > > + pmdp = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(v), v), v), v); > > *pmdp++ = __pmd(val); > > *pmdp++ = __pmd(val); > > *pmdp++ = __pmd(val); > > @@ -119,7 +119,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) > > pmd_t *pmdp; > > unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE; > > - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); > > + pmdp = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(v), v), v), v); > > *pmdp = __pmd(val); > > v += LARGE_PAGE_SIZE_4M; > > diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c > > index 4637fdd469cf..77884e24281d 100644 > > --- a/arch/powerpc/mm/nohash/book3e_pgtable.c > > +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c > > @@ -73,6 +73,7 @@ static void __init *early_alloc_pgtable(unsigned long size) > > int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) > > { > > pgd_t *pgdp; > > + p4d_t *p4dp; > > pud_t *pudp; > > pmd_t *pmdp; > > pte_t *ptep; > > @@ -80,7 +81,8 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) > > BUILD_BUG_ON(TASK_SIZE_USER64 > PGTABLE_RANGE); > > if (slab_is_available()) { > > pgdp = pgd_offset_k(ea); > > - pudp = pud_alloc(&init_mm, pgdp, ea); > > + p4dp = p4d_offset(pgdp, ea); > > + pudp = pud_alloc(&init_mm, p4dp, ea); > > if (!pudp) > > return -ENOMEM; > > pmdp = pmd_alloc(&init_mm, pudp, ea); > > @@ -91,13 +93,12 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) > > return -ENOMEM; > > } else { > > pgdp = pgd_offset_k(ea); > > -#ifndef __PAGETABLE_PUD_FOLDED > > - if (pgd_none(*pgdp)) { > > - pudp = early_alloc_pgtable(PUD_TABLE_SIZE); > > - pgd_populate(&init_mm, pgdp, pudp); > > + p4dp = p4d_offset(pgdp, ea); > > + if (p4d_none(*p4dp)) { > > + pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); > > + p4d_populate(&init_mm, p4dp, pmdp); > > } > > -#endif /* !__PAGETABLE_PUD_FOLDED */ > > - pudp = pud_offset(pgdp, ea); > > + pudp = pud_offset(p4dp, ea); > > if (pud_none(*pudp)) { > > pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); > > pud_populate(&init_mm, pudp, pmdp); > > diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c > > index e3759b69f81b..c2499271f6c1 100644 > > --- a/arch/powerpc/mm/pgtable.c > > +++ b/arch/powerpc/mm/pgtable.c > > @@ -265,6 +265,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, > > void assert_pte_locked(struct mm_struct *mm, unsigned long addr) > > { > > pgd_t *pgd; > > + p4d_t *p4d; > > pud_t *pud; > > pmd_t *pmd; > > @@ -272,7 +273,9 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr) > > return; > > pgd = mm->pgd + pgd_index(addr); > > BUG_ON(pgd_none(*pgd)); > > - pud = pud_offset(pgd, addr); > > + p4d = p4d_offset(pgd, addr); > > + BUG_ON(p4d_none(*p4d)); > > + pud = pud_offset(p4d, addr); > > BUG_ON(pud_none(*pud)); > > pmd = pmd_offset(pud, addr); > > /* > > @@ -312,12 +315,13 @@ EXPORT_SYMBOL_GPL(vmalloc_to_phys); > > pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, > > bool *is_thp, unsigned *hpage_shift) > > { > > - pgd_t pgd, *pgdp; > > + pgd_t *pgdp; > > + p4d_t p4d, *p4dp; > > pud_t pud, *pudp; > > pmd_t pmd, *pmdp; > > pte_t *ret_pte; > > hugepd_t *hpdp = NULL; > > - unsigned pdshift = PGDIR_SHIFT; > > + unsigned pdshift; > > if (hpage_shift) > > *hpage_shift = 0; > > @@ -325,24 +329,28 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, > > if (is_thp) > > *is_thp = false; > > - pgdp = pgdir + pgd_index(ea); > > - pgd = READ_ONCE(*pgdp); > > /* > > * Always operate on the local stack value. This make sure the > > * value don't get updated by a parallel THP split/collapse, > > * page fault or a page unmap. The return pte_t * is still not > > * stable. So should be checked there for above conditions. > > + * Top level is an exception because it is folded into p4d. > > */ > > - if (pgd_none(pgd)) > > + pgdp = pgdir + pgd_index(ea); > > + p4dp = p4d_offset(pgdp, ea); > > + p4d = READ_ONCE(*p4dp); > > + pdshift = P4D_SHIFT; > > + > > + if (p4d_none(p4d)) > > return NULL; > > - if (pgd_is_leaf(pgd)) { > > - ret_pte = (pte_t *)pgdp; > > + if (p4d_is_leaf(p4d)) { > > + ret_pte = (pte_t *)p4dp; > > goto out; > > } > > - if (is_hugepd(__hugepd(pgd_val(pgd)))) { > > - hpdp = (hugepd_t *)&pgd; > > + if (is_hugepd(__hugepd(p4d_val(p4d)))) { > > + hpdp = (hugepd_t *)&p4d; > > goto out_huge; > > } > > @@ -352,7 +360,7 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, > > * irq disabled > > */ > > pdshift = PUD_SHIFT; > > - pudp = pud_offset(&pgd, ea); > > + pudp = pud_offset(&p4d, ea); > > pud = READ_ONCE(*pudp); > > if (pud_none(pud)) > > diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c > > index 5fb90edd865e..5774d4bc94d0 100644 > > --- a/arch/powerpc/mm/pgtable_32.c > > +++ b/arch/powerpc/mm/pgtable_32.c > > @@ -63,7 +63,7 @@ int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) > > int err = -ENOMEM; > > /* Use upper 10 bits of VA to index the first level map */ > > - pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va); > > + pd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va); > > /* Use middle 10 bits of VA to index the second-level map */ > > if (likely(slab_is_available())) > > pg = pte_alloc_kernel(pd, va); > > @@ -121,53 +121,24 @@ void __init mapin_ram(void) > > } > > } > > -/* Scan the real Linux page tables and return a PTE pointer for > > - * a virtual address in a context. > > - * Returns true (1) if PTE was found, zero otherwise. The pointer to > > - * the PTE pointer is unmodified if PTE is not found. > > - */ > > -static int > > -get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp) > > -{ > > - pgd_t *pgd; > > - pud_t *pud; > > - pmd_t *pmd; > > - pte_t *pte; > > - int retval = 0; > > - > > - pgd = pgd_offset(mm, addr & PAGE_MASK); > > - if (pgd) { > > - pud = pud_offset(pgd, addr & PAGE_MASK); > > - if (pud && pud_present(*pud)) { > > - pmd = pmd_offset(pud, addr & PAGE_MASK); > > - if (pmd_present(*pmd)) { > > - pte = pte_offset_map(pmd, addr & PAGE_MASK); > > - if (pte) { > > - retval = 1; > > - *ptep = pte; > > - if (pmdp) > > - *pmdp = pmd; > > - /* XXX caller needs to do pte_unmap, yuck */ > > - } > > - } > > - } > > - } > > - return(retval); > > -} > > - > > static int __change_page_attr_noflush(struct page *page, pgprot_t prot) > > { > > pte_t *kpte; > > pmd_t *kpmd; > > - unsigned long address; > > + unsigned long address, va; > > BUG_ON(PageHighMem(page)); > > address = (unsigned long)page_address(page); > > + va = address & PAGE_MASK; > > if (v_block_mapped(address)) > > return 0; > > - if (!get_pteptr(&init_mm, address, &kpte, &kpmd)) > > + > > + kpmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va); > > + if (!pmd_present(*kpmd)) > > return -EINVAL; > > + > > + kpte = pte_offset_map(kpmd, va); > > __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0); > > pte_unmap(kpte); > > diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c > > index e78832dce7bb..1f86a88fd4bb 100644 > > --- a/arch/powerpc/mm/pgtable_64.c > > +++ b/arch/powerpc/mm/pgtable_64.c > > @@ -101,13 +101,13 @@ EXPORT_SYMBOL(__pte_frag_size_shift); > > #ifndef __PAGETABLE_PUD_FOLDED > > /* 4 level page table */ > > -struct page *pgd_page(pgd_t pgd) > > +struct page *p4d_page(p4d_t p4d) > > { > > - if (pgd_is_leaf(pgd)) { > > - VM_WARN_ON(!pgd_huge(pgd)); > > - return pte_page(pgd_pte(pgd)); > > + if (p4d_is_leaf(p4d)) { > > + VM_WARN_ON(!p4d_huge(p4d)); > > + return pte_page(p4d_pte(p4d)); > > } > > - return virt_to_page(pgd_page_vaddr(pgd)); > > + return virt_to_page(p4d_page_vaddr(p4d)); > > } > > #endif > > diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c > > index a07278027c6f..ac360ad865a8 100644 > > --- a/arch/powerpc/mm/ptdump/hashpagetable.c > > +++ b/arch/powerpc/mm/ptdump/hashpagetable.c > > @@ -417,9 +417,9 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) > > } > > } > > -static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) > > +static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start) > > { > > - pud_t *pud = pud_offset(pgd, 0); > > + pud_t *pud = pud_offset(p4d, 0); > > unsigned long addr; > > unsigned int i; > > @@ -431,6 +431,20 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) > > } > > } > > +static void walk_p4d(struct pg_state *st, pgd_t *pgd, unsigned long start) > > +{ > > + p4d_t *p4d = p4d_offset(pgd, 0); > > + unsigned long addr; > > + unsigned int i; > > + > > + for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { > > + addr = start + i * P4D_SIZE; > > + if (!p4d_none(*p4d)) > > + /* p4d exists */ > > + walk_pud(st, p4d, addr); > > + } > > +} > > + > > static void walk_pagetables(struct pg_state *st) > > { > > pgd_t *pgd = pgd_offset_k(0UL); > > @@ -445,7 +459,7 @@ static void walk_pagetables(struct pg_state *st) > > addr = KERN_VIRT_START + i * PGDIR_SIZE; > > if (!pgd_none(*pgd)) > > /* pgd exists */ > > - walk_pud(st, pgd, addr); > > + walk_p4d(st, pgd, addr); > > } > > } > > diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c > > index 206156255247..9d6256b61df3 100644 > > --- a/arch/powerpc/mm/ptdump/ptdump.c > > +++ b/arch/powerpc/mm/ptdump/ptdump.c > > @@ -277,9 +277,9 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) > > } > > } > > -static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) > > +static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start) > > { > > - pud_t *pud = pud_offset(pgd, 0); > > + pud_t *pud = pud_offset(p4d, 0); > > unsigned long addr; > > unsigned int i; > > @@ -304,11 +304,13 @@ static void walk_pagetables(struct pg_state *st) > > * the hash pagetable. > > */ > > for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { > > - if (!pgd_none(*pgd) && !pgd_is_leaf(*pgd)) > > - /* pgd exists */ > > - walk_pud(st, pgd, addr); > > + p4d_t *p4d = p4d_offset(pgd, 0); > > + > > + if (!p4d_none(*p4d) && !p4d_is_leaf(*p4d)) > > + /* p4d exists */ > > + walk_pud(st, p4d, addr); > > else > > - note_page(st, addr, 1, pgd_val(*pgd)); > > + note_page(st, addr, 1, p4d_val(*p4d)); > > } > > } > > diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c > > index 0ec9640335bb..3e29128c58cc 100644 > > --- a/arch/powerpc/xmon/xmon.c > > +++ b/arch/powerpc/xmon/xmon.c > > @@ -3130,6 +3130,7 @@ static void show_pte(unsigned long addr) > > struct task_struct *tsk = NULL; > > struct mm_struct *mm; > > pgd_t *pgdp, *pgdir; > > + p4d_t *p4dp; > > pud_t *pudp; > > pmd_t *pmdp; > > pte_t *ptep; > > @@ -3161,20 +3162,21 @@ static void show_pte(unsigned long addr) > > pgdir = pgd_offset(mm, 0); > > } > > - if (pgd_none(*pgdp)) { > > - printf("no linux page table for address\n"); > > + p4dp = p4d_offset(pgdp, addr); > > + > > + if (p4d_none(*p4dp)) { > > + printf("No valid P4D\n"); > > return; > > } > > - printf("pgd @ 0x%px\n", pgdir); > > - > > - if (pgd_is_leaf(*pgdp)) { > > - format_pte(pgdp, pgd_val(*pgdp)); > > + if (p4d_is_leaf(*p4dp)) { > > + format_pte(p4dp, p4d_val(*p4dp)); > > return; > > } > > - printf("pgdp @ 0x%px = 0x%016lx\n", pgdp, pgd_val(*pgdp)); > > - pudp = pud_offset(pgdp, addr); > > + printf("p4dp @ 0x%px = 0x%016lx\n", p4dp, p4d_val(*p4dp)); > > + > > + pudp = pud_offset(p4dp, addr); > > if (pud_none(*pudp)) { > > printf("No valid PUD\n"); > > > >
Le 26/02/2020 à 11:56, Mike Rapoport a écrit : > On Wed, Feb 26, 2020 at 10:46:13AM +0100, Christophe Leroy wrote: >> >> >> Le 26/02/2020 à 10:13, Mike Rapoport a écrit : >>> On Tue, Feb 18, 2020 at 12:54:40PM +0200, Mike Rapoport wrote: >>>> On Sun, Feb 16, 2020 at 11:41:07AM +0100, Christophe Leroy wrote: >>>>> >>>>> >>>>> Le 16/02/2020 à 09:18, Mike Rapoport a écrit : >>>>>> From: Mike Rapoport <rppt@linux.ibm.com> >>>>>> >>>>>> Implement primitives necessary for the 4th level folding, add walks of p4d >>>>>> level where appropriate and replace 5level-fixup.h with pgtable-nop4d.h. >>>>> >>>>> I don't think it is worth adding all this additionnals walks of p4d, this >>>>> patch could be limited to changes like: >>>>> >>>>> - pud = pud_offset(pgd, gpa); >>>>> + pud = pud_offset(p4d_offset(pgd, gpa), gpa); >>>>> >>>>> The additionnal walks should be added through another patch the day powerpc >>>>> need them. >>>> >>>> Ok, I'll update the patch to reduce walking the p4d. >>> >>> Here's what I have with more direct acceses from pgd to pud. >> >> I went quickly through. This looks promising. >> >> Do we need the walk_p4d() in arch/powerpc/mm/ptdump/hashpagetable.c ? >> Can't we just do >> >> @@ -445,7 +459,7 @@ static void walk_pagetables(struct pg_state *st) >> addr = KERN_VIRT_START + i * PGDIR_SIZE; >> if (!pgd_none(*pgd)) >> /* pgd exists */ >> - walk_pud(st, pgd, addr); >> + walk_pud(st, p4d_offset(pgd, addr), addr); > > We can do > > addr = KERN_VIRT_START + i * PGDIR_SIZE; > p4d = p4d_offset(pgd, addr); > if (!p4d_none(*pgd)) > walk_pud() > > But I don't think this is really essential. Again, we are trading off code > consistency vs line count. I don't think line count is that important. Ok. Christophe
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 5b39c11e884a..39ec11371be0 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -2,7 +2,6 @@ #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H -#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #include <asm/book3s/32/hash.h> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 2781ebf6add4..876d1528c2cf 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -134,9 +134,9 @@ static inline int get_region_id(unsigned long ea) #define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS) #define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS) -static inline int hash__pgd_bad(pgd_t pgd) +static inline int hash__p4d_bad(p4d_t p4d) { - return (pgd_val(pgd) == 0); + return (p4d_val(p4d) == 0); } #ifdef CONFIG_STRICT_KERNEL_RWX extern void hash__mark_rodata_ro(void); diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index a41e91bd0580..69c5b051734f 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -85,9 +85,9 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); } -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +static inline void p4d_populate(struct mm_struct *mm, p4d_t *pgd, pud_t *pud) { - *pgd = __pgd(__pgtable_ptr_val(pud) | PGD_VAL_BITS); + *pgd = __p4d(__pgtable_ptr_val(pud) | PGD_VAL_BITS); } static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 201a69e6a355..ddddbafff0ab 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -2,7 +2,7 @@ #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ -#include <asm-generic/5level-fixup.h> +#include <asm-generic/pgtable-nop4d.h> #ifndef __ASSEMBLY__ #include <linux/mmdebug.h> @@ -251,7 +251,7 @@ extern unsigned long __pmd_frag_size_shift; /* Bits to mask out from a PUD to get to the PMD page */ #define PUD_MASKED_BITS 0xc0000000000000ffUL /* Bits to mask out from a PGD to get to the PUD page */ -#define PGD_MASKED_BITS 0xc0000000000000ffUL +#define P4D_MASKED_BITS 0xc0000000000000ffUL /* * Used as an indicator for rcu callback functions @@ -949,54 +949,60 @@ static inline bool pud_access_permitted(pud_t pud, bool write) return pte_access_permitted(pud_pte(pud), write); } -#define pgd_write(pgd) pte_write(pgd_pte(pgd)) +#define __p4d_raw(x) ((p4d_t) { __pgd_raw(x) }) +static inline __be64 p4d_raw(p4d_t x) +{ + return pgd_raw(x.pgd); +} + +#define p4d_write(p4d) pte_write(p4d_pte(p4d)) -static inline void pgd_clear(pgd_t *pgdp) +static inline void p4d_clear(p4d_t *p4dp) { - *pgdp = __pgd(0); + *p4dp = __p4d(0); } -static inline int pgd_none(pgd_t pgd) +static inline int p4d_none(p4d_t p4d) { - return !pgd_raw(pgd); + return !p4d_raw(p4d); } -static inline int pgd_present(pgd_t pgd) +static inline int p4d_present(p4d_t p4d) { - return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT)); + return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PRESENT)); } -static inline pte_t pgd_pte(pgd_t pgd) +static inline pte_t p4d_pte(p4d_t p4d) { - return __pte_raw(pgd_raw(pgd)); + return __pte_raw(p4d_raw(p4d)); } -static inline pgd_t pte_pgd(pte_t pte) +static inline p4d_t pte_p4d(pte_t pte) { - return __pgd_raw(pte_raw(pte)); + return __p4d_raw(pte_raw(pte)); } -static inline int pgd_bad(pgd_t pgd) +static inline int p4d_bad(p4d_t p4d) { if (radix_enabled()) - return radix__pgd_bad(pgd); - return hash__pgd_bad(pgd); + return radix__p4d_bad(p4d); + return hash__p4d_bad(p4d); } -#define pgd_access_permitted pgd_access_permitted -static inline bool pgd_access_permitted(pgd_t pgd, bool write) +#define p4d_access_permitted p4d_access_permitted +static inline bool p4d_access_permitted(p4d_t p4d, bool write) { - return pte_access_permitted(pgd_pte(pgd), write); + return pte_access_permitted(p4d_pte(p4d), write); } -extern struct page *pgd_page(pgd_t pgd); +extern struct page *p4d_page(p4d_t p4d); /* Pointers in the page table tree are physical addresses */ #define __pgtable_ptr_val(ptr) __pa(ptr) #define pmd_page_vaddr(pmd) __va(pmd_val(pmd) & ~PMD_MASKED_BITS) #define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS) -#define pgd_page_vaddr(pgd) __va(pgd_val(pgd) & ~PGD_MASKED_BITS) +#define p4d_page_vaddr(p4d) __va(p4d_val(p4d) & ~P4D_MASKED_BITS) #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) #define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) @@ -1010,8 +1016,8 @@ extern struct page *pgd_page(pgd_t pgd); #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) -#define pud_offset(pgdp, addr) \ - (((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr)) +#define pud_offset(p4dp, addr) \ + (((pud_t *) p4d_page_vaddr(*(p4dp))) + pud_index(addr)) #define pmd_offset(pudp,addr) \ (((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr)) #define pte_offset_kernel(dir,addr) \ @@ -1368,6 +1374,12 @@ static inline bool pud_is_leaf(pud_t pud) return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); } +#define p4d_is_leaf p4d_is_leaf +static inline bool p4d_is_leaf(p4d_t p4d) +{ + return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PTE)); +} + #define pgd_is_leaf pgd_is_leaf #define pgd_leaf pgd_is_leaf static inline bool pgd_is_leaf(pgd_t pgd) diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index d97db3ad9aae..9bca2ac64220 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -30,7 +30,7 @@ /* Don't have anything in the reserved bits and leaf bits */ #define RADIX_PMD_BAD_BITS 0x60000000000000e0UL #define RADIX_PUD_BAD_BITS 0x60000000000000e0UL -#define RADIX_PGD_BAD_BITS 0x60000000000000e0UL +#define RADIX_P4D_BAD_BITS 0x60000000000000e0UL #define RADIX_PMD_SHIFT (PAGE_SHIFT + RADIX_PTE_INDEX_SIZE) #define RADIX_PUD_SHIFT (RADIX_PMD_SHIFT + RADIX_PMD_INDEX_SIZE) @@ -227,9 +227,9 @@ static inline int radix__pud_bad(pud_t pud) } -static inline int radix__pgd_bad(pgd_t pgd) +static inline int radix__p4d_bad(p4d_t p4d) { - return !!(pgd_val(pgd) & RADIX_PGD_BAD_BITS); + return !!(p4d_val(p4d) & RADIX_P4D_BAD_BITS); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 60c4d829152e..d4c2c4259fa3 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -2,7 +2,6 @@ #ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H #define _ASM_POWERPC_NOHASH_32_PGTABLE_H -#define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index b9534a793293..668aee6017e7 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -15,7 +15,7 @@ struct vmemmap_backing { }; extern struct vmemmap_backing *vmemmap_list; -#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD) +#define p4d_populate(MM, P4D, PUD) p4d_set(P4D, (unsigned long)PUD) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h index c40ec32b8194..81b1c54e3cf1 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h @@ -2,7 +2,7 @@ #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H -#include <asm-generic/5level-fixup.h> +#include <asm-generic/pgtable-nop4d.h> /* * Entries per page directory level. The PTE level must use a 64b record @@ -45,41 +45,41 @@ #define PMD_MASKED_BITS 0 /* Bits to mask out from a PUD to get to the PMD page */ #define PUD_MASKED_BITS 0 -/* Bits to mask out from a PGD to get to the PUD page */ -#define PGD_MASKED_BITS 0 +/* Bits to mask out from a P4D to get to the PUD page */ +#define P4D_MASKED_BITS 0 /* * 4-level page tables related bits */ -#define pgd_none(pgd) (!pgd_val(pgd)) -#define pgd_bad(pgd) (pgd_val(pgd) == 0) -#define pgd_present(pgd) (pgd_val(pgd) != 0) -#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) +#define p4d_none(p4d) (!p4d_val(p4d)) +#define p4d_bad(p4d) (p4d_val(p4d) == 0) +#define p4d_present(p4d) (p4d_val(p4d) != 0) +#define p4d_page_vaddr(p4d) (p4d_val(p4d) & ~P4D_MASKED_BITS) #ifndef __ASSEMBLY__ -static inline void pgd_clear(pgd_t *pgdp) +static inline void p4d_clear(p4d_t *p4dp) { - *pgdp = __pgd(0); + *p4dp = __p4d(0); } -static inline pte_t pgd_pte(pgd_t pgd) +static inline pte_t p4d_pte(p4d_t p4d) { - return __pte(pgd_val(pgd)); + return __pte(p4d_val(p4d)); } -static inline pgd_t pte_pgd(pte_t pte) +static inline p4d_t pte_p4d(pte_t pte) { - return __pgd(pte_val(pte)); + return __p4d(pte_val(pte)); } -extern struct page *pgd_page(pgd_t pgd); +extern struct page *p4d_page(p4d_t p4d); #endif /* !__ASSEMBLY__ */ -#define pud_offset(pgdp, addr) \ - (((pud_t *) pgd_page_vaddr(*(pgdp))) + \ +#define pud_offset(p4dp, addr) \ + (((pud_t *) p4d_page_vaddr(*(p4dp))) + \ (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) #define pud_ERROR(e) \ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 9a33b8bd842d..b360f262b9c6 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -175,11 +175,11 @@ static inline pud_t pte_pud(pte_t pte) return __pud(pte_val(pte)); } #define pud_write(pud) pte_write(pud_pte(pud)) -#define pgd_write(pgd) pte_write(pgd_pte(pgd)) +#define p4d_write(pgd) pte_write(p4d_pte(p4d)) -static inline void pgd_set(pgd_t *pgdp, unsigned long val) +static inline void p4d_set(p4d_t *p4dp, unsigned long val) { - *pgdp = __pgd(val); + *p4dp = __p4d(val); } /* diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 8cc543ed114c..0a05fddd7881 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -139,6 +139,14 @@ static inline bool pud_is_leaf(pud_t pud) } #endif +#ifndef p4d_is_leaf +#define p4d_is_leaf p4d_is_leaf +static inline bool p4d_is_leaf(p4d_t p4d) +{ + return false; +} +#endif + #ifndef pgd_is_leaf #define pgd_is_leaf pgd_is_leaf static inline bool pgd_is_leaf(pgd_t pgd) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 803940d79b73..5aacfa0b27ef 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -494,17 +494,39 @@ static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud, pud_free(kvm->mm, pud); } +static void kvmppc_unmap_free_p4d(struct kvm *kvm, p4d_t *p4d, + unsigned int lpid) +{ + unsigned long iu; + p4d_t *p = p4d; + + for (iu = 0; iu < PTRS_PER_P4D; ++iu, ++p) { + if (!p4d_present(*p)) + continue; + if (p4d_is_leaf(*p)) { + p4d_clear(p); + } else { + pud_t *pud; + + pud = pud_offset(p, 0); + kvmppc_unmap_free_pud(kvm, pud, lpid); + p4d_clear(p); + } + } + p4d_free(kvm->mm, p4d); +} + void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid) { unsigned long ig; for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) { - pud_t *pud; + p4d_t *p4d; if (!pgd_present(*pgd)) continue; - pud = pud_offset(pgd, 0); - kvmppc_unmap_free_pud(kvm, pud, lpid); + p4d = p4d_offset(pgd, 0); + kvmppc_unmap_free_p4d(kvm, p4d, lpid); pgd_clear(pgd); } } @@ -566,6 +588,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, unsigned long *rmapp, struct rmap_nested **n_rmap) { pgd_t *pgd; + p4d_t *p4d, *new_p4d = NULL; pud_t *pud, *new_pud = NULL; pmd_t *pmd, *new_pmd = NULL; pte_t *ptep, *new_ptep = NULL; @@ -573,9 +596,15 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, /* Traverse the guest's 2nd-level tree, allocate new levels needed */ pgd = pgtable + pgd_index(gpa); - pud = NULL; + p4d = NULL; if (pgd_present(*pgd)) - pud = pud_offset(pgd, gpa); + p4d = p4d_offset(pgd, gpa); + else + new_p4d = p4d_alloc_one(kvm->mm, gpa); + + pud = NULL; + if (p4d_present(*p4d)) + pud = pud_offset(p4d, gpa); else new_pud = pud_alloc_one(kvm->mm, gpa); @@ -597,12 +626,18 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, /* Now traverse again under the lock and change the tree */ ret = -ENOMEM; if (pgd_none(*pgd)) { + if (!new_p4d) + goto out_unlock; + pgd_populate(kvm->mm, pgd, new_p4d); + new_p4d = NULL; + } + if (p4d_none(*p4d)) { if (!new_pud) goto out_unlock; - pgd_populate(kvm->mm, pgd, new_pud); + p4d_populate(kvm->mm, p4d, new_pud); new_pud = NULL; } - pud = pud_offset(pgd, gpa); + pud = pud_offset(p4d, gpa); if (pud_is_leaf(*pud)) { unsigned long hgpa = gpa & PUD_MASK; @@ -1220,6 +1255,7 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf, pgd_t *pgt; struct kvm_nested_guest *nested; pgd_t pgd, *pgdp; + p4d_t p4d, *p4dp; pud_t pud, *pudp; pmd_t pmd, *pmdp; pte_t *ptep; @@ -1298,7 +1334,14 @@ static ssize_t debugfs_radix_read(struct file *file, char __user *buf, continue; } - pudp = pud_offset(&pgd, gpa); + p4dp = p4d_offset(&pgd, gpa); + p4d = READ_ONCE(*p4dp); + if (!(p4d_val(p4d) & _PAGE_PRESENT)) { + gpa = (gpa & P4D_MASK) + P4D_SIZE; + continue; + } + + pudp = pud_offset(&p4d, gpa); pud = READ_ONCE(*pudp); if (!(pud_val(pud) & _PAGE_PRESENT)) { gpa = (gpa & PUD_MASK) + PUD_SIZE; diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 3345f039a876..7a59f6863cec 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -107,13 +107,18 @@ static inline int unmap_patch_area(unsigned long addr) pte_t *ptep; pmd_t *pmdp; pud_t *pudp; + p4d_t *p4dp; pgd_t *pgdp; pgdp = pgd_offset_k(addr); if (unlikely(!pgdp)) return -EINVAL; - pudp = pud_offset(pgdp, addr); + p4dp = p4d_offset(pgdp, addr); + if (unlikely(!p4dp)) + return -EINVAL; + + pudp = pud_offset(p4dp, addr); if (unlikely(!pudp)) return -EINVAL; diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 0a1c65a2c565..b2fc3e71165c 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -312,7 +312,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea) if (!Hash) return; - pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea); + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, ea), ea), ea), ea); if (!pmd_none(*pmd)) add_hash_page(mm->context.id, ea, pmd_val(*pmd)); } diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c index 2fcd321040ff..175bc33b41b7 100644 --- a/arch/powerpc/mm/book3s32/tlb.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -87,7 +87,7 @@ static void flush_range(struct mm_struct *mm, unsigned long start, if (start >= end) return; end = (end - 1) | ~PAGE_MASK; - pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start); + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, start), start), start), start); for (;;) { pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; if (pmd_end > end) @@ -145,7 +145,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) return; } mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; - pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr); + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr), vmaddr); if (!pmd_none(*pmd)) flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1); } diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index 64733b9cb20a..9cd15937e88a 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -148,6 +148,7 @@ void hash__vmemmap_remove_mapping(unsigned long start, int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) { pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -155,7 +156,8 @@ int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) BUILD_BUG_ON(TASK_SIZE_USER64 > H_PGTABLE_RANGE); if (slab_is_available()) { pgdp = pgd_offset_k(ea); - pudp = pud_alloc(&init_mm, pgdp, ea); + p4dp = p4d_offset(pgdp, ea); + pudp = pud_alloc(&init_mm, p4dp, ea); if (!pudp) return -ENOMEM; pmdp = pmd_alloc(&init_mm, pudp, ea); diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index dd1bea45325c..11762556fe4d 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -64,17 +64,24 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, { unsigned long pfn = pa >> PAGE_SHIFT; pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; pgdp = pgd_offset_k(ea); if (pgd_none(*pgdp)) { + p4dp = early_alloc_pgtable(PGD_TABLE_SIZE, nid, + region_start, region_end); + pgd_populate(&init_mm, pgdp, p4dp); + } + p4dp = p4d_offset(pgdp, ea); + if (p4d_none(*p4dp)) { pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid, region_start, region_end); - pgd_populate(&init_mm, pgdp, pudp); + p4d_populate(&init_mm, p4dp, pudp); } - pudp = pud_offset(pgdp, ea); + pudp = pud_offset(p4dp, ea); if (map_page_size == PUD_SIZE) { ptep = (pte_t *)pudp; goto set_the_pte; @@ -114,6 +121,7 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, { unsigned long pfn = pa >> PAGE_SHIFT; pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -136,7 +144,8 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, * boot. */ pgdp = pgd_offset_k(ea); - pudp = pud_alloc(&init_mm, pgdp, ea); + p4dp = p4d_offset(pgdp, ea); + pudp = pud_alloc(&init_mm, p4dp, ea); if (!pudp) return -ENOMEM; if (map_page_size == PUD_SIZE) { @@ -173,6 +182,7 @@ void radix__change_memory_range(unsigned long start, unsigned long end, { unsigned long idx; pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -185,7 +195,8 @@ void radix__change_memory_range(unsigned long start, unsigned long end, for (idx = start; idx < end; idx += PAGE_SIZE) { pgdp = pgd_offset_k(idx); - pudp = pud_alloc(&init_mm, pgdp, idx); + p4dp = p4d_offset(pgdp, idx); + pudp = pud_alloc(&init_mm, p4dp, idx); if (!pudp) continue; if (pud_is_leaf(*pudp)) { diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c index 2ef24a53f4c9..27daeed1a141 100644 --- a/arch/powerpc/mm/book3s64/subpage_prot.c +++ b/arch/powerpc/mm/book3s64/subpage_prot.c @@ -54,6 +54,7 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, int npages) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -62,7 +63,10 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, pgd = pgd_offset(mm, addr); if (pgd_none(*pgd)) return; - pud = pud_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + return; + pud = pud_offset(p4d, addr); if (pud_none(*pud)) return; pmd = pmd_offset(pud, addr); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 73d4873fc7f8..43d463f20fc3 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -112,6 +112,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { pgd_t *pg; + p4d_t *p4; pud_t *pu; pmd_t *pm; hugepd_t *hpdp = NULL; @@ -121,20 +122,21 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz addr &= ~(sz-1); pg = pgd_offset(mm, addr); + p4 = p4d_offset(pg, addr); #ifdef CONFIG_PPC_BOOK3S_64 if (pshift == PGDIR_SHIFT) /* 16GB huge page */ - return (pte_t *) pg; + return (pte_t *) p4; else if (pshift > PUD_SHIFT) { /* * We need to use hugepd table */ ptl = &mm->page_table_lock; - hpdp = (hugepd_t *)pg; + hpdp = (hugepd_t *)p4; } else { pdshift = PUD_SHIFT; - pu = pud_alloc(mm, pg, addr); + pu = pud_alloc(mm, p4, addr); if (!pu) return NULL; if (pshift == PUD_SHIFT) @@ -159,10 +161,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz #else if (pshift >= PGDIR_SHIFT) { ptl = &mm->page_table_lock; - hpdp = (hugepd_t *)pg; + hpdp = (hugepd_t *)p4; } else { pdshift = PUD_SHIFT; - pu = pud_alloc(mm, pg, addr); + pu = pud_alloc(mm, p4, addr); if (!pu) return NULL; if (pshift >= PUD_SHIFT) { @@ -384,7 +386,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, mm_dec_nr_pmds(tlb->mm); } -static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, +static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { @@ -394,7 +396,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, start = addr; do { - pud = pud_offset(pgd, addr); + pud = pud_offset(p4d, addr); next = pud_addr_end(addr, end); if (!is_hugepd(__hugepd(pud_val(*pud)))) { if (pud_none_or_clear_bad(pud)) @@ -429,8 +431,8 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, if (end - 1 > ceiling - 1) return; - pud = pud_offset(pgd, start); - pgd_clear(pgd); + pud = pud_offset(p4d, start); + p4d_clear(p4d); pud_free_tlb(tlb, pud, start); mm_dec_nr_puds(tlb->mm); } @@ -443,6 +445,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long floor, unsigned long ceiling) { pgd_t *pgd; + p4d_t *p4d; unsigned long next; /* @@ -465,10 +468,11 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, do { next = pgd_addr_end(addr, end); pgd = pgd_offset(tlb->mm, addr); + p4d = p4d_offset(pgd, addr); if (!is_hugepd(__hugepd(pgd_val(*pgd)))) { - if (pgd_none_or_clear_bad(pgd)) + if (p4d_none_or_clear_bad(p4d)) continue; - hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); + hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling); } else { unsigned long more; /* @@ -481,7 +485,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, if (more > next) next = more; - free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, + free_hugepd_range(tlb, (hugepd_t *)p4d, PGDIR_SHIFT, addr, next, floor, ceiling); } } while (addr = next, addr != end); diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 16dd95bd0749..eed3f1ae3b90 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -36,7 +36,7 @@ static int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned unsigned long k_cur, k_next; pte_t *new = NULL; - pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start); + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(k_start), k_start), k_start), k_start); for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) { k_next = pgd_addr_end(k_cur, k_end); @@ -78,7 +78,7 @@ static int __init kasan_init_region(void *start, size_t size) block = memblock_alloc(k_end - k_start, PAGE_SIZE); for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); + pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(k_cur), k_cur), k_cur), k_cur); void *va = block + k_cur - k_start; pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); @@ -102,7 +102,7 @@ static void __init kasan_remap_early_shadow_ro(void) kasan_populate_pte(kasan_early_shadow_pte, prot); for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); + pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(k_cur), k_cur), k_cur), k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) @@ -202,7 +202,7 @@ void __init kasan_early_init(void) unsigned long addr = KASAN_SHADOW_START; unsigned long end = KASAN_SHADOW_END; unsigned long next; - pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(addr), addr), addr); + pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(addr), addr), addr), addr); BUILD_BUG_ON(KASAN_SHADOW_START & ~PGDIR_MASK); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index ef7b1119b2e2..8262b384dcf3 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -69,8 +69,8 @@ EXPORT_SYMBOL(kmap_prot); static inline pte_t *virt_to_kpte(unsigned long vaddr) { - return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), - vaddr), vaddr), vaddr); + return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(vaddr), + vaddr), vaddr), vaddr), vaddr); } #endif diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c index f348104eb461..7aaf7155e350 100644 --- a/arch/powerpc/mm/nohash/40x.c +++ b/arch/powerpc/mm/nohash/40x.c @@ -104,7 +104,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) pmd_t *pmdp; unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE; - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); + pmdp = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(v), v), v), v); *pmdp++ = __pmd(val); *pmdp++ = __pmd(val); *pmdp++ = __pmd(val); @@ -119,7 +119,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) pmd_t *pmdp; unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE; - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); + pmdp = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(v), v), v), v); *pmdp = __pmd(val); v += LARGE_PAGE_SIZE_4M; diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c index 4637fdd469cf..a62d59a928be 100644 --- a/arch/powerpc/mm/nohash/book3e_pgtable.c +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c @@ -73,6 +73,7 @@ static void __init *early_alloc_pgtable(unsigned long size) int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) { pgd_t *pgdp; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -80,7 +81,10 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) BUILD_BUG_ON(TASK_SIZE_USER64 > PGTABLE_RANGE); if (slab_is_available()) { pgdp = pgd_offset_k(ea); - pudp = pud_alloc(&init_mm, pgdp, ea); + p4dp = p4d_alloc(&init_mm, pgdp, ea); + if (!p4dp) + return -ENOMEM; + pudp = pud_alloc(&init_mm, p4dp, ea); if (!pudp) return -ENOMEM; pmdp = pmd_alloc(&init_mm, pudp, ea); @@ -91,13 +95,16 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) return -ENOMEM; } else { pgdp = pgd_offset_k(ea); -#ifndef __PAGETABLE_PUD_FOLDED if (pgd_none(*pgdp)) { pudp = early_alloc_pgtable(PUD_TABLE_SIZE); pgd_populate(&init_mm, pgdp, pudp); } -#endif /* !__PAGETABLE_PUD_FOLDED */ - pudp = pud_offset(pgdp, ea); + p4dp = p4d_offset(pgdp, ea); + if (p4d_none(*p4dp)) { + pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); + p4d_populate(&init_mm, p4dp, pmdp); + } + pudp = pud_offset(p4dp, ea); if (pud_none(*pudp)) { pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); pud_populate(&init_mm, pudp, pmdp); diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index e3759b69f81b..dca6a72da26a 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -265,6 +265,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, void assert_pte_locked(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; @@ -272,7 +273,9 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr) return; pgd = mm->pgd + pgd_index(addr); BUG_ON(pgd_none(*pgd)); - pud = pud_offset(pgd, addr); + p4d = p4d_offset(pgd, addr); + BUG_ON(p4d_none(*p4d)); + pud = pud_offset(p4d, addr); BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, addr); /* @@ -313,6 +316,7 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, bool *is_thp, unsigned *hpage_shift) { pgd_t pgd, *pgdp; + p4d_t p4d, *p4dp; pud_t pud, *pudp; pmd_t pmd, *pmdp; pte_t *ret_pte; @@ -346,13 +350,30 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, goto out_huge; } + pdshift = P4D_SHIFT; + p4dp = p4d_offset(&pgd, ea); + p4d = READ_ONCE(*p4dp); + + if (p4d_none(p4d)) + return NULL; + + if (p4d_is_leaf(p4d)) { + ret_pte = (pte_t *)p4dp; + goto out; + } + + if (is_hugepd(__hugepd(p4d_val(p4d)))) { + hpdp = (hugepd_t *)&p4d; + goto out_huge; + } + /* * Even if we end up with an unmap, the pgtable will not * be freed, because we do an rcu free and here we are * irq disabled */ pdshift = PUD_SHIFT; - pudp = pud_offset(&pgd, ea); + pudp = pud_offset(&p4d, ea); pud = READ_ONCE(*pudp); if (pud_none(pud)) diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 5fb90edd865e..ad217e5e039f 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -63,7 +63,7 @@ int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) int err = -ENOMEM; /* Use upper 10 bits of VA to index the first level map */ - pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va); + pd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va); /* Use middle 10 bits of VA to index the second-level map */ if (likely(slab_is_available())) pg = pte_alloc_kernel(pd, va); @@ -130,6 +130,7 @@ static int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp) { pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -137,17 +138,20 @@ get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp) pgd = pgd_offset(mm, addr & PAGE_MASK); if (pgd) { - pud = pud_offset(pgd, addr & PAGE_MASK); - if (pud && pud_present(*pud)) { - pmd = pmd_offset(pud, addr & PAGE_MASK); - if (pmd_present(*pmd)) { - pte = pte_offset_map(pmd, addr & PAGE_MASK); - if (pte) { - retval = 1; - *ptep = pte; - if (pmdp) - *pmdp = pmd; - /* XXX caller needs to do pte_unmap, yuck */ + p4d = p4d_offset(pgd, addr & PAGE_MASK); + if (p4d && p4d_present(*p4d)) { + pud = pud_offset(p4d, addr & PAGE_MASK); + if (pud && pud_present(*pud)) { + pmd = pmd_offset(pud, addr & PAGE_MASK); + if (pmd_present(*pmd)) { + pte = pte_offset_map(pmd, addr & PAGE_MASK); + if (pte) { + retval = 1; + *ptep = pte; + if (pmdp) + *pmdp = pmd; + /* XXX caller needs to do pte_unmap, yuck */ + } } } } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index e78832dce7bb..1f86a88fd4bb 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -101,13 +101,13 @@ EXPORT_SYMBOL(__pte_frag_size_shift); #ifndef __PAGETABLE_PUD_FOLDED /* 4 level page table */ -struct page *pgd_page(pgd_t pgd) +struct page *p4d_page(p4d_t p4d) { - if (pgd_is_leaf(pgd)) { - VM_WARN_ON(!pgd_huge(pgd)); - return pte_page(pgd_pte(pgd)); + if (p4d_is_leaf(p4d)) { + VM_WARN_ON(!p4d_huge(p4d)); + return pte_page(p4d_pte(p4d)); } - return virt_to_page(pgd_page_vaddr(pgd)); + return virt_to_page(p4d_page_vaddr(p4d)); } #endif diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index a07278027c6f..ac360ad865a8 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -417,9 +417,9 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) } } -static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start) { - pud_t *pud = pud_offset(pgd, 0); + pud_t *pud = pud_offset(p4d, 0); unsigned long addr; unsigned int i; @@ -431,6 +431,20 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) } } +static void walk_p4d(struct pg_state *st, pgd_t *pgd, unsigned long start) +{ + p4d_t *p4d = p4d_offset(pgd, 0); + unsigned long addr; + unsigned int i; + + for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { + addr = start + i * P4D_SIZE; + if (!p4d_none(*p4d)) + /* p4d exists */ + walk_pud(st, p4d, addr); + } +} + static void walk_pagetables(struct pg_state *st) { pgd_t *pgd = pgd_offset_k(0UL); @@ -445,7 +459,7 @@ static void walk_pagetables(struct pg_state *st) addr = KERN_VIRT_START + i * PGDIR_SIZE; if (!pgd_none(*pgd)) /* pgd exists */ - walk_pud(st, pgd, addr); + walk_p4d(st, pgd, addr); } } diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 206156255247..7bd4b81d5b5d 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -277,9 +277,9 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) } } -static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) +static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start) { - pud_t *pud = pud_offset(pgd, 0); + pud_t *pud = pud_offset(p4d, 0); unsigned long addr; unsigned int i; @@ -293,6 +293,22 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) } } +static void walk_p4d(struct pg_state *st, pgd_t *pgd, unsigned long start) +{ + p4d_t *p4d = p4d_offset(pgd, 0); + unsigned long addr; + unsigned int i; + + for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { + addr = start + i * P4D_SIZE; + if (!p4d_none(*p4d) && !p4d_is_leaf(*p4d)) + /* p4d exists */ + walk_pud(st, p4d, addr); + else + note_page(st, addr, 2, p4d_val(*p4d)); + } +} + static void walk_pagetables(struct pg_state *st) { unsigned int i; @@ -306,7 +322,7 @@ static void walk_pagetables(struct pg_state *st) for (i = pgd_index(addr); i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { if (!pgd_none(*pgd) && !pgd_is_leaf(*pgd)) /* pgd exists */ - walk_pud(st, pgd, addr); + walk_p4d(st, pgd, addr); else note_page(st, addr, 1, pgd_val(*pgd)); } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index e8c84d265602..c7bd1145b268 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3130,6 +3130,7 @@ static void show_pte(unsigned long addr) struct task_struct *tsk = NULL; struct mm_struct *mm; pgd_t *pgdp, *pgdir; + p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; @@ -3174,7 +3175,21 @@ static void show_pte(unsigned long addr) } printf("pgdp @ 0x%px = 0x%016lx\n", pgdp, pgd_val(*pgdp)); - pudp = pud_offset(pgdp, addr); + p4dp = p4d_offset(pgdp, addr); + + if (p4d_none(*p4dp)) { + printf("No valid P4D\n"); + return; + } + + if (p4d_is_leaf(*p4dp)) { + format_pte(p4dp, p4d_val(*p4dp)); + return; + } + + printf("p4dp @ 0x%px = 0x%016lx\n", p4dp, p4d_val(*p4dp)); + + pudp = pud_offset(p4dp, addr); if (pud_none(*pudp)) { printf("No valid PUD\n");