Message ID | 20240503144604.151095-3-ryan.roberts@arm.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | arm64/mm: Enable userfaultfd write-protect | expand |
On 03.05.24 16:46, Ryan Roberts wrote: > Currently the PTE_PRESENT_INVALID and PTE_PROT_NONE functionality > explicitly occupy 2 bits in the PTE when PTE_VALID/PMD_SECT_VALID is > clear. This has 2 significant consequences: > > - PTE_PROT_NONE consumes a precious SW PTE bit that could be used for > other things. > - The swap pte layout must reserve those same 2 bits and ensure they > are both always zero for a swap pte. It would be nice to reclaim at > least one of those bits. > > But PTE_PRESENT_INVALID, which since the previous patch, applies > uniformly to page/block descriptors at any level when PTE_VALID is > clear, can already give us most of what PTE_PROT_NONE requires: If it is > set, then the pte is still considered present; pte_present() returns > true and all the fields in the pte follow the HW interpretation (e.g. SW > can safely call pte_pfn(), etc). But crucially, the HW treats the pte as > invalid and will fault if it hits. > > So let's remove PTE_PROT_NONE entirely and instead represent PROT_NONE > as a present but invalid pte (PTE_VALID=0, PTE_PRESENT_INVALID=1) with > PTE_USER=0 and PTE_UXN=1. This is a unique combination that is not used > anywhere else. > > The net result is a clearer, simpler, more generic encoding scheme that > applies uniformly to all levels. Additionally we free up a PTE SW bit > and a swap pte bit (bit 58 in both cases). > > Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> > Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> Reviewed-by: David Hildenbrand <david@redhat.com>
On 5/3/24 20:16, Ryan Roberts wrote: > Currently the PTE_PRESENT_INVALID and PTE_PROT_NONE functionality > explicitly occupy 2 bits in the PTE when PTE_VALID/PMD_SECT_VALID is > clear. This has 2 significant consequences: > > - PTE_PROT_NONE consumes a precious SW PTE bit that could be used for > other things. > - The swap pte layout must reserve those same 2 bits and ensure they > are both always zero for a swap pte. It would be nice to reclaim at > least one of those bits. > > But PTE_PRESENT_INVALID, which since the previous patch, applies > uniformly to page/block descriptors at any level when PTE_VALID is > clear, can already give us most of what PTE_PROT_NONE requires: If it is > set, then the pte is still considered present; pte_present() returns > true and all the fields in the pte follow the HW interpretation (e.g. SW > can safely call pte_pfn(), etc). But crucially, the HW treats the pte as > invalid and will fault if it hits. > > So let's remove PTE_PROT_NONE entirely and instead represent PROT_NONE > as a present but invalid pte (PTE_VALID=0, PTE_PRESENT_INVALID=1) with > PTE_USER=0 and PTE_UXN=1. This is a unique combination that is not used > anywhere else. > > The net result is a clearer, simpler, more generic encoding scheme that > applies uniformly to all levels. Additionally we free up a PTE SW bit > and a swap pte bit (bit 58 in both cases). > > Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> > Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com> > --- > arch/arm64/include/asm/pgtable-prot.h | 3 +-- > arch/arm64/include/asm/pgtable.h | 31 +++++++++++++++------------ > 2 files changed, 18 insertions(+), 16 deletions(-) > > diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h > index cdbf51eef7a6..81f07b44f7b8 100644 > --- a/arch/arm64/include/asm/pgtable-prot.h > +++ b/arch/arm64/include/asm/pgtable-prot.h > @@ -18,7 +18,6 @@ > #define PTE_DIRTY (_AT(pteval_t, 1) << 55) > #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) > #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) > -#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ > > /* > * PTE_PRESENT_INVALID=1 & PTE_VALID=0 indicates that the pte's fields should be > @@ -103,7 +102,7 @@ static inline bool __pure lpa2_is_enabled(void) > __val; \ > }) > > -#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) > +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PRESENT_INVALID | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) > /* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */ > #define PAGE_SHARED __pgprot(_PAGE_SHARED) > #define PAGE_SHARED_EXEC __pgprot(_PAGE_SHARED_EXEC) > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h > index 7156c940ac4f..c0f4471423db 100644 > --- a/arch/arm64/include/asm/pgtable.h > +++ b/arch/arm64/include/asm/pgtable.h > @@ -105,7 +105,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) > /* > * The following only work if pte_present(). Undefined behaviour otherwise. > */ > -#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))) > +#define pte_present(pte) (pte_valid(pte) || pte_present_invalid(pte)) > #define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) > #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) > #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) > @@ -478,7 +478,16 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) > */ > static inline int pte_protnone(pte_t pte) > { > - return (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) == PTE_PROT_NONE; > + /* > + * pte_present_invalid() tells us that the pte is invalid from HW > + * perspective but present from SW perspective, so the fields are to be > + * interpretted as per the HW layout. The second 2 checks are the unique > + * encoding that we use for PROT_NONE. It is insufficient to only use > + * the first check because we share the same encoding scheme with pmds > + * which support pmd_mkinvalid(), so can be present-invalid without > + * being PROT_NONE. > + */ > + return pte_present_invalid(pte) && !pte_user(pte) && !pte_user_exec(pte); > } > > static inline int pmd_protnone(pmd_t pmd) > @@ -487,12 +496,7 @@ static inline int pmd_protnone(pmd_t pmd) > } > #endif > > -#define pmd_present_invalid(pmd) pte_present_invalid(pmd_pte(pmd)) > - > -static inline int pmd_present(pmd_t pmd) > -{ > - return pte_present(pmd_pte(pmd)) || pmd_present_invalid(pmd); > -} > +#define pmd_present(pmd) pte_present(pmd_pte(pmd)) > > /* > * THP definitions. > @@ -1029,8 +1033,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) > * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK. > */ > const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | > - PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP | > - PTE_ATTRINDX_MASK; > + PTE_PRESENT_INVALID | PTE_VALID | PTE_WRITE | > + PTE_GP | PTE_ATTRINDX_MASK; > /* preserve the hardware dirty information */ > if (pte_hw_dirty(pte)) > pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); > @@ -1078,17 +1082,17 @@ static inline int pgd_devmap(pgd_t pgd) > #ifdef CONFIG_PAGE_TABLE_CHECK > static inline bool pte_user_accessible_page(pte_t pte) > { > - return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte)); > + return pte_valid(pte) && (pte_user(pte) || pte_user_exec(pte)); > } > > static inline bool pmd_user_accessible_page(pmd_t pmd) > { > - return pmd_leaf(pmd) && !pmd_present_invalid(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); > + return pmd_valid(pmd) && !pmd_table(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); > } > > static inline bool pud_user_accessible_page(pud_t pud) > { > - return pud_leaf(pud) && (pud_user(pud) || pud_user_exec(pud)); > + return pud_valid(pud) && !pud_table(pud) && (pud_user(pud) || pud_user_exec(pud)); > } > #endif > > @@ -1252,7 +1256,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, > * bits 2: remember PG_anon_exclusive > * bits 3-7: swap type > * bits 8-57: swap offset > - * bit 58: PTE_PROT_NONE (must be zero) > * bit 59: PTE_PRESENT_INVALID (must be zero) > */ > #define __SWP_TYPE_SHIFT 3
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index cdbf51eef7a6..81f07b44f7b8 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -18,7 +18,6 @@ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) #define PTE_DEVMAP (_AT(pteval_t, 1) << 57) -#define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* * PTE_PRESENT_INVALID=1 & PTE_VALID=0 indicates that the pte's fields should be @@ -103,7 +102,7 @@ static inline bool __pure lpa2_is_enabled(void) __val; \ }) -#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PRESENT_INVALID | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) /* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */ #define PAGE_SHARED __pgprot(_PAGE_SHARED) #define PAGE_SHARED_EXEC __pgprot(_PAGE_SHARED_EXEC) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7156c940ac4f..c0f4471423db 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -105,7 +105,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) /* * The following only work if pte_present(). Undefined behaviour otherwise. */ -#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))) +#define pte_present(pte) (pte_valid(pte) || pte_present_invalid(pte)) #define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) @@ -478,7 +478,16 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) */ static inline int pte_protnone(pte_t pte) { - return (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) == PTE_PROT_NONE; + /* + * pte_present_invalid() tells us that the pte is invalid from HW + * perspective but present from SW perspective, so the fields are to be + * interpretted as per the HW layout. The second 2 checks are the unique + * encoding that we use for PROT_NONE. It is insufficient to only use + * the first check because we share the same encoding scheme with pmds + * which support pmd_mkinvalid(), so can be present-invalid without + * being PROT_NONE. + */ + return pte_present_invalid(pte) && !pte_user(pte) && !pte_user_exec(pte); } static inline int pmd_protnone(pmd_t pmd) @@ -487,12 +496,7 @@ static inline int pmd_protnone(pmd_t pmd) } #endif -#define pmd_present_invalid(pmd) pte_present_invalid(pmd_pte(pmd)) - -static inline int pmd_present(pmd_t pmd) -{ - return pte_present(pmd_pte(pmd)) || pmd_present_invalid(pmd); -} +#define pmd_present(pmd) pte_present(pmd_pte(pmd)) /* * THP definitions. @@ -1029,8 +1033,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK. */ const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP | - PTE_ATTRINDX_MASK; + PTE_PRESENT_INVALID | PTE_VALID | PTE_WRITE | + PTE_GP | PTE_ATTRINDX_MASK; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); @@ -1078,17 +1082,17 @@ static inline int pgd_devmap(pgd_t pgd) #ifdef CONFIG_PAGE_TABLE_CHECK static inline bool pte_user_accessible_page(pte_t pte) { - return pte_present(pte) && (pte_user(pte) || pte_user_exec(pte)); + return pte_valid(pte) && (pte_user(pte) || pte_user_exec(pte)); } static inline bool pmd_user_accessible_page(pmd_t pmd) { - return pmd_leaf(pmd) && !pmd_present_invalid(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); + return pmd_valid(pmd) && !pmd_table(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); } static inline bool pud_user_accessible_page(pud_t pud) { - return pud_leaf(pud) && (pud_user(pud) || pud_user_exec(pud)); + return pud_valid(pud) && !pud_table(pud) && (pud_user(pud) || pud_user_exec(pud)); } #endif @@ -1252,7 +1256,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, * bits 2: remember PG_anon_exclusive * bits 3-7: swap type * bits 8-57: swap offset - * bit 58: PTE_PROT_NONE (must be zero) * bit 59: PTE_PRESENT_INVALID (must be zero) */ #define __SWP_TYPE_SHIFT 3