diff mbox series

[v4,1/4] arm64/mm: generalize PMD_PRESENT_INVALID for all levels

Message ID 20240503144604.151095-2-ryan.roberts@arm.com (mailing list archive)
State New
Headers show
Series arm64/mm: Enable userfaultfd write-protect | expand

Commit Message

Ryan Roberts May 3, 2024, 2:45 p.m. UTC
As preparation for the next patch, which frees up the PTE_PROT_NONE
present pte and swap pte bit, generalize PMD_PRESENT_INVALID to
PTE_PRESENT_INVALID. This will then be used to mark PROT_NONE ptes (and
entries at any other level) in the next patch.

While we're at it, fix up the swap pte format comment to include
PTE_PRESENT_INVALID. This is not new, it just wasn't previously
documented.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
---
 arch/arm64/include/asm/pgtable-prot.h |  8 ++++----
 arch/arm64/include/asm/pgtable.h      | 21 ++++++++++++---------
 2 files changed, 16 insertions(+), 13 deletions(-)

Comments

David Hildenbrand May 7, 2024, 11:38 a.m. UTC | #1
On 03.05.24 16:45, Ryan Roberts wrote:
> As preparation for the next patch, which frees up the PTE_PROT_NONE
> present pte and swap pte bit, generalize PMD_PRESENT_INVALID to
> PTE_PRESENT_INVALID. This will then be used to mark PROT_NONE ptes (and
> entries at any other level) in the next patch.
> 
> While we're at it, fix up the swap pte format comment to include
> PTE_PRESENT_INVALID. This is not new, it just wasn't previously
> documented.
> 
> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
> ---
>   arch/arm64/include/asm/pgtable-prot.h |  8 ++++----
>   arch/arm64/include/asm/pgtable.h      | 21 ++++++++++++---------
>   2 files changed, 16 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
> index dd9ee67d1d87..cdbf51eef7a6 100644
> --- a/arch/arm64/include/asm/pgtable-prot.h
> +++ b/arch/arm64/include/asm/pgtable-prot.h
> @@ -21,11 +21,11 @@
>   #define PTE_PROT_NONE		(_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
>   
>   /*
> - * This bit indicates that the entry is present i.e. pmd_page()
> - * still points to a valid huge page in memory even if the pmd
> - * has been invalidated.
> + * PTE_PRESENT_INVALID=1 & PTE_VALID=0 indicates that the pte's fields should be
> + * interpreted according to the HW layout by SW but any attempted HW access to
> + * the address will result in a fault. pte_present() returns true.
>    */
> -#define PMD_PRESENT_INVALID	(_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */
> +#define PTE_PRESENT_INVALID	(_AT(pteval_t, 1) << 59) /* only when !PTE_VALID */

Ah, so PTE_VALID == PMD_SECT_VALID. Would that also be a reasonable 
generalization independent of this? (or do we keep it as is because it's 
a HW def?)

Reviewed-by: David Hildenbrand <david@redhat.com>
Ryan Roberts May 7, 2024, 12:34 p.m. UTC | #2
On 07/05/2024 12:38, David Hildenbrand wrote:
> On 03.05.24 16:45, Ryan Roberts wrote:
>> As preparation for the next patch, which frees up the PTE_PROT_NONE
>> present pte and swap pte bit, generalize PMD_PRESENT_INVALID to
>> PTE_PRESENT_INVALID. This will then be used to mark PROT_NONE ptes (and
>> entries at any other level) in the next patch.
>>
>> While we're at it, fix up the swap pte format comment to include
>> PTE_PRESENT_INVALID. This is not new, it just wasn't previously
>> documented.
>>
>> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
>> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
>> ---
>>   arch/arm64/include/asm/pgtable-prot.h |  8 ++++----
>>   arch/arm64/include/asm/pgtable.h      | 21 ++++++++++++---------
>>   2 files changed, 16 insertions(+), 13 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/pgtable-prot.h
>> b/arch/arm64/include/asm/pgtable-prot.h
>> index dd9ee67d1d87..cdbf51eef7a6 100644
>> --- a/arch/arm64/include/asm/pgtable-prot.h
>> +++ b/arch/arm64/include/asm/pgtable-prot.h
>> @@ -21,11 +21,11 @@
>>   #define PTE_PROT_NONE        (_AT(pteval_t, 1) << 58) /* only when
>> !PTE_VALID */
>>     /*
>> - * This bit indicates that the entry is present i.e. pmd_page()
>> - * still points to a valid huge page in memory even if the pmd
>> - * has been invalidated.
>> + * PTE_PRESENT_INVALID=1 & PTE_VALID=0 indicates that the pte's fields should be
>> + * interpreted according to the HW layout by SW but any attempted HW access to
>> + * the address will result in a fault. pte_present() returns true.
>>    */
>> -#define PMD_PRESENT_INVALID    (_AT(pteval_t, 1) << 59) /* only when
>> !PMD_SECT_VALID */
>> +#define PTE_PRESENT_INVALID    (_AT(pteval_t, 1) << 59) /* only when
>> !PTE_VALID */
> 
> Ah, so PTE_VALID == PMD_SECT_VALID. Would that also be a reasonable
> generalization independent of this? (or do we keep it as is because it's a HW def?)

To be honest, I'm not sure of the history, but some things are implemented as
wrappers around pte functions and others are implemented specifically for
pmd/pud/etc.

On arm64, block mappings (all levels except last level) have the same HW format
as page mappings (last level) except that bit 1 must be 0 for block and 1 for
page. And with this series, SW/non-present bits are all matching too. So my vote
would be to harmonise toward a single implementation in future (modulus the bit
1 problem), which would include getting rid of things like PMD_SECT_VALID.

> 
> Reviewed-by: David Hildenbrand <david@redhat.com>

Thanks for all the R-bs!
Catalin Marinas May 7, 2024, 2:08 p.m. UTC | #3
On Tue, May 07, 2024 at 01:34:36PM +0100, Ryan Roberts wrote:
> On 07/05/2024 12:38, David Hildenbrand wrote:
> > On 03.05.24 16:45, Ryan Roberts wrote:
> >> --- a/arch/arm64/include/asm/pgtable-prot.h
> >> +++ b/arch/arm64/include/asm/pgtable-prot.h
> >> @@ -21,11 +21,11 @@
> >>   #define PTE_PROT_NONE        (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
> >>     /*
> >> - * This bit indicates that the entry is present i.e. pmd_page()
> >> - * still points to a valid huge page in memory even if the pmd
> >> - * has been invalidated.
> >> + * PTE_PRESENT_INVALID=1 & PTE_VALID=0 indicates that the pte's fields should be
> >> + * interpreted according to the HW layout by SW but any attempted HW access to
> >> + * the address will result in a fault. pte_present() returns true.
> >>    */
> >> -#define PMD_PRESENT_INVALID    (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */
> >> +#define PTE_PRESENT_INVALID    (_AT(pteval_t, 1) << 59) /* only when !PTE_VALID */
> > 
> > Ah, so PTE_VALID == PMD_SECT_VALID. Would that also be a reasonable
> > generalization independent of this? (or do we keep it as is because it's a HW def?)
> 
> To be honest, I'm not sure of the history, but some things are implemented as
> wrappers around pte functions and others are implemented specifically for
> pmd/pud/etc.

There's also a bit of historical arm32 code moved over to arm64 when we
did the port. On classic arm32 page tables, the ptes and pmds were
pretty different.

> On arm64, block mappings (all levels except last level) have the same HW format
> as page mappings (last level) except that bit 1 must be 0 for block and 1 for
> page. And with this series, SW/non-present bits are all matching too. So my vote
> would be to harmonise toward a single implementation in future (modulus the bit
> 1 problem), which would include getting rid of things like PMD_SECT_VALID.

For PMD_SECT_VALID vs PTE_VALID, it's fine to only use the latter. The
PMD_TABLE_BIT, however, only makes sense for p*d levels. I think we can
get rid of all the PMD_SECT_* macros, just keeping PMD_TYPE_* and the
table bit.

For a bit of architecture history, the reason the pmd block entries have
bit 1 clear while the ptes have it set is to allow recursive mappings
where an entry in the pgd points to the pgd itself. The hardware page
table walk would end on the pmd entry when accessed at the specific VA,
giving quick access to the pte. The downside is wasting a bit of the VA
space.
Ryan Roberts May 7, 2024, 3:06 p.m. UTC | #4
On 07/05/2024 15:08, Catalin Marinas wrote:
> On Tue, May 07, 2024 at 01:34:36PM +0100, Ryan Roberts wrote:
>> On 07/05/2024 12:38, David Hildenbrand wrote:
>>> On 03.05.24 16:45, Ryan Roberts wrote:
>>>> --- a/arch/arm64/include/asm/pgtable-prot.h
>>>> +++ b/arch/arm64/include/asm/pgtable-prot.h
>>>> @@ -21,11 +21,11 @@
>>>> � #define PTE_PROT_NONE������� (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
>>>> � � /*
>>>> - * This bit indicates that the entry is present i.e. pmd_page()
>>>> - * still points to a valid huge page in memory even if the pmd
>>>> - * has been invalidated.
>>>> + * PTE_PRESENT_INVALID=1 & PTE_VALID=0 indicates that the pte's fields should be
>>>> + * interpreted according to the HW layout by SW but any attempted HW access to
>>>> + * the address will result in a fault. pte_present() returns true.
>>>> �� */
>>>> -#define PMD_PRESENT_INVALID��� (_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */
>>>> +#define PTE_PRESENT_INVALID��� (_AT(pteval_t, 1) << 59) /* only when !PTE_VALID */
>>>
>>> Ah, so PTE_VALID == PMD_SECT_VALID. Would that also be a reasonable
>>> generalization independent of this? (or do we keep it as is because it's a HW def?)
>>
>> To be honest, I'm not sure of the history, but some things are implemented as
>> wrappers around pte functions and others are implemented specifically for
>> pmd/pud/etc.
> 
> There's also a bit of historical arm32 code moved over to arm64 when we
> did the port. On classic arm32 page tables, the ptes and pmds were
> pretty different.>
>> On arm64, block mappings (all levels except last level) have the same HW format
>> as page mappings (last level) except that bit 1 must be 0 for block and 1 for
>> page. And with this series, SW/non-present bits are all matching too. So my vote
>> would be to harmonise toward a single implementation in future (modulus the bit
>> 1 problem), which would include getting rid of things like PMD_SECT_VALID.
> 
> For PMD_SECT_VALID vs PTE_VALID, it's fine to only use the latter. The
> PMD_TABLE_BIT, however, only makes sense for p*d levels. I think we can
> get rid of all the PMD_SECT_* macros, just keeping PMD_TYPE_* and the
> table bit.

OK, perhaps I'll get around to sending a patch at some point.

> 
> For a bit of architecture history, the reason the pmd block entries have
> bit 1 clear while the ptes have it set is to allow recursive mappings
> where an entry in the pgd points to the pgd itself. The hardware page
> table walk would end on the pmd entry when accessed at the specific VA,
> giving quick access to the pte. The downside is wasting a bit of the VA
> space.

Ahh ok. I had to think about that for a while, but makes sense. Thanks!
Anshuman Khandual May 8, 2024, 9:43 a.m. UTC | #5
On 5/3/24 20:15, Ryan Roberts wrote:
> As preparation for the next patch, which frees up the PTE_PROT_NONE
> present pte and swap pte bit, generalize PMD_PRESENT_INVALID to
> PTE_PRESENT_INVALID. This will then be used to mark PROT_NONE ptes (and
> entries at any other level) in the next patch.
> 
> While we're at it, fix up the swap pte format comment to include
> PTE_PRESENT_INVALID. This is not new, it just wasn't previously
> documented.
> 
> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>

Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>

> ---
>  arch/arm64/include/asm/pgtable-prot.h |  8 ++++----
>  arch/arm64/include/asm/pgtable.h      | 21 ++++++++++++---------
>  2 files changed, 16 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
> index dd9ee67d1d87..cdbf51eef7a6 100644
> --- a/arch/arm64/include/asm/pgtable-prot.h
> +++ b/arch/arm64/include/asm/pgtable-prot.h
> @@ -21,11 +21,11 @@
>  #define PTE_PROT_NONE		(_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
>  
>  /*
> - * This bit indicates that the entry is present i.e. pmd_page()
> - * still points to a valid huge page in memory even if the pmd
> - * has been invalidated.
> + * PTE_PRESENT_INVALID=1 & PTE_VALID=0 indicates that the pte's fields should be
> + * interpreted according to the HW layout by SW but any attempted HW access to
> + * the address will result in a fault. pte_present() returns true.
>   */
> -#define PMD_PRESENT_INVALID	(_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */
> +#define PTE_PRESENT_INVALID	(_AT(pteval_t, 1) << 59) /* only when !PTE_VALID */
>  
>  #define _PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
>  #define _PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index afdd56d26ad7..7156c940ac4f 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -132,6 +132,8 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
>  #define pte_dirty(pte)		(pte_sw_dirty(pte) || pte_hw_dirty(pte))
>  
>  #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
> +#define pte_present_invalid(pte) \
> +	((pte_val(pte) & (PTE_VALID | PTE_PRESENT_INVALID)) == PTE_PRESENT_INVALID)
>  /*
>   * Execute-only user mappings do not have the PTE_USER bit set. All valid
>   * kernel mappings have the PTE_UXN bit set.
> @@ -261,6 +263,13 @@ static inline pte_t pte_mkpresent(pte_t pte)
>  	return set_pte_bit(pte, __pgprot(PTE_VALID));
>  }
>  
> +static inline pte_t pte_mkinvalid(pte_t pte)
> +{
> +	pte = set_pte_bit(pte, __pgprot(PTE_PRESENT_INVALID));
> +	pte = clear_pte_bit(pte, __pgprot(PTE_VALID));
> +	return pte;
> +}
> +
>  static inline pmd_t pmd_mkcont(pmd_t pmd)
>  {
>  	return __pmd(pmd_val(pmd) | PMD_SECT_CONT);
> @@ -478,7 +487,7 @@ static inline int pmd_protnone(pmd_t pmd)
>  }
>  #endif
>  
> -#define pmd_present_invalid(pmd)     (!!(pmd_val(pmd) & PMD_PRESENT_INVALID))
> +#define pmd_present_invalid(pmd)	pte_present_invalid(pmd_pte(pmd))
>  
>  static inline int pmd_present(pmd_t pmd)
>  {
> @@ -508,14 +517,7 @@ static inline int pmd_trans_huge(pmd_t pmd)
>  #define pmd_mkclean(pmd)	pte_pmd(pte_mkclean(pmd_pte(pmd)))
>  #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
>  #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
> -
> -static inline pmd_t pmd_mkinvalid(pmd_t pmd)
> -{
> -	pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID));
> -	pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID));
> -
> -	return pmd;
> -}
> +#define pmd_mkinvalid(pmd)	pte_pmd(pte_mkinvalid(pmd_pte(pmd)))
>  
>  #define pmd_thp_or_huge(pmd)	(pmd_huge(pmd) || pmd_trans_huge(pmd))
>  
> @@ -1251,6 +1253,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
>   *	bits 3-7:	swap type
>   *	bits 8-57:	swap offset
>   *	bit  58:	PTE_PROT_NONE (must be zero)
> + *	bit  59:	PTE_PRESENT_INVALID (must be zero)
>   */
>  #define __SWP_TYPE_SHIFT	3
>  #define __SWP_TYPE_BITS		5
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index dd9ee67d1d87..cdbf51eef7a6 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -21,11 +21,11 @@ 
 #define PTE_PROT_NONE		(_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
 
 /*
- * This bit indicates that the entry is present i.e. pmd_page()
- * still points to a valid huge page in memory even if the pmd
- * has been invalidated.
+ * PTE_PRESENT_INVALID=1 & PTE_VALID=0 indicates that the pte's fields should be
+ * interpreted according to the HW layout by SW but any attempted HW access to
+ * the address will result in a fault. pte_present() returns true.
  */
-#define PMD_PRESENT_INVALID	(_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */
+#define PTE_PRESENT_INVALID	(_AT(pteval_t, 1) << 59) /* only when !PTE_VALID */
 
 #define _PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
 #define _PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index afdd56d26ad7..7156c940ac4f 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -132,6 +132,8 @@  static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
 #define pte_dirty(pte)		(pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
 #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
+#define pte_present_invalid(pte) \
+	((pte_val(pte) & (PTE_VALID | PTE_PRESENT_INVALID)) == PTE_PRESENT_INVALID)
 /*
  * Execute-only user mappings do not have the PTE_USER bit set. All valid
  * kernel mappings have the PTE_UXN bit set.
@@ -261,6 +263,13 @@  static inline pte_t pte_mkpresent(pte_t pte)
 	return set_pte_bit(pte, __pgprot(PTE_VALID));
 }
 
+static inline pte_t pte_mkinvalid(pte_t pte)
+{
+	pte = set_pte_bit(pte, __pgprot(PTE_PRESENT_INVALID));
+	pte = clear_pte_bit(pte, __pgprot(PTE_VALID));
+	return pte;
+}
+
 static inline pmd_t pmd_mkcont(pmd_t pmd)
 {
 	return __pmd(pmd_val(pmd) | PMD_SECT_CONT);
@@ -478,7 +487,7 @@  static inline int pmd_protnone(pmd_t pmd)
 }
 #endif
 
-#define pmd_present_invalid(pmd)     (!!(pmd_val(pmd) & PMD_PRESENT_INVALID))
+#define pmd_present_invalid(pmd)	pte_present_invalid(pmd_pte(pmd))
 
 static inline int pmd_present(pmd_t pmd)
 {
@@ -508,14 +517,7 @@  static inline int pmd_trans_huge(pmd_t pmd)
 #define pmd_mkclean(pmd)	pte_pmd(pte_mkclean(pmd_pte(pmd)))
 #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
 #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
-
-static inline pmd_t pmd_mkinvalid(pmd_t pmd)
-{
-	pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID));
-	pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID));
-
-	return pmd;
-}
+#define pmd_mkinvalid(pmd)	pte_pmd(pte_mkinvalid(pmd_pte(pmd)))
 
 #define pmd_thp_or_huge(pmd)	(pmd_huge(pmd) || pmd_trans_huge(pmd))
 
@@ -1251,6 +1253,7 @@  static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
  *	bits 3-7:	swap type
  *	bits 8-57:	swap offset
  *	bit  58:	PTE_PROT_NONE (must be zero)
+ *	bit  59:	PTE_PRESENT_INVALID (must be zero)
  */
 #define __SWP_TYPE_SHIFT	3
 #define __SWP_TYPE_BITS		5