diff mbox series

[V5,3/5] arch/powerpc/mm: Nest MMU workaround for mprotect RW upgrade.

Message ID 20190116085035.29729-4-aneesh.kumar@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series NestMMU pte upgrade workaround for mprotect | expand

Commit Message

Aneesh Kumar K.V Jan. 16, 2019, 8:50 a.m. UTC
NestMMU requires us to mark the pte invalid and flush the tlb when we do a
RW upgrade of pte. We fixed a variant of this in the fault path in commit
Fixes: bd5050e38aec ("powerpc/mm/radix: Change pte relax sequence to handle nest MMU hang")

Do the same for mprotect upgrades.

Hugetlb is handled in the next patch.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 18 ++++++++++++++
 arch/powerpc/include/asm/book3s/64/radix.h   |  4 ++++
 arch/powerpc/mm/pgtable-book3s64.c           | 25 ++++++++++++++++++++
 arch/powerpc/mm/pgtable-radix.c              | 18 ++++++++++++++
 4 files changed, 65 insertions(+)

Comments

Michael Ellerman Jan. 30, 2019, 10:52 a.m. UTC | #1
"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
> NestMMU requires us to mark the pte invalid and flush the tlb when we do a
> RW upgrade of pte. We fixed a variant of this in the fault path in commit
> Fixes: bd5050e38aec ("powerpc/mm/radix: Change pte relax sequence to handle nest MMU hang")

You don't want the "Fixes:" there.

>
> Do the same for mprotect upgrades.
>
> Hugetlb is handled in the next patch.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/pgtable.h | 18 ++++++++++++++
>  arch/powerpc/include/asm/book3s/64/radix.h   |  4 ++++
>  arch/powerpc/mm/pgtable-book3s64.c           | 25 ++++++++++++++++++++
>  arch/powerpc/mm/pgtable-radix.c              | 18 ++++++++++++++
>  4 files changed, 65 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index 2e6ada28da64..92eaea164700 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -1314,6 +1314,24 @@ static inline int pud_pfn(pud_t pud)
>  	BUILD_BUG();
>  	return 0;
>  }

Can we get a blank line here?

> +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
> +pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
> +void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
> +			     pte_t *, pte_t, pte_t);

So these are not inline ...

> +/*
> + * Returns true for a R -> RW upgrade of pte
> + */
> +static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_val)
> +{
> +	if (!(old_val & _PAGE_READ))
> +		return false;
> +
> +	if ((!(old_val & _PAGE_WRITE)) && (new_val & _PAGE_WRITE))
> +		return true;
> +
> +	return false;
> +}
>  
>  #endif /* __ASSEMBLY__ */
>  #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
> diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
> index f3c31f5e1026..47c742f002ea 100644
> --- a/arch/powerpc/mm/pgtable-book3s64.c
> +++ b/arch/powerpc/mm/pgtable-book3s64.c
> @@ -400,3 +400,28 @@ void arch_report_meminfo(struct seq_file *m)
>  		   atomic_long_read(&direct_pages_count[MMU_PAGE_1G]) << 20);
>  }
>  #endif /* CONFIG_PROC_FS */
> +
> +pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
> +			     pte_t *ptep)
> +{
> +	unsigned long pte_val;
> +
> +	/*
> +	 * Clear the _PAGE_PRESENT so that no hardware parallel update is
> +	 * possible. Also keep the pte_present true so that we don't take
> +	 * wrong fault.
> +	 */
> +	pte_val = pte_update(vma->vm_mm, addr, ptep, _PAGE_PRESENT, _PAGE_INVALID, 0);
> +
> +	return __pte(pte_val);
> +
> +}
> +
> +void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
> +			     pte_t *ptep, pte_t old_pte, pte_t pte)
> +{

Which means we're going to be doing a function call to get to here ...

> +	if (radix_enabled())
> +		return radix__ptep_modify_prot_commit(vma, addr,
> +						      ptep, old_pte, pte);

And then another function call to get to the radix version ...

> +	set_pte_at(vma->vm_mm, addr, ptep, pte);
> +}
> diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
> index 931156069a81..dced3cd241c2 100644
> --- a/arch/powerpc/mm/pgtable-radix.c
> +++ b/arch/powerpc/mm/pgtable-radix.c
> @@ -1063,3 +1063,21 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
>  	}
>  	/* See ptesync comment in radix__set_pte_at */
>  }
> +
> +void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
> +				    unsigned long addr, pte_t *ptep,
> +				    pte_t old_pte, pte_t pte)
> +{
> +	struct mm_struct *mm = vma->vm_mm;
> +
> +	/*
> +	 * To avoid NMMU hang while relaxing access we need to flush the tlb before
> +	 * we set the new value. We need to do this only for radix, because hash
> +	 * translation does flush when updating the linux pte.
> +	 */
> +	if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
> +	    (atomic_read(&mm->context.copros) > 0))
> +		radix__flush_tlb_page(vma, addr);

To finally get here, where we'll realise that 99.99% of processes don't
use copros and so we have nothing to do except set the PTE.

> +
> +	set_pte_at(mm, addr, ptep, pte);
> +}

So can we just make it all inline in the header? Or do we think it's not
a hot enough path to worry about it?

cheers
Aneesh Kumar K.V Jan. 31, 2019, 5:07 a.m. UTC | #2
Michael Ellerman <mpe@ellerman.id.au> writes:

> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
>> NestMMU requires us to mark the pte invalid and flush the tlb when we do a
>> RW upgrade of pte. We fixed a variant of this in the fault path in commit
>> Fixes: bd5050e38aec ("powerpc/mm/radix: Change pte relax sequence to handle nest MMU hang")
>
> You don't want the "Fixes:" there.
>
>>
>> Do the same for mprotect upgrades.
>>
>> Hugetlb is handled in the next patch.
>>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>>  arch/powerpc/include/asm/book3s/64/pgtable.h | 18 ++++++++++++++
>>  arch/powerpc/include/asm/book3s/64/radix.h   |  4 ++++
>>  arch/powerpc/mm/pgtable-book3s64.c           | 25 ++++++++++++++++++++
>>  arch/powerpc/mm/pgtable-radix.c              | 18 ++++++++++++++
>>  4 files changed, 65 insertions(+)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> index 2e6ada28da64..92eaea164700 100644
>> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> @@ -1314,6 +1314,24 @@ static inline int pud_pfn(pud_t pud)
>>  	BUILD_BUG();
>>  	return 0;
>>  }
>
> Can we get a blank line here?
>
>> +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
>> +pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
>> +void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
>> +			     pte_t *, pte_t, pte_t);
>
> So these are not inline ...
>
>> +/*
>> + * Returns true for a R -> RW upgrade of pte
>> + */
>> +static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_val)
>> +{
>> +	if (!(old_val & _PAGE_READ))
>> +		return false;
>> +
>> +	if ((!(old_val & _PAGE_WRITE)) && (new_val & _PAGE_WRITE))
>> +		return true;
>> +
>> +	return false;
>> +}
>>  
>>  #endif /* __ASSEMBLY__ */
>>  #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
>> diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
>> index f3c31f5e1026..47c742f002ea 100644
>> --- a/arch/powerpc/mm/pgtable-book3s64.c
>> +++ b/arch/powerpc/mm/pgtable-book3s64.c
>> @@ -400,3 +400,28 @@ void arch_report_meminfo(struct seq_file *m)
>>  		   atomic_long_read(&direct_pages_count[MMU_PAGE_1G]) << 20);
>>  }
>>  #endif /* CONFIG_PROC_FS */
>> +
>> +pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
>> +			     pte_t *ptep)
>> +{
>> +	unsigned long pte_val;
>> +
>> +	/*
>> +	 * Clear the _PAGE_PRESENT so that no hardware parallel update is
>> +	 * possible. Also keep the pte_present true so that we don't take
>> +	 * wrong fault.
>> +	 */
>> +	pte_val = pte_update(vma->vm_mm, addr, ptep, _PAGE_PRESENT, _PAGE_INVALID, 0);
>> +
>> +	return __pte(pte_val);
>> +
>> +}
>> +
>> +void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
>> +			     pte_t *ptep, pte_t old_pte, pte_t pte)
>> +{
>
> Which means we're going to be doing a function call to get to here ...
>
>> +	if (radix_enabled())
>> +		return radix__ptep_modify_prot_commit(vma, addr,
>> +						      ptep, old_pte, pte);
>
> And then another function call to get to the radix version ...
>
>> +	set_pte_at(vma->vm_mm, addr, ptep, pte);
>> +}
>> diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
>> index 931156069a81..dced3cd241c2 100644
>> --- a/arch/powerpc/mm/pgtable-radix.c
>> +++ b/arch/powerpc/mm/pgtable-radix.c
>> @@ -1063,3 +1063,21 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
>>  	}
>>  	/* See ptesync comment in radix__set_pte_at */
>>  }
>> +
>> +void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
>> +				    unsigned long addr, pte_t *ptep,
>> +				    pte_t old_pte, pte_t pte)
>> +{
>> +	struct mm_struct *mm = vma->vm_mm;
>> +
>> +	/*
>> +	 * To avoid NMMU hang while relaxing access we need to flush the tlb before
>> +	 * we set the new value. We need to do this only for radix, because hash
>> +	 * translation does flush when updating the linux pte.
>> +	 */
>> +	if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
>> +	    (atomic_read(&mm->context.copros) > 0))
>> +		radix__flush_tlb_page(vma, addr);
>
> To finally get here, where we'll realise that 99.99% of processes don't
> use copros and so we have nothing to do except set the PTE.
>
>> +
>> +	set_pte_at(mm, addr, ptep, pte);
>> +}
>
> So can we just make it all inline in the header? Or do we think it's not
> a hot enough path to worry about it?
>

I did try that earlier, But IIRC that didn't work due to header
inclusion issue. I can try that again in an addon patch. That would
require moving things around so that we find different struct
definitions correctly.

-aneesh
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 2e6ada28da64..92eaea164700 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1314,6 +1314,24 @@  static inline int pud_pfn(pud_t pud)
 	BUILD_BUG();
 	return 0;
 }
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
+pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
+void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
+			     pte_t *, pte_t, pte_t);
+
+/*
+ * Returns true for a R -> RW upgrade of pte
+ */
+static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_val)
+{
+	if (!(old_val & _PAGE_READ))
+		return false;
+
+	if ((!(old_val & _PAGE_WRITE)) && (new_val & _PAGE_WRITE))
+		return true;
+
+	return false;
+}
 
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 7d1a3d1543fc..5ab134eeed20 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -127,6 +127,10 @@  extern void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep
 					 pte_t entry, unsigned long address,
 					 int psize);
 
+extern void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
+					   unsigned long addr, pte_t *ptep,
+					   pte_t old_pte, pte_t pte);
+
 static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
 					       unsigned long set)
 {
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index f3c31f5e1026..47c742f002ea 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -400,3 +400,28 @@  void arch_report_meminfo(struct seq_file *m)
 		   atomic_long_read(&direct_pages_count[MMU_PAGE_1G]) << 20);
 }
 #endif /* CONFIG_PROC_FS */
+
+pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
+			     pte_t *ptep)
+{
+	unsigned long pte_val;
+
+	/*
+	 * Clear the _PAGE_PRESENT so that no hardware parallel update is
+	 * possible. Also keep the pte_present true so that we don't take
+	 * wrong fault.
+	 */
+	pte_val = pte_update(vma->vm_mm, addr, ptep, _PAGE_PRESENT, _PAGE_INVALID, 0);
+
+	return __pte(pte_val);
+
+}
+
+void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+			     pte_t *ptep, pte_t old_pte, pte_t pte)
+{
+	if (radix_enabled())
+		return radix__ptep_modify_prot_commit(vma, addr,
+						      ptep, old_pte, pte);
+	set_pte_at(vma->vm_mm, addr, ptep, pte);
+}
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 931156069a81..dced3cd241c2 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -1063,3 +1063,21 @@  void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
 	}
 	/* See ptesync comment in radix__set_pte_at */
 }
+
+void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
+				    unsigned long addr, pte_t *ptep,
+				    pte_t old_pte, pte_t pte)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	/*
+	 * To avoid NMMU hang while relaxing access we need to flush the tlb before
+	 * we set the new value. We need to do this only for radix, because hash
+	 * translation does flush when updating the linux pte.
+	 */
+	if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
+	    (atomic_read(&mm->context.copros) > 0))
+		radix__flush_tlb_page(vma, addr);
+
+	set_pte_at(mm, addr, ptep, pte);
+}