diff mbox series

[rfc,v2,05/10] powerpc: mm: use try_vma_locked_page_fault()

Message ID 20230821123056.2109942-6-wangkefeng.wang@huawei.com (mailing list archive)
State New
Headers show
Series mm: convert to generic VMA lock-based page fault | expand

Commit Message

Kefeng Wang Aug. 21, 2023, 12:30 p.m. UTC
Use new try_vma_locked_page_fault() helper to simplify code.
No functional change intended.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 arch/powerpc/mm/fault.c | 66 ++++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 34 deletions(-)

Comments

Christophe Leroy Aug. 22, 2023, 9:38 a.m. UTC | #1
Le 21/08/2023 à 14:30, Kefeng Wang a écrit :
> Use new try_vma_locked_page_fault() helper to simplify code.
> No functional change intended.

Does it really simplifies code ? It's 32 insertions versus 34 deletions 
so only removing 2 lines.

I don't like the struct vm_fault you are adding because when it was four 
independant variables it was handled through local registers. Now that 
it is a struct it has to go via the stack, leading to unnecessary memory 
read and writes. And going back and forth between architecture code and 
generic code may also be counter-performant.

Did you make any performance analysis ? Page faults are really a hot 
path when dealling with minor faults.

Thanks
Christophe

> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>   arch/powerpc/mm/fault.c | 66 ++++++++++++++++++++---------------------
>   1 file changed, 32 insertions(+), 34 deletions(-)
> 
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index b1723094d464..52f9546e020e 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -391,6 +391,22 @@ static int page_fault_is_bad(unsigned long err)
>   #define page_fault_is_bad(__err)	((__err) & DSISR_BAD_FAULT_32S)
>   #endif
>   
> +#ifdef CONFIG_PER_VMA_LOCK
> +bool arch_vma_access_error(struct vm_area_struct *vma, struct vm_fault *vmf)
> +{
> +	int is_exec = TRAP(vmf->regs) == INTERRUPT_INST_STORAGE;
> +	int is_write = page_fault_is_write(vmf->fault_code);
> +
> +	if (unlikely(access_pkey_error(is_write, is_exec,
> +				(vmf->fault_code & DSISR_KEYFAULT), vma)))
> +		return true;
> +
> +	if (unlikely(access_error(is_write, is_exec, vma)))
> +		return true;
> +	return false;
> +}
> +#endif
> +
>   /*
>    * For 600- and 800-family processors, the error_code parameter is DSISR
>    * for a data fault, SRR1 for an instruction fault.
> @@ -407,12 +423,18 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
>   {
>   	struct vm_area_struct * vma;
>   	struct mm_struct *mm = current->mm;
> -	unsigned int flags = FAULT_FLAG_DEFAULT;
>   	int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
>   	int is_user = user_mode(regs);
>   	int is_write = page_fault_is_write(error_code);
>   	vm_fault_t fault, major = 0;
>   	bool kprobe_fault = kprobe_page_fault(regs, 11);
> +	struct vm_fault vmf = {
> +		.real_address = address,
> +		.fault_code = error_code,
> +		.regs = regs,
> +		.flags = FAULT_FLAG_DEFAULT,
> +	};
> +
>   
>   	if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
>   		return 0;
> @@ -463,45 +485,21 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
>   	 * mmap_lock held
>   	 */
>   	if (is_user)
> -		flags |= FAULT_FLAG_USER;
> +		vmf.flags |= FAULT_FLAG_USER;
>   	if (is_write)
> -		flags |= FAULT_FLAG_WRITE;
> +		vmf.flags |= FAULT_FLAG_WRITE;
>   	if (is_exec)
> -		flags |= FAULT_FLAG_INSTRUCTION;
> +		vmf.flags |= FAULT_FLAG_INSTRUCTION;
>   
> -	if (!(flags & FAULT_FLAG_USER))
> -		goto lock_mmap;
> -
> -	vma = lock_vma_under_rcu(mm, address);
> -	if (!vma)
> -		goto lock_mmap;
> -
> -	if (unlikely(access_pkey_error(is_write, is_exec,
> -				       (error_code & DSISR_KEYFAULT), vma))) {
> -		vma_end_read(vma);
> -		goto lock_mmap;
> -	}
> -
> -	if (unlikely(access_error(is_write, is_exec, vma))) {
> -		vma_end_read(vma);
> -		goto lock_mmap;
> -	}
> -
> -	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
> -	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
> -		vma_end_read(vma);
> -
> -	if (!(fault & VM_FAULT_RETRY)) {
> -		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> +	fault = try_vma_locked_page_fault(&vmf);
> +	if (fault == VM_FAULT_NONE)
> +		goto retry;
> +	if (!(fault & VM_FAULT_RETRY))
>   		goto done;
> -	}
> -	count_vm_vma_lock_event(VMA_LOCK_RETRY);
>   
>   	if (fault_signal_pending(fault, regs))
>   		return user_mode(regs) ? 0 : SIGBUS;
>   
> -lock_mmap:
> -
>   	/* When running in the kernel we expect faults to occur only to
>   	 * addresses in user space.  All other faults represent errors in the
>   	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
> @@ -528,7 +526,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
>   	 * make sure we exit gracefully rather than endlessly redo
>   	 * the fault.
>   	 */
> -	fault = handle_mm_fault(vma, address, flags, regs);
> +	fault = handle_mm_fault(vma, address, vmf.flags, regs);
>   
>   	major |= fault & VM_FAULT_MAJOR;
>   
> @@ -544,7 +542,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
>   	 * case.
>   	 */
>   	if (unlikely(fault & VM_FAULT_RETRY)) {
> -		flags |= FAULT_FLAG_TRIED;
> +		vmf.flags |= FAULT_FLAG_TRIED;
>   		goto retry;
>   	}
>
Kefeng Wang Aug. 22, 2023, 12:12 p.m. UTC | #2
On 2023/8/22 17:38, Christophe Leroy wrote:
> 
> 
> Le 21/08/2023 à 14:30, Kefeng Wang a écrit :
>> Use new try_vma_locked_page_fault() helper to simplify code.
>> No functional change intended.
> 
> Does it really simplifies code ? It's 32 insertions versus 34 deletions
> so only removing 2 lines.

Yes,it is unfriendly for powerpc as the arch's vma access check is much
complex than other arch,
> 
> I don't like the struct vm_fault you are adding because when it was four
> independant variables it was handled through local registers. Now that
> it is a struct it has to go via the stack, leading to unnecessary memory
> read and writes. And going back and forth between architecture code and
> generic code may also be counter-performant.

Because different arch has different var to check vma access, so the
easy way to add them into vmf, I don' find a better way.
> 
> Did you make any performance analysis ? Page faults are really a hot
> path when dealling with minor faults.

no, this is only built and rfc to see the feedback about the conversion.

Thanks.

> 
> Thanks
> Christophe
> 
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>    arch/powerpc/mm/fault.c | 66 ++++++++++++++++++++---------------------
>>    1 file changed, 32 insertions(+), 34 deletions(-)
>>
>> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
>> index b1723094d464..52f9546e020e 100644
>> --- a/arch/powerpc/mm/fault.c
>> +++ b/arch/powerpc/mm/fault.c
>> @@ -391,6 +391,22 @@ static int page_fault_is_bad(unsigned long err)
>>    #define page_fault_is_bad(__err)	((__err) & DSISR_BAD_FAULT_32S)
>>    #endif
>>    
>> +#ifdef CONFIG_PER_VMA_LOCK
>> +bool arch_vma_access_error(struct vm_area_struct *vma, struct vm_fault *vmf)
>> +{
>> +	int is_exec = TRAP(vmf->regs) == INTERRUPT_INST_STORAGE;
>> +	int is_write = page_fault_is_write(vmf->fault_code);
>> +
>> +	if (unlikely(access_pkey_error(is_write, is_exec,
>> +				(vmf->fault_code & DSISR_KEYFAULT), vma)))
>> +		return true;
>> +
>> +	if (unlikely(access_error(is_write, is_exec, vma)))
>> +		return true;
>> +	return false;
>> +}
>> +#endif
>> +
>>    /*
>>     * For 600- and 800-family processors, the error_code parameter is DSISR
>>     * for a data fault, SRR1 for an instruction fault.
>> @@ -407,12 +423,18 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
>>    {
>>    	struct vm_area_struct * vma;
>>    	struct mm_struct *mm = current->mm;
>> -	unsigned int flags = FAULT_FLAG_DEFAULT;
>>    	int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
>>    	int is_user = user_mode(regs);
>>    	int is_write = page_fault_is_write(error_code);
>>    	vm_fault_t fault, major = 0;
>>    	bool kprobe_fault = kprobe_page_fault(regs, 11);
>> +	struct vm_fault vmf = {
>> +		.real_address = address,
>> +		.fault_code = error_code,
>> +		.regs = regs,
>> +		.flags = FAULT_FLAG_DEFAULT,
>> +	};
>> +
>>    
>>    	if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
>>    		return 0;
>> @@ -463,45 +485,21 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
>>    	 * mmap_lock held
>>    	 */
>>    	if (is_user)
>> -		flags |= FAULT_FLAG_USER;
>> +		vmf.flags |= FAULT_FLAG_USER;
>>    	if (is_write)
>> -		flags |= FAULT_FLAG_WRITE;
>> +		vmf.flags |= FAULT_FLAG_WRITE;
>>    	if (is_exec)
>> -		flags |= FAULT_FLAG_INSTRUCTION;
>> +		vmf.flags |= FAULT_FLAG_INSTRUCTION;
>>    
>> -	if (!(flags & FAULT_FLAG_USER))
>> -		goto lock_mmap;
>> -
>> -	vma = lock_vma_under_rcu(mm, address);
>> -	if (!vma)
>> -		goto lock_mmap;
>> -
>> -	if (unlikely(access_pkey_error(is_write, is_exec,
>> -				       (error_code & DSISR_KEYFAULT), vma))) {
>> -		vma_end_read(vma);
>> -		goto lock_mmap;
>> -	}
>> -
>> -	if (unlikely(access_error(is_write, is_exec, vma))) {
>> -		vma_end_read(vma);
>> -		goto lock_mmap;
>> -	}
>> -
>> -	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
>> -	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
>> -		vma_end_read(vma);
>> -
>> -	if (!(fault & VM_FAULT_RETRY)) {
>> -		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
>> +	fault = try_vma_locked_page_fault(&vmf);
>> +	if (fault == VM_FAULT_NONE)
>> +		goto retry;
>> +	if (!(fault & VM_FAULT_RETRY))
>>    		goto done;
>> -	}
>> -	count_vm_vma_lock_event(VMA_LOCK_RETRY);
>>    
>>    	if (fault_signal_pending(fault, regs))
>>    		return user_mode(regs) ? 0 : SIGBUS;
>>    
>> -lock_mmap:
>> -
>>    	/* When running in the kernel we expect faults to occur only to
>>    	 * addresses in user space.  All other faults represent errors in the
>>    	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
>> @@ -528,7 +526,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
>>    	 * make sure we exit gracefully rather than endlessly redo
>>    	 * the fault.
>>    	 */
>> -	fault = handle_mm_fault(vma, address, flags, regs);
>> +	fault = handle_mm_fault(vma, address, vmf.flags, regs);
>>    
>>    	major |= fault & VM_FAULT_MAJOR;
>>    
>> @@ -544,7 +542,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
>>    	 * case.
>>    	 */
>>    	if (unlikely(fault & VM_FAULT_RETRY)) {
>> -		flags |= FAULT_FLAG_TRIED;
>> +		vmf.flags |= FAULT_FLAG_TRIED;
>>    		goto retry;
>>    	}
>>
diff mbox series

Patch

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index b1723094d464..52f9546e020e 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -391,6 +391,22 @@  static int page_fault_is_bad(unsigned long err)
 #define page_fault_is_bad(__err)	((__err) & DSISR_BAD_FAULT_32S)
 #endif
 
+#ifdef CONFIG_PER_VMA_LOCK
+bool arch_vma_access_error(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	int is_exec = TRAP(vmf->regs) == INTERRUPT_INST_STORAGE;
+	int is_write = page_fault_is_write(vmf->fault_code);
+
+	if (unlikely(access_pkey_error(is_write, is_exec,
+				(vmf->fault_code & DSISR_KEYFAULT), vma)))
+		return true;
+
+	if (unlikely(access_error(is_write, is_exec, vma)))
+		return true;
+	return false;
+}
+#endif
+
 /*
  * For 600- and 800-family processors, the error_code parameter is DSISR
  * for a data fault, SRR1 for an instruction fault.
@@ -407,12 +423,18 @@  static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
 {
 	struct vm_area_struct * vma;
 	struct mm_struct *mm = current->mm;
-	unsigned int flags = FAULT_FLAG_DEFAULT;
 	int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
 	int is_user = user_mode(regs);
 	int is_write = page_fault_is_write(error_code);
 	vm_fault_t fault, major = 0;
 	bool kprobe_fault = kprobe_page_fault(regs, 11);
+	struct vm_fault vmf = {
+		.real_address = address,
+		.fault_code = error_code,
+		.regs = regs,
+		.flags = FAULT_FLAG_DEFAULT,
+	};
+
 
 	if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
 		return 0;
@@ -463,45 +485,21 @@  static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
 	 * mmap_lock held
 	 */
 	if (is_user)
-		flags |= FAULT_FLAG_USER;
+		vmf.flags |= FAULT_FLAG_USER;
 	if (is_write)
-		flags |= FAULT_FLAG_WRITE;
+		vmf.flags |= FAULT_FLAG_WRITE;
 	if (is_exec)
-		flags |= FAULT_FLAG_INSTRUCTION;
+		vmf.flags |= FAULT_FLAG_INSTRUCTION;
 
-	if (!(flags & FAULT_FLAG_USER))
-		goto lock_mmap;
-
-	vma = lock_vma_under_rcu(mm, address);
-	if (!vma)
-		goto lock_mmap;
-
-	if (unlikely(access_pkey_error(is_write, is_exec,
-				       (error_code & DSISR_KEYFAULT), vma))) {
-		vma_end_read(vma);
-		goto lock_mmap;
-	}
-
-	if (unlikely(access_error(is_write, is_exec, vma))) {
-		vma_end_read(vma);
-		goto lock_mmap;
-	}
-
-	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
-	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
-		vma_end_read(vma);
-
-	if (!(fault & VM_FAULT_RETRY)) {
-		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+	fault = try_vma_locked_page_fault(&vmf);
+	if (fault == VM_FAULT_NONE)
+		goto retry;
+	if (!(fault & VM_FAULT_RETRY))
 		goto done;
-	}
-	count_vm_vma_lock_event(VMA_LOCK_RETRY);
 
 	if (fault_signal_pending(fault, regs))
 		return user_mode(regs) ? 0 : SIGBUS;
 
-lock_mmap:
-
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
@@ -528,7 +526,7 @@  static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(vma, address, flags, regs);
+	fault = handle_mm_fault(vma, address, vmf.flags, regs);
 
 	major |= fault & VM_FAULT_MAJOR;
 
@@ -544,7 +542,7 @@  static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
 	 * case.
 	 */
 	if (unlikely(fault & VM_FAULT_RETRY)) {
-		flags |= FAULT_FLAG_TRIED;
+		vmf.flags |= FAULT_FLAG_TRIED;
 		goto retry;
 	}