diff mbox series

[v4,15/29] arm64: handle PKEY/POE faults

Message ID 20240503130147.1154804-16-joey.gouly@arm.com (mailing list archive)
State New
Headers show
Series arm64: Permission Overlay Extension | expand

Commit Message

Joey Gouly May 3, 2024, 1:01 p.m. UTC
If a memory fault occurs that is due to an overlay/pkey fault, report that to
userspace with a SEGV_PKUERR.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/traps.h |  1 +
 arch/arm64/kernel/traps.c      | 12 ++++++--
 arch/arm64/mm/fault.c          | 56 ++++++++++++++++++++++++++++++++--
 3 files changed, 64 insertions(+), 5 deletions(-)

Comments

Catalin Marinas June 21, 2024, 4:57 p.m. UTC | #1
On Fri, May 03, 2024 at 02:01:33PM +0100, Joey Gouly wrote:
> @@ -529,6 +547,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  	unsigned int mm_flags = FAULT_FLAG_DEFAULT;
>  	unsigned long addr = untagged_addr(far);
>  	struct vm_area_struct *vma;
> +	bool pkey_fault = false;
> +	int pkey = -1;
>  
>  	if (kprobe_page_fault(regs, esr))
>  		return 0;
> @@ -590,6 +610,12 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  		vma_end_read(vma);
>  		goto lock_mmap;
>  	}
> +
> +	if (fault_from_pkey(esr, vma, mm_flags)) {
> +		vma_end_read(vma);
> +		goto lock_mmap;
> +	}
> +
>  	fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs);
>  	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
>  		vma_end_read(vma);
> @@ -617,6 +643,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  		goto done;
>  	}
>  
> +	if (fault_from_pkey(esr, vma, mm_flags)) {
> +		pkey_fault = true;
> +		pkey = vma_pkey(vma);
> +	}

I was wondering if we actually need to test this again. We know the
fault was from a pkey already above but I guess it matches what we do
with the vma->vm_flags check in case it races with some mprotect() call.

> +
>  	fault = __do_page_fault(mm, vma, addr, mm_flags, vm_flags, regs);

You'll need to rebase this on 6.10-rcX since this function disappeared.

Otherwise the patch looks fine.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Kevin Brodsky July 9, 2024, 1:03 p.m. UTC | #2
On 03/05/2024 15:01, Joey Gouly wrote:
> [...]
>
> +static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma,
> +			unsigned int mm_flags)
> +{
> +	unsigned long iss2 = ESR_ELx_ISS2(esr);
> +
> +	if (!arch_pkeys_enabled())
> +		return false;
> +
> +	if (iss2 & ESR_ELx_Overlay)
> +		return true;
> +
> +	return !arch_vma_access_permitted(vma,
> +			mm_flags & FAULT_FLAG_WRITE,
> +			mm_flags & FAULT_FLAG_INSTRUCTION,
> +			mm_flags & FAULT_FLAG_REMOTE);

This function is only called from do_page_fault(), so the access cannot
be remote. The equivalent x86 function (access_error()) always sets
foreign to false.

> +}
> +
>  static vm_fault_t __do_page_fault(struct mm_struct *mm,
>  				  struct vm_area_struct *vma, unsigned long addr,
>  				  unsigned int mm_flags, unsigned long vm_flags,
> @@ -529,6 +547,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  	unsigned int mm_flags = FAULT_FLAG_DEFAULT;
>  	unsigned long addr = untagged_addr(far);
>  	struct vm_area_struct *vma;
> +	bool pkey_fault = false;
> +	int pkey = -1;
>  
>  	if (kprobe_page_fault(regs, esr))
>  		return 0;
> @@ -590,6 +610,12 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  		vma_end_read(vma);
>  		goto lock_mmap;
>  	}
> +
> +	if (fault_from_pkey(esr, vma, mm_flags)) {
> +		vma_end_read(vma);
> +		goto lock_mmap;
> +	}
> +
>  	fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs);
>  	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
>  		vma_end_read(vma);
> @@ -617,6 +643,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  		goto done;
>  	}
>  
> +	if (fault_from_pkey(esr, vma, mm_flags)) {
> +		pkey_fault = true;
> +		pkey = vma_pkey(vma);
> +	}
> +
>  	fault = __do_page_fault(mm, vma, addr, mm_flags, vm_flags, regs);

We don't actually need to call __do_page_fault()/handle_mm_fault() if
the fault was caused by POE. It still works since it checks
arch_vma_access_permitted() early, but we might as well skip it
altogether (like on x86). On 6.10-rcX, we could handle it like a missing
vm_flags (goto bad_area).

Kevin
Anshuman Khandual July 16, 2024, 10:13 a.m. UTC | #3
A minor nit. The fault is related to POE in terms of the HW rather than PKEY
which it is abstracted out in core MM. Hence it might be better to describe
the fault as POE one rather than PKEY related.

arm64/mm: Handle POE faults

On 5/3/24 18:31, Joey Gouly wrote:
> If a memory fault occurs that is due to an overlay/pkey fault, report that to
> userspace with a SEGV_PKUERR.
> 
> Signed-off-by: Joey Gouly <joey.gouly@arm.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> ---
>  arch/arm64/include/asm/traps.h |  1 +
>  arch/arm64/kernel/traps.c      | 12 ++++++--
>  arch/arm64/mm/fault.c          | 56 ++++++++++++++++++++++++++++++++--
>  3 files changed, 64 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
> index eefe766d6161..f6f6f2cb7f10 100644
> --- a/arch/arm64/include/asm/traps.h
> +++ b/arch/arm64/include/asm/traps.h
> @@ -25,6 +25,7 @@ try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn)
>  void force_signal_inject(int signal, int code, unsigned long address, unsigned long err);
>  void arm64_notify_segfault(unsigned long addr);
>  void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
> +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, const char *str, int pkey);
>  void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
>  void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
>  
> diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
> index 215e6d7f2df8..1bac6c84d3f5 100644
> --- a/arch/arm64/kernel/traps.c
> +++ b/arch/arm64/kernel/traps.c
> @@ -263,16 +263,24 @@ static void arm64_show_signal(int signo, const char *str)
>  	__show_regs(regs);
>  }
>  
> -void arm64_force_sig_fault(int signo, int code, unsigned long far,
> -			   const char *str)
> +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far,
> +			   const char *str, int pkey)
>  {
>  	arm64_show_signal(signo, str);
>  	if (signo == SIGKILL)
>  		force_sig(SIGKILL);
> +	else if (code == SEGV_PKUERR)
> +		force_sig_pkuerr((void __user *)far, pkey);
>  	else
>  		force_sig_fault(signo, code, (void __user *)far);
>  }
>  
> +void arm64_force_sig_fault(int signo, int code, unsigned long far,
> +			   const char *str)
> +{
> +	arm64_force_sig_fault_pkey(signo, code, far, str, 0);
> +}
> +

arm64_force_sig_fault_pkey() could not be added as a new stand alone
helper, without refactoring with arm64_force_sig_fault() ? Is there
any benefit ?

>  void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
>  			    const char *str)
>  {
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index 8251e2fea9c7..585295168918 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -23,6 +23,7 @@
>  #include <linux/sched/debug.h>
>  #include <linux/highmem.h>
>  #include <linux/perf_event.h>
> +#include <linux/pkeys.h>
>  #include <linux/preempt.h>
>  #include <linux/hugetlb.h>
>  
> @@ -489,6 +490,23 @@ static void do_bad_area(unsigned long far, unsigned long esr,
>  #define VM_FAULT_BADMAP		((__force vm_fault_t)0x010000)
>  #define VM_FAULT_BADACCESS	((__force vm_fault_t)0x020000)
>  
> +static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma,
> +			unsigned int mm_flags)
> +{
> +	unsigned long iss2 = ESR_ELx_ISS2(esr);
> +
> +	if (!arch_pkeys_enabled())
> +		return false;
> +
> +	if (iss2 & ESR_ELx_Overlay)
> +		return true;

Should not there be a spurious POE fault check with an WARN_ONCE()
splash, when ESR_ELx_Overlay is set without arch_pkeys_enabled().

> +
> +	return !arch_vma_access_permitted(vma,
> +			mm_flags & FAULT_FLAG_WRITE,
> +			mm_flags & FAULT_FLAG_INSTRUCTION,
> +			mm_flags & FAULT_FLAG_REMOTE);
> +}

FAULT_FLAG_REMOTE is applicable here ?

> +
>  static vm_fault_t __do_page_fault(struct mm_struct *mm,
>  				  struct vm_area_struct *vma, unsigned long addr,
>  				  unsigned int mm_flags, unsigned long vm_flags,
> @@ -529,6 +547,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  	unsigned int mm_flags = FAULT_FLAG_DEFAULT;
>  	unsigned long addr = untagged_addr(far);
>  	struct vm_area_struct *vma;
> +	bool pkey_fault = false;
> +	int pkey = -1;
>  
>  	if (kprobe_page_fault(regs, esr))
>  		return 0;
> @@ -590,6 +610,12 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  		vma_end_read(vma);
>  		goto lock_mmap;
>  	}
> +
> +	if (fault_from_pkey(esr, vma, mm_flags)) {
> +		vma_end_read(vma);
> +		goto lock_mmap;
> +	}
> +
>  	fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs);
>  	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
>  		vma_end_read(vma);
> @@ -617,6 +643,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  		goto done;
>  	}
>  
> +	if (fault_from_pkey(esr, vma, mm_flags)) {
> +		pkey_fault = true;
> +		pkey = vma_pkey(vma);
> +	}
> +
>  	fault = __do_page_fault(mm, vma, addr, mm_flags, vm_flags, regs);
>  
>  	/* Quick path to respond to signals */
> @@ -682,9 +713,28 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
>  		 * Something tried to access memory that isn't in our memory
>  		 * map.
>  		 */
> -		arm64_force_sig_fault(SIGSEGV,
> -				      fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR,
> -				      far, inf->name);
> +		int fault_kind;
> +		/*
> +		 * The pkey value that we return to userspace can be different
> +		 * from the pkey that caused the fault.
> +		 *
> +		 * 1. T1   : mprotect_key(foo, PAGE_SIZE, pkey=4);
> +		 * 2. T1   : set POR_EL0 to deny access to pkey=4, touches, page
> +		 * 3. T1   : faults...
> +		 * 4.    T2: mprotect_key(foo, PAGE_SIZE, pkey=5);
> +		 * 5. T1   : enters fault handler, takes mmap_lock, etc...
> +		 * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
> +		 *	     faulted on a pte with its pkey=4.
> +		 */
> +
> +		if (pkey_fault)
> +			fault_kind = SEGV_PKUERR;
> +		else
> +			fault_kind = fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR;
> +
> +		arm64_force_sig_fault_pkey(SIGSEGV,
> +				      fault_kind,
> +				      far, inf->name, pkey);
>  	}
>  
>  	return 0;
Dave Martin July 25, 2024, 3:57 p.m. UTC | #4
On Fri, May 03, 2024 at 02:01:33PM +0100, Joey Gouly wrote:
> If a memory fault occurs that is due to an overlay/pkey fault, report that to
> userspace with a SEGV_PKUERR.
> 
> Signed-off-by: Joey Gouly <joey.gouly@arm.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> ---
>  arch/arm64/include/asm/traps.h |  1 +
>  arch/arm64/kernel/traps.c      | 12 ++++++--
>  arch/arm64/mm/fault.c          | 56 ++++++++++++++++++++++++++++++++--
>  3 files changed, 64 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
> index eefe766d6161..f6f6f2cb7f10 100644
> --- a/arch/arm64/include/asm/traps.h
> +++ b/arch/arm64/include/asm/traps.h
> @@ -25,6 +25,7 @@ try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn)
>  void force_signal_inject(int signal, int code, unsigned long address, unsigned long err);
>  void arm64_notify_segfault(unsigned long addr);
>  void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
> +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, const char *str, int pkey);
>  void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
>  void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
>  
> diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
> index 215e6d7f2df8..1bac6c84d3f5 100644
> --- a/arch/arm64/kernel/traps.c
> +++ b/arch/arm64/kernel/traps.c
> @@ -263,16 +263,24 @@ static void arm64_show_signal(int signo, const char *str)
>  	__show_regs(regs);
>  }
>  
> -void arm64_force_sig_fault(int signo, int code, unsigned long far,
> -			   const char *str)
> +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far,
> +			   const char *str, int pkey)
>  {
>  	arm64_show_signal(signo, str);
>  	if (signo == SIGKILL)
>  		force_sig(SIGKILL);
> +	else if (code == SEGV_PKUERR)
> +		force_sig_pkuerr((void __user *)far, pkey);

Is signo definitely SIGSEGV here?  It looks to me like we can get in
here for SIGBUS, SIGTRAP etc.

si_codes are not unique between different signo here, so I'm wondering
whether this should this be:

	else if (signo == SIGSEGV && code == SEGV_PKUERR)

...?


>  	else
>  		force_sig_fault(signo, code, (void __user *)far);
>  }
>  
> +void arm64_force_sig_fault(int signo, int code, unsigned long far,
> +			   const char *str)
> +{
> +	arm64_force_sig_fault_pkey(signo, code, far, str, 0);

Is there a reason not to follow the same convention as elsewhere, where
-1 is passed for "no pkey"?

If we think this should never be called with signo == SIGSEGV &&
code == SEGV_PKUERR and no valid pkey but if it's messy to prove, then
maybe a WARN_ON_ONCE() would be worth it here?

[...]

Cheers
---Dave
Joey Gouly Aug. 1, 2024, 4:01 p.m. UTC | #5
On Thu, Jul 25, 2024 at 04:57:09PM +0100, Dave Martin wrote:
> On Fri, May 03, 2024 at 02:01:33PM +0100, Joey Gouly wrote:
> > If a memory fault occurs that is due to an overlay/pkey fault, report that to
> > userspace with a SEGV_PKUERR.
> > 
> > Signed-off-by: Joey Gouly <joey.gouly@arm.com>
> > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > Cc: Will Deacon <will@kernel.org>
> > ---
> >  arch/arm64/include/asm/traps.h |  1 +
> >  arch/arm64/kernel/traps.c      | 12 ++++++--
> >  arch/arm64/mm/fault.c          | 56 ++++++++++++++++++++++++++++++++--
> >  3 files changed, 64 insertions(+), 5 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
> > index eefe766d6161..f6f6f2cb7f10 100644
> > --- a/arch/arm64/include/asm/traps.h
> > +++ b/arch/arm64/include/asm/traps.h
> > @@ -25,6 +25,7 @@ try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn)
> >  void force_signal_inject(int signal, int code, unsigned long address, unsigned long err);
> >  void arm64_notify_segfault(unsigned long addr);
> >  void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
> > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, const char *str, int pkey);
> >  void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
> >  void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
> >  
> > diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
> > index 215e6d7f2df8..1bac6c84d3f5 100644
> > --- a/arch/arm64/kernel/traps.c
> > +++ b/arch/arm64/kernel/traps.c
> > @@ -263,16 +263,24 @@ static void arm64_show_signal(int signo, const char *str)
> >  	__show_regs(regs);
> >  }
> >  
> > -void arm64_force_sig_fault(int signo, int code, unsigned long far,
> > -			   const char *str)
> > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far,
> > +			   const char *str, int pkey)
> >  {
> >  	arm64_show_signal(signo, str);
> >  	if (signo == SIGKILL)
> >  		force_sig(SIGKILL);
> > +	else if (code == SEGV_PKUERR)
> > +		force_sig_pkuerr((void __user *)far, pkey);
> 
> Is signo definitely SIGSEGV here?  It looks to me like we can get in
> here for SIGBUS, SIGTRAP etc.
> 
> si_codes are not unique between different signo here, so I'm wondering
> whether this should this be:
> 
> 	else if (signo == SIGSEGV && code == SEGV_PKUERR)
> 
> ...?
> 
> 
> >  	else
> >  		force_sig_fault(signo, code, (void __user *)far);
> >  }
> >  
> > +void arm64_force_sig_fault(int signo, int code, unsigned long far,
> > +			   const char *str)
> > +{
> > +	arm64_force_sig_fault_pkey(signo, code, far, str, 0);
> 
> Is there a reason not to follow the same convention as elsewhere, where
> -1 is passed for "no pkey"?
> 
> If we think this should never be called with signo == SIGSEGV &&
> code == SEGV_PKUERR and no valid pkey but if it's messy to prove, then
> maybe a WARN_ON_ONCE() would be worth it here?
> 

Anshuman suggested to separate them out, which I did like below, I think that
addresses your comments too?

diff --git arch/arm64/kernel/traps.c arch/arm64/kernel/traps.c
index 215e6d7f2df8..49bac9ae04c0 100644
--- arch/arm64/kernel/traps.c
+++ arch/arm64/kernel/traps.c
@@ -273,6 +273,13 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far,
                force_sig_fault(signo, code, (void __user *)far);
 }
 
+void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far,
+                          const char *str, int pkey)
+{
+       arm64_show_signal(signo, str);
+       force_sig_pkuerr((void __user *)far, pkey);
+}
+
 void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
                            const char *str)
 {

diff --git arch/arm64/mm/fault.c arch/arm64/mm/fault.c
index 451ba7cbd5ad..1ddd46b97f88 100644
--- arch/arm64/mm/fault.c
+++ arch/arm64/mm/fault.c

-               arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);
+               if (si_code == SEGV_PKUERR)
+                       arm64_force_sig_fault_pkey(SIGSEGV, si_code, far, inf->name, pkey);
+               else
+                       arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);


Thanks,
Joey
Dave Martin Aug. 6, 2024, 1:33 p.m. UTC | #6
Hi,

On Thu, Aug 01, 2024 at 05:01:10PM +0100, Joey Gouly wrote:
> On Thu, Jul 25, 2024 at 04:57:09PM +0100, Dave Martin wrote:
> > On Fri, May 03, 2024 at 02:01:33PM +0100, Joey Gouly wrote:
> > > If a memory fault occurs that is due to an overlay/pkey fault, report that to
> > > userspace with a SEGV_PKUERR.
> > > 
> > > Signed-off-by: Joey Gouly <joey.gouly@arm.com>
> > > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > > Cc: Will Deacon <will@kernel.org>
> > > ---
> > >  arch/arm64/include/asm/traps.h |  1 +
> > >  arch/arm64/kernel/traps.c      | 12 ++++++--
> > >  arch/arm64/mm/fault.c          | 56 ++++++++++++++++++++++++++++++++--
> > >  3 files changed, 64 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
> > > index eefe766d6161..f6f6f2cb7f10 100644
> > > --- a/arch/arm64/include/asm/traps.h
> > > +++ b/arch/arm64/include/asm/traps.h
> > > @@ -25,6 +25,7 @@ try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn)
> > >  void force_signal_inject(int signal, int code, unsigned long address, unsigned long err);
> > >  void arm64_notify_segfault(unsigned long addr);
> > >  void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
> > > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, const char *str, int pkey);
> > >  void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
> > >  void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
> > >  
> > > diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
> > > index 215e6d7f2df8..1bac6c84d3f5 100644
> > > --- a/arch/arm64/kernel/traps.c
> > > +++ b/arch/arm64/kernel/traps.c
> > > @@ -263,16 +263,24 @@ static void arm64_show_signal(int signo, const char *str)
> > >  	__show_regs(regs);
> > >  }
> > >  
> > > -void arm64_force_sig_fault(int signo, int code, unsigned long far,
> > > -			   const char *str)
> > > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far,
> > > +			   const char *str, int pkey)
> > >  {
> > >  	arm64_show_signal(signo, str);
> > >  	if (signo == SIGKILL)
> > >  		force_sig(SIGKILL);
> > > +	else if (code == SEGV_PKUERR)
> > > +		force_sig_pkuerr((void __user *)far, pkey);
> > 
> > Is signo definitely SIGSEGV here?  It looks to me like we can get in
> > here for SIGBUS, SIGTRAP etc.
> > 
> > si_codes are not unique between different signo here, so I'm wondering
> > whether this should this be:
> > 
> > 	else if (signo == SIGSEGV && code == SEGV_PKUERR)
> > 
> > ...?
> > 
> > 
> > >  	else
> > >  		force_sig_fault(signo, code, (void __user *)far);
> > >  }
> > >  
> > > +void arm64_force_sig_fault(int signo, int code, unsigned long far,
> > > +			   const char *str)
> > > +{
> > > +	arm64_force_sig_fault_pkey(signo, code, far, str, 0);
> > 
> > Is there a reason not to follow the same convention as elsewhere, where
> > -1 is passed for "no pkey"?
> > 
> > If we think this should never be called with signo == SIGSEGV &&
> > code == SEGV_PKUERR and no valid pkey but if it's messy to prove, then
> > maybe a WARN_ON_ONCE() would be worth it here?
> > 
> 
> Anshuman suggested to separate them out, which I did like below, I think that
> addresses your comments too?
> 
> diff --git arch/arm64/kernel/traps.c arch/arm64/kernel/traps.c
> index 215e6d7f2df8..49bac9ae04c0 100644
> --- arch/arm64/kernel/traps.c
> +++ arch/arm64/kernel/traps.c
> @@ -273,6 +273,13 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far,
>                 force_sig_fault(signo, code, (void __user *)far);
>  }
>  
> +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far,
> +                          const char *str, int pkey)
> +{
> +       arm64_show_signal(signo, str);
> +       force_sig_pkuerr((void __user *)far, pkey);
> +}
> +
>  void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
>                             const char *str)
>  {
> 
> diff --git arch/arm64/mm/fault.c arch/arm64/mm/fault.c
> index 451ba7cbd5ad..1ddd46b97f88 100644
> --- arch/arm64/mm/fault.c
> +++ arch/arm64/mm/fault.c

(Guessing where this is means to apply, since there is no hunk header
or context...)

> 
> -               arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);
> +               if (si_code == SEGV_PKUERR)
> +                       arm64_force_sig_fault_pkey(SIGSEGV, si_code, far, inf->name, pkey);

Maybe drop the the signo and si_code argument?  This would mean that
arm64_force_sig_fault_pkey() can't be called with a signo/si_code
combination that makes no sense.

I think pkey faults are always going to be SIGSEGV/SEGV_PKUERR, right?
Or are there other combinations that can apply for these faults?


> +               else
> +                       arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);

Otherwise yes, I think splitting things this way makes sense.

Cheers
---Dave
Joey Gouly Aug. 6, 2024, 1:43 p.m. UTC | #7
On Tue, Aug 06, 2024 at 02:33:37PM +0100, Dave Martin wrote:
> Hi,
> 
> On Thu, Aug 01, 2024 at 05:01:10PM +0100, Joey Gouly wrote:
> > On Thu, Jul 25, 2024 at 04:57:09PM +0100, Dave Martin wrote:
> > > On Fri, May 03, 2024 at 02:01:33PM +0100, Joey Gouly wrote:
> > > > If a memory fault occurs that is due to an overlay/pkey fault, report that to
> > > > userspace with a SEGV_PKUERR.
> > > > 
> > > > Signed-off-by: Joey Gouly <joey.gouly@arm.com>
> > > > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > > > Cc: Will Deacon <will@kernel.org>
> > > > ---
> > > >  arch/arm64/include/asm/traps.h |  1 +
> > > >  arch/arm64/kernel/traps.c      | 12 ++++++--
> > > >  arch/arm64/mm/fault.c          | 56 ++++++++++++++++++++++++++++++++--
> > > >  3 files changed, 64 insertions(+), 5 deletions(-)
> > > > 
> > > > diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
> > > > index eefe766d6161..f6f6f2cb7f10 100644
> > > > --- a/arch/arm64/include/asm/traps.h
> > > > +++ b/arch/arm64/include/asm/traps.h
> > > > @@ -25,6 +25,7 @@ try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn)
> > > >  void force_signal_inject(int signal, int code, unsigned long address, unsigned long err);
> > > >  void arm64_notify_segfault(unsigned long addr);
> > > >  void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
> > > > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, const char *str, int pkey);
> > > >  void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
> > > >  void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
> > > >  
> > > > diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
> > > > index 215e6d7f2df8..1bac6c84d3f5 100644
> > > > --- a/arch/arm64/kernel/traps.c
> > > > +++ b/arch/arm64/kernel/traps.c
> > > > @@ -263,16 +263,24 @@ static void arm64_show_signal(int signo, const char *str)
> > > >  	__show_regs(regs);
> > > >  }
> > > >  
> > > > -void arm64_force_sig_fault(int signo, int code, unsigned long far,
> > > > -			   const char *str)
> > > > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far,
> > > > +			   const char *str, int pkey)
> > > >  {
> > > >  	arm64_show_signal(signo, str);
> > > >  	if (signo == SIGKILL)
> > > >  		force_sig(SIGKILL);
> > > > +	else if (code == SEGV_PKUERR)
> > > > +		force_sig_pkuerr((void __user *)far, pkey);
> > > 
> > > Is signo definitely SIGSEGV here?  It looks to me like we can get in
> > > here for SIGBUS, SIGTRAP etc.
> > > 
> > > si_codes are not unique between different signo here, so I'm wondering
> > > whether this should this be:
> > > 
> > > 	else if (signo == SIGSEGV && code == SEGV_PKUERR)
> > > 
> > > ...?
> > > 
> > > 
> > > >  	else
> > > >  		force_sig_fault(signo, code, (void __user *)far);
> > > >  }
> > > >  
> > > > +void arm64_force_sig_fault(int signo, int code, unsigned long far,
> > > > +			   const char *str)
> > > > +{
> > > > +	arm64_force_sig_fault_pkey(signo, code, far, str, 0);
> > > 
> > > Is there a reason not to follow the same convention as elsewhere, where
> > > -1 is passed for "no pkey"?
> > > 
> > > If we think this should never be called with signo == SIGSEGV &&
> > > code == SEGV_PKUERR and no valid pkey but if it's messy to prove, then
> > > maybe a WARN_ON_ONCE() would be worth it here?
> > > 
> > 
> > Anshuman suggested to separate them out, which I did like below, I think that
> > addresses your comments too?
> > 
> > diff --git arch/arm64/kernel/traps.c arch/arm64/kernel/traps.c
> > index 215e6d7f2df8..49bac9ae04c0 100644
> > --- arch/arm64/kernel/traps.c
> > +++ arch/arm64/kernel/traps.c
> > @@ -273,6 +273,13 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far,
> >                 force_sig_fault(signo, code, (void __user *)far);
> >  }
> >  
> > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far,
> > +                          const char *str, int pkey)
> > +{
> > +       arm64_show_signal(signo, str);
> > +       force_sig_pkuerr((void __user *)far, pkey);
> > +}
> > +
> >  void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
> >                             const char *str)
> >  {
> > 
> > diff --git arch/arm64/mm/fault.c arch/arm64/mm/fault.c
> > index 451ba7cbd5ad..1ddd46b97f88 100644
> > --- arch/arm64/mm/fault.c
> > +++ arch/arm64/mm/fault.c
> 
> (Guessing where this is means to apply, since there is no hunk header
> or context...)

Sorry I had some other changes and just mashed the bits into a diff-looking-thing.

> 
> > 
> > -               arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);
> > +               if (si_code == SEGV_PKUERR)
> > +                       arm64_force_sig_fault_pkey(SIGSEGV, si_code, far, inf->name, pkey);
> 
> Maybe drop the the signo and si_code argument?  This would mean that
> arm64_force_sig_fault_pkey() can't be called with a signo/si_code
> combination that makes no sense.
> 
> I think pkey faults are always going to be SIGSEGV/SEGV_PKUERR, right?
> Or are there other combinations that can apply for these faults?

Ah yes, I can simplify it even more, thanks.

diff --git arch/arm64/kernel/traps.c arch/arm64/kernel/traps.c
index 49bac9ae04c0..d9abb8b390c0 100644
--- arch/arm64/kernel/traps.c
+++ arch/arm64/kernel/traps.c
@@ -273,10 +273,9 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far,
                force_sig_fault(signo, code, (void __user *)far);
 }
 
-void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far,
-                          const char *str, int pkey)
+void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey)
 {
-       arm64_show_signal(signo, str);
+       arm64_show_signal(SIGSEGV, str);
        force_sig_pkuerr((void __user *)far, pkey);
 }


> 
> 
> > +               else
> > +                       arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);
> 
> Otherwise yes, I think splitting things this way makes sense.
> 
> Cheers
> ---Dave
Dave Martin Aug. 6, 2024, 2:38 p.m. UTC | #8
Hi,

On Tue, Aug 06, 2024 at 02:43:57PM +0100, Joey Gouly wrote:
> On Tue, Aug 06, 2024 at 02:33:37PM +0100, Dave Martin wrote:
> > Hi,
> > 
> > On Thu, Aug 01, 2024 at 05:01:10PM +0100, Joey Gouly wrote:
> > > On Thu, Jul 25, 2024 at 04:57:09PM +0100, Dave Martin wrote:
> > > > On Fri, May 03, 2024 at 02:01:33PM +0100, Joey Gouly wrote:
> > > > > If a memory fault occurs that is due to an overlay/pkey fault, report that to
> > > > > userspace with a SEGV_PKUERR.
> > > > > 
> > > > > Signed-off-by: Joey Gouly <joey.gouly@arm.com>
> > > > > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > > > > Cc: Will Deacon <will@kernel.org>
> > > > > ---
> > > > >  arch/arm64/include/asm/traps.h |  1 +
> > > > >  arch/arm64/kernel/traps.c      | 12 ++++++--
> > > > >  arch/arm64/mm/fault.c          | 56 ++++++++++++++++++++++++++++++++--
> > > > >  3 files changed, 64 insertions(+), 5 deletions(-)

[...]

> > > > > diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
> > > > > index 215e6d7f2df8..1bac6c84d3f5 100644
> > > > > --- a/arch/arm64/kernel/traps.c
> > > > > +++ b/arch/arm64/kernel/traps.c
> > > > > @@ -263,16 +263,24 @@ static void arm64_show_signal(int signo, const char *str)
> > > > >  	__show_regs(regs);
> > > > >  }
> > > > >  
> > > > > -void arm64_force_sig_fault(int signo, int code, unsigned long far,
> > > > > -			   const char *str)
> > > > > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far,
> > > > > +			   const char *str, int pkey)
> > > > >  {
> > > > >  	arm64_show_signal(signo, str);
> > > > >  	if (signo == SIGKILL)
> > > > >  		force_sig(SIGKILL);
> > > > > +	else if (code == SEGV_PKUERR)
> > > > > +		force_sig_pkuerr((void __user *)far, pkey);
> > > > 
> > > > Is signo definitely SIGSEGV here?  It looks to me like we can get in
> > > > here for SIGBUS, SIGTRAP etc.
> > > > 
> > > > si_codes are not unique between different signo here, so I'm wondering
> > > > whether this should this be:
> > > > 
> > > > 	else if (signo == SIGSEGV && code == SEGV_PKUERR)
> > > > 
> > > > ...?
> > > > 
> > > > 
> > > > >  	else
> > > > >  		force_sig_fault(signo, code, (void __user *)far);
> > > > >  }
> > > > >  
> > > > > +void arm64_force_sig_fault(int signo, int code, unsigned long far,
> > > > > +			   const char *str)
> > > > > +{
> > > > > +	arm64_force_sig_fault_pkey(signo, code, far, str, 0);
> > > > 
> > > > Is there a reason not to follow the same convention as elsewhere, where
> > > > -1 is passed for "no pkey"?
> > > > 
> > > > If we think this should never be called with signo == SIGSEGV &&
> > > > code == SEGV_PKUERR and no valid pkey but if it's messy to prove, then
> > > > maybe a WARN_ON_ONCE() would be worth it here?
> > > > 
> > > 
> > > Anshuman suggested to separate them out, which I did like below, I think that
> > > addresses your comments too?
> > > 
> > > diff --git arch/arm64/kernel/traps.c arch/arm64/kernel/traps.c
> > > index 215e6d7f2df8..49bac9ae04c0 100644
> > > --- arch/arm64/kernel/traps.c
> > > +++ arch/arm64/kernel/traps.c
> > > @@ -273,6 +273,13 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far,
> > >                 force_sig_fault(signo, code, (void __user *)far);
> > >  }
> > >  
> > > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far,
> > > +                          const char *str, int pkey)
> > > +{
> > > +       arm64_show_signal(signo, str);
> > > +       force_sig_pkuerr((void __user *)far, pkey);
> > > +}
> > > +
> > >  void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
> > >                             const char *str)
> > >  {
> > > 
> > > diff --git arch/arm64/mm/fault.c arch/arm64/mm/fault.c
> > > index 451ba7cbd5ad..1ddd46b97f88 100644
> > > --- arch/arm64/mm/fault.c
> > > +++ arch/arm64/mm/fault.c
> > 
> > (Guessing where this is means to apply, since there is no hunk header
> > or context...)
> 
> Sorry I had some other changes and just mashed the bits into a diff-looking-thing.

Fair enough.  There are a few similar bits of code, so including more
lines of context would have been helpful.

The change looked reasonable though.

> > > 
> > > -               arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);
> > > +               if (si_code == SEGV_PKUERR)
> > > +                       arm64_force_sig_fault_pkey(SIGSEGV, si_code, far, inf->name, pkey);
> > 
> > Maybe drop the the signo and si_code argument?  This would mean that
> > arm64_force_sig_fault_pkey() can't be called with a signo/si_code
> > combination that makes no sense.
> > 
> > I think pkey faults are always going to be SIGSEGV/SEGV_PKUERR, right?
> > Or are there other combinations that can apply for these faults?
> 
> Ah yes, I can simplify it even more, thanks.
> 
> diff --git arch/arm64/kernel/traps.c arch/arm64/kernel/traps.c
> index 49bac9ae04c0..d9abb8b390c0 100644
> --- arch/arm64/kernel/traps.c
> +++ arch/arm64/kernel/traps.c
> @@ -273,10 +273,9 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far,
>                 force_sig_fault(signo, code, (void __user *)far);
>  }
>  
> -void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far,
> -                          const char *str, int pkey)
> +void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey)
>  {
> -       arm64_show_signal(signo, str);
> +       arm64_show_signal(SIGSEGV, str);
>         force_sig_pkuerr((void __user *)far, pkey);
>  }

Looks sensible.

I see that force_sig_pkuerr() fills in the signo and si_code itself.

Cheers
---Dave
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index eefe766d6161..f6f6f2cb7f10 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -25,6 +25,7 @@  try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn)
 void force_signal_inject(int signal, int code, unsigned long address, unsigned long err);
 void arm64_notify_segfault(unsigned long addr);
 void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
+void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, const char *str, int pkey);
 void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
 void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
 
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 215e6d7f2df8..1bac6c84d3f5 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -263,16 +263,24 @@  static void arm64_show_signal(int signo, const char *str)
 	__show_regs(regs);
 }
 
-void arm64_force_sig_fault(int signo, int code, unsigned long far,
-			   const char *str)
+void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far,
+			   const char *str, int pkey)
 {
 	arm64_show_signal(signo, str);
 	if (signo == SIGKILL)
 		force_sig(SIGKILL);
+	else if (code == SEGV_PKUERR)
+		force_sig_pkuerr((void __user *)far, pkey);
 	else
 		force_sig_fault(signo, code, (void __user *)far);
 }
 
+void arm64_force_sig_fault(int signo, int code, unsigned long far,
+			   const char *str)
+{
+	arm64_force_sig_fault_pkey(signo, code, far, str, 0);
+}
+
 void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
 			    const char *str)
 {
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 8251e2fea9c7..585295168918 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -23,6 +23,7 @@ 
 #include <linux/sched/debug.h>
 #include <linux/highmem.h>
 #include <linux/perf_event.h>
+#include <linux/pkeys.h>
 #include <linux/preempt.h>
 #include <linux/hugetlb.h>
 
@@ -489,6 +490,23 @@  static void do_bad_area(unsigned long far, unsigned long esr,
 #define VM_FAULT_BADMAP		((__force vm_fault_t)0x010000)
 #define VM_FAULT_BADACCESS	((__force vm_fault_t)0x020000)
 
+static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma,
+			unsigned int mm_flags)
+{
+	unsigned long iss2 = ESR_ELx_ISS2(esr);
+
+	if (!arch_pkeys_enabled())
+		return false;
+
+	if (iss2 & ESR_ELx_Overlay)
+		return true;
+
+	return !arch_vma_access_permitted(vma,
+			mm_flags & FAULT_FLAG_WRITE,
+			mm_flags & FAULT_FLAG_INSTRUCTION,
+			mm_flags & FAULT_FLAG_REMOTE);
+}
+
 static vm_fault_t __do_page_fault(struct mm_struct *mm,
 				  struct vm_area_struct *vma, unsigned long addr,
 				  unsigned int mm_flags, unsigned long vm_flags,
@@ -529,6 +547,8 @@  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 	unsigned int mm_flags = FAULT_FLAG_DEFAULT;
 	unsigned long addr = untagged_addr(far);
 	struct vm_area_struct *vma;
+	bool pkey_fault = false;
+	int pkey = -1;
 
 	if (kprobe_page_fault(regs, esr))
 		return 0;
@@ -590,6 +610,12 @@  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 		vma_end_read(vma);
 		goto lock_mmap;
 	}
+
+	if (fault_from_pkey(esr, vma, mm_flags)) {
+		vma_end_read(vma);
+		goto lock_mmap;
+	}
+
 	fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs);
 	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
 		vma_end_read(vma);
@@ -617,6 +643,11 @@  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 		goto done;
 	}
 
+	if (fault_from_pkey(esr, vma, mm_flags)) {
+		pkey_fault = true;
+		pkey = vma_pkey(vma);
+	}
+
 	fault = __do_page_fault(mm, vma, addr, mm_flags, vm_flags, regs);
 
 	/* Quick path to respond to signals */
@@ -682,9 +713,28 @@  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 		 * Something tried to access memory that isn't in our memory
 		 * map.
 		 */
-		arm64_force_sig_fault(SIGSEGV,
-				      fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR,
-				      far, inf->name);
+		int fault_kind;
+		/*
+		 * The pkey value that we return to userspace can be different
+		 * from the pkey that caused the fault.
+		 *
+		 * 1. T1   : mprotect_key(foo, PAGE_SIZE, pkey=4);
+		 * 2. T1   : set POR_EL0 to deny access to pkey=4, touches, page
+		 * 3. T1   : faults...
+		 * 4.    T2: mprotect_key(foo, PAGE_SIZE, pkey=5);
+		 * 5. T1   : enters fault handler, takes mmap_lock, etc...
+		 * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
+		 *	     faulted on a pte with its pkey=4.
+		 */
+
+		if (pkey_fault)
+			fault_kind = SEGV_PKUERR;
+		else
+			fault_kind = fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR;
+
+		arm64_force_sig_fault_pkey(SIGSEGV,
+				      fault_kind,
+				      far, inf->name, pkey);
 	}
 
 	return 0;