Message ID | 20240503130147.1154804-16-joey.gouly@arm.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | arm64: Permission Overlay Extension | expand |
On Fri, May 03, 2024 at 02:01:33PM +0100, Joey Gouly wrote: > @@ -529,6 +547,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, > unsigned int mm_flags = FAULT_FLAG_DEFAULT; > unsigned long addr = untagged_addr(far); > struct vm_area_struct *vma; > + bool pkey_fault = false; > + int pkey = -1; > > if (kprobe_page_fault(regs, esr)) > return 0; > @@ -590,6 +610,12 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, > vma_end_read(vma); > goto lock_mmap; > } > + > + if (fault_from_pkey(esr, vma, mm_flags)) { > + vma_end_read(vma); > + goto lock_mmap; > + } > + > fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs); > if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) > vma_end_read(vma); > @@ -617,6 +643,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, > goto done; > } > > + if (fault_from_pkey(esr, vma, mm_flags)) { > + pkey_fault = true; > + pkey = vma_pkey(vma); > + } I was wondering if we actually need to test this again. We know the fault was from a pkey already above but I guess it matches what we do with the vma->vm_flags check in case it races with some mprotect() call. > + > fault = __do_page_fault(mm, vma, addr, mm_flags, vm_flags, regs); You'll need to rebase this on 6.10-rcX since this function disappeared. Otherwise the patch looks fine. Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
On 03/05/2024 15:01, Joey Gouly wrote: > [...] > > +static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma, > + unsigned int mm_flags) > +{ > + unsigned long iss2 = ESR_ELx_ISS2(esr); > + > + if (!arch_pkeys_enabled()) > + return false; > + > + if (iss2 & ESR_ELx_Overlay) > + return true; > + > + return !arch_vma_access_permitted(vma, > + mm_flags & FAULT_FLAG_WRITE, > + mm_flags & FAULT_FLAG_INSTRUCTION, > + mm_flags & FAULT_FLAG_REMOTE); This function is only called from do_page_fault(), so the access cannot be remote. The equivalent x86 function (access_error()) always sets foreign to false. > +} > + > static vm_fault_t __do_page_fault(struct mm_struct *mm, > struct vm_area_struct *vma, unsigned long addr, > unsigned int mm_flags, unsigned long vm_flags, > @@ -529,6 +547,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, > unsigned int mm_flags = FAULT_FLAG_DEFAULT; > unsigned long addr = untagged_addr(far); > struct vm_area_struct *vma; > + bool pkey_fault = false; > + int pkey = -1; > > if (kprobe_page_fault(regs, esr)) > return 0; > @@ -590,6 +610,12 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, > vma_end_read(vma); > goto lock_mmap; > } > + > + if (fault_from_pkey(esr, vma, mm_flags)) { > + vma_end_read(vma); > + goto lock_mmap; > + } > + > fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs); > if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) > vma_end_read(vma); > @@ -617,6 +643,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, > goto done; > } > > + if (fault_from_pkey(esr, vma, mm_flags)) { > + pkey_fault = true; > + pkey = vma_pkey(vma); > + } > + > fault = __do_page_fault(mm, vma, addr, mm_flags, vm_flags, regs); We don't actually need to call __do_page_fault()/handle_mm_fault() if the fault was caused by POE. It still works since it checks arch_vma_access_permitted() early, but we might as well skip it altogether (like on x86). On 6.10-rcX, we could handle it like a missing vm_flags (goto bad_area). Kevin
A minor nit. The fault is related to POE in terms of the HW rather than PKEY which it is abstracted out in core MM. Hence it might be better to describe the fault as POE one rather than PKEY related. arm64/mm: Handle POE faults On 5/3/24 18:31, Joey Gouly wrote: > If a memory fault occurs that is due to an overlay/pkey fault, report that to > userspace with a SEGV_PKUERR. > > Signed-off-by: Joey Gouly <joey.gouly@arm.com> > Cc: Catalin Marinas <catalin.marinas@arm.com> > Cc: Will Deacon <will@kernel.org> > --- > arch/arm64/include/asm/traps.h | 1 + > arch/arm64/kernel/traps.c | 12 ++++++-- > arch/arm64/mm/fault.c | 56 ++++++++++++++++++++++++++++++++-- > 3 files changed, 64 insertions(+), 5 deletions(-) > > diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h > index eefe766d6161..f6f6f2cb7f10 100644 > --- a/arch/arm64/include/asm/traps.h > +++ b/arch/arm64/include/asm/traps.h > @@ -25,6 +25,7 @@ try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn) > void force_signal_inject(int signal, int code, unsigned long address, unsigned long err); > void arm64_notify_segfault(unsigned long addr); > void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str); > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, const char *str, int pkey); > void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str); > void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str); > > diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c > index 215e6d7f2df8..1bac6c84d3f5 100644 > --- a/arch/arm64/kernel/traps.c > +++ b/arch/arm64/kernel/traps.c > @@ -263,16 +263,24 @@ static void arm64_show_signal(int signo, const char *str) > __show_regs(regs); > } > > -void arm64_force_sig_fault(int signo, int code, unsigned long far, > - const char *str) > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, > + const char *str, int pkey) > { > arm64_show_signal(signo, str); > if (signo == SIGKILL) > force_sig(SIGKILL); > + else if (code == SEGV_PKUERR) > + force_sig_pkuerr((void __user *)far, pkey); > else > force_sig_fault(signo, code, (void __user *)far); > } > > +void arm64_force_sig_fault(int signo, int code, unsigned long far, > + const char *str) > +{ > + arm64_force_sig_fault_pkey(signo, code, far, str, 0); > +} > + arm64_force_sig_fault_pkey() could not be added as a new stand alone helper, without refactoring with arm64_force_sig_fault() ? Is there any benefit ? > void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, > const char *str) > { > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c > index 8251e2fea9c7..585295168918 100644 > --- a/arch/arm64/mm/fault.c > +++ b/arch/arm64/mm/fault.c > @@ -23,6 +23,7 @@ > #include <linux/sched/debug.h> > #include <linux/highmem.h> > #include <linux/perf_event.h> > +#include <linux/pkeys.h> > #include <linux/preempt.h> > #include <linux/hugetlb.h> > > @@ -489,6 +490,23 @@ static void do_bad_area(unsigned long far, unsigned long esr, > #define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000) > #define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000) > > +static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma, > + unsigned int mm_flags) > +{ > + unsigned long iss2 = ESR_ELx_ISS2(esr); > + > + if (!arch_pkeys_enabled()) > + return false; > + > + if (iss2 & ESR_ELx_Overlay) > + return true; Should not there be a spurious POE fault check with an WARN_ONCE() splash, when ESR_ELx_Overlay is set without arch_pkeys_enabled(). > + > + return !arch_vma_access_permitted(vma, > + mm_flags & FAULT_FLAG_WRITE, > + mm_flags & FAULT_FLAG_INSTRUCTION, > + mm_flags & FAULT_FLAG_REMOTE); > +} FAULT_FLAG_REMOTE is applicable here ? > + > static vm_fault_t __do_page_fault(struct mm_struct *mm, > struct vm_area_struct *vma, unsigned long addr, > unsigned int mm_flags, unsigned long vm_flags, > @@ -529,6 +547,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, > unsigned int mm_flags = FAULT_FLAG_DEFAULT; > unsigned long addr = untagged_addr(far); > struct vm_area_struct *vma; > + bool pkey_fault = false; > + int pkey = -1; > > if (kprobe_page_fault(regs, esr)) > return 0; > @@ -590,6 +610,12 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, > vma_end_read(vma); > goto lock_mmap; > } > + > + if (fault_from_pkey(esr, vma, mm_flags)) { > + vma_end_read(vma); > + goto lock_mmap; > + } > + > fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs); > if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) > vma_end_read(vma); > @@ -617,6 +643,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, > goto done; > } > > + if (fault_from_pkey(esr, vma, mm_flags)) { > + pkey_fault = true; > + pkey = vma_pkey(vma); > + } > + > fault = __do_page_fault(mm, vma, addr, mm_flags, vm_flags, regs); > > /* Quick path to respond to signals */ > @@ -682,9 +713,28 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, > * Something tried to access memory that isn't in our memory > * map. > */ > - arm64_force_sig_fault(SIGSEGV, > - fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR, > - far, inf->name); > + int fault_kind; > + /* > + * The pkey value that we return to userspace can be different > + * from the pkey that caused the fault. > + * > + * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4); > + * 2. T1 : set POR_EL0 to deny access to pkey=4, touches, page > + * 3. T1 : faults... > + * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); > + * 5. T1 : enters fault handler, takes mmap_lock, etc... > + * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really > + * faulted on a pte with its pkey=4. > + */ > + > + if (pkey_fault) > + fault_kind = SEGV_PKUERR; > + else > + fault_kind = fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR; > + > + arm64_force_sig_fault_pkey(SIGSEGV, > + fault_kind, > + far, inf->name, pkey); > } > > return 0;
On Fri, May 03, 2024 at 02:01:33PM +0100, Joey Gouly wrote: > If a memory fault occurs that is due to an overlay/pkey fault, report that to > userspace with a SEGV_PKUERR. > > Signed-off-by: Joey Gouly <joey.gouly@arm.com> > Cc: Catalin Marinas <catalin.marinas@arm.com> > Cc: Will Deacon <will@kernel.org> > --- > arch/arm64/include/asm/traps.h | 1 + > arch/arm64/kernel/traps.c | 12 ++++++-- > arch/arm64/mm/fault.c | 56 ++++++++++++++++++++++++++++++++-- > 3 files changed, 64 insertions(+), 5 deletions(-) > > diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h > index eefe766d6161..f6f6f2cb7f10 100644 > --- a/arch/arm64/include/asm/traps.h > +++ b/arch/arm64/include/asm/traps.h > @@ -25,6 +25,7 @@ try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn) > void force_signal_inject(int signal, int code, unsigned long address, unsigned long err); > void arm64_notify_segfault(unsigned long addr); > void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str); > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, const char *str, int pkey); > void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str); > void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str); > > diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c > index 215e6d7f2df8..1bac6c84d3f5 100644 > --- a/arch/arm64/kernel/traps.c > +++ b/arch/arm64/kernel/traps.c > @@ -263,16 +263,24 @@ static void arm64_show_signal(int signo, const char *str) > __show_regs(regs); > } > > -void arm64_force_sig_fault(int signo, int code, unsigned long far, > - const char *str) > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, > + const char *str, int pkey) > { > arm64_show_signal(signo, str); > if (signo == SIGKILL) > force_sig(SIGKILL); > + else if (code == SEGV_PKUERR) > + force_sig_pkuerr((void __user *)far, pkey); Is signo definitely SIGSEGV here? It looks to me like we can get in here for SIGBUS, SIGTRAP etc. si_codes are not unique between different signo here, so I'm wondering whether this should this be: else if (signo == SIGSEGV && code == SEGV_PKUERR) ...? > else > force_sig_fault(signo, code, (void __user *)far); > } > > +void arm64_force_sig_fault(int signo, int code, unsigned long far, > + const char *str) > +{ > + arm64_force_sig_fault_pkey(signo, code, far, str, 0); Is there a reason not to follow the same convention as elsewhere, where -1 is passed for "no pkey"? If we think this should never be called with signo == SIGSEGV && code == SEGV_PKUERR and no valid pkey but if it's messy to prove, then maybe a WARN_ON_ONCE() would be worth it here? [...] Cheers ---Dave
On Thu, Jul 25, 2024 at 04:57:09PM +0100, Dave Martin wrote: > On Fri, May 03, 2024 at 02:01:33PM +0100, Joey Gouly wrote: > > If a memory fault occurs that is due to an overlay/pkey fault, report that to > > userspace with a SEGV_PKUERR. > > > > Signed-off-by: Joey Gouly <joey.gouly@arm.com> > > Cc: Catalin Marinas <catalin.marinas@arm.com> > > Cc: Will Deacon <will@kernel.org> > > --- > > arch/arm64/include/asm/traps.h | 1 + > > arch/arm64/kernel/traps.c | 12 ++++++-- > > arch/arm64/mm/fault.c | 56 ++++++++++++++++++++++++++++++++-- > > 3 files changed, 64 insertions(+), 5 deletions(-) > > > > diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h > > index eefe766d6161..f6f6f2cb7f10 100644 > > --- a/arch/arm64/include/asm/traps.h > > +++ b/arch/arm64/include/asm/traps.h > > @@ -25,6 +25,7 @@ try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn) > > void force_signal_inject(int signal, int code, unsigned long address, unsigned long err); > > void arm64_notify_segfault(unsigned long addr); > > void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str); > > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, const char *str, int pkey); > > void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str); > > void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str); > > > > diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c > > index 215e6d7f2df8..1bac6c84d3f5 100644 > > --- a/arch/arm64/kernel/traps.c > > +++ b/arch/arm64/kernel/traps.c > > @@ -263,16 +263,24 @@ static void arm64_show_signal(int signo, const char *str) > > __show_regs(regs); > > } > > > > -void arm64_force_sig_fault(int signo, int code, unsigned long far, > > - const char *str) > > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, > > + const char *str, int pkey) > > { > > arm64_show_signal(signo, str); > > if (signo == SIGKILL) > > force_sig(SIGKILL); > > + else if (code == SEGV_PKUERR) > > + force_sig_pkuerr((void __user *)far, pkey); > > Is signo definitely SIGSEGV here? It looks to me like we can get in > here for SIGBUS, SIGTRAP etc. > > si_codes are not unique between different signo here, so I'm wondering > whether this should this be: > > else if (signo == SIGSEGV && code == SEGV_PKUERR) > > ...? > > > > else > > force_sig_fault(signo, code, (void __user *)far); > > } > > > > +void arm64_force_sig_fault(int signo, int code, unsigned long far, > > + const char *str) > > +{ > > + arm64_force_sig_fault_pkey(signo, code, far, str, 0); > > Is there a reason not to follow the same convention as elsewhere, where > -1 is passed for "no pkey"? > > If we think this should never be called with signo == SIGSEGV && > code == SEGV_PKUERR and no valid pkey but if it's messy to prove, then > maybe a WARN_ON_ONCE() would be worth it here? > Anshuman suggested to separate them out, which I did like below, I think that addresses your comments too? diff --git arch/arm64/kernel/traps.c arch/arm64/kernel/traps.c index 215e6d7f2df8..49bac9ae04c0 100644 --- arch/arm64/kernel/traps.c +++ arch/arm64/kernel/traps.c @@ -273,6 +273,13 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far, force_sig_fault(signo, code, (void __user *)far); } +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, + const char *str, int pkey) +{ + arm64_show_signal(signo, str); + force_sig_pkuerr((void __user *)far, pkey); +} + void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str) { diff --git arch/arm64/mm/fault.c arch/arm64/mm/fault.c index 451ba7cbd5ad..1ddd46b97f88 100644 --- arch/arm64/mm/fault.c +++ arch/arm64/mm/fault.c - arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name); + if (si_code == SEGV_PKUERR) + arm64_force_sig_fault_pkey(SIGSEGV, si_code, far, inf->name, pkey); + else + arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name); Thanks, Joey
Hi, On Thu, Aug 01, 2024 at 05:01:10PM +0100, Joey Gouly wrote: > On Thu, Jul 25, 2024 at 04:57:09PM +0100, Dave Martin wrote: > > On Fri, May 03, 2024 at 02:01:33PM +0100, Joey Gouly wrote: > > > If a memory fault occurs that is due to an overlay/pkey fault, report that to > > > userspace with a SEGV_PKUERR. > > > > > > Signed-off-by: Joey Gouly <joey.gouly@arm.com> > > > Cc: Catalin Marinas <catalin.marinas@arm.com> > > > Cc: Will Deacon <will@kernel.org> > > > --- > > > arch/arm64/include/asm/traps.h | 1 + > > > arch/arm64/kernel/traps.c | 12 ++++++-- > > > arch/arm64/mm/fault.c | 56 ++++++++++++++++++++++++++++++++-- > > > 3 files changed, 64 insertions(+), 5 deletions(-) > > > > > > diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h > > > index eefe766d6161..f6f6f2cb7f10 100644 > > > --- a/arch/arm64/include/asm/traps.h > > > +++ b/arch/arm64/include/asm/traps.h > > > @@ -25,6 +25,7 @@ try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn) > > > void force_signal_inject(int signal, int code, unsigned long address, unsigned long err); > > > void arm64_notify_segfault(unsigned long addr); > > > void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str); > > > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, const char *str, int pkey); > > > void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str); > > > void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str); > > > > > > diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c > > > index 215e6d7f2df8..1bac6c84d3f5 100644 > > > --- a/arch/arm64/kernel/traps.c > > > +++ b/arch/arm64/kernel/traps.c > > > @@ -263,16 +263,24 @@ static void arm64_show_signal(int signo, const char *str) > > > __show_regs(regs); > > > } > > > > > > -void arm64_force_sig_fault(int signo, int code, unsigned long far, > > > - const char *str) > > > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, > > > + const char *str, int pkey) > > > { > > > arm64_show_signal(signo, str); > > > if (signo == SIGKILL) > > > force_sig(SIGKILL); > > > + else if (code == SEGV_PKUERR) > > > + force_sig_pkuerr((void __user *)far, pkey); > > > > Is signo definitely SIGSEGV here? It looks to me like we can get in > > here for SIGBUS, SIGTRAP etc. > > > > si_codes are not unique between different signo here, so I'm wondering > > whether this should this be: > > > > else if (signo == SIGSEGV && code == SEGV_PKUERR) > > > > ...? > > > > > > > else > > > force_sig_fault(signo, code, (void __user *)far); > > > } > > > > > > +void arm64_force_sig_fault(int signo, int code, unsigned long far, > > > + const char *str) > > > +{ > > > + arm64_force_sig_fault_pkey(signo, code, far, str, 0); > > > > Is there a reason not to follow the same convention as elsewhere, where > > -1 is passed for "no pkey"? > > > > If we think this should never be called with signo == SIGSEGV && > > code == SEGV_PKUERR and no valid pkey but if it's messy to prove, then > > maybe a WARN_ON_ONCE() would be worth it here? > > > > Anshuman suggested to separate them out, which I did like below, I think that > addresses your comments too? > > diff --git arch/arm64/kernel/traps.c arch/arm64/kernel/traps.c > index 215e6d7f2df8..49bac9ae04c0 100644 > --- arch/arm64/kernel/traps.c > +++ arch/arm64/kernel/traps.c > @@ -273,6 +273,13 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far, > force_sig_fault(signo, code, (void __user *)far); > } > > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, > + const char *str, int pkey) > +{ > + arm64_show_signal(signo, str); > + force_sig_pkuerr((void __user *)far, pkey); > +} > + > void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, > const char *str) > { > > diff --git arch/arm64/mm/fault.c arch/arm64/mm/fault.c > index 451ba7cbd5ad..1ddd46b97f88 100644 > --- arch/arm64/mm/fault.c > +++ arch/arm64/mm/fault.c (Guessing where this is means to apply, since there is no hunk header or context...) > > - arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name); > + if (si_code == SEGV_PKUERR) > + arm64_force_sig_fault_pkey(SIGSEGV, si_code, far, inf->name, pkey); Maybe drop the the signo and si_code argument? This would mean that arm64_force_sig_fault_pkey() can't be called with a signo/si_code combination that makes no sense. I think pkey faults are always going to be SIGSEGV/SEGV_PKUERR, right? Or are there other combinations that can apply for these faults? > + else > + arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name); Otherwise yes, I think splitting things this way makes sense. Cheers ---Dave
On Tue, Aug 06, 2024 at 02:33:37PM +0100, Dave Martin wrote: > Hi, > > On Thu, Aug 01, 2024 at 05:01:10PM +0100, Joey Gouly wrote: > > On Thu, Jul 25, 2024 at 04:57:09PM +0100, Dave Martin wrote: > > > On Fri, May 03, 2024 at 02:01:33PM +0100, Joey Gouly wrote: > > > > If a memory fault occurs that is due to an overlay/pkey fault, report that to > > > > userspace with a SEGV_PKUERR. > > > > > > > > Signed-off-by: Joey Gouly <joey.gouly@arm.com> > > > > Cc: Catalin Marinas <catalin.marinas@arm.com> > > > > Cc: Will Deacon <will@kernel.org> > > > > --- > > > > arch/arm64/include/asm/traps.h | 1 + > > > > arch/arm64/kernel/traps.c | 12 ++++++-- > > > > arch/arm64/mm/fault.c | 56 ++++++++++++++++++++++++++++++++-- > > > > 3 files changed, 64 insertions(+), 5 deletions(-) > > > > > > > > diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h > > > > index eefe766d6161..f6f6f2cb7f10 100644 > > > > --- a/arch/arm64/include/asm/traps.h > > > > +++ b/arch/arm64/include/asm/traps.h > > > > @@ -25,6 +25,7 @@ try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn) > > > > void force_signal_inject(int signal, int code, unsigned long address, unsigned long err); > > > > void arm64_notify_segfault(unsigned long addr); > > > > void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str); > > > > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, const char *str, int pkey); > > > > void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str); > > > > void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str); > > > > > > > > diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c > > > > index 215e6d7f2df8..1bac6c84d3f5 100644 > > > > --- a/arch/arm64/kernel/traps.c > > > > +++ b/arch/arm64/kernel/traps.c > > > > @@ -263,16 +263,24 @@ static void arm64_show_signal(int signo, const char *str) > > > > __show_regs(regs); > > > > } > > > > > > > > -void arm64_force_sig_fault(int signo, int code, unsigned long far, > > > > - const char *str) > > > > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, > > > > + const char *str, int pkey) > > > > { > > > > arm64_show_signal(signo, str); > > > > if (signo == SIGKILL) > > > > force_sig(SIGKILL); > > > > + else if (code == SEGV_PKUERR) > > > > + force_sig_pkuerr((void __user *)far, pkey); > > > > > > Is signo definitely SIGSEGV here? It looks to me like we can get in > > > here for SIGBUS, SIGTRAP etc. > > > > > > si_codes are not unique between different signo here, so I'm wondering > > > whether this should this be: > > > > > > else if (signo == SIGSEGV && code == SEGV_PKUERR) > > > > > > ...? > > > > > > > > > > else > > > > force_sig_fault(signo, code, (void __user *)far); > > > > } > > > > > > > > +void arm64_force_sig_fault(int signo, int code, unsigned long far, > > > > + const char *str) > > > > +{ > > > > + arm64_force_sig_fault_pkey(signo, code, far, str, 0); > > > > > > Is there a reason not to follow the same convention as elsewhere, where > > > -1 is passed for "no pkey"? > > > > > > If we think this should never be called with signo == SIGSEGV && > > > code == SEGV_PKUERR and no valid pkey but if it's messy to prove, then > > > maybe a WARN_ON_ONCE() would be worth it here? > > > > > > > Anshuman suggested to separate them out, which I did like below, I think that > > addresses your comments too? > > > > diff --git arch/arm64/kernel/traps.c arch/arm64/kernel/traps.c > > index 215e6d7f2df8..49bac9ae04c0 100644 > > --- arch/arm64/kernel/traps.c > > +++ arch/arm64/kernel/traps.c > > @@ -273,6 +273,13 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far, > > force_sig_fault(signo, code, (void __user *)far); > > } > > > > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, > > + const char *str, int pkey) > > +{ > > + arm64_show_signal(signo, str); > > + force_sig_pkuerr((void __user *)far, pkey); > > +} > > + > > void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, > > const char *str) > > { > > > > diff --git arch/arm64/mm/fault.c arch/arm64/mm/fault.c > > index 451ba7cbd5ad..1ddd46b97f88 100644 > > --- arch/arm64/mm/fault.c > > +++ arch/arm64/mm/fault.c > > (Guessing where this is means to apply, since there is no hunk header > or context...) Sorry I had some other changes and just mashed the bits into a diff-looking-thing. > > > > > - arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name); > > + if (si_code == SEGV_PKUERR) > > + arm64_force_sig_fault_pkey(SIGSEGV, si_code, far, inf->name, pkey); > > Maybe drop the the signo and si_code argument? This would mean that > arm64_force_sig_fault_pkey() can't be called with a signo/si_code > combination that makes no sense. > > I think pkey faults are always going to be SIGSEGV/SEGV_PKUERR, right? > Or are there other combinations that can apply for these faults? Ah yes, I can simplify it even more, thanks. diff --git arch/arm64/kernel/traps.c arch/arm64/kernel/traps.c index 49bac9ae04c0..d9abb8b390c0 100644 --- arch/arm64/kernel/traps.c +++ arch/arm64/kernel/traps.c @@ -273,10 +273,9 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far, force_sig_fault(signo, code, (void __user *)far); } -void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, - const char *str, int pkey) +void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey) { - arm64_show_signal(signo, str); + arm64_show_signal(SIGSEGV, str); force_sig_pkuerr((void __user *)far, pkey); } > > > > + else > > + arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name); > > Otherwise yes, I think splitting things this way makes sense. > > Cheers > ---Dave
Hi, On Tue, Aug 06, 2024 at 02:43:57PM +0100, Joey Gouly wrote: > On Tue, Aug 06, 2024 at 02:33:37PM +0100, Dave Martin wrote: > > Hi, > > > > On Thu, Aug 01, 2024 at 05:01:10PM +0100, Joey Gouly wrote: > > > On Thu, Jul 25, 2024 at 04:57:09PM +0100, Dave Martin wrote: > > > > On Fri, May 03, 2024 at 02:01:33PM +0100, Joey Gouly wrote: > > > > > If a memory fault occurs that is due to an overlay/pkey fault, report that to > > > > > userspace with a SEGV_PKUERR. > > > > > > > > > > Signed-off-by: Joey Gouly <joey.gouly@arm.com> > > > > > Cc: Catalin Marinas <catalin.marinas@arm.com> > > > > > Cc: Will Deacon <will@kernel.org> > > > > > --- > > > > > arch/arm64/include/asm/traps.h | 1 + > > > > > arch/arm64/kernel/traps.c | 12 ++++++-- > > > > > arch/arm64/mm/fault.c | 56 ++++++++++++++++++++++++++++++++-- > > > > > 3 files changed, 64 insertions(+), 5 deletions(-) [...] > > > > > diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c > > > > > index 215e6d7f2df8..1bac6c84d3f5 100644 > > > > > --- a/arch/arm64/kernel/traps.c > > > > > +++ b/arch/arm64/kernel/traps.c > > > > > @@ -263,16 +263,24 @@ static void arm64_show_signal(int signo, const char *str) > > > > > __show_regs(regs); > > > > > } > > > > > > > > > > -void arm64_force_sig_fault(int signo, int code, unsigned long far, > > > > > - const char *str) > > > > > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, > > > > > + const char *str, int pkey) > > > > > { > > > > > arm64_show_signal(signo, str); > > > > > if (signo == SIGKILL) > > > > > force_sig(SIGKILL); > > > > > + else if (code == SEGV_PKUERR) > > > > > + force_sig_pkuerr((void __user *)far, pkey); > > > > > > > > Is signo definitely SIGSEGV here? It looks to me like we can get in > > > > here for SIGBUS, SIGTRAP etc. > > > > > > > > si_codes are not unique between different signo here, so I'm wondering > > > > whether this should this be: > > > > > > > > else if (signo == SIGSEGV && code == SEGV_PKUERR) > > > > > > > > ...? > > > > > > > > > > > > > else > > > > > force_sig_fault(signo, code, (void __user *)far); > > > > > } > > > > > > > > > > +void arm64_force_sig_fault(int signo, int code, unsigned long far, > > > > > + const char *str) > > > > > +{ > > > > > + arm64_force_sig_fault_pkey(signo, code, far, str, 0); > > > > > > > > Is there a reason not to follow the same convention as elsewhere, where > > > > -1 is passed for "no pkey"? > > > > > > > > If we think this should never be called with signo == SIGSEGV && > > > > code == SEGV_PKUERR and no valid pkey but if it's messy to prove, then > > > > maybe a WARN_ON_ONCE() would be worth it here? > > > > > > > > > > Anshuman suggested to separate them out, which I did like below, I think that > > > addresses your comments too? > > > > > > diff --git arch/arm64/kernel/traps.c arch/arm64/kernel/traps.c > > > index 215e6d7f2df8..49bac9ae04c0 100644 > > > --- arch/arm64/kernel/traps.c > > > +++ arch/arm64/kernel/traps.c > > > @@ -273,6 +273,13 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far, > > > force_sig_fault(signo, code, (void __user *)far); > > > } > > > > > > +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, > > > + const char *str, int pkey) > > > +{ > > > + arm64_show_signal(signo, str); > > > + force_sig_pkuerr((void __user *)far, pkey); > > > +} > > > + > > > void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, > > > const char *str) > > > { > > > > > > diff --git arch/arm64/mm/fault.c arch/arm64/mm/fault.c > > > index 451ba7cbd5ad..1ddd46b97f88 100644 > > > --- arch/arm64/mm/fault.c > > > +++ arch/arm64/mm/fault.c > > > > (Guessing where this is means to apply, since there is no hunk header > > or context...) > > Sorry I had some other changes and just mashed the bits into a diff-looking-thing. Fair enough. There are a few similar bits of code, so including more lines of context would have been helpful. The change looked reasonable though. > > > > > > - arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name); > > > + if (si_code == SEGV_PKUERR) > > > + arm64_force_sig_fault_pkey(SIGSEGV, si_code, far, inf->name, pkey); > > > > Maybe drop the the signo and si_code argument? This would mean that > > arm64_force_sig_fault_pkey() can't be called with a signo/si_code > > combination that makes no sense. > > > > I think pkey faults are always going to be SIGSEGV/SEGV_PKUERR, right? > > Or are there other combinations that can apply for these faults? > > Ah yes, I can simplify it even more, thanks. > > diff --git arch/arm64/kernel/traps.c arch/arm64/kernel/traps.c > index 49bac9ae04c0..d9abb8b390c0 100644 > --- arch/arm64/kernel/traps.c > +++ arch/arm64/kernel/traps.c > @@ -273,10 +273,9 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far, > force_sig_fault(signo, code, (void __user *)far); > } > > -void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, > - const char *str, int pkey) > +void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey) > { > - arm64_show_signal(signo, str); > + arm64_show_signal(SIGSEGV, str); > force_sig_pkuerr((void __user *)far, pkey); > } Looks sensible. I see that force_sig_pkuerr() fills in the signo and si_code itself. Cheers ---Dave
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index eefe766d6161..f6f6f2cb7f10 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -25,6 +25,7 @@ try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn) void force_signal_inject(int signal, int code, unsigned long address, unsigned long err); void arm64_notify_segfault(unsigned long addr); void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str); +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, const char *str, int pkey); void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str); void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 215e6d7f2df8..1bac6c84d3f5 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -263,16 +263,24 @@ static void arm64_show_signal(int signo, const char *str) __show_regs(regs); } -void arm64_force_sig_fault(int signo, int code, unsigned long far, - const char *str) +void arm64_force_sig_fault_pkey(int signo, int code, unsigned long far, + const char *str, int pkey) { arm64_show_signal(signo, str); if (signo == SIGKILL) force_sig(SIGKILL); + else if (code == SEGV_PKUERR) + force_sig_pkuerr((void __user *)far, pkey); else force_sig_fault(signo, code, (void __user *)far); } +void arm64_force_sig_fault(int signo, int code, unsigned long far, + const char *str) +{ + arm64_force_sig_fault_pkey(signo, code, far, str, 0); +} + void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str) { diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 8251e2fea9c7..585295168918 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -23,6 +23,7 @@ #include <linux/sched/debug.h> #include <linux/highmem.h> #include <linux/perf_event.h> +#include <linux/pkeys.h> #include <linux/preempt.h> #include <linux/hugetlb.h> @@ -489,6 +490,23 @@ static void do_bad_area(unsigned long far, unsigned long esr, #define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000) #define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000) +static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma, + unsigned int mm_flags) +{ + unsigned long iss2 = ESR_ELx_ISS2(esr); + + if (!arch_pkeys_enabled()) + return false; + + if (iss2 & ESR_ELx_Overlay) + return true; + + return !arch_vma_access_permitted(vma, + mm_flags & FAULT_FLAG_WRITE, + mm_flags & FAULT_FLAG_INSTRUCTION, + mm_flags & FAULT_FLAG_REMOTE); +} + static vm_fault_t __do_page_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned int mm_flags, unsigned long vm_flags, @@ -529,6 +547,8 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, unsigned int mm_flags = FAULT_FLAG_DEFAULT; unsigned long addr = untagged_addr(far); struct vm_area_struct *vma; + bool pkey_fault = false; + int pkey = -1; if (kprobe_page_fault(regs, esr)) return 0; @@ -590,6 +610,12 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, vma_end_read(vma); goto lock_mmap; } + + if (fault_from_pkey(esr, vma, mm_flags)) { + vma_end_read(vma); + goto lock_mmap; + } + fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs); if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) vma_end_read(vma); @@ -617,6 +643,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, goto done; } + if (fault_from_pkey(esr, vma, mm_flags)) { + pkey_fault = true; + pkey = vma_pkey(vma); + } + fault = __do_page_fault(mm, vma, addr, mm_flags, vm_flags, regs); /* Quick path to respond to signals */ @@ -682,9 +713,28 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, * Something tried to access memory that isn't in our memory * map. */ - arm64_force_sig_fault(SIGSEGV, - fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR, - far, inf->name); + int fault_kind; + /* + * The pkey value that we return to userspace can be different + * from the pkey that caused the fault. + * + * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4); + * 2. T1 : set POR_EL0 to deny access to pkey=4, touches, page + * 3. T1 : faults... + * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); + * 5. T1 : enters fault handler, takes mmap_lock, etc... + * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really + * faulted on a pte with its pkey=4. + */ + + if (pkey_fault) + fault_kind = SEGV_PKUERR; + else + fault_kind = fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR; + + arm64_force_sig_fault_pkey(SIGSEGV, + fault_kind, + far, inf->name, pkey); } return 0;
If a memory fault occurs that is due to an overlay/pkey fault, report that to userspace with a SEGV_PKUERR. Signed-off-by: Joey Gouly <joey.gouly@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will@kernel.org> --- arch/arm64/include/asm/traps.h | 1 + arch/arm64/kernel/traps.c | 12 ++++++-- arch/arm64/mm/fault.c | 56 ++++++++++++++++++++++++++++++++-- 3 files changed, 64 insertions(+), 5 deletions(-)