Message ID | 20210119160723.116983-2-vladimir.murzin@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: Support Enhanced PAN | expand |
On Tue, Jan 19, 2021 at 04:07:22PM +0000, Vladimir Murzin wrote: > Enhanced Privileged Access Never (EPAN) allows Privileged Access Never > to be used with Execute-only mappings. > > Absence of such support was a reason for 24cecc377463 ("arm64: Revert > support for execute-only user mappings"). Thus now it can be revisited > and re-enabled. > > Cc: Kees Cook <keescook@chromium.org> > Cc: Catalin Marinas <catalin.marinas@arm.com> > Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com> > --- > arch/arm64/Kconfig | 17 +++++++++++++++++ > arch/arm64/include/asm/cpucaps.h | 3 ++- > arch/arm64/include/asm/pgtable-prot.h | 5 +++-- > arch/arm64/include/asm/pgtable.h | 14 +++++++++++++- > arch/arm64/include/asm/sysreg.h | 3 ++- > arch/arm64/kernel/cpufeature.c | 12 ++++++++++++ > arch/arm64/mm/fault.c | 3 +++ > 7 files changed, 52 insertions(+), 5 deletions(-) (please cc me on arm64 patches) > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index f39568b..e63cc18 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -1053,6 +1053,9 @@ config ARCH_WANT_HUGE_PMD_SHARE > config ARCH_HAS_CACHE_LINE_SIZE > def_bool y > > +config ARCH_HAS_FILTER_PGPROT > + def_bool y > + > config ARCH_ENABLE_SPLIT_PMD_PTLOCK > def_bool y if PGTABLE_LEVELS > 2 > > @@ -1673,6 +1676,20 @@ config ARM64_MTE > > endmenu > > +menu "ARMv8.7 architectural features" > + > +config ARM64_EPAN > + bool "Enable support for Enhanced Privileged Access Never (EPAN)" > + default y > + depends on ARM64_PAN > + help > + Enhanced Privileged Access Never (EPAN) allows Privileged > + Access Never to be used with Execute-only mappings. > + > + The feature is detected at runtime, and will remain disabled > + if the cpu does not implement the feature. > +endmenu > + > config ARM64_SVE > bool "ARM Scalable Vector Extension support" > default y > diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h > index b77d997..9e3ec4d 100644 > --- a/arch/arm64/include/asm/cpucaps.h > +++ b/arch/arm64/include/asm/cpucaps.h > @@ -66,7 +66,8 @@ > #define ARM64_WORKAROUND_1508412 58 > #define ARM64_HAS_LDAPR 59 > #define ARM64_KVM_PROTECTED_MODE 60 > +#define ARM64_HAS_EPAN 61 > > -#define ARM64_NCAPS 61 > +#define ARM64_NCAPS 62 > > #endif /* __ASM_CPUCAPS_H */ > diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h > index 046be78..f91c2aa 100644 > --- a/arch/arm64/include/asm/pgtable-prot.h > +++ b/arch/arm64/include/asm/pgtable-prot.h > @@ -88,12 +88,13 @@ extern bool arm64_use_ng_mappings; > #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) > #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) > #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) > +#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) > > #define __P000 PAGE_NONE > #define __P001 PAGE_READONLY > #define __P010 PAGE_READONLY > #define __P011 PAGE_READONLY > -#define __P100 PAGE_READONLY_EXEC > +#define __P100 PAGE_EXECONLY > #define __P101 PAGE_READONLY_EXEC > #define __P110 PAGE_READONLY_EXEC > #define __P111 PAGE_READONLY_EXEC > @@ -102,7 +103,7 @@ extern bool arm64_use_ng_mappings; > #define __S001 PAGE_READONLY > #define __S010 PAGE_SHARED > #define __S011 PAGE_SHARED > -#define __S100 PAGE_READONLY_EXEC > +#define __S100 PAGE_EXECONLY > #define __S101 PAGE_READONLY_EXEC > #define __S110 PAGE_SHARED_EXEC > #define __S111 PAGE_SHARED_EXEC > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h > index 5015627..0196849 100644 > --- a/arch/arm64/include/asm/pgtable.h > +++ b/arch/arm64/include/asm/pgtable.h > @@ -114,7 +114,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; > > #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) We used to have a useful comment here describing why we're looking at UXN. There was also one next to the protection_map[], which I think we should add back. > #define pte_valid_not_user(pte) \ > - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) > + ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) > #define pte_valid_user(pte) \ > ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) Won't pte_valid_user() go wrong for exec-only mappings now? > > @@ -982,6 +982,18 @@ static inline bool arch_faults_on_old_pte(void) > } > #define arch_faults_on_old_pte arch_faults_on_old_pte > > +static inline pgprot_t arch_filter_pgprot(pgprot_t prot) > +{ > + if (cpus_have_const_cap(ARM64_HAS_EPAN)) > + return prot; > + > + if (pgprot_val(prot) != pgprot_val(PAGE_EXECONLY)) > + return prot; > + > + return PAGE_READONLY_EXEC; > +} > + > + > #endif /* !__ASSEMBLY__ */ > > #endif /* __ASM_PGTABLE_H */ > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h > index 8b5e7e5..47e9fdc 100644 > --- a/arch/arm64/include/asm/sysreg.h > +++ b/arch/arm64/include/asm/sysreg.h > @@ -591,6 +591,7 @@ > (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) > > /* SCTLR_EL1 specific flags. */ > +#define SCTLR_EL1_EPAN (BIT(57)) > #define SCTLR_EL1_ATA0 (BIT(42)) > > #define SCTLR_EL1_TCF0_SHIFT 38 > @@ -631,7 +632,7 @@ > SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \ > SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \ > SCTLR_ELx_ATA | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI | \ > - SCTLR_EL1_RES1) > + SCTLR_EL1_EPAN | SCTLR_EL1_RES1) Why is this handled differently to normal PAN, where the SCTLR is written in cpu_enable_pan()? > /* MAIR_ELx memory attributes (used by Linux) */ > #define MAIR_ATTR_DEVICE_nGnRnE UL(0x00) > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c > index e99edde..9d85956 100644 > --- a/arch/arm64/kernel/cpufeature.c > +++ b/arch/arm64/kernel/cpufeature.c > @@ -1774,6 +1774,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = { > .cpu_enable = cpu_enable_pan, > }, > #endif /* CONFIG_ARM64_PAN */ > +#ifdef CONFIG_ARM64_EPAN > + { > + .desc = "Enhanced Privileged Access Never", > + .capability = ARM64_HAS_EPAN, > + .type = ARM64_CPUCAP_SYSTEM_FEATURE, > + .matches = has_cpuid_feature, > + .sys_reg = SYS_ID_AA64MMFR1_EL1, > + .field_pos = ID_AA64MMFR1_PAN_SHIFT, > + .sign = FTR_UNSIGNED, > + .min_field_value = 3, > + }, > +#endif /* CONFIG_ARM64_EPAN */ > #ifdef CONFIG_ARM64_LSE_ATOMICS > { > .desc = "LSE atomic instructions", > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c > index 3c40da4..c32095f6 100644 > --- a/arch/arm64/mm/fault.c > +++ b/arch/arm64/mm/fault.c > @@ -529,6 +529,9 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr, > if (faulthandler_disabled() || !mm) > goto no_context; > > + if (cpus_have_const_cap(ARM64_HAS_EPAN)) > + vm_flags &= ~VM_EXEC; This needs a comment, and I think it would be cleaner to rework the vm_flags initialisation along the lines of: unsigned long vm_flags; unsigned long mm_flags = FAULT_FLAG_DEFAULT; if (user_mode(regs)) mm_flags |= FAULT_FLAG_USER; if (is_el0_instruction_abort(esr)) { vm_flags = VM_EXEC; mm_flags |= FAULT_FLAG_INSTRUCTION; } else if (is_write_abort(esr)) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } else { vm_flags = VM_READ | VM_WRITE; if (!cpus_have_const_cap(ARM64_HAS_EPAN)) vm_flags |= VM_EXEC: } (but again, please add a comment to that last part because I still don't really follow what you're doing) Will
On Tue, Jan 26, 2021 at 11:03:06AM +0000, Will Deacon wrote: > On Tue, Jan 19, 2021 at 04:07:22PM +0000, Vladimir Murzin wrote: > > #define pte_valid_not_user(pte) \ > > - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) > > + ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) > > #define pte_valid_user(pte) \ > > ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) > > Won't pte_valid_user() go wrong for exec-only mappings now? I wondered about the same in November (and reproducing my comment below): https://lore.kernel.org/linux-arm-kernel/20201119182236.GN4376@gaia/ pte_valid_user() checks for both PTE_VALID and PTE_USER. In theory, a !PTE_UXN is also user-accessible but it's only used in gup_pte_range() via pte_access_permitted(). If "access" in the latter means only read/write, we should be ok. If we keep it as is, a comment would be useful. > > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h > > index 8b5e7e5..47e9fdc 100644 > > --- a/arch/arm64/include/asm/sysreg.h > > +++ b/arch/arm64/include/asm/sysreg.h > > @@ -591,6 +591,7 @@ > > (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) > > > > /* SCTLR_EL1 specific flags. */ > > +#define SCTLR_EL1_EPAN (BIT(57)) > > #define SCTLR_EL1_ATA0 (BIT(42)) > > > > #define SCTLR_EL1_TCF0_SHIFT 38 > > @@ -631,7 +632,7 @@ > > SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \ > > SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \ > > SCTLR_ELx_ATA | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI | \ > > - SCTLR_EL1_RES1) > > + SCTLR_EL1_EPAN | SCTLR_EL1_RES1) > > Why is this handled differently to normal PAN, where the SCTLR is written in > cpu_enable_pan()? That's how it was done in an early version but I suggested we move it to the default SCTLR bits to save some lines: https://lore.kernel.org/linux-arm-kernel/20201202182303.GC21091@gaia/ With classic PAN, we only enable it if all the CPUs support it. For EPAN that's not necessary since EPAN should depend on PAN being enabled. It's also cached in the TLB, so it's a bit of a hassle with CnP. See this past discussion: https://lore.kernel.org/linux-arm-kernel/X7P+r%2Fl3ewvaf1aV@trantor/ > > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c > > index 3c40da4..c32095f6 100644 > > --- a/arch/arm64/mm/fault.c > > +++ b/arch/arm64/mm/fault.c > > @@ -529,6 +529,9 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr, > > if (faulthandler_disabled() || !mm) > > goto no_context; > > > > + if (cpus_have_const_cap(ARM64_HAS_EPAN)) > > + vm_flags &= ~VM_EXEC; > > This needs a comment, and I think it would be cleaner to rework the vm_flags > initialisation along the lines of: > > unsigned long vm_flags; > unsigned long mm_flags = FAULT_FLAG_DEFAULT; > > if (user_mode(regs)) > mm_flags |= FAULT_FLAG_USER; > > if (is_el0_instruction_abort(esr)) { > vm_flags = VM_EXEC; > mm_flags |= FAULT_FLAG_INSTRUCTION; > } else if (is_write_abort(esr)) { > vm_flags = VM_WRITE; > mm_flags |= FAULT_FLAG_WRITE; > } else { > vm_flags = VM_READ | VM_WRITE; > if (!cpus_have_const_cap(ARM64_HAS_EPAN)) > vm_flags |= VM_EXEC: > } > > (but again, please add a comment to that last part because I still don't > really follow what you're doing) Every time I come across this vm_flags handling I have to spend some minutes to re-understand what it does. So vm_flags tells us what bits we must have in vma->vm_flags for the fault to be benign. But we start with all rwx flags and clear VM_EXEC if EPAN since exec does not imply read, making it more confusing. I think your rewrite is cleaner, maybe with some comment why we add VM_EXEC when !EPAN.
On Tue, Jan 26, 2021 at 12:05:42PM +0000, Catalin Marinas wrote: > On Tue, Jan 26, 2021 at 11:03:06AM +0000, Will Deacon wrote: > > On Tue, Jan 19, 2021 at 04:07:22PM +0000, Vladimir Murzin wrote: > > > #define pte_valid_not_user(pte) \ > > > - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) > > > + ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) > > > #define pte_valid_user(pte) \ > > > ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) > > > > Won't pte_valid_user() go wrong for exec-only mappings now? > > I wondered about the same in November (and reproducing my comment > below): > > https://lore.kernel.org/linux-arm-kernel/20201119182236.GN4376@gaia/ > > pte_valid_user() checks for both PTE_VALID and PTE_USER. In theory, a > !PTE_UXN is also user-accessible but it's only used in gup_pte_range() > via pte_access_permitted(). If "access" in the latter means only > read/write, we should be ok. If we keep it as is, a comment would > be useful. I think we should ensure that: pte_valid(pte) && (pte_valid_not_user(pte) || pte_valid_user(pte)) is always true, but I don't think it is after this patch. It reminds me of some of the tests that Anshuman wrote. > > > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h > > > index 8b5e7e5..47e9fdc 100644 > > > --- a/arch/arm64/include/asm/sysreg.h > > > +++ b/arch/arm64/include/asm/sysreg.h > > > @@ -591,6 +591,7 @@ > > > (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) > > > > > > /* SCTLR_EL1 specific flags. */ > > > +#define SCTLR_EL1_EPAN (BIT(57)) > > > #define SCTLR_EL1_ATA0 (BIT(42)) > > > > > > #define SCTLR_EL1_TCF0_SHIFT 38 > > > @@ -631,7 +632,7 @@ > > > SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \ > > > SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \ > > > SCTLR_ELx_ATA | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI | \ > > > - SCTLR_EL1_RES1) > > > + SCTLR_EL1_EPAN | SCTLR_EL1_RES1) > > > > Why is this handled differently to normal PAN, where the SCTLR is written in > > cpu_enable_pan()? > > That's how it was done in an early version but I suggested we move it to > the default SCTLR bits to save some lines: > > https://lore.kernel.org/linux-arm-kernel/20201202182303.GC21091@gaia/ > > With classic PAN, we only enable it if all the CPUs support it. For EPAN > that's not necessary since EPAN should depend on PAN being enabled. Ok, let's just hope we don't have any CPU errata requiring us to disable it. > It's also cached in the TLB, so it's a bit of a hassle with CnP. See > this past discussion: > > https://lore.kernel.org/linux-arm-kernel/X7P+r%2Fl3ewvaf1aV@trantor/ Thanks. None of this is in the Arm ARM afaict, so I can't figure this stuff out on my own. > > > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c > > > index 3c40da4..c32095f6 100644 > > > --- a/arch/arm64/mm/fault.c > > > +++ b/arch/arm64/mm/fault.c > > > @@ -529,6 +529,9 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr, > > > if (faulthandler_disabled() || !mm) > > > goto no_context; > > > > > > + if (cpus_have_const_cap(ARM64_HAS_EPAN)) > > > + vm_flags &= ~VM_EXEC; > > > > This needs a comment, and I think it would be cleaner to rework the vm_flags > > initialisation along the lines of: > > > > unsigned long vm_flags; > > unsigned long mm_flags = FAULT_FLAG_DEFAULT; > > > > if (user_mode(regs)) > > mm_flags |= FAULT_FLAG_USER; > > > > if (is_el0_instruction_abort(esr)) { > > vm_flags = VM_EXEC; > > mm_flags |= FAULT_FLAG_INSTRUCTION; > > } else if (is_write_abort(esr)) { > > vm_flags = VM_WRITE; > > mm_flags |= FAULT_FLAG_WRITE; > > } else { > > vm_flags = VM_READ | VM_WRITE; > > if (!cpus_have_const_cap(ARM64_HAS_EPAN)) > > vm_flags |= VM_EXEC: > > } > > > > (but again, please add a comment to that last part because I still don't > > really follow what you're doing) > > Every time I come across this vm_flags handling I have to spend some > minutes to re-understand what it does. So vm_flags tells us what bits we > must have in vma->vm_flags for the fault to be benign. But we start with > all rwx flags and clear VM_EXEC if EPAN since exec does not imply read, > making it more confusing. > > I think your rewrite is cleaner, maybe with some comment why we add > VM_EXEC when !EPAN. Agreed. Will
On 1/26/21 11:03 AM, Will Deacon wrote: > On Tue, Jan 19, 2021 at 04:07:22PM +0000, Vladimir Murzin wrote: >> Enhanced Privileged Access Never (EPAN) allows Privileged Access Never >> to be used with Execute-only mappings. >> >> Absence of such support was a reason for 24cecc377463 ("arm64: Revert >> support for execute-only user mappings"). Thus now it can be revisited >> and re-enabled. >> >> Cc: Kees Cook <keescook@chromium.org> >> Cc: Catalin Marinas <catalin.marinas@arm.com> >> Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com> >> --- >> arch/arm64/Kconfig | 17 +++++++++++++++++ >> arch/arm64/include/asm/cpucaps.h | 3 ++- >> arch/arm64/include/asm/pgtable-prot.h | 5 +++-- >> arch/arm64/include/asm/pgtable.h | 14 +++++++++++++- >> arch/arm64/include/asm/sysreg.h | 3 ++- >> arch/arm64/kernel/cpufeature.c | 12 ++++++++++++ >> arch/arm64/mm/fault.c | 3 +++ >> 7 files changed, 52 insertions(+), 5 deletions(-) > > (please cc me on arm64 patches) Ah, surely I should have done that! snip... >> --- a/arch/arm64/include/asm/pgtable.h >> +++ b/arch/arm64/include/asm/pgtable.h >> @@ -114,7 +114,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; >> >> #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) > > We used to have a useful comment here describing why we're looking at UXN. > > There was also one next to the protection_map[], which I think we should add > back. Noted. It seems that the rest has been covered by Catalin... Cheers Vladimir
On 1/26/21 12:23 PM, Will Deacon wrote: > On Tue, Jan 26, 2021 at 12:05:42PM +0000, Catalin Marinas wrote: >> On Tue, Jan 26, 2021 at 11:03:06AM +0000, Will Deacon wrote: >>> On Tue, Jan 19, 2021 at 04:07:22PM +0000, Vladimir Murzin wrote: >>>> #define pte_valid_not_user(pte) \ >>>> - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) >>>> + ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) >>>> #define pte_valid_user(pte) \ >>>> ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) >>> >>> Won't pte_valid_user() go wrong for exec-only mappings now? >> >> I wondered about the same in November (and reproducing my comment >> below): >> >> https://lore.kernel.org/linux-arm-kernel/20201119182236.GN4376@gaia/ >> >> pte_valid_user() checks for both PTE_VALID and PTE_USER. In theory, a >> !PTE_UXN is also user-accessible but it's only used in gup_pte_range() >> via pte_access_permitted(). If "access" in the latter means only >> read/write, we should be ok. If we keep it as is, a comment would >> be useful. > > I think we should ensure that: > > pte_valid(pte) && (pte_valid_not_user(pte) || pte_valid_user(pte)) > > is always true, but I don't think it is after this patch. It reminds me > of some of the tests that Anshuman wrote. Something like /* * Most of user mappings would have PTE_USER bit set except Execute-only * where both PTE_USER and PTE_UXN bits not set */ #define pte_valid_user(pte) \ (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) || \ ((pte_val(pte) & (PTE_VALID | PTE_UXN)) == PTE_VALID)) Cheers Vladimir
On Fri, Mar 12, 2021 at 11:19:02AM +0000, Vladimir Murzin wrote: > On 1/26/21 12:23 PM, Will Deacon wrote: > > On Tue, Jan 26, 2021 at 12:05:42PM +0000, Catalin Marinas wrote: > >> On Tue, Jan 26, 2021 at 11:03:06AM +0000, Will Deacon wrote: > >>> On Tue, Jan 19, 2021 at 04:07:22PM +0000, Vladimir Murzin wrote: > >>>> #define pte_valid_not_user(pte) \ > >>>> - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) > >>>> + ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) > >>>> #define pte_valid_user(pte) \ > >>>> ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) > >>> > >>> Won't pte_valid_user() go wrong for exec-only mappings now? > >> > >> I wondered about the same in November (and reproducing my comment > >> below): > >> > >> https://lore.kernel.org/linux-arm-kernel/20201119182236.GN4376@gaia/ > >> > >> pte_valid_user() checks for both PTE_VALID and PTE_USER. In theory, a > >> !PTE_UXN is also user-accessible but it's only used in gup_pte_range() > >> via pte_access_permitted(). If "access" in the latter means only > >> read/write, we should be ok. If we keep it as is, a comment would > >> be useful. > > > > I think we should ensure that: > > > > pte_valid(pte) && (pte_valid_not_user(pte) || pte_valid_user(pte)) > > > > is always true, but I don't think it is after this patch. It reminds me > > of some of the tests that Anshuman wrote. > > Something like > > /* > * Most of user mappings would have PTE_USER bit set except Execute-only > * where both PTE_USER and PTE_UXN bits not set > */ > #define pte_valid_user(pte) \ > (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) || \ > ((pte_val(pte) & (PTE_VALID | PTE_UXN)) == PTE_VALID)) Yeah, assuming the compiler doesn't make a dog's dinner of it. Will
On Fri, Mar 12, 2021 at 11:19:02AM +0000, Vladimir Murzin wrote: > On 1/26/21 12:23 PM, Will Deacon wrote: > > On Tue, Jan 26, 2021 at 12:05:42PM +0000, Catalin Marinas wrote: > >> On Tue, Jan 26, 2021 at 11:03:06AM +0000, Will Deacon wrote: > >>> On Tue, Jan 19, 2021 at 04:07:22PM +0000, Vladimir Murzin wrote: > >>>> #define pte_valid_not_user(pte) \ > >>>> - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) > >>>> + ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) > >>>> #define pte_valid_user(pte) \ > >>>> ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) > >>> > >>> Won't pte_valid_user() go wrong for exec-only mappings now? > >> > >> I wondered about the same in November (and reproducing my comment > >> below): > >> > >> https://lore.kernel.org/linux-arm-kernel/20201119182236.GN4376@gaia/ > >> > >> pte_valid_user() checks for both PTE_VALID and PTE_USER. In theory, a > >> !PTE_UXN is also user-accessible but it's only used in gup_pte_range() > >> via pte_access_permitted(). If "access" in the latter means only > >> read/write, we should be ok. If we keep it as is, a comment would > >> be useful. > > > > I think we should ensure that: > > > > pte_valid(pte) && (pte_valid_not_user(pte) || pte_valid_user(pte)) > > > > is always true, but I don't think it is after this patch. It reminds me > > of some of the tests that Anshuman wrote. > > Something like > > /* > * Most of user mappings would have PTE_USER bit set except Execute-only > * where both PTE_USER and PTE_UXN bits not set > */ > #define pte_valid_user(pte) \ > (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) || \ > ((pte_val(pte) & (PTE_VALID | PTE_UXN)) == PTE_VALID)) This is equivalent to (valid && (user || !uxn)) which matches how the EPAN is now specified. However, I think this change breaks pte_access_permitted() which would now return true even for execute-only mappings. Since such pages are not readable, nor writable by user, get_user_pages_fast() should fail. For example, if we have a futex in such execute-only mapping, get_futex_key() succeeds with the above. Similarly for the iov_iter_get_pages(). The slow-path get_user_pages() does the checks on the vma flags and this can be overridden with FOLL_FORCE (it doesn't use pte_access_permitted()). I haven't seen any get_user_pages_fast() called with FOLL_FORCE but even if it does, it looks like it's ignored as the code relies on pte_access_permitted() only. I'd say lets scrap pte_valid_user() altogether and move the original logic to pte_access_permitted() with a comment that it must return false for execute-only user mappings.
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f39568b..e63cc18 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1053,6 +1053,9 @@ config ARCH_WANT_HUGE_PMD_SHARE config ARCH_HAS_CACHE_LINE_SIZE def_bool y +config ARCH_HAS_FILTER_PGPROT + def_bool y + config ARCH_ENABLE_SPLIT_PMD_PTLOCK def_bool y if PGTABLE_LEVELS > 2 @@ -1673,6 +1676,20 @@ config ARM64_MTE endmenu +menu "ARMv8.7 architectural features" + +config ARM64_EPAN + bool "Enable support for Enhanced Privileged Access Never (EPAN)" + default y + depends on ARM64_PAN + help + Enhanced Privileged Access Never (EPAN) allows Privileged + Access Never to be used with Execute-only mappings. + + The feature is detected at runtime, and will remain disabled + if the cpu does not implement the feature. +endmenu + config ARM64_SVE bool "ARM Scalable Vector Extension support" default y diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index b77d997..9e3ec4d 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -66,7 +66,8 @@ #define ARM64_WORKAROUND_1508412 58 #define ARM64_HAS_LDAPR 59 #define ARM64_KVM_PROTECTED_MODE 60 +#define ARM64_HAS_EPAN 61 -#define ARM64_NCAPS 61 +#define ARM64_NCAPS 62 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 046be78..f91c2aa 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -88,12 +88,13 @@ extern bool arm64_use_ng_mappings; #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) +#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_READONLY #define __P011 PAGE_READONLY -#define __P100 PAGE_READONLY_EXEC +#define __P100 PAGE_EXECONLY #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_READONLY_EXEC #define __P111 PAGE_READONLY_EXEC @@ -102,7 +103,7 @@ extern bool arm64_use_ng_mappings; #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_EXEC +#define __S100 PAGE_EXECONLY #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 5015627..0196849 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -114,7 +114,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) #define pte_valid_not_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) + ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) #define pte_valid_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) @@ -982,6 +982,18 @@ static inline bool arch_faults_on_old_pte(void) } #define arch_faults_on_old_pte arch_faults_on_old_pte +static inline pgprot_t arch_filter_pgprot(pgprot_t prot) +{ + if (cpus_have_const_cap(ARM64_HAS_EPAN)) + return prot; + + if (pgprot_val(prot) != pgprot_val(PAGE_EXECONLY)) + return prot; + + return PAGE_READONLY_EXEC; +} + + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 8b5e7e5..47e9fdc 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -591,6 +591,7 @@ (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) /* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_EPAN (BIT(57)) #define SCTLR_EL1_ATA0 (BIT(42)) #define SCTLR_EL1_TCF0_SHIFT 38 @@ -631,7 +632,7 @@ SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \ SCTLR_ELx_ATA | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI | \ - SCTLR_EL1_RES1) + SCTLR_EL1_EPAN | SCTLR_EL1_RES1) /* MAIR_ELx memory attributes (used by Linux) */ #define MAIR_ATTR_DEVICE_nGnRnE UL(0x00) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e99edde..9d85956 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1774,6 +1774,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_pan, }, #endif /* CONFIG_ARM64_PAN */ +#ifdef CONFIG_ARM64_EPAN + { + .desc = "Enhanced Privileged Access Never", + .capability = ARM64_HAS_EPAN, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR1_EL1, + .field_pos = ID_AA64MMFR1_PAN_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = 3, + }, +#endif /* CONFIG_ARM64_EPAN */ #ifdef CONFIG_ARM64_LSE_ATOMICS { .desc = "LSE atomic instructions", diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 3c40da4..c32095f6 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -529,6 +529,9 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr, if (faulthandler_disabled() || !mm) goto no_context; + if (cpus_have_const_cap(ARM64_HAS_EPAN)) + vm_flags &= ~VM_EXEC; + if (user_mode(regs)) mm_flags |= FAULT_FLAG_USER;
Enhanced Privileged Access Never (EPAN) allows Privileged Access Never to be used with Execute-only mappings. Absence of such support was a reason for 24cecc377463 ("arm64: Revert support for execute-only user mappings"). Thus now it can be revisited and re-enabled. Cc: Kees Cook <keescook@chromium.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com> --- arch/arm64/Kconfig | 17 +++++++++++++++++ arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/pgtable-prot.h | 5 +++-- arch/arm64/include/asm/pgtable.h | 14 +++++++++++++- arch/arm64/include/asm/sysreg.h | 3 ++- arch/arm64/kernel/cpufeature.c | 12 ++++++++++++ arch/arm64/mm/fault.c | 3 +++ 7 files changed, 52 insertions(+), 5 deletions(-)