diff mbox series

[v3,1/2] arm64: Support execute-only permissions with Enhanced PAN

Message ID 20210119160723.116983-2-vladimir.murzin@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: Support Enhanced PAN | expand

Commit Message

Vladimir Murzin Jan. 19, 2021, 4:07 p.m. UTC
Enhanced Privileged Access Never (EPAN) allows Privileged Access Never
to be used with Execute-only mappings.

Absence of such support was a reason for 24cecc377463 ("arm64: Revert
support for execute-only user mappings"). Thus now it can be revisited
and re-enabled.

Cc: Kees Cook <keescook@chromium.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
---
 arch/arm64/Kconfig                    | 17 +++++++++++++++++
 arch/arm64/include/asm/cpucaps.h      |  3 ++-
 arch/arm64/include/asm/pgtable-prot.h |  5 +++--
 arch/arm64/include/asm/pgtable.h      | 14 +++++++++++++-
 arch/arm64/include/asm/sysreg.h       |  3 ++-
 arch/arm64/kernel/cpufeature.c        | 12 ++++++++++++
 arch/arm64/mm/fault.c                 |  3 +++
 7 files changed, 52 insertions(+), 5 deletions(-)

Comments

Will Deacon Jan. 26, 2021, 11:03 a.m. UTC | #1
On Tue, Jan 19, 2021 at 04:07:22PM +0000, Vladimir Murzin wrote:
> Enhanced Privileged Access Never (EPAN) allows Privileged Access Never
> to be used with Execute-only mappings.
> 
> Absence of such support was a reason for 24cecc377463 ("arm64: Revert
> support for execute-only user mappings"). Thus now it can be revisited
> and re-enabled.
> 
> Cc: Kees Cook <keescook@chromium.org>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
> ---
>  arch/arm64/Kconfig                    | 17 +++++++++++++++++
>  arch/arm64/include/asm/cpucaps.h      |  3 ++-
>  arch/arm64/include/asm/pgtable-prot.h |  5 +++--
>  arch/arm64/include/asm/pgtable.h      | 14 +++++++++++++-
>  arch/arm64/include/asm/sysreg.h       |  3 ++-
>  arch/arm64/kernel/cpufeature.c        | 12 ++++++++++++
>  arch/arm64/mm/fault.c                 |  3 +++
>  7 files changed, 52 insertions(+), 5 deletions(-)

(please cc me on arm64 patches)

> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index f39568b..e63cc18 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -1053,6 +1053,9 @@ config ARCH_WANT_HUGE_PMD_SHARE
>  config ARCH_HAS_CACHE_LINE_SIZE
>  	def_bool y
>  
> +config ARCH_HAS_FILTER_PGPROT
> +	def_bool y
> +
>  config ARCH_ENABLE_SPLIT_PMD_PTLOCK
>  	def_bool y if PGTABLE_LEVELS > 2
>  
> @@ -1673,6 +1676,20 @@ config ARM64_MTE
>  
>  endmenu
>  
> +menu "ARMv8.7 architectural features"
> +
> +config ARM64_EPAN
> +	bool "Enable support for Enhanced Privileged Access Never (EPAN)"
> +	default y
> +	depends on ARM64_PAN
> +	help
> +	 Enhanced Privileged Access Never (EPAN) allows Privileged
> +	 Access Never to be used with Execute-only mappings.
> +
> +	 The feature is detected at runtime, and will remain disabled
> +	 if the cpu does not implement the feature.
> +endmenu
> +
>  config ARM64_SVE
>  	bool "ARM Scalable Vector Extension support"
>  	default y
> diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
> index b77d997..9e3ec4d 100644
> --- a/arch/arm64/include/asm/cpucaps.h
> +++ b/arch/arm64/include/asm/cpucaps.h
> @@ -66,7 +66,8 @@
>  #define ARM64_WORKAROUND_1508412		58
>  #define ARM64_HAS_LDAPR				59
>  #define ARM64_KVM_PROTECTED_MODE		60
> +#define ARM64_HAS_EPAN				61
>  
> -#define ARM64_NCAPS				61
> +#define ARM64_NCAPS				62
>  
>  #endif /* __ASM_CPUCAPS_H */
> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
> index 046be78..f91c2aa 100644
> --- a/arch/arm64/include/asm/pgtable-prot.h
> +++ b/arch/arm64/include/asm/pgtable-prot.h
> @@ -88,12 +88,13 @@ extern bool arm64_use_ng_mappings;
>  #define PAGE_SHARED_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE)
>  #define PAGE_READONLY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
>  #define PAGE_READONLY_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
> +#define PAGE_EXECONLY		__pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
>  
>  #define __P000  PAGE_NONE
>  #define __P001  PAGE_READONLY
>  #define __P010  PAGE_READONLY
>  #define __P011  PAGE_READONLY
> -#define __P100  PAGE_READONLY_EXEC
> +#define __P100  PAGE_EXECONLY
>  #define __P101  PAGE_READONLY_EXEC
>  #define __P110  PAGE_READONLY_EXEC
>  #define __P111  PAGE_READONLY_EXEC
> @@ -102,7 +103,7 @@ extern bool arm64_use_ng_mappings;
>  #define __S001  PAGE_READONLY
>  #define __S010  PAGE_SHARED
>  #define __S011  PAGE_SHARED
> -#define __S100  PAGE_READONLY_EXEC
> +#define __S100  PAGE_EXECONLY
>  #define __S101  PAGE_READONLY_EXEC
>  #define __S110  PAGE_SHARED_EXEC
>  #define __S111  PAGE_SHARED_EXEC
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 5015627..0196849 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -114,7 +114,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
>  
>  #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))

We used to have a useful comment here describing why we're looking at UXN.

There was also one next to the protection_map[], which I think we should add
back.

>  #define pte_valid_not_user(pte) \
> -	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
> +	((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
>  #define pte_valid_user(pte) \
>  	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))

Won't pte_valid_user() go wrong for exec-only mappings now?

>  
> @@ -982,6 +982,18 @@ static inline bool arch_faults_on_old_pte(void)
>  }
>  #define arch_faults_on_old_pte arch_faults_on_old_pte
>  
> +static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
> +{
> +	if (cpus_have_const_cap(ARM64_HAS_EPAN))
> +		return prot;
> +
> +	if (pgprot_val(prot) != pgprot_val(PAGE_EXECONLY))
> +		return prot;
> +
> +	return PAGE_READONLY_EXEC;
> +}
> +
> +
>  #endif /* !__ASSEMBLY__ */
>  
>  #endif /* __ASM_PGTABLE_H */
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 8b5e7e5..47e9fdc 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -591,6 +591,7 @@
>  	(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
>  
>  /* SCTLR_EL1 specific flags. */
> +#define SCTLR_EL1_EPAN		(BIT(57))
>  #define SCTLR_EL1_ATA0		(BIT(42))
>  
>  #define SCTLR_EL1_TCF0_SHIFT	38
> @@ -631,7 +632,7 @@
>  	 SCTLR_EL1_SED  | SCTLR_ELx_I    | SCTLR_EL1_DZE  | SCTLR_EL1_UCT   | \
>  	 SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \
>  	 SCTLR_ELx_ATA  | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI   | \
> -	 SCTLR_EL1_RES1)
> +	 SCTLR_EL1_EPAN | SCTLR_EL1_RES1)

Why is this handled differently to normal PAN, where the SCTLR is written in
cpu_enable_pan()?

>  /* MAIR_ELx memory attributes (used by Linux) */
>  #define MAIR_ATTR_DEVICE_nGnRnE		UL(0x00)
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index e99edde..9d85956 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -1774,6 +1774,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
>  		.cpu_enable = cpu_enable_pan,
>  	},
>  #endif /* CONFIG_ARM64_PAN */
> +#ifdef CONFIG_ARM64_EPAN
> +	{
> +		.desc = "Enhanced Privileged Access Never",
> +		.capability = ARM64_HAS_EPAN,
> +		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
> +		.matches = has_cpuid_feature,
> +		.sys_reg = SYS_ID_AA64MMFR1_EL1,
> +		.field_pos = ID_AA64MMFR1_PAN_SHIFT,
> +		.sign = FTR_UNSIGNED,
> +		.min_field_value = 3,
> +	},
> +#endif /* CONFIG_ARM64_EPAN */
>  #ifdef CONFIG_ARM64_LSE_ATOMICS
>  	{
>  		.desc = "LSE atomic instructions",
> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index 3c40da4..c32095f6 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -529,6 +529,9 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
>  	if (faulthandler_disabled() || !mm)
>  		goto no_context;
>  
> +	if (cpus_have_const_cap(ARM64_HAS_EPAN))
> +		vm_flags &= ~VM_EXEC;

This needs a comment, and I think it would be cleaner to rework the vm_flags
initialisation along the lines of:

	unsigned long vm_flags;
	unsigned long mm_flags = FAULT_FLAG_DEFAULT;

	if (user_mode(regs))
		mm_flags |= FAULT_FLAG_USER;

	if (is_el0_instruction_abort(esr)) {
		vm_flags = VM_EXEC;
		mm_flags |= FAULT_FLAG_INSTRUCTION;
	} else if (is_write_abort(esr)) {
		vm_flags = VM_WRITE;
		mm_flags |= FAULT_FLAG_WRITE;
	} else {
		vm_flags = VM_READ | VM_WRITE;
		if (!cpus_have_const_cap(ARM64_HAS_EPAN))
			vm_flags |= VM_EXEC:
	}

(but again, please add a comment to that last part because I still don't
really follow what you're doing)

Will
Catalin Marinas Jan. 26, 2021, 12:05 p.m. UTC | #2
On Tue, Jan 26, 2021 at 11:03:06AM +0000, Will Deacon wrote:
> On Tue, Jan 19, 2021 at 04:07:22PM +0000, Vladimir Murzin wrote:
> >  #define pte_valid_not_user(pte) \
> > -	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
> > +	((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
> >  #define pte_valid_user(pte) \
> >  	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
> 
> Won't pte_valid_user() go wrong for exec-only mappings now?

I wondered about the same in November (and reproducing my comment
below):

https://lore.kernel.org/linux-arm-kernel/20201119182236.GN4376@gaia/

pte_valid_user() checks for both PTE_VALID and PTE_USER. In theory, a
!PTE_UXN is also user-accessible but it's only used in gup_pte_range()
via pte_access_permitted(). If "access" in the latter means only
read/write, we should be ok. If we keep it as is, a comment would
be useful.

> > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> > index 8b5e7e5..47e9fdc 100644
> > --- a/arch/arm64/include/asm/sysreg.h
> > +++ b/arch/arm64/include/asm/sysreg.h
> > @@ -591,6 +591,7 @@
> >  	(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
> >  
> >  /* SCTLR_EL1 specific flags. */
> > +#define SCTLR_EL1_EPAN		(BIT(57))
> >  #define SCTLR_EL1_ATA0		(BIT(42))
> >  
> >  #define SCTLR_EL1_TCF0_SHIFT	38
> > @@ -631,7 +632,7 @@
> >  	 SCTLR_EL1_SED  | SCTLR_ELx_I    | SCTLR_EL1_DZE  | SCTLR_EL1_UCT   | \
> >  	 SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \
> >  	 SCTLR_ELx_ATA  | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI   | \
> > -	 SCTLR_EL1_RES1)
> > +	 SCTLR_EL1_EPAN | SCTLR_EL1_RES1)
> 
> Why is this handled differently to normal PAN, where the SCTLR is written in
> cpu_enable_pan()?

That's how it was done in an early version but I suggested we move it to
the default SCTLR bits to save some lines:

https://lore.kernel.org/linux-arm-kernel/20201202182303.GC21091@gaia/

With classic PAN, we only enable it if all the CPUs support it. For EPAN
that's not necessary since EPAN should depend on PAN being enabled.

It's also cached in the TLB, so it's a bit of a hassle with CnP. See
this past discussion:

https://lore.kernel.org/linux-arm-kernel/X7P+r%2Fl3ewvaf1aV@trantor/

> > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> > index 3c40da4..c32095f6 100644
> > --- a/arch/arm64/mm/fault.c
> > +++ b/arch/arm64/mm/fault.c
> > @@ -529,6 +529,9 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
> >  	if (faulthandler_disabled() || !mm)
> >  		goto no_context;
> >  
> > +	if (cpus_have_const_cap(ARM64_HAS_EPAN))
> > +		vm_flags &= ~VM_EXEC;
> 
> This needs a comment, and I think it would be cleaner to rework the vm_flags
> initialisation along the lines of:
> 
> 	unsigned long vm_flags;
> 	unsigned long mm_flags = FAULT_FLAG_DEFAULT;
> 
> 	if (user_mode(regs))
> 		mm_flags |= FAULT_FLAG_USER;
> 
> 	if (is_el0_instruction_abort(esr)) {
> 		vm_flags = VM_EXEC;
> 		mm_flags |= FAULT_FLAG_INSTRUCTION;
> 	} else if (is_write_abort(esr)) {
> 		vm_flags = VM_WRITE;
> 		mm_flags |= FAULT_FLAG_WRITE;
> 	} else {
> 		vm_flags = VM_READ | VM_WRITE;
> 		if (!cpus_have_const_cap(ARM64_HAS_EPAN))
> 			vm_flags |= VM_EXEC:
> 	}
> 
> (but again, please add a comment to that last part because I still don't
> really follow what you're doing)

Every time I come across this vm_flags handling I have to spend some
minutes to re-understand what it does. So vm_flags tells us what bits we
must have in vma->vm_flags for the fault to be benign. But we start with
all rwx flags and clear VM_EXEC if EPAN since exec does not imply read,
making it more confusing.

I think your rewrite is cleaner, maybe with some comment why we add
VM_EXEC when !EPAN.
Will Deacon Jan. 26, 2021, 12:23 p.m. UTC | #3
On Tue, Jan 26, 2021 at 12:05:42PM +0000, Catalin Marinas wrote:
> On Tue, Jan 26, 2021 at 11:03:06AM +0000, Will Deacon wrote:
> > On Tue, Jan 19, 2021 at 04:07:22PM +0000, Vladimir Murzin wrote:
> > >  #define pte_valid_not_user(pte) \
> > > -	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
> > > +	((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
> > >  #define pte_valid_user(pte) \
> > >  	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
> > 
> > Won't pte_valid_user() go wrong for exec-only mappings now?
> 
> I wondered about the same in November (and reproducing my comment
> below):
> 
> https://lore.kernel.org/linux-arm-kernel/20201119182236.GN4376@gaia/
> 
> pte_valid_user() checks for both PTE_VALID and PTE_USER. In theory, a
> !PTE_UXN is also user-accessible but it's only used in gup_pte_range()
> via pte_access_permitted(). If "access" in the latter means only
> read/write, we should be ok. If we keep it as is, a comment would
> be useful.

I think we should ensure that:

  pte_valid(pte) && (pte_valid_not_user(pte) || pte_valid_user(pte))

is always true, but I don't think it is after this patch. It reminds me
of some of the tests that Anshuman wrote.

> > > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> > > index 8b5e7e5..47e9fdc 100644
> > > --- a/arch/arm64/include/asm/sysreg.h
> > > +++ b/arch/arm64/include/asm/sysreg.h
> > > @@ -591,6 +591,7 @@
> > >  	(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
> > >  
> > >  /* SCTLR_EL1 specific flags. */
> > > +#define SCTLR_EL1_EPAN		(BIT(57))
> > >  #define SCTLR_EL1_ATA0		(BIT(42))
> > >  
> > >  #define SCTLR_EL1_TCF0_SHIFT	38
> > > @@ -631,7 +632,7 @@
> > >  	 SCTLR_EL1_SED  | SCTLR_ELx_I    | SCTLR_EL1_DZE  | SCTLR_EL1_UCT   | \
> > >  	 SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \
> > >  	 SCTLR_ELx_ATA  | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI   | \
> > > -	 SCTLR_EL1_RES1)
> > > +	 SCTLR_EL1_EPAN | SCTLR_EL1_RES1)
> > 
> > Why is this handled differently to normal PAN, where the SCTLR is written in
> > cpu_enable_pan()?
> 
> That's how it was done in an early version but I suggested we move it to
> the default SCTLR bits to save some lines:
> 
> https://lore.kernel.org/linux-arm-kernel/20201202182303.GC21091@gaia/
> 
> With classic PAN, we only enable it if all the CPUs support it. For EPAN
> that's not necessary since EPAN should depend on PAN being enabled.

Ok, let's just hope we don't have any CPU errata requiring us to disable
it.

> It's also cached in the TLB, so it's a bit of a hassle with CnP. See
> this past discussion:
> 
> https://lore.kernel.org/linux-arm-kernel/X7P+r%2Fl3ewvaf1aV@trantor/

Thanks. None of this is in the Arm ARM afaict, so I can't figure this stuff
out on my own.

> > > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> > > index 3c40da4..c32095f6 100644
> > > --- a/arch/arm64/mm/fault.c
> > > +++ b/arch/arm64/mm/fault.c
> > > @@ -529,6 +529,9 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
> > >  	if (faulthandler_disabled() || !mm)
> > >  		goto no_context;
> > >  
> > > +	if (cpus_have_const_cap(ARM64_HAS_EPAN))
> > > +		vm_flags &= ~VM_EXEC;
> > 
> > This needs a comment, and I think it would be cleaner to rework the vm_flags
> > initialisation along the lines of:
> > 
> > 	unsigned long vm_flags;
> > 	unsigned long mm_flags = FAULT_FLAG_DEFAULT;
> > 
> > 	if (user_mode(regs))
> > 		mm_flags |= FAULT_FLAG_USER;
> > 
> > 	if (is_el0_instruction_abort(esr)) {
> > 		vm_flags = VM_EXEC;
> > 		mm_flags |= FAULT_FLAG_INSTRUCTION;
> > 	} else if (is_write_abort(esr)) {
> > 		vm_flags = VM_WRITE;
> > 		mm_flags |= FAULT_FLAG_WRITE;
> > 	} else {
> > 		vm_flags = VM_READ | VM_WRITE;
> > 		if (!cpus_have_const_cap(ARM64_HAS_EPAN))
> > 			vm_flags |= VM_EXEC:
> > 	}
> > 
> > (but again, please add a comment to that last part because I still don't
> > really follow what you're doing)
> 
> Every time I come across this vm_flags handling I have to spend some
> minutes to re-understand what it does. So vm_flags tells us what bits we
> must have in vma->vm_flags for the fault to be benign. But we start with
> all rwx flags and clear VM_EXEC if EPAN since exec does not imply read,
> making it more confusing.
> 
> I think your rewrite is cleaner, maybe with some comment why we add
> VM_EXEC when !EPAN.

Agreed.

Will
Vladimir Murzin March 12, 2021, 11:17 a.m. UTC | #4
On 1/26/21 11:03 AM, Will Deacon wrote:
> On Tue, Jan 19, 2021 at 04:07:22PM +0000, Vladimir Murzin wrote:
>> Enhanced Privileged Access Never (EPAN) allows Privileged Access Never
>> to be used with Execute-only mappings.
>>
>> Absence of such support was a reason for 24cecc377463 ("arm64: Revert
>> support for execute-only user mappings"). Thus now it can be revisited
>> and re-enabled.
>>
>> Cc: Kees Cook <keescook@chromium.org>
>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>> Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
>> ---
>>  arch/arm64/Kconfig                    | 17 +++++++++++++++++
>>  arch/arm64/include/asm/cpucaps.h      |  3 ++-
>>  arch/arm64/include/asm/pgtable-prot.h |  5 +++--
>>  arch/arm64/include/asm/pgtable.h      | 14 +++++++++++++-
>>  arch/arm64/include/asm/sysreg.h       |  3 ++-
>>  arch/arm64/kernel/cpufeature.c        | 12 ++++++++++++
>>  arch/arm64/mm/fault.c                 |  3 +++
>>  7 files changed, 52 insertions(+), 5 deletions(-)
> 
> (please cc me on arm64 patches)

Ah, surely I should have done that!

snip...

>> --- a/arch/arm64/include/asm/pgtable.h
>> +++ b/arch/arm64/include/asm/pgtable.h
>> @@ -114,7 +114,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
>>  
>>  #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
> 
> We used to have a useful comment here describing why we're looking at UXN.
> 
> There was also one next to the protection_map[], which I think we should add
> back.

Noted.

It seems that the rest has been covered by Catalin...

Cheers
Vladimir
Vladimir Murzin March 12, 2021, 11:19 a.m. UTC | #5
On 1/26/21 12:23 PM, Will Deacon wrote:
> On Tue, Jan 26, 2021 at 12:05:42PM +0000, Catalin Marinas wrote:
>> On Tue, Jan 26, 2021 at 11:03:06AM +0000, Will Deacon wrote:
>>> On Tue, Jan 19, 2021 at 04:07:22PM +0000, Vladimir Murzin wrote:
>>>>  #define pte_valid_not_user(pte) \
>>>> -	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
>>>> +	((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
>>>>  #define pte_valid_user(pte) \
>>>>  	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
>>>
>>> Won't pte_valid_user() go wrong for exec-only mappings now?
>>
>> I wondered about the same in November (and reproducing my comment
>> below):
>>
>> https://lore.kernel.org/linux-arm-kernel/20201119182236.GN4376@gaia/
>>
>> pte_valid_user() checks for both PTE_VALID and PTE_USER. In theory, a
>> !PTE_UXN is also user-accessible but it's only used in gup_pte_range()
>> via pte_access_permitted(). If "access" in the latter means only
>> read/write, we should be ok. If we keep it as is, a comment would
>> be useful.
> 
> I think we should ensure that:
> 
>   pte_valid(pte) && (pte_valid_not_user(pte) || pte_valid_user(pte))
> 
> is always true, but I don't think it is after this patch. It reminds me
> of some of the tests that Anshuman wrote.

Something like

/*
 * Most of user mappings would have PTE_USER bit set except Execute-only
 * where both PTE_USER and PTE_UXN bits not set
 */
#define pte_valid_user(pte)                                            \
      (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) || \
         ((pte_val(pte) & (PTE_VALID | PTE_UXN)) == PTE_VALID))



Cheers
Vladimir
Will Deacon March 12, 2021, 11:20 a.m. UTC | #6
On Fri, Mar 12, 2021 at 11:19:02AM +0000, Vladimir Murzin wrote:
> On 1/26/21 12:23 PM, Will Deacon wrote:
> > On Tue, Jan 26, 2021 at 12:05:42PM +0000, Catalin Marinas wrote:
> >> On Tue, Jan 26, 2021 at 11:03:06AM +0000, Will Deacon wrote:
> >>> On Tue, Jan 19, 2021 at 04:07:22PM +0000, Vladimir Murzin wrote:
> >>>>  #define pte_valid_not_user(pte) \
> >>>> -	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
> >>>> +	((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
> >>>>  #define pte_valid_user(pte) \
> >>>>  	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
> >>>
> >>> Won't pte_valid_user() go wrong for exec-only mappings now?
> >>
> >> I wondered about the same in November (and reproducing my comment
> >> below):
> >>
> >> https://lore.kernel.org/linux-arm-kernel/20201119182236.GN4376@gaia/
> >>
> >> pte_valid_user() checks for both PTE_VALID and PTE_USER. In theory, a
> >> !PTE_UXN is also user-accessible but it's only used in gup_pte_range()
> >> via pte_access_permitted(). If "access" in the latter means only
> >> read/write, we should be ok. If we keep it as is, a comment would
> >> be useful.
> > 
> > I think we should ensure that:
> > 
> >   pte_valid(pte) && (pte_valid_not_user(pte) || pte_valid_user(pte))
> > 
> > is always true, but I don't think it is after this patch. It reminds me
> > of some of the tests that Anshuman wrote.
> 
> Something like
> 
> /*
>  * Most of user mappings would have PTE_USER bit set except Execute-only
>  * where both PTE_USER and PTE_UXN bits not set
>  */
> #define pte_valid_user(pte)                                            \
>       (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) || \
>          ((pte_val(pte) & (PTE_VALID | PTE_UXN)) == PTE_VALID))

Yeah, assuming the compiler doesn't make a dog's dinner of it.

Will
Catalin Marinas March 12, 2021, 12:23 p.m. UTC | #7
On Fri, Mar 12, 2021 at 11:19:02AM +0000, Vladimir Murzin wrote:
> On 1/26/21 12:23 PM, Will Deacon wrote:
> > On Tue, Jan 26, 2021 at 12:05:42PM +0000, Catalin Marinas wrote:
> >> On Tue, Jan 26, 2021 at 11:03:06AM +0000, Will Deacon wrote:
> >>> On Tue, Jan 19, 2021 at 04:07:22PM +0000, Vladimir Murzin wrote:
> >>>>  #define pte_valid_not_user(pte) \
> >>>> -	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
> >>>> +	((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
> >>>>  #define pte_valid_user(pte) \
> >>>>  	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
> >>>
> >>> Won't pte_valid_user() go wrong for exec-only mappings now?
> >>
> >> I wondered about the same in November (and reproducing my comment
> >> below):
> >>
> >> https://lore.kernel.org/linux-arm-kernel/20201119182236.GN4376@gaia/
> >>
> >> pte_valid_user() checks for both PTE_VALID and PTE_USER. In theory, a
> >> !PTE_UXN is also user-accessible but it's only used in gup_pte_range()
> >> via pte_access_permitted(). If "access" in the latter means only
> >> read/write, we should be ok. If we keep it as is, a comment would
> >> be useful.
> > 
> > I think we should ensure that:
> > 
> >   pte_valid(pte) && (pte_valid_not_user(pte) || pte_valid_user(pte))
> > 
> > is always true, but I don't think it is after this patch. It reminds me
> > of some of the tests that Anshuman wrote.
> 
> Something like
> 
> /*
>  * Most of user mappings would have PTE_USER bit set except Execute-only
>  * where both PTE_USER and PTE_UXN bits not set
>  */
> #define pte_valid_user(pte)                                            \
>       (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) || \
>          ((pte_val(pte) & (PTE_VALID | PTE_UXN)) == PTE_VALID))

This is equivalent to (valid && (user || !uxn)) which matches how the
EPAN is now specified.

However, I think this change breaks pte_access_permitted() which would
now return true even for execute-only mappings. Since such pages are not
readable, nor writable by user, get_user_pages_fast() should fail. For
example, if we have a futex in such execute-only mapping,
get_futex_key() succeeds with the above. Similarly for the
iov_iter_get_pages().

The slow-path get_user_pages() does the checks on the vma flags and this
can be overridden with FOLL_FORCE (it doesn't use
pte_access_permitted()). I haven't seen any get_user_pages_fast() called
with FOLL_FORCE but even if it does, it looks like it's ignored as the
code relies on pte_access_permitted() only.

I'd say lets scrap pte_valid_user() altogether and move the original
logic to pte_access_permitted() with a comment that it must return false
for execute-only user mappings.
diff mbox series

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f39568b..e63cc18 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1053,6 +1053,9 @@  config ARCH_WANT_HUGE_PMD_SHARE
 config ARCH_HAS_CACHE_LINE_SIZE
 	def_bool y
 
+config ARCH_HAS_FILTER_PGPROT
+	def_bool y
+
 config ARCH_ENABLE_SPLIT_PMD_PTLOCK
 	def_bool y if PGTABLE_LEVELS > 2
 
@@ -1673,6 +1676,20 @@  config ARM64_MTE
 
 endmenu
 
+menu "ARMv8.7 architectural features"
+
+config ARM64_EPAN
+	bool "Enable support for Enhanced Privileged Access Never (EPAN)"
+	default y
+	depends on ARM64_PAN
+	help
+	 Enhanced Privileged Access Never (EPAN) allows Privileged
+	 Access Never to be used with Execute-only mappings.
+
+	 The feature is detected at runtime, and will remain disabled
+	 if the cpu does not implement the feature.
+endmenu
+
 config ARM64_SVE
 	bool "ARM Scalable Vector Extension support"
 	default y
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index b77d997..9e3ec4d 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -66,7 +66,8 @@ 
 #define ARM64_WORKAROUND_1508412		58
 #define ARM64_HAS_LDAPR				59
 #define ARM64_KVM_PROTECTED_MODE		60
+#define ARM64_HAS_EPAN				61
 
-#define ARM64_NCAPS				61
+#define ARM64_NCAPS				62
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 046be78..f91c2aa 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -88,12 +88,13 @@  extern bool arm64_use_ng_mappings;
 #define PAGE_SHARED_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE)
 #define PAGE_READONLY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
 #define PAGE_READONLY_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
+#define PAGE_EXECONLY		__pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
 
 #define __P000  PAGE_NONE
 #define __P001  PAGE_READONLY
 #define __P010  PAGE_READONLY
 #define __P011  PAGE_READONLY
-#define __P100  PAGE_READONLY_EXEC
+#define __P100  PAGE_EXECONLY
 #define __P101  PAGE_READONLY_EXEC
 #define __P110  PAGE_READONLY_EXEC
 #define __P111  PAGE_READONLY_EXEC
@@ -102,7 +103,7 @@  extern bool arm64_use_ng_mappings;
 #define __S001  PAGE_READONLY
 #define __S010  PAGE_SHARED
 #define __S011  PAGE_SHARED
-#define __S100  PAGE_READONLY_EXEC
+#define __S100  PAGE_EXECONLY
 #define __S101  PAGE_READONLY_EXEC
 #define __S110  PAGE_SHARED_EXEC
 #define __S111  PAGE_SHARED_EXEC
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 5015627..0196849 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -114,7 +114,7 @@  extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 
 #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
 #define pte_valid_not_user(pte) \
-	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
+	((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
 #define pte_valid_user(pte) \
 	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
 
@@ -982,6 +982,18 @@  static inline bool arch_faults_on_old_pte(void)
 }
 #define arch_faults_on_old_pte arch_faults_on_old_pte
 
+static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
+{
+	if (cpus_have_const_cap(ARM64_HAS_EPAN))
+		return prot;
+
+	if (pgprot_val(prot) != pgprot_val(PAGE_EXECONLY))
+		return prot;
+
+	return PAGE_READONLY_EXEC;
+}
+
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 8b5e7e5..47e9fdc 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -591,6 +591,7 @@ 
 	(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
 
 /* SCTLR_EL1 specific flags. */
+#define SCTLR_EL1_EPAN		(BIT(57))
 #define SCTLR_EL1_ATA0		(BIT(42))
 
 #define SCTLR_EL1_TCF0_SHIFT	38
@@ -631,7 +632,7 @@ 
 	 SCTLR_EL1_SED  | SCTLR_ELx_I    | SCTLR_EL1_DZE  | SCTLR_EL1_UCT   | \
 	 SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \
 	 SCTLR_ELx_ATA  | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI   | \
-	 SCTLR_EL1_RES1)
+	 SCTLR_EL1_EPAN | SCTLR_EL1_RES1)
 
 /* MAIR_ELx memory attributes (used by Linux) */
 #define MAIR_ATTR_DEVICE_nGnRnE		UL(0x00)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e99edde..9d85956 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1774,6 +1774,18 @@  static const struct arm64_cpu_capabilities arm64_features[] = {
 		.cpu_enable = cpu_enable_pan,
 	},
 #endif /* CONFIG_ARM64_PAN */
+#ifdef CONFIG_ARM64_EPAN
+	{
+		.desc = "Enhanced Privileged Access Never",
+		.capability = ARM64_HAS_EPAN,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64MMFR1_EL1,
+		.field_pos = ID_AA64MMFR1_PAN_SHIFT,
+		.sign = FTR_UNSIGNED,
+		.min_field_value = 3,
+	},
+#endif /* CONFIG_ARM64_EPAN */
 #ifdef CONFIG_ARM64_LSE_ATOMICS
 	{
 		.desc = "LSE atomic instructions",
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 3c40da4..c32095f6 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -529,6 +529,9 @@  static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
 	if (faulthandler_disabled() || !mm)
 		goto no_context;
 
+	if (cpus_have_const_cap(ARM64_HAS_EPAN))
+		vm_flags &= ~VM_EXEC;
+
 	if (user_mode(regs))
 		mm_flags |= FAULT_FLAG_USER;