Message ID | 20200715170844.30064-19-catalin.marinas@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: Memory Tagging Extension user-space support | expand |
On 15/07/2020 18:08, Catalin Marinas wrote: > By default, even if PROT_MTE is set on a memory range, there is no tag > check fault reporting (SIGSEGV). Introduce a set of option to the > exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag > check fault mode: > > PR_MTE_TCF_NONE - no reporting (default) > PR_MTE_TCF_SYNC - synchronous tag check fault reporting > PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting > > These options translate into the corresponding SCTLR_EL1.TCF0 bitfield, > context-switched by the kernel. Note that uaccess done by the kernel is > not checked and cannot be configured by the user. > > Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> > Cc: Will Deacon <will@kernel.org> > --- > > Notes: > v3: > - Use SCTLR_EL1_TCF0_NONE instead of 0 for consistency. > - Move mte_thread_switch() in this patch from an earlier one. In > addition, it is called after the dsb() in __switch_to() so that any > asynchronous tag check faults have been registered in the TFSR_EL1 > registers (to be added with the in-kernel MTE support. > > v2: > - Handle SCTLR_EL1_TCF0_NONE explicitly for consistency with PR_MTE_TCF_NONE. > - Fix SCTLR_EL1 register setting in flush_mte_state() (thanks to Peter > Collingbourne). > - Added ISB to update_sctlr_el1_tcf0() since, with the latest > architecture update/fix, the TCF0 field is used by the uaccess > routines. > > arch/arm64/include/asm/mte.h | 14 ++++++ > arch/arm64/include/asm/processor.h | 3 ++ > arch/arm64/kernel/mte.c | 77 ++++++++++++++++++++++++++++++ > arch/arm64/kernel/process.c | 26 ++++++++-- > include/uapi/linux/prctl.h | 6 +++ > 5 files changed, 123 insertions(+), 3 deletions(-) > > diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h > index b2577eee62c2..df2efbc9f8f1 100644 > --- a/arch/arm64/include/asm/mte.h > +++ b/arch/arm64/include/asm/mte.h > @@ -21,6 +21,9 @@ void mte_clear_page_tags(void *addr); > void mte_sync_tags(pte_t *ptep, pte_t pte); > void mte_copy_page_tags(void *kto, const void *kfrom); > void flush_mte_state(void); > +void mte_thread_switch(struct task_struct *next); > +long set_mte_ctrl(unsigned long arg); > +long get_mte_ctrl(void); > > #else > > @@ -36,6 +39,17 @@ static inline void mte_copy_page_tags(void *kto, const void *kfrom) > static inline void flush_mte_state(void) > { > } > +static inline void mte_thread_switch(struct task_struct *next) > +{ > +} > +static inline long set_mte_ctrl(unsigned long arg) > +{ > + return 0; > +} > +static inline long get_mte_ctrl(void) > +{ > + return 0; > +} > > #endif > > diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h > index 240fe5e5b720..80e7f0573309 100644 > --- a/arch/arm64/include/asm/processor.h > +++ b/arch/arm64/include/asm/processor.h > @@ -151,6 +151,9 @@ struct thread_struct { > struct ptrauth_keys_user keys_user; > struct ptrauth_keys_kernel keys_kernel; > #endif > +#ifdef CONFIG_ARM64_MTE > + u64 sctlr_tcf0; > +#endif > }; > > static inline void arch_thread_struct_whitelist(unsigned long *offset, > diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c > index 5f54fd140610..375483a1f573 100644 > --- a/arch/arm64/kernel/mte.c > +++ b/arch/arm64/kernel/mte.c > @@ -5,6 +5,8 @@ > > #include <linux/bitops.h> > #include <linux/mm.h> > +#include <linux/prctl.h> > +#include <linux/sched.h> > #include <linux/string.h> > #include <linux/thread_info.h> > > @@ -49,6 +51,26 @@ int memcmp_pages(struct page *page1, struct page *page2) > return ret; > } > > +static void update_sctlr_el1_tcf0(u64 tcf0) > +{ > + /* ISB required for the kernel uaccess routines */ > + sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0); > + isb(); > +} > + > +static void set_sctlr_el1_tcf0(u64 tcf0) > +{ > + /* > + * mte_thread_switch() checks current->thread.sctlr_tcf0 as an > + * optimisation. Disable preemption so that it does not see > + * the variable update before the SCTLR_EL1.TCF0 one. > + */ > + preempt_disable(); > + current->thread.sctlr_tcf0 = tcf0; > + update_sctlr_el1_tcf0(tcf0); > + preempt_enable(); > +} > + > void flush_mte_state(void) > { > if (!system_supports_mte()) > @@ -58,4 +80,59 @@ void flush_mte_state(void) > dsb(ish); > write_sysreg_s(0, SYS_TFSRE0_EL1); > clear_thread_flag(TIF_MTE_ASYNC_FAULT); > + /* disable tag checking */ > + set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE); > +} > + > +void mte_thread_switch(struct task_struct *next) > +{ > + if (!system_supports_mte()) > + return; > + > + /* avoid expensive SCTLR_EL1 accesses if no change */ > + if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) I think this could be improved by checking whether `next` is a kernel thread, in which case thread.sctlr_tcf0 is 0 but there is no point in setting SCTLR_EL1.TCF0, since there should not be any access via TTBR0. Kevin > + update_sctlr_el1_tcf0(next->thread.sctlr_tcf0); > +} > + > +long set_mte_ctrl(unsigned long arg) > +{ > + u64 tcf0; > + > + if (!system_supports_mte()) > + return 0; > + > + switch (arg & PR_MTE_TCF_MASK) { > + case PR_MTE_TCF_NONE: > + tcf0 = SCTLR_EL1_TCF0_NONE; > + break; > + case PR_MTE_TCF_SYNC: > + tcf0 = SCTLR_EL1_TCF0_SYNC; > + break; > + case PR_MTE_TCF_ASYNC: > + tcf0 = SCTLR_EL1_TCF0_ASYNC; > + break; > + default: > + return -EINVAL; > + } > + > + set_sctlr_el1_tcf0(tcf0); > + > + return 0; > +} > + > +long get_mte_ctrl(void) > +{ > + if (!system_supports_mte()) > + return 0; > + > + switch (current->thread.sctlr_tcf0) { > + case SCTLR_EL1_TCF0_NONE: > + return PR_MTE_TCF_NONE; > + case SCTLR_EL1_TCF0_SYNC: > + return PR_MTE_TCF_SYNC; > + case SCTLR_EL1_TCF0_ASYNC: > + return PR_MTE_TCF_ASYNC; > + } > + > + return 0; > } > diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c > index 695705d1f8e5..d19ce8053a03 100644 > --- a/arch/arm64/kernel/process.c > +++ b/arch/arm64/kernel/process.c > @@ -544,6 +544,13 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, > */ > dsb(ish); > > + /* > + * MTE thread switching must happen after the DSB above to ensure that > + * any asynchronous tag check faults have been logged in the TFSR*_EL1 > + * registers. > + */ > + mte_thread_switch(next); > + > /* the actual thread switch */ > last = cpu_switch_to(prev, next); > > @@ -603,9 +610,15 @@ static unsigned int tagged_addr_disabled; > > long set_tagged_addr_ctrl(unsigned long arg) > { > + unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE; > + > if (is_compat_task()) > return -EINVAL; > - if (arg & ~PR_TAGGED_ADDR_ENABLE) > + > + if (system_supports_mte()) > + valid_mask |= PR_MTE_TCF_MASK; > + > + if (arg & ~valid_mask) > return -EINVAL; > > /* > @@ -615,6 +628,9 @@ long set_tagged_addr_ctrl(unsigned long arg) > if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled) > return -EINVAL; > > + if (set_mte_ctrl(arg) != 0) > + return -EINVAL; > + > update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE); > > return 0; > @@ -622,13 +638,17 @@ long set_tagged_addr_ctrl(unsigned long arg) > > long get_tagged_addr_ctrl(void) > { > + long ret = 0; > + > if (is_compat_task()) > return -EINVAL; > > if (test_thread_flag(TIF_TAGGED_ADDR)) > - return PR_TAGGED_ADDR_ENABLE; > + ret = PR_TAGGED_ADDR_ENABLE; > > - return 0; > + ret |= get_mte_ctrl(); > + > + return ret; > } > > /* > diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h > index 07b4f8131e36..2390ab324afa 100644 > --- a/include/uapi/linux/prctl.h > +++ b/include/uapi/linux/prctl.h > @@ -233,6 +233,12 @@ struct prctl_mm_map { > #define PR_SET_TAGGED_ADDR_CTRL 55 > #define PR_GET_TAGGED_ADDR_CTRL 56 > # define PR_TAGGED_ADDR_ENABLE (1UL << 0) > +/* MTE tag check fault modes */ > +# define PR_MTE_TCF_SHIFT 1 > +# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) > +# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) > +# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) > +# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) > > /* Control reclaim behavior when allocating memory */ > #define PR_SET_IO_FLUSHER 57
On Mon, Jul 20, 2020 at 04:30:35PM +0100, Kevin Brodsky wrote: > On 15/07/2020 18:08, Catalin Marinas wrote: > >By default, even if PROT_MTE is set on a memory range, there is no tag > >check fault reporting (SIGSEGV). Introduce a set of option to the > >exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag > >check fault mode: > > > > PR_MTE_TCF_NONE - no reporting (default) > > PR_MTE_TCF_SYNC - synchronous tag check fault reporting > > PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting > > > >These options translate into the corresponding SCTLR_EL1.TCF0 bitfield, > >context-switched by the kernel. Note that uaccess done by the kernel is > >not checked and cannot be configured by the user. > > > >Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> > >Cc: Will Deacon <will@kernel.org> > >--- > > > >Notes: > > v3: > > - Use SCTLR_EL1_TCF0_NONE instead of 0 for consistency. > > - Move mte_thread_switch() in this patch from an earlier one. In > > addition, it is called after the dsb() in __switch_to() so that any > > asynchronous tag check faults have been registered in the TFSR_EL1 > > registers (to be added with the in-kernel MTE support. > > v2: > > - Handle SCTLR_EL1_TCF0_NONE explicitly for consistency with PR_MTE_TCF_NONE. > > - Fix SCTLR_EL1 register setting in flush_mte_state() (thanks to Peter > > Collingbourne). > > - Added ISB to update_sctlr_el1_tcf0() since, with the latest > > architecture update/fix, the TCF0 field is used by the uaccess > > routines. [...] > >diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c [...] > >+void mte_thread_switch(struct task_struct *next) > >+{ > >+ if (!system_supports_mte()) > >+ return; > >+ > >+ /* avoid expensive SCTLR_EL1 accesses if no change */ > >+ if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) > > I think this could be improved by checking whether `next` is a kernel > thread, in which case thread.sctlr_tcf0 is 0 but there is no point in > setting SCTLR_EL1.TCF0, since there should not be any access via TTBR0. Out of interest, do we have a nice way of testing for a kernel thread now? I remember fpsimd_thread_switch() used to check for task->mm, but we seem to have got rid of that at some point. set_mm() can defeat this, and anyway the heavy lifting for FPSIMD is now deferred until returning to userspace. Cheers ---Dave
On Mon, Jul 20, 2020 at 06:00:50PM +0100, Dave P Martin wrote: > On Mon, Jul 20, 2020 at 04:30:35PM +0100, Kevin Brodsky wrote: > > On 15/07/2020 18:08, Catalin Marinas wrote: > > >+void mte_thread_switch(struct task_struct *next) > > >+{ > > >+ if (!system_supports_mte()) > > >+ return; > > >+ > > >+ /* avoid expensive SCTLR_EL1 accesses if no change */ > > >+ if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) > > > > I think this could be improved by checking whether `next` is a kernel > > thread, in which case thread.sctlr_tcf0 is 0 but there is no point in > > setting SCTLR_EL1.TCF0, since there should not be any access via TTBR0. > > Out of interest, do we have a nice way of testing for a kernel thread > now? > > I remember fpsimd_thread_switch() used to check for task->mm, but we > seem to have got rid of that at some point. set_mm() can defeat this, > and anyway the heavy lifting for FPSIMD is now deferred until returning > to userspace. I think a better way is (current->flags & PF_KTHREAD). kthread_use_mm() indeed changes current->mm.
On Mon, Jul 20, 2020 at 04:30:35PM +0100, Kevin Brodsky wrote: > On 15/07/2020 18:08, Catalin Marinas wrote: > > +void mte_thread_switch(struct task_struct *next) > > +{ > > + if (!system_supports_mte()) > > + return; > > + > > + /* avoid expensive SCTLR_EL1 accesses if no change */ > > + if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) > > I think this could be improved by checking whether `next` is a kernel > thread, in which case thread.sctlr_tcf0 is 0 but there is no point in > setting SCTLR_EL1.TCF0, since there should not be any access via TTBR0. It's not about kernel or user thread here. kthread_use_mm() (just use_mm() in older kernels) would set an mm on a kernel thread, temporarily making it behave as a user one. Since the sctlr_tcf0 is per thread, not per mm, we need to switch to the default TCF0 for kthreads so that user accesses (if use_mm() is called) don't generate any tag check faults. Note that switch_mm() does not touch TCF0. If we did allow a global, per-mm TCF0 setting, such kthreads could only handle synchronous faults and no SIGSEGV generated (as we do with copy_{from,to}_user() for normal threads). If we want to revisit per-thread vs per-mm TCF0 setting, now is the time.
On 20/07/2020 18:00, Dave Martin wrote: > On Mon, Jul 20, 2020 at 04:30:35PM +0100, Kevin Brodsky wrote: >> On 15/07/2020 18:08, Catalin Marinas wrote: >>> By default, even if PROT_MTE is set on a memory range, there is no tag >>> check fault reporting (SIGSEGV). Introduce a set of option to the >>> exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag >>> check fault mode: >>> >>> PR_MTE_TCF_NONE - no reporting (default) >>> PR_MTE_TCF_SYNC - synchronous tag check fault reporting >>> PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting >>> >>> These options translate into the corresponding SCTLR_EL1.TCF0 bitfield, >>> context-switched by the kernel. Note that uaccess done by the kernel is >>> not checked and cannot be configured by the user. >>> >>> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> >>> Cc: Will Deacon <will@kernel.org> >>> --- >>> >>> Notes: >>> v3: >>> - Use SCTLR_EL1_TCF0_NONE instead of 0 for consistency. >>> - Move mte_thread_switch() in this patch from an earlier one. In >>> addition, it is called after the dsb() in __switch_to() so that any >>> asynchronous tag check faults have been registered in the TFSR_EL1 >>> registers (to be added with the in-kernel MTE support. >>> v2: >>> - Handle SCTLR_EL1_TCF0_NONE explicitly for consistency with PR_MTE_TCF_NONE. >>> - Fix SCTLR_EL1 register setting in flush_mte_state() (thanks to Peter >>> Collingbourne). >>> - Added ISB to update_sctlr_el1_tcf0() since, with the latest >>> architecture update/fix, the TCF0 field is used by the uaccess >>> routines. > [...] > >>> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c > [...] > >>> +void mte_thread_switch(struct task_struct *next) >>> +{ >>> + if (!system_supports_mte()) >>> + return; >>> + >>> + /* avoid expensive SCTLR_EL1 accesses if no change */ >>> + if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) >> I think this could be improved by checking whether `next` is a kernel >> thread, in which case thread.sctlr_tcf0 is 0 but there is no point in >> setting SCTLR_EL1.TCF0, since there should not be any access via TTBR0. > Out of interest, do we have a nice way of testing for a kernel thread > now? Isn't it as simple as checking if PF_KTHREAD is set in tsk->flags? At least this is what ssbs_thread_switch() does. Kevin > I remember fpsimd_thread_switch() used to check for task->mm, but we > seem to have got rid of that at some point. set_mm() can defeat this, > and anyway the heavy lifting for FPSIMD is now deferred until returning > to userspace. > > Cheers > ---Dave
On 15/07/2020 18:08, Catalin Marinas wrote: > By default, even if PROT_MTE is set on a memory range, there is no tag > check fault reporting (SIGSEGV). Introduce a set of option to the > exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag > check fault mode: > > PR_MTE_TCF_NONE - no reporting (default) > PR_MTE_TCF_SYNC - synchronous tag check fault reporting > PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting > > These options translate into the corresponding SCTLR_EL1.TCF0 bitfield, > context-switched by the kernel. Note that uaccess done by the kernel is > not checked and cannot be configured by the user. The last sentence is outdated, it should probably say that uaccess is only checked in in synchronous mode. Kevin > Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> > Cc: Will Deacon <will@kernel.org> > --- > > Notes: > v3: > - Use SCTLR_EL1_TCF0_NONE instead of 0 for consistency. > - Move mte_thread_switch() in this patch from an earlier one. In > addition, it is called after the dsb() in __switch_to() so that any > asynchronous tag check faults have been registered in the TFSR_EL1 > registers (to be added with the in-kernel MTE support. > > v2: > - Handle SCTLR_EL1_TCF0_NONE explicitly for consistency with PR_MTE_TCF_NONE. > - Fix SCTLR_EL1 register setting in flush_mte_state() (thanks to Peter > Collingbourne). > - Added ISB to update_sctlr_el1_tcf0() since, with the latest > architecture update/fix, the TCF0 field is used by the uaccess > routines. > > arch/arm64/include/asm/mte.h | 14 ++++++ > arch/arm64/include/asm/processor.h | 3 ++ > arch/arm64/kernel/mte.c | 77 ++++++++++++++++++++++++++++++ > arch/arm64/kernel/process.c | 26 ++++++++-- > include/uapi/linux/prctl.h | 6 +++ > 5 files changed, 123 insertions(+), 3 deletions(-) > > diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h > index b2577eee62c2..df2efbc9f8f1 100644 > --- a/arch/arm64/include/asm/mte.h > +++ b/arch/arm64/include/asm/mte.h > @@ -21,6 +21,9 @@ void mte_clear_page_tags(void *addr); > void mte_sync_tags(pte_t *ptep, pte_t pte); > void mte_copy_page_tags(void *kto, const void *kfrom); > void flush_mte_state(void); > +void mte_thread_switch(struct task_struct *next); > +long set_mte_ctrl(unsigned long arg); > +long get_mte_ctrl(void); > > #else > > @@ -36,6 +39,17 @@ static inline void mte_copy_page_tags(void *kto, const void *kfrom) > static inline void flush_mte_state(void) > { > } > +static inline void mte_thread_switch(struct task_struct *next) > +{ > +} > +static inline long set_mte_ctrl(unsigned long arg) > +{ > + return 0; > +} > +static inline long get_mte_ctrl(void) > +{ > + return 0; > +} > > #endif > > diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h > index 240fe5e5b720..80e7f0573309 100644 > --- a/arch/arm64/include/asm/processor.h > +++ b/arch/arm64/include/asm/processor.h > @@ -151,6 +151,9 @@ struct thread_struct { > struct ptrauth_keys_user keys_user; > struct ptrauth_keys_kernel keys_kernel; > #endif > +#ifdef CONFIG_ARM64_MTE > + u64 sctlr_tcf0; > +#endif > }; > > static inline void arch_thread_struct_whitelist(unsigned long *offset, > diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c > index 5f54fd140610..375483a1f573 100644 > --- a/arch/arm64/kernel/mte.c > +++ b/arch/arm64/kernel/mte.c > @@ -5,6 +5,8 @@ > > #include <linux/bitops.h> > #include <linux/mm.h> > +#include <linux/prctl.h> > +#include <linux/sched.h> > #include <linux/string.h> > #include <linux/thread_info.h> > > @@ -49,6 +51,26 @@ int memcmp_pages(struct page *page1, struct page *page2) > return ret; > } > > +static void update_sctlr_el1_tcf0(u64 tcf0) > +{ > + /* ISB required for the kernel uaccess routines */ > + sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0); > + isb(); > +} > + > +static void set_sctlr_el1_tcf0(u64 tcf0) > +{ > + /* > + * mte_thread_switch() checks current->thread.sctlr_tcf0 as an > + * optimisation. Disable preemption so that it does not see > + * the variable update before the SCTLR_EL1.TCF0 one. > + */ > + preempt_disable(); > + current->thread.sctlr_tcf0 = tcf0; > + update_sctlr_el1_tcf0(tcf0); > + preempt_enable(); > +} > + > void flush_mte_state(void) > { > if (!system_supports_mte()) > @@ -58,4 +80,59 @@ void flush_mte_state(void) > dsb(ish); > write_sysreg_s(0, SYS_TFSRE0_EL1); > clear_thread_flag(TIF_MTE_ASYNC_FAULT); > + /* disable tag checking */ > + set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE); > +} > + > +void mte_thread_switch(struct task_struct *next) > +{ > + if (!system_supports_mte()) > + return; > + > + /* avoid expensive SCTLR_EL1 accesses if no change */ > + if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) > + update_sctlr_el1_tcf0(next->thread.sctlr_tcf0); > +} > + > +long set_mte_ctrl(unsigned long arg) > +{ > + u64 tcf0; > + > + if (!system_supports_mte()) > + return 0; > + > + switch (arg & PR_MTE_TCF_MASK) { > + case PR_MTE_TCF_NONE: > + tcf0 = SCTLR_EL1_TCF0_NONE; > + break; > + case PR_MTE_TCF_SYNC: > + tcf0 = SCTLR_EL1_TCF0_SYNC; > + break; > + case PR_MTE_TCF_ASYNC: > + tcf0 = SCTLR_EL1_TCF0_ASYNC; > + break; > + default: > + return -EINVAL; > + } > + > + set_sctlr_el1_tcf0(tcf0); > + > + return 0; > +} > + > +long get_mte_ctrl(void) > +{ > + if (!system_supports_mte()) > + return 0; > + > + switch (current->thread.sctlr_tcf0) { > + case SCTLR_EL1_TCF0_NONE: > + return PR_MTE_TCF_NONE; > + case SCTLR_EL1_TCF0_SYNC: > + return PR_MTE_TCF_SYNC; > + case SCTLR_EL1_TCF0_ASYNC: > + return PR_MTE_TCF_ASYNC; > + } > + > + return 0; > } > diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c > index 695705d1f8e5..d19ce8053a03 100644 > --- a/arch/arm64/kernel/process.c > +++ b/arch/arm64/kernel/process.c > @@ -544,6 +544,13 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, > */ > dsb(ish); > > + /* > + * MTE thread switching must happen after the DSB above to ensure that > + * any asynchronous tag check faults have been logged in the TFSR*_EL1 > + * registers. > + */ > + mte_thread_switch(next); > + > /* the actual thread switch */ > last = cpu_switch_to(prev, next); > > @@ -603,9 +610,15 @@ static unsigned int tagged_addr_disabled; > > long set_tagged_addr_ctrl(unsigned long arg) > { > + unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE; > + > if (is_compat_task()) > return -EINVAL; > - if (arg & ~PR_TAGGED_ADDR_ENABLE) > + > + if (system_supports_mte()) > + valid_mask |= PR_MTE_TCF_MASK; > + > + if (arg & ~valid_mask) > return -EINVAL; > > /* > @@ -615,6 +628,9 @@ long set_tagged_addr_ctrl(unsigned long arg) > if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled) > return -EINVAL; > > + if (set_mte_ctrl(arg) != 0) > + return -EINVAL; > + > update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE); > > return 0; > @@ -622,13 +638,17 @@ long set_tagged_addr_ctrl(unsigned long arg) > > long get_tagged_addr_ctrl(void) > { > + long ret = 0; > + > if (is_compat_task()) > return -EINVAL; > > if (test_thread_flag(TIF_TAGGED_ADDR)) > - return PR_TAGGED_ADDR_ENABLE; > + ret = PR_TAGGED_ADDR_ENABLE; > > - return 0; > + ret |= get_mte_ctrl(); > + > + return ret; > } > > /* > diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h > index 07b4f8131e36..2390ab324afa 100644 > --- a/include/uapi/linux/prctl.h > +++ b/include/uapi/linux/prctl.h > @@ -233,6 +233,12 @@ struct prctl_mm_map { > #define PR_SET_TAGGED_ADDR_CTRL 55 > #define PR_GET_TAGGED_ADDR_CTRL 56 > # define PR_TAGGED_ADDR_ENABLE (1UL << 0) > +/* MTE tag check fault modes */ > +# define PR_MTE_TCF_SHIFT 1 > +# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) > +# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) > +# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) > +# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) > > /* Control reclaim behavior when allocating memory */ > #define PR_SET_IO_FLUSHER 57
On Tue, Aug 04, 2020 at 08:34:42PM +0100, Kevin Brodsky wrote: > On 15/07/2020 18:08, Catalin Marinas wrote: > > By default, even if PROT_MTE is set on a memory range, there is no tag > > check fault reporting (SIGSEGV). Introduce a set of option to the > > exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag > > check fault mode: > > > > PR_MTE_TCF_NONE - no reporting (default) > > PR_MTE_TCF_SYNC - synchronous tag check fault reporting > > PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting > > > > These options translate into the corresponding SCTLR_EL1.TCF0 bitfield, > > context-switched by the kernel. Note that uaccess done by the kernel is > > not checked and cannot be configured by the user. > > The last sentence is outdated, it should probably say that uaccess is only > checked in in synchronous mode. Thanks, I forgot about the commit log. The documentation was updated to: **Note**: Kernel accesses to the user address space (e.g. ``read()`` system call) are not checked if the user thread tag checking mode is ``PR_MTE_TCF_NONE`` or ``PR_MTE_TCF_ASYNC``. If the tag checking mode is ``PR_MTE_TCF_SYNC``, the kernel makes a best effort to check its user address accesses, however it cannot always guarantee it.
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index b2577eee62c2..df2efbc9f8f1 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -21,6 +21,9 @@ void mte_clear_page_tags(void *addr); void mte_sync_tags(pte_t *ptep, pte_t pte); void mte_copy_page_tags(void *kto, const void *kfrom); void flush_mte_state(void); +void mte_thread_switch(struct task_struct *next); +long set_mte_ctrl(unsigned long arg); +long get_mte_ctrl(void); #else @@ -36,6 +39,17 @@ static inline void mte_copy_page_tags(void *kto, const void *kfrom) static inline void flush_mte_state(void) { } +static inline void mte_thread_switch(struct task_struct *next) +{ +} +static inline long set_mte_ctrl(unsigned long arg) +{ + return 0; +} +static inline long get_mte_ctrl(void) +{ + return 0; +} #endif diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 240fe5e5b720..80e7f0573309 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -151,6 +151,9 @@ struct thread_struct { struct ptrauth_keys_user keys_user; struct ptrauth_keys_kernel keys_kernel; #endif +#ifdef CONFIG_ARM64_MTE + u64 sctlr_tcf0; +#endif }; static inline void arch_thread_struct_whitelist(unsigned long *offset, diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 5f54fd140610..375483a1f573 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -5,6 +5,8 @@ #include <linux/bitops.h> #include <linux/mm.h> +#include <linux/prctl.h> +#include <linux/sched.h> #include <linux/string.h> #include <linux/thread_info.h> @@ -49,6 +51,26 @@ int memcmp_pages(struct page *page1, struct page *page2) return ret; } +static void update_sctlr_el1_tcf0(u64 tcf0) +{ + /* ISB required for the kernel uaccess routines */ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0); + isb(); +} + +static void set_sctlr_el1_tcf0(u64 tcf0) +{ + /* + * mte_thread_switch() checks current->thread.sctlr_tcf0 as an + * optimisation. Disable preemption so that it does not see + * the variable update before the SCTLR_EL1.TCF0 one. + */ + preempt_disable(); + current->thread.sctlr_tcf0 = tcf0; + update_sctlr_el1_tcf0(tcf0); + preempt_enable(); +} + void flush_mte_state(void) { if (!system_supports_mte()) @@ -58,4 +80,59 @@ void flush_mte_state(void) dsb(ish); write_sysreg_s(0, SYS_TFSRE0_EL1); clear_thread_flag(TIF_MTE_ASYNC_FAULT); + /* disable tag checking */ + set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE); +} + +void mte_thread_switch(struct task_struct *next) +{ + if (!system_supports_mte()) + return; + + /* avoid expensive SCTLR_EL1 accesses if no change */ + if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) + update_sctlr_el1_tcf0(next->thread.sctlr_tcf0); +} + +long set_mte_ctrl(unsigned long arg) +{ + u64 tcf0; + + if (!system_supports_mte()) + return 0; + + switch (arg & PR_MTE_TCF_MASK) { + case PR_MTE_TCF_NONE: + tcf0 = SCTLR_EL1_TCF0_NONE; + break; + case PR_MTE_TCF_SYNC: + tcf0 = SCTLR_EL1_TCF0_SYNC; + break; + case PR_MTE_TCF_ASYNC: + tcf0 = SCTLR_EL1_TCF0_ASYNC; + break; + default: + return -EINVAL; + } + + set_sctlr_el1_tcf0(tcf0); + + return 0; +} + +long get_mte_ctrl(void) +{ + if (!system_supports_mte()) + return 0; + + switch (current->thread.sctlr_tcf0) { + case SCTLR_EL1_TCF0_NONE: + return PR_MTE_TCF_NONE; + case SCTLR_EL1_TCF0_SYNC: + return PR_MTE_TCF_SYNC; + case SCTLR_EL1_TCF0_ASYNC: + return PR_MTE_TCF_ASYNC; + } + + return 0; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 695705d1f8e5..d19ce8053a03 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -544,6 +544,13 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, */ dsb(ish); + /* + * MTE thread switching must happen after the DSB above to ensure that + * any asynchronous tag check faults have been logged in the TFSR*_EL1 + * registers. + */ + mte_thread_switch(next); + /* the actual thread switch */ last = cpu_switch_to(prev, next); @@ -603,9 +610,15 @@ static unsigned int tagged_addr_disabled; long set_tagged_addr_ctrl(unsigned long arg) { + unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE; + if (is_compat_task()) return -EINVAL; - if (arg & ~PR_TAGGED_ADDR_ENABLE) + + if (system_supports_mte()) + valid_mask |= PR_MTE_TCF_MASK; + + if (arg & ~valid_mask) return -EINVAL; /* @@ -615,6 +628,9 @@ long set_tagged_addr_ctrl(unsigned long arg) if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled) return -EINVAL; + if (set_mte_ctrl(arg) != 0) + return -EINVAL; + update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE); return 0; @@ -622,13 +638,17 @@ long set_tagged_addr_ctrl(unsigned long arg) long get_tagged_addr_ctrl(void) { + long ret = 0; + if (is_compat_task()) return -EINVAL; if (test_thread_flag(TIF_TAGGED_ADDR)) - return PR_TAGGED_ADDR_ENABLE; + ret = PR_TAGGED_ADDR_ENABLE; - return 0; + ret |= get_mte_ctrl(); + + return ret; } /* diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 07b4f8131e36..2390ab324afa 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -233,6 +233,12 @@ struct prctl_mm_map { #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) +/* MTE tag check fault modes */ +# define PR_MTE_TCF_SHIFT 1 +# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57
By default, even if PROT_MTE is set on a memory range, there is no tag check fault reporting (SIGSEGV). Introduce a set of option to the exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag check fault mode: PR_MTE_TCF_NONE - no reporting (default) PR_MTE_TCF_SYNC - synchronous tag check fault reporting PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting These options translate into the corresponding SCTLR_EL1.TCF0 bitfield, context-switched by the kernel. Note that uaccess done by the kernel is not checked and cannot be configured by the user. Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will@kernel.org> --- Notes: v3: - Use SCTLR_EL1_TCF0_NONE instead of 0 for consistency. - Move mte_thread_switch() in this patch from an earlier one. In addition, it is called after the dsb() in __switch_to() so that any asynchronous tag check faults have been registered in the TFSR_EL1 registers (to be added with the in-kernel MTE support. v2: - Handle SCTLR_EL1_TCF0_NONE explicitly for consistency with PR_MTE_TCF_NONE. - Fix SCTLR_EL1 register setting in flush_mte_state() (thanks to Peter Collingbourne). - Added ISB to update_sctlr_el1_tcf0() since, with the latest architecture update/fix, the TCF0 field is used by the uaccess routines. arch/arm64/include/asm/mte.h | 14 ++++++ arch/arm64/include/asm/processor.h | 3 ++ arch/arm64/kernel/mte.c | 77 ++++++++++++++++++++++++++++++ arch/arm64/kernel/process.c | 26 ++++++++-- include/uapi/linux/prctl.h | 6 +++ 5 files changed, 123 insertions(+), 3 deletions(-)