Message ID | 20191211184027.20130-21-catalin.marinas@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: Memory Tagging Extension user-space support | expand |
+Branislav, Peter In this patch, the default exclusion mask remains 0 (i.e. all tags can be generated). After some more discussions, Branislav and I think that it would be better to start with the reverse, i.e. all tags but 0 excluded (mask = 0xfe or 0xff). This should simplify the MTE setup in the early C runtime quite a bit. Indeed, if all tags can be generated, doing any heap or stack tagging before the PR_SET_TAGGED_ADDR_CTRL prctl() is issued can cause problems, notably because tagged addresses could end up being passed to syscalls. Conversely, if IRG and ADDG never set the top byte by default, then tagging operations should be no-ops until the prctl() is issued. This would be particularly useful given that it may not be straightforward for the C runtime to issue the prctl() before doing anything else. Additionally, since the default tag checking mode is PR_MTE_TCF_NONE, it would make perfect sense not to generate tags by default. Any thoughts? Thanks, Kevin On 11/12/2019 18:40, Catalin Marinas wrote: > The IRG, ADDG and SUBG instructions insert a random tag in the resulting > address. Certain tags can be excluded via the GCR_EL1.Exclude bitmap > when, for example, the user wants a certain colour for freed buffers. > Since the GCR_EL1 register is not accessible at EL0, extend the > prctl(PR_SET_TAGGED_ADDR_CTRL) interface to include a 16-bit field in > the first argument for controlling the excluded tags. This setting is > pre-thread. > > Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> > --- > arch/arm64/include/asm/processor.h | 1 + > arch/arm64/include/asm/sysreg.h | 7 +++++++ > arch/arm64/kernel/process.c | 27 +++++++++++++++++++++++---- > include/uapi/linux/prctl.h | 3 +++ > 4 files changed, 34 insertions(+), 4 deletions(-) > > diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h > index 91aa270afc7d..5b6988035334 100644 > --- a/arch/arm64/include/asm/processor.h > +++ b/arch/arm64/include/asm/processor.h > @@ -150,6 +150,7 @@ struct thread_struct { > #endif > #ifdef CONFIG_ARM64_MTE > u64 sctlr_tcf0; > + u64 gcr_excl; > #endif > }; > > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h > index 9e5753272f4b..b6bb6d31f1cd 100644 > --- a/arch/arm64/include/asm/sysreg.h > +++ b/arch/arm64/include/asm/sysreg.h > @@ -901,6 +901,13 @@ > write_sysreg(__scs_new, sysreg); \ > } while (0) > > +#define sysreg_clear_set_s(sysreg, clear, set) do { \ > + u64 __scs_val = read_sysreg_s(sysreg); \ > + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ > + if (__scs_new != __scs_val) \ > + write_sysreg_s(__scs_new, sysreg); \ > +} while (0) > + > #endif > > #endif /* __ASM_SYSREG_H */ > diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c > index 47ce98f47253..5ec6889795fc 100644 > --- a/arch/arm64/kernel/process.c > +++ b/arch/arm64/kernel/process.c > @@ -502,6 +502,15 @@ static void update_sctlr_el1_tcf0(u64 tcf0) > sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0); > } > > +static void update_gcr_el1_excl(u64 excl) > +{ > + /* > + * No need for ISB since this only affects EL0 currently, implicit > + * with ERET. > + */ > + sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl); > +} > + > /* Handle MTE thread switch */ > static void mte_thread_switch(struct task_struct *next) > { > @@ -511,6 +520,7 @@ static void mte_thread_switch(struct task_struct *next) > /* avoid expensive SCTLR_EL1 accesses if no change */ > if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) > update_sctlr_el1_tcf0(next->thread.sctlr_tcf0); > + update_gcr_el1_excl(next->thread.gcr_excl); > } > #else > static void mte_thread_switch(struct task_struct *next) > @@ -641,22 +651,31 @@ static long set_mte_ctrl(unsigned long arg) > update_sctlr_el1_tcf0(tcf0); > preempt_enable(); > > + current->thread.gcr_excl = (arg & PR_MTE_EXCL_MASK) >> PR_MTE_EXCL_SHIFT; > + update_gcr_el1_excl(current->thread.gcr_excl); > + > return 0; > } > > static long get_mte_ctrl(void) > { > + unsigned long ret; > + > if (!system_supports_mte()) > return 0; > > + ret = current->thread.gcr_excl << PR_MTE_EXCL_SHIFT; > + > switch (current->thread.sctlr_tcf0) { > case SCTLR_EL1_TCF0_SYNC: > - return PR_MTE_TCF_SYNC; > + ret |= PR_MTE_TCF_SYNC; > + break; > case SCTLR_EL1_TCF0_ASYNC: > - return PR_MTE_TCF_ASYNC; > + ret |= PR_MTE_TCF_ASYNC; > + break; > } > > - return 0; > + return ret; > } > #else > static long set_mte_ctrl(unsigned long arg) > @@ -684,7 +703,7 @@ long set_tagged_addr_ctrl(unsigned long arg) > return -EINVAL; > > if (system_supports_mte()) > - valid_mask |= PR_MTE_TCF_MASK; > + valid_mask |= PR_MTE_TCF_MASK | PR_MTE_EXCL_MASK; > > if (arg & ~valid_mask) > return -EINVAL; > diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h > index 5e9323e66a38..749de5ab4f9f 100644 > --- a/include/uapi/linux/prctl.h > +++ b/include/uapi/linux/prctl.h > @@ -239,5 +239,8 @@ struct prctl_mm_map { > # define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) > # define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) > # define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) > +/* MTE tag exclusion mask */ > +# define PR_MTE_EXCL_SHIFT 3 > +# define PR_MTE_EXCL_MASK (0xffffUL << PR_MTE_EXCL_SHIFT) > > #endif /* _LINUX_PRCTL_H */
On Mon, Dec 16, 2019 at 6:20 AM Kevin Brodsky <kevin.brodsky@arm.com> wrote: > > +Branislav, Peter > > In this patch, the default exclusion mask remains 0 (i.e. all tags can be generated). > After some more discussions, Branislav and I think that it would be better to start > with the reverse, i.e. all tags but 0 excluded (mask = 0xfe or 0xff). > > This should simplify the MTE setup in the early C runtime quite a bit. Indeed, if all > tags can be generated, doing any heap or stack tagging before the > PR_SET_TAGGED_ADDR_CTRL prctl() is issued can cause problems, notably because tagged > addresses could end up being passed to syscalls. Conversely, if IRG and ADDG never > set the top byte by default, then tagging operations should be no-ops until the > prctl() is issued. This would be particularly useful given that it may not be > straightforward for the C runtime to issue the prctl() before doing anything else. > > Additionally, since the default tag checking mode is PR_MTE_TCF_NONE, it would make > perfect sense not to generate tags by default. > > Any thoughts? This would indeed allow the early C runtime startup code to pass tagged addresses to syscalls, but I don't think it would entirely free the code from the burden of worrying about stack tagging. Either way, any stack frames that are active at the point when the prctl() is issued would need to be compiled without stack tagging, because otherwise those stack frames may use ADDG to rematerialize a stack object address, which may produce a different address post-prctl. Setting the exclude mask to 0xffff would at least make it more likely for this problem to be detected, though. If we change the default in this way, maybe it would be worth considering flipping the meaning of the tag mask and have it be a mask of tags to allow. That would be consistent with the existing behaviour where userspace sets bits in tagged_addr_ctrl in order to enable tagging features. Peter > > Thanks, > Kevin > > On 11/12/2019 18:40, Catalin Marinas wrote: > > The IRG, ADDG and SUBG instructions insert a random tag in the resulting > > address. Certain tags can be excluded via the GCR_EL1.Exclude bitmap > > when, for example, the user wants a certain colour for freed buffers. > > Since the GCR_EL1 register is not accessible at EL0, extend the > > prctl(PR_SET_TAGGED_ADDR_CTRL) interface to include a 16-bit field in > > the first argument for controlling the excluded tags. This setting is > > pre-thread. > > > > Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> > > --- > > arch/arm64/include/asm/processor.h | 1 + > > arch/arm64/include/asm/sysreg.h | 7 +++++++ > > arch/arm64/kernel/process.c | 27 +++++++++++++++++++++++---- > > include/uapi/linux/prctl.h | 3 +++ > > 4 files changed, 34 insertions(+), 4 deletions(-) > > > > diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h > > index 91aa270afc7d..5b6988035334 100644 > > --- a/arch/arm64/include/asm/processor.h > > +++ b/arch/arm64/include/asm/processor.h > > @@ -150,6 +150,7 @@ struct thread_struct { > > #endif > > #ifdef CONFIG_ARM64_MTE > > u64 sctlr_tcf0; > > + u64 gcr_excl; > > #endif > > }; > > > > diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h > > index 9e5753272f4b..b6bb6d31f1cd 100644 > > --- a/arch/arm64/include/asm/sysreg.h > > +++ b/arch/arm64/include/asm/sysreg.h > > @@ -901,6 +901,13 @@ > > write_sysreg(__scs_new, sysreg); \ > > } while (0) > > > > +#define sysreg_clear_set_s(sysreg, clear, set) do { \ > > + u64 __scs_val = read_sysreg_s(sysreg); \ > > + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ > > + if (__scs_new != __scs_val) \ > > + write_sysreg_s(__scs_new, sysreg); \ > > +} while (0) > > + > > #endif > > > > #endif /* __ASM_SYSREG_H */ > > diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c > > index 47ce98f47253..5ec6889795fc 100644 > > --- a/arch/arm64/kernel/process.c > > +++ b/arch/arm64/kernel/process.c > > @@ -502,6 +502,15 @@ static void update_sctlr_el1_tcf0(u64 tcf0) > > sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0); > > } > > > > +static void update_gcr_el1_excl(u64 excl) > > +{ > > + /* > > + * No need for ISB since this only affects EL0 currently, implicit > > + * with ERET. > > + */ > > + sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl); > > +} > > + > > /* Handle MTE thread switch */ > > static void mte_thread_switch(struct task_struct *next) > > { > > @@ -511,6 +520,7 @@ static void mte_thread_switch(struct task_struct *next) > > /* avoid expensive SCTLR_EL1 accesses if no change */ > > if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) > > update_sctlr_el1_tcf0(next->thread.sctlr_tcf0); > > + update_gcr_el1_excl(next->thread.gcr_excl); > > } > > #else > > static void mte_thread_switch(struct task_struct *next) > > @@ -641,22 +651,31 @@ static long set_mte_ctrl(unsigned long arg) > > update_sctlr_el1_tcf0(tcf0); > > preempt_enable(); > > > > + current->thread.gcr_excl = (arg & PR_MTE_EXCL_MASK) >> PR_MTE_EXCL_SHIFT; > > + update_gcr_el1_excl(current->thread.gcr_excl); > > + > > return 0; > > } > > > > static long get_mte_ctrl(void) > > { > > + unsigned long ret; > > + > > if (!system_supports_mte()) > > return 0; > > > > + ret = current->thread.gcr_excl << PR_MTE_EXCL_SHIFT; > > + > > switch (current->thread.sctlr_tcf0) { > > case SCTLR_EL1_TCF0_SYNC: > > - return PR_MTE_TCF_SYNC; > > + ret |= PR_MTE_TCF_SYNC; > > + break; > > case SCTLR_EL1_TCF0_ASYNC: > > - return PR_MTE_TCF_ASYNC; > > + ret |= PR_MTE_TCF_ASYNC; > > + break; > > } > > > > - return 0; > > + return ret; > > } > > #else > > static long set_mte_ctrl(unsigned long arg) > > @@ -684,7 +703,7 @@ long set_tagged_addr_ctrl(unsigned long arg) > > return -EINVAL; > > > > if (system_supports_mte()) > > - valid_mask |= PR_MTE_TCF_MASK; > > + valid_mask |= PR_MTE_TCF_MASK | PR_MTE_EXCL_MASK; > > > > if (arg & ~valid_mask) > > return -EINVAL; > > diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h > > index 5e9323e66a38..749de5ab4f9f 100644 > > --- a/include/uapi/linux/prctl.h > > +++ b/include/uapi/linux/prctl.h > > @@ -239,5 +239,8 @@ struct prctl_mm_map { > > # define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) > > # define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) > > # define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) > > +/* MTE tag exclusion mask */ > > +# define PR_MTE_EXCL_SHIFT 3 > > +# define PR_MTE_EXCL_MASK (0xffffUL << PR_MTE_EXCL_SHIFT) > > > > #endif /* _LINUX_PRCTL_H */ >
On Mon, Dec 16, 2019 at 09:30:36AM -0800, Peter Collingbourne wrote: > On Mon, Dec 16, 2019 at 6:20 AM Kevin Brodsky <kevin.brodsky@arm.com> wrote: > > In this patch, the default exclusion mask remains 0 (i.e. all tags can be generated). > > After some more discussions, Branislav and I think that it would be better to start > > with the reverse, i.e. all tags but 0 excluded (mask = 0xfe or 0xff). So with mask 0xff, IRG generates only tag 0? This seems to be the case reading the pseudocode in the ARM ARM. > > This should simplify the MTE setup in the early C runtime quite a bit. Indeed, if all > > tags can be generated, doing any heap or stack tagging before the > > PR_SET_TAGGED_ADDR_CTRL prctl() is issued can cause problems, notably because tagged > > addresses could end up being passed to syscalls. Conversely, if IRG and ADDG never > > set the top byte by default, then tagging operations should be no-ops until the > > prctl() is issued. This would be particularly useful given that it may not be > > straightforward for the C runtime to issue the prctl() before doing anything else. > > > > Additionally, since the default tag checking mode is PR_MTE_TCF_NONE, it would make > > perfect sense not to generate tags by default. > > > > Any thoughts? > > This would indeed allow the early C runtime startup code to pass > tagged addresses to syscalls, but I don't think it would entirely free > the code from the burden of worrying about stack tagging. Either way, > any stack frames that are active at the point when the prctl() is > issued would need to be compiled without stack tagging, because > otherwise those stack frames may use ADDG to rematerialize a stack > object address, which may produce a different address post-prctl. > Setting the exclude mask to 0xffff would at least make it more likely > for this problem to be detected, though. > > If we change the default in this way, maybe it would be worth > considering flipping the meaning of the tag mask and have it be a mask > of tags to allow. That would be consistent with the existing behaviour > where userspace sets bits in tagged_addr_ctrl in order to enable > tagging features. Either option works for me. It's really for the libc people to decide what they need. I think an "include" rather than "exclude" mask makes sense with the default 0 meaning only generate tag 0. Thanks.
Hi Peter, Revisiting the gcr_excl vs gcr_incl decision, so reviving an old thread. On Mon, Dec 16, 2019 at 09:30:36AM -0800, Peter Collingbourne wrote: > On Mon, Dec 16, 2019 at 6:20 AM Kevin Brodsky <kevin.brodsky@arm.com> wrote: > > In this patch, the default exclusion mask remains 0 (i.e. all tags can be generated). > > After some more discussions, Branislav and I think that it would be better to start > > with the reverse, i.e. all tags but 0 excluded (mask = 0xfe or 0xff). > > > > This should simplify the MTE setup in the early C runtime quite a bit. Indeed, if all > > tags can be generated, doing any heap or stack tagging before the > > PR_SET_TAGGED_ADDR_CTRL prctl() is issued can cause problems, notably because tagged > > addresses could end up being passed to syscalls. Conversely, if IRG and ADDG never > > set the top byte by default, then tagging operations should be no-ops until the > > prctl() is issued. This would be particularly useful given that it may not be > > straightforward for the C runtime to issue the prctl() before doing anything else. > > > > Additionally, since the default tag checking mode is PR_MTE_TCF_NONE, it would make > > perfect sense not to generate tags by default. > > This would indeed allow the early C runtime startup code to pass > tagged addresses to syscalls, I guess you meant that early C runtime code won't get tagged stack addresses, hence they can be passed to syscalls. Prior to the prctl(), the kernel doesn't accept tagged addresses anyway. > but I don't think it would entirely free > the code from the burden of worrying about stack tagging. Either way, > any stack frames that are active at the point when the prctl() is > issued would need to be compiled without stack tagging, because > otherwise those stack frames may use ADDG to rematerialize a stack > object address, which may produce a different address post-prctl. If you want to guarantee that ADDG always returns tag 0, I guess that's only possible with a default exclude mask of 0xffff (or if you are careful enough with the start tag and offset passed). > Setting the exclude mask to 0xffff would at least make it more likely > for this problem to be detected, though. I thought it would be detected if we didn't have a 0xffff default exclude mask. With only tag 0 generated, any such problem could be hidden. > If we change the default in this way, maybe it would be worth > considering flipping the meaning of the tag mask and have it be a mask > of tags to allow. That would be consistent with the existing behaviour > where userspace sets bits in tagged_addr_ctrl in order to enable > tagging features. The first question is whether the C runtime requires a default GCR_EL1.Excl mask of 0xffff (or 0xfffe) so that IRG, ADDG, SUBG always generate tag 0. If the runtime is fine with a default exclude mask of 0, I'm tempted to go back to an exclude mask for prctl(). (to me it feels more natural to use an exclude mask as it matches the ARM ARM definition but maybe I stare too much at the hardware specs ;))
On Mon, Jun 22, 2020 at 10:17 AM Catalin Marinas <catalin.marinas@arm.com> wrote: > > Hi Peter, > > Revisiting the gcr_excl vs gcr_incl decision, so reviving an old thread. > > On Mon, Dec 16, 2019 at 09:30:36AM -0800, Peter Collingbourne wrote: > > On Mon, Dec 16, 2019 at 6:20 AM Kevin Brodsky <kevin.brodsky@arm.com> wrote: > > > In this patch, the default exclusion mask remains 0 (i.e. all tags can be generated). > > > After some more discussions, Branislav and I think that it would be better to start > > > with the reverse, i.e. all tags but 0 excluded (mask = 0xfe or 0xff). > > > > > > This should simplify the MTE setup in the early C runtime quite a bit. Indeed, if all > > > tags can be generated, doing any heap or stack tagging before the > > > PR_SET_TAGGED_ADDR_CTRL prctl() is issued can cause problems, notably because tagged > > > addresses could end up being passed to syscalls. Conversely, if IRG and ADDG never > > > set the top byte by default, then tagging operations should be no-ops until the > > > prctl() is issued. This would be particularly useful given that it may not be > > > straightforward for the C runtime to issue the prctl() before doing anything else. > > > > > > Additionally, since the default tag checking mode is PR_MTE_TCF_NONE, it would make > > > perfect sense not to generate tags by default. > > > > This would indeed allow the early C runtime startup code to pass > > tagged addresses to syscalls, > > I guess you meant that early C runtime code won't get tagged stack > addresses, hence they can be passed to syscalls. Prior to the prctl(), > the kernel doesn't accept tagged addresses anyway. Right. > > but I don't think it would entirely free > > the code from the burden of worrying about stack tagging. Either way, > > any stack frames that are active at the point when the prctl() is > > issued would need to be compiled without stack tagging, because > > otherwise those stack frames may use ADDG to rematerialize a stack > > object address, which may produce a different address post-prctl. > > If you want to guarantee that ADDG always returns tag 0, I guess that's > only possible with a default exclude mask of 0xffff (or if you are > careful enough with the start tag and offset passed). > > > Setting the exclude mask to 0xffff would at least make it more likely > > for this problem to be detected, though. > > I thought it would be detected if we didn't have a 0xffff default > exclude mask. With only tag 0 generated, any such problem could be > hidden. I don't think that's the case, as long as you aren't using 0 as a catch-all tag. Imagine that you have some hypothetical startup code that looks like this: void init() { bool called_prctl = false; prctl(PR_SET_TAGGED_ADDR_CTRL, ...); // effect is to change GCR_EL1.Excl from 0xffff to 1 called_prctl = true; } This may be compiled as something like (well, a real compiler wouldn't compile it like this but rather use sp-relative stores or eliminate the dead stores entirely, but imagine that the stores to called_prctl are obfuscated somehow, e.g. in another translation unit): sub x19, sp, #16 irg x19, x19 // compute a tag base for the function addg x0, x19, #0, #1 // add tag offset for "called_prctl" stzg x0, [x0] bl prctl addg x0, x19, #0, #1 // rematerialize "called_prctl" address mov w1, #1 strb w1, [x0] ret The first addg will materialize a tag of 0 due to the default Excl value, so the stzg will set the memory tag to 0. However, the second addg will materialize a tag of 1 because of the new Excl value, which will result in a tag fault in the strb instruction. This problem is less likely to be detected if we transition Excl from 0 to 1. It will only be detected in the case where the irg instruction produces a tag of 0xf, which would be incremented to 0 by the first addg but to 1 by the second one. > > If we change the default in this way, maybe it would be worth > > considering flipping the meaning of the tag mask and have it be a mask > > of tags to allow. That would be consistent with the existing behaviour > > where userspace sets bits in tagged_addr_ctrl in order to enable > > tagging features. > > The first question is whether the C runtime requires a default > GCR_EL1.Excl mask of 0xffff (or 0xfffe) so that IRG, ADDG, SUBG always > generate tag 0. If the runtime is fine with a default exclude mask of 0, > I'm tempted to go back to an exclude mask for prctl(). > > (to me it feels more natural to use an exclude mask as it matches the > ARM ARM definition but maybe I stare too much at the hardware specs ;)) I think that would be fine with me. With the transition from 0 to 1 the above problem would still be detected, but only 1/16 of the time. But if the problem exists in the early startup code which will be executed many times during a typical system boot, it makes it likely that the problem will be detected eventually. Peter
On Mon, Jun 22, 2020 at 12:00:48PM -0700, Peter Collingbourne wrote: > On Mon, Jun 22, 2020 at 10:17 AM Catalin Marinas > <catalin.marinas@arm.com> wrote: > > On Mon, Dec 16, 2019 at 09:30:36AM -0800, Peter Collingbourne wrote: > > > On Mon, Dec 16, 2019 at 6:20 AM Kevin Brodsky <kevin.brodsky@arm.com> wrote: > > > > In this patch, the default exclusion mask remains 0 (i.e. all tags can be generated). > > > > After some more discussions, Branislav and I think that it would be better to start > > > > with the reverse, i.e. all tags but 0 excluded (mask = 0xfe or 0xff). > > > > > > > > This should simplify the MTE setup in the early C runtime quite a bit. Indeed, if all > > > > tags can be generated, doing any heap or stack tagging before the > > > > PR_SET_TAGGED_ADDR_CTRL prctl() is issued can cause problems, notably because tagged > > > > addresses could end up being passed to syscalls. Conversely, if IRG and ADDG never > > > > set the top byte by default, then tagging operations should be no-ops until the > > > > prctl() is issued. This would be particularly useful given that it may not be > > > > straightforward for the C runtime to issue the prctl() before doing anything else. > > > > > > > > Additionally, since the default tag checking mode is PR_MTE_TCF_NONE, it would make > > > > perfect sense not to generate tags by default. > > > > > > This would indeed allow the early C runtime startup code to pass > > > tagged addresses to syscalls, [...] > > > but I don't think it would entirely free > > > the code from the burden of worrying about stack tagging. Either way, > > > any stack frames that are active at the point when the prctl() is > > > issued would need to be compiled without stack tagging, because > > > otherwise those stack frames may use ADDG to rematerialize a stack > > > object address, which may produce a different address post-prctl. [...] > > > Setting the exclude mask to 0xffff would at least make it more likely > > > for this problem to be detected, though. > > > > I thought it would be detected if we didn't have a 0xffff default > > exclude mask. With only tag 0 generated, any such problem could be > > hidden. > > I don't think that's the case, as long as you aren't using 0 as a > catch-all tag. Imagine that you have some hypothetical startup code > that looks like this: > > void init() { > bool called_prctl = false; > prctl(PR_SET_TAGGED_ADDR_CTRL, ...); // effect is to change > GCR_EL1.Excl from 0xffff to 1 > called_prctl = true; > } > > This may be compiled as something like (well, a real compiler wouldn't > compile it like this but rather use sp-relative stores or eliminate > the dead stores entirely, but imagine that the stores to called_prctl > are obfuscated somehow, e.g. in another translation unit): > > sub x19, sp, #16 > irg x19, x19 // compute a tag base for the function > addg x0, x19, #0, #1 // add tag offset for "called_prctl" > stzg x0, [x0] > bl prctl > addg x0, x19, #0, #1 // rematerialize "called_prctl" address > mov w1, #1 > strb w1, [x0] > ret > > The first addg will materialize a tag of 0 due to the default Excl > value, so the stzg will set the memory tag to 0. However, the second > addg will materialize a tag of 1 because of the new Excl value, which > will result in a tag fault in the strb instruction. > > This problem is less likely to be detected if we transition Excl from > 0 to 1. It will only be detected in the case where the irg instruction > produces a tag of 0xf, which would be incremented to 0 by the first > addg but to 1 by the second one. Thanks for the explanation. For some reason I thought ADDG would only be used once (per variable or frame). I now agree that a default exclude mask of 0xffff would catch such issues early. > > > If we change the default in this way, maybe it would be worth > > > considering flipping the meaning of the tag mask and have it be a mask > > > of tags to allow. That would be consistent with the existing behaviour > > > where userspace sets bits in tagged_addr_ctrl in order to enable > > > tagging features. > > > > The first question is whether the C runtime requires a default > > GCR_EL1.Excl mask of 0xffff (or 0xfffe) so that IRG, ADDG, SUBG always > > generate tag 0. If the runtime is fine with a default exclude mask of 0, > > I'm tempted to go back to an exclude mask for prctl(). > > > > (to me it feels more natural to use an exclude mask as it matches the > > ARM ARM definition but maybe I stare too much at the hardware specs ;)) > > I think that would be fine with me. With the transition from 0 to 1 > the above problem would still be detected, but only 1/16 of the time. > But if the problem exists in the early startup code which will be > executed many times during a typical system boot, it makes it likely > that the problem will be detected eventually. I'm not a big fan of hitting a problem 1/16 times, it makes debugging harder. So I'll stick to a default exclude mask of 0xffff, in which case it makes sense to invert the polarity for prctl() and make it an include mask (as in v4 of the patchset). Thanks.
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 91aa270afc7d..5b6988035334 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -150,6 +150,7 @@ struct thread_struct { #endif #ifdef CONFIG_ARM64_MTE u64 sctlr_tcf0; + u64 gcr_excl; #endif }; diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9e5753272f4b..b6bb6d31f1cd 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -901,6 +901,13 @@ write_sysreg(__scs_new, sysreg); \ } while (0) +#define sysreg_clear_set_s(sysreg, clear, set) do { \ + u64 __scs_val = read_sysreg_s(sysreg); \ + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ + if (__scs_new != __scs_val) \ + write_sysreg_s(__scs_new, sysreg); \ +} while (0) + #endif #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 47ce98f47253..5ec6889795fc 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -502,6 +502,15 @@ static void update_sctlr_el1_tcf0(u64 tcf0) sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0); } +static void update_gcr_el1_excl(u64 excl) +{ + /* + * No need for ISB since this only affects EL0 currently, implicit + * with ERET. + */ + sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl); +} + /* Handle MTE thread switch */ static void mte_thread_switch(struct task_struct *next) { @@ -511,6 +520,7 @@ static void mte_thread_switch(struct task_struct *next) /* avoid expensive SCTLR_EL1 accesses if no change */ if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) update_sctlr_el1_tcf0(next->thread.sctlr_tcf0); + update_gcr_el1_excl(next->thread.gcr_excl); } #else static void mte_thread_switch(struct task_struct *next) @@ -641,22 +651,31 @@ static long set_mte_ctrl(unsigned long arg) update_sctlr_el1_tcf0(tcf0); preempt_enable(); + current->thread.gcr_excl = (arg & PR_MTE_EXCL_MASK) >> PR_MTE_EXCL_SHIFT; + update_gcr_el1_excl(current->thread.gcr_excl); + return 0; } static long get_mte_ctrl(void) { + unsigned long ret; + if (!system_supports_mte()) return 0; + ret = current->thread.gcr_excl << PR_MTE_EXCL_SHIFT; + switch (current->thread.sctlr_tcf0) { case SCTLR_EL1_TCF0_SYNC: - return PR_MTE_TCF_SYNC; + ret |= PR_MTE_TCF_SYNC; + break; case SCTLR_EL1_TCF0_ASYNC: - return PR_MTE_TCF_ASYNC; + ret |= PR_MTE_TCF_ASYNC; + break; } - return 0; + return ret; } #else static long set_mte_ctrl(unsigned long arg) @@ -684,7 +703,7 @@ long set_tagged_addr_ctrl(unsigned long arg) return -EINVAL; if (system_supports_mte()) - valid_mask |= PR_MTE_TCF_MASK; + valid_mask |= PR_MTE_TCF_MASK | PR_MTE_EXCL_MASK; if (arg & ~valid_mask) return -EINVAL; diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 5e9323e66a38..749de5ab4f9f 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -239,5 +239,8 @@ struct prctl_mm_map { # define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) # define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) # define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) +/* MTE tag exclusion mask */ +# define PR_MTE_EXCL_SHIFT 3 +# define PR_MTE_EXCL_MASK (0xffffUL << PR_MTE_EXCL_SHIFT) #endif /* _LINUX_PRCTL_H */
The IRG, ADDG and SUBG instructions insert a random tag in the resulting address. Certain tags can be excluded via the GCR_EL1.Exclude bitmap when, for example, the user wants a certain colour for freed buffers. Since the GCR_EL1 register is not accessible at EL0, extend the prctl(PR_SET_TAGGED_ADDR_CTRL) interface to include a 16-bit field in the first argument for controlling the excluded tags. This setting is pre-thread. Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> --- arch/arm64/include/asm/processor.h | 1 + arch/arm64/include/asm/sysreg.h | 7 +++++++ arch/arm64/kernel/process.c | 27 +++++++++++++++++++++++---- include/uapi/linux/prctl.h | 3 +++ 4 files changed, 34 insertions(+), 4 deletions(-)