Message ID | 20220929222936.14584-25-rick.p.edgecombe@intel.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Shadowstacks for userspace | expand |
On Thu, Sep 29, 2022 at 03:29:21PM -0700, Rick Edgecombe wrote: > From: Yu-cheng Yu <yu-cheng.yu@intel.com> > > Introduce basic shadow stack enabling/disabling/allocation routines. > A task's shadow stack is allocated from memory with VM_SHADOW_STACK flag > and has a fixed size of min(RLIMIT_STACK, 4GB). > > Keep the task's shadow stack address and size in thread_struct. This will > be copied when cloning new threads, but needs to be cleared during exec, > so add a function to do this. > > Do not support IA32 emulation. > > Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com> > Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com> > Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com> > Cc: Kees Cook <keescook@chromium.org> > > --- > > v2: > - Get rid of unnessary shstk->base checks > - Don't support IA32 emulation > > v1: > - Switch to xsave helpers. > - Expand commit log. > > Yu-cheng v30: > - Remove superfluous comments for struct thread_shstk. > - Replace 'populate' with 'unused'. > > Yu-cheng v28: > - Update shstk_setup() with wrmsrl_safe(), returns success when shadow > stack feature is not present (since this is a setup function). > > arch/x86/include/asm/cet.h | 13 +++ > arch/x86/include/asm/msr.h | 11 +++ > arch/x86/include/asm/processor.h | 5 ++ > arch/x86/include/uapi/asm/prctl.h | 2 + > arch/x86/kernel/Makefile | 2 + > arch/x86/kernel/process_64.c | 2 + > arch/x86/kernel/shstk.c | 143 ++++++++++++++++++++++++++++++ > 7 files changed, 178 insertions(+) > > diff --git a/arch/x86/include/asm/cet.h b/arch/x86/include/asm/cet.h > index 0fa4dbc98c49..a4a1f4c0089b 100644 > --- a/arch/x86/include/asm/cet.h > +++ b/arch/x86/include/asm/cet.h > @@ -7,12 +7,25 @@ > > struct task_struct; > > +struct thread_shstk { > + u64 base; > + u64 size; > +}; > + > #ifdef CONFIG_X86_SHADOW_STACK > long cet_prctl(struct task_struct *task, int option, > unsigned long features); > +int shstk_setup(void); > +void shstk_free(struct task_struct *p); > +int shstk_disable(void); > +void reset_thread_shstk(void); > #else > static inline long cet_prctl(struct task_struct *task, int option, > unsigned long features) { return -EINVAL; } > +static inline int shstk_setup(void) { return -EOPNOTSUPP; } > +static inline void shstk_free(struct task_struct *p) {} > +static inline int shstk_disable(void) { return -EOPNOTSUPP; } > +static inline void reset_thread_shstk(void) {} > #endif /* CONFIG_X86_SHADOW_STACK */ shstk_setup() and shstk_disable() are not called outside of shstk.c, so they can be removed from this header entirely. > > #endif /* __ASSEMBLY__ */ > diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h > index 65ec1965cd28..a9cb4c434e60 100644 > --- a/arch/x86/include/asm/msr.h > +++ b/arch/x86/include/asm/msr.h > @@ -310,6 +310,17 @@ void msrs_free(struct msr *msrs); > int msr_set_bit(u32 msr, u8 bit); > int msr_clear_bit(u32 msr, u8 bit); > > +static inline void set_clr_bits_msrl(u32 msr, u64 set, u64 clear) > +{ > + u64 val, new_val; > + > + rdmsrl(msr, val); > + new_val = (val & ~clear) | set; > + > + if (new_val != val) > + wrmsrl(msr, new_val); > +} I always get uncomfortable when I see these kinds of generalized helper functions for touching cpu bits, etc. It just begs for future attacker abuse to muck with arbitrary bits -- even marked inline there is a risk the compiler will ignore that in some circumstances (not as currently used in the code, but I'm imagining future changes leading to such a condition). Will you humor me and change this to a macro instead? That'll force it always inline (even __always_inline isn't always inline): /* Helper that can never get accidentally un-inlined. */ #define set_clr_bits_msrl(msr, set, clear) do { \ u64 __val, __new_val; \ \ rdmsrl(msr, __val); \ __new_val = (__val & ~(clear)) | (set); \ \ if (__new_val != __val) \ wrmsrl(msr, __new_val); \ } while (0) > + > #ifdef CONFIG_SMP > int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); > int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); > diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h > index a92bf76edafe..3a0c9d9d4d1d 100644 > --- a/arch/x86/include/asm/processor.h > +++ b/arch/x86/include/asm/processor.h > @@ -27,6 +27,7 @@ struct vm86; > #include <asm/unwind_hints.h> > #include <asm/vmxfeatures.h> > #include <asm/vdso/processor.h> > +#include <asm/cet.h> > > #include <linux/personality.h> > #include <linux/cache.h> > @@ -533,6 +534,10 @@ struct thread_struct { > unsigned long features; > unsigned long features_locked; > > +#ifdef CONFIG_X86_SHADOW_STACK > + struct thread_shstk shstk; > +#endif > + > /* Floating point and extended processor state */ > struct fpu fpu; > /* > diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h > index 028158e35269..41af3a8c4fa4 100644 > --- a/arch/x86/include/uapi/asm/prctl.h > +++ b/arch/x86/include/uapi/asm/prctl.h > @@ -26,4 +26,6 @@ > #define ARCH_CET_DISABLE 0x4002 > #define ARCH_CET_LOCK 0x4003 > For readability, maybe add: /* ARCH_CET_* "features" bits */ > +#define CET_SHSTK 0x1 This is UAPI, so the BIT() macro isn't available, but since this is unsigned long, please use the form: (1ULL << 0) etc... > + > #endif /* _ASM_X86_PRCTL_H */ > diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile > index a20a5ebfacd7..8950d1f71226 100644 > --- a/arch/x86/kernel/Makefile > +++ b/arch/x86/kernel/Makefile > @@ -139,6 +139,8 @@ obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o > > obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o > > +obj-$(CONFIG_X86_SHADOW_STACK) += shstk.o > + > ### > # 64 bit specific files > ifeq ($(CONFIG_X86_64),y) > diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c > index 8fa2c2b7de65..be544b4b4c8b 100644 > --- a/arch/x86/kernel/process_64.c > +++ b/arch/x86/kernel/process_64.c > @@ -514,6 +514,8 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, > load_gs_index(__USER_DS); > } > > + reset_thread_shstk(); > + > loadsegment(fs, 0); > loadsegment(es, _ds); > loadsegment(ds, _ds); > diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c > index e3276ac9e9b9..a0b8d4adb2bf 100644 > --- a/arch/x86/kernel/shstk.c > +++ b/arch/x86/kernel/shstk.c > @@ -8,8 +8,151 @@ > > #include <linux/sched.h> > #include <linux/bitops.h> > +#include <linux/types.h> > +#include <linux/mm.h> > +#include <linux/mman.h> > +#include <linux/slab.h> > +#include <linux/uaccess.h> > +#include <linux/sched/signal.h> > +#include <linux/compat.h> > +#include <linux/sizes.h> > +#include <linux/user.h> > +#include <asm/msr.h> > +#include <asm/fpu/xstate.h> > +#include <asm/fpu/types.h> > +#include <asm/cet.h> > +#include <asm/special_insns.h> > +#include <asm/fpu/api.h> > #include <asm/prctl.h> > > +static bool feature_enabled(unsigned long features) > +{ > + return current->thread.features & features; > +} > + > +static void feature_set(unsigned long features) > +{ > + current->thread.features |= features; > +} > + > +static void feature_clr(unsigned long features) > +{ > + current->thread.features &= ~features; > +} "feature" vs "features" here is confusing. Should these helpers enforce the single-bit-set requirements? If so, please switch to a bit number instead of a mask. If not, please rename these to "features_{enabled,set,clr}", and fix "features_enabled" to check them all: return (current->thread.features & features) == features; > +static unsigned long alloc_shstk(unsigned long size) > +{ > + int flags = MAP_ANONYMOUS | MAP_PRIVATE; > + struct mm_struct *mm = current->mm; > + unsigned long addr, unused; WARN_ON + clamp on "size" here, or perhaps move the bounds check from shstk_setup() into here? > + > + mmap_write_lock(mm); > + addr = do_mmap(NULL, addr, size, PROT_READ, flags, > + VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL); This will use the mmap base address offset randomization, I guess? > + > + mmap_write_unlock(mm); > + > + return addr; > +} > + > +static void unmap_shadow_stack(u64 base, u64 size) > +{ > + while (1) { > + int r; > + > + r = vm_munmap(base, size); > + > + /* > + * vm_munmap() returns -EINTR when mmap_lock is held by > + * something else, and that lock should not be held for a > + * long time. Retry it for the case. > + */ > + if (r == -EINTR) { > + cond_resched(); > + continue; > + } > + > + /* > + * For all other types of vm_munmap() failure, either the > + * system is out of memory or there is bug. > + */ > + WARN_ON_ONCE(r); > + break; > + } > +} > + > +int shstk_setup(void) Only called local. Make static? > +{ > + struct thread_shstk *shstk = ¤t->thread.shstk; > + unsigned long addr, size; > + > + /* Already enabled */ > + if (feature_enabled(CET_SHSTK)) > + return 0; > + > + /* Also not supported for 32 bit */ > + if (!cpu_feature_enabled(X86_FEATURE_SHSTK) || in_ia32_syscall()) > + return -EOPNOTSUPP; > + > + size = PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G)); > + addr = alloc_shstk(size); > + if (IS_ERR_VALUE(addr)) > + return PTR_ERR((void *)addr); > + > + fpu_lock_and_load(); > + wrmsrl(MSR_IA32_PL3_SSP, addr + size); > + wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN); > + fpregs_unlock(); > + > + shstk->base = addr; > + shstk->size = size; > + feature_set(CET_SHSTK); > + > + return 0; > +} > + > +void reset_thread_shstk(void) > +{ > + memset(¤t->thread.shstk, 0, sizeof(struct thread_shstk)); > + current->thread.features = 0; > + current->thread.features_locked = 0; > +} If features is always going to be tied to shstk, why not put them in the shstk struct? Also, shouldn't this also be called from arch_setup_new_exec() instead of the open-coded wipe of features there? > + > +void shstk_free(struct task_struct *tsk) > +{ > + struct thread_shstk *shstk = &tsk->thread.shstk; > + > + if (!cpu_feature_enabled(X86_FEATURE_SHSTK) || > + !feature_enabled(CET_SHSTK)) > + return; > + > + if (!tsk->mm) > + return; > + > + unmap_shadow_stack(shstk->base, shstk->size); I feel like base and size should be zeroed here? > +} > + > +int shstk_disable(void) This is only called locally. static? > +{ > + if (!cpu_feature_enabled(X86_FEATURE_SHSTK)) > + return -EOPNOTSUPP; > + > + /* Already disabled? */ > + if (!feature_enabled(CET_SHSTK)) > + return 0; > + > + fpu_lock_and_load(); > + /* Disable WRSS too when disabling shadow stack */ > + set_clr_bits_msrl(MSR_IA32_U_CET, 0, CET_SHSTK_EN); > + wrmsrl(MSR_IA32_PL3_SSP, 0); > + fpregs_unlock(); > + > + shstk_free(current); > + feature_clr(CET_SHSTK); > + > + return 0; > +} > + > long cet_prctl(struct task_struct *task, int option, unsigned long features) > { > if (option == ARCH_CET_LOCK) { > -- > 2.17.1 >
On 10/3/22 12:43, Kees Cook wrote: >> +static inline void set_clr_bits_msrl(u32 msr, u64 set, u64 clear) >> +{ >> + u64 val, new_val; >> + >> + rdmsrl(msr, val); >> + new_val = (val & ~clear) | set; >> + >> + if (new_val != val) >> + wrmsrl(msr, new_val); >> +} > I always get uncomfortable when I see these kinds of generalized helper > functions for touching cpu bits, etc. It just begs for future attacker > abuse to muck with arbitrary bits -- even marked inline there is a risk > the compiler will ignore that in some circumstances (not as currently > used in the code, but I'm imagining future changes leading to such a > condition). Will you humor me and change this to a macro instead? That'll > force it always inline (even __always_inline isn't always inline): Oh, are you thinking that this is dangerous because it's so surgical and non-intrusive? It's even more powerful to an attacker than, say wrmsrl(), because there they actually have to know what the existing value is to update it. With this helper, it's quite easy to flip an individual bit without disturbing the neighboring bits. Is that it? I don't _like_ the #defines, but doing one here doesn't seem too onerous considering how critical MSRs are.
On Mon, Oct 03, 2022 at 01:04:37PM -0700, Dave Hansen wrote: > On 10/3/22 12:43, Kees Cook wrote: > >> +static inline void set_clr_bits_msrl(u32 msr, u64 set, u64 clear) > >> +{ > >> + u64 val, new_val; > >> + > >> + rdmsrl(msr, val); > >> + new_val = (val & ~clear) | set; > >> + > >> + if (new_val != val) > >> + wrmsrl(msr, new_val); > >> +} > > I always get uncomfortable when I see these kinds of generalized helper > > functions for touching cpu bits, etc. It just begs for future attacker > > abuse to muck with arbitrary bits -- even marked inline there is a risk > > the compiler will ignore that in some circumstances (not as currently > > used in the code, but I'm imagining future changes leading to such a > > condition). Will you humor me and change this to a macro instead? That'll > > force it always inline (even __always_inline isn't always inline): > > Oh, are you thinking that this is dangerous because it's so surgical and > non-intrusive? It's even more powerful to an attacker than, say > wrmsrl(), because there they actually have to know what the existing > value is to update it. With this helper, it's quite easy to flip an > individual bit without disturbing the neighboring bits. > > Is that it? Yeah, it was kind of the combo: both a potential entry point to wrmsrl for arbitrary values, but also one where all the work is done to mask stuff out. > I don't _like_ the #defines, but doing one here doesn't seem too onerous > considering how critical MSRs are. I bet there are others, but this just weirded me out. I'll live with a macro, especially since the chance of it mutating in a non-inline is very small, but I figured I'd mention the idea.
From: Dave Hansen > Sent: 03 October 2022 21:05 > > On 10/3/22 12:43, Kees Cook wrote: > >> +static inline void set_clr_bits_msrl(u32 msr, u64 set, u64 clear) > >> +{ > >> + u64 val, new_val; > >> + > >> + rdmsrl(msr, val); > >> + new_val = (val & ~clear) | set; > >> + > >> + if (new_val != val) > >> + wrmsrl(msr, new_val); > >> +} > > I always get uncomfortable when I see these kinds of generalized helper > > functions for touching cpu bits, etc. It just begs for future attacker > > abuse to muck with arbitrary bits -- even marked inline there is a risk > > the compiler will ignore that in some circumstances (not as currently > > used in the code, but I'm imagining future changes leading to such a > > condition). Will you humor me and change this to a macro instead? That'll > > force it always inline (even __always_inline isn't always inline): > > Oh, are you thinking that this is dangerous because it's so surgical and > non-intrusive? It's even more powerful to an attacker than, say > wrmsrl(), because there they actually have to know what the existing > value is to update it. With this helper, it's quite easy to flip an > individual bit without disturbing the neighboring bits. > > Is that it? > > I don't _like_ the #defines, but doing one here doesn't seem too onerous > considering how critical MSRs are. How often is the 'msr' number not a compile-time constant? Adding rd/wrmsr variants that verify this would reduce the attack surface as well. David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
On Mon, 2022-10-03 at 21:04 -0700, Kees Cook wrote: > > I don't _like_ the #defines, but doing one here doesn't seem too > > onerous > > considering how critical MSRs are. > > I bet there are others, but this just weirded me out. I'll live with > a > macro, especially since the chance of it mutating in a non-inline is > very small, but I figured I'd mention the idea. Makes sense. I'll change it to a define.
On Tue, Oct 04, 2022 at 10:17:57AM +0000, David Laight wrote: > From: Dave Hansen > > Sent: 03 October 2022 21:05 > > > > On 10/3/22 12:43, Kees Cook wrote: > > >> +static inline void set_clr_bits_msrl(u32 msr, u64 set, u64 clear) > > >> +{ > > >> + u64 val, new_val; > > >> + > > >> + rdmsrl(msr, val); > > >> + new_val = (val & ~clear) | set; > > >> + > > >> + if (new_val != val) > > >> + wrmsrl(msr, new_val); > > >> +} > > > I always get uncomfortable when I see these kinds of generalized helper > > > functions for touching cpu bits, etc. It just begs for future attacker > > > abuse to muck with arbitrary bits -- even marked inline there is a risk > > > the compiler will ignore that in some circumstances (not as currently > > > used in the code, but I'm imagining future changes leading to such a > > > condition). Will you humor me and change this to a macro instead? That'll > > > force it always inline (even __always_inline isn't always inline): > > > > Oh, are you thinking that this is dangerous because it's so surgical and > > non-intrusive? It's even more powerful to an attacker than, say > > wrmsrl(), because there they actually have to know what the existing > > value is to update it. With this helper, it's quite easy to flip an > > individual bit without disturbing the neighboring bits. > > > > Is that it? > > > > I don't _like_ the #defines, but doing one here doesn't seem too onerous > > considering how critical MSRs are. > > How often is the 'msr' number not a compile-time constant? > Adding rd/wrmsr variants that verify this would reduce the > attack surface as well. Oh, yes! I do this all the time with FORTIFY shenanigans. Right, so, instead of a macro, the "cannot be un-inlined" could be enforced with this (untested): static __always_inline void set_clr_bits_msrl(u32 msr, u64 set, u64 clear) { u64 val, new_val; BUILD_BUG_ON(!__builtin_constant_p(msr) || !__builtin_constant_p(set) || !__builtin_constant_p(clear)); rdmsrl(msr, val); new_val = (val & ~clear) | set; if (new_val != val) wrmsrl(msr, new_val); }
From: Kees Cook > Sent: 04 October 2022 20:32 ... > Oh, yes! I do this all the time with FORTIFY shenanigans. Right, so, > instead of a macro, the "cannot be un-inlined" could be enforced with > this (untested): > > static __always_inline void set_clr_bits_msrl(u32 msr, u64 set, u64 clear) > { > u64 val, new_val; > > BUILD_BUG_ON(!__builtin_constant_p(msr) || > !__builtin_constant_p(set) || > !__builtin_constant_p(clear)); You can reduce the amount of text the brain has to parse by using: BUILD_BUG_ON(!__builtin_constant_p(msr + set + clear)); Just requires the brain to do a quick 'oh yes'... David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
Just now realizing, I never responded to most of this feedback as the later conversation focused in on one area. All seems good (thanks!), except not sure about the below: On Mon, 2022-10-03 at 12:43 -0700, Kees Cook wrote: > On Thu, Sep 29, 2022 at 03:29:21PM -0700, Rick Edgecombe wrote: > > + > > + mmap_write_lock(mm); > > + addr = do_mmap(NULL, addr, size, PROT_READ, flags, > > + VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL); > > This will use the mmap base address offset randomization, I guess? Yes. > > > + > > + mmap_write_unlock(mm); > > + > > + return addr; > > +} > > + > > +static void unmap_shadow_stack(u64 base, u64 size) > > +{ > > + while (1) { > > + int r; > > + > > + r = vm_munmap(base, size); > > + > > + /* > > + * vm_munmap() returns -EINTR when mmap_lock is held by > > + * something else, and that lock should not be held for > > a > > + * long time. Retry it for the case. > > + */ > > + if (r == -EINTR) { > > + cond_resched(); > > + continue; > > + } > > + > > + /* > > + * For all other types of vm_munmap() failure, either > > the > > + * system is out of memory or there is bug. > > + */ > > + WARN_ON_ONCE(r); > > + break; > > + } > > +} > > + > > +int shstk_setup(void) > > Only called local. Make static? > > > +{ > > + struct thread_shstk *shstk = ¤t->thread.shstk; > > + unsigned long addr, size; > > + > > + /* Already enabled */ > > + if (feature_enabled(CET_SHSTK)) > > + return 0; > > + > > + /* Also not supported for 32 bit */ > > + if (!cpu_feature_enabled(X86_FEATURE_SHSTK) || > > in_ia32_syscall()) > > + return -EOPNOTSUPP; > > + > > + size = PAGE_ALIGN(min_t(unsigned long long, > > rlimit(RLIMIT_STACK), SZ_4G)); > > + addr = alloc_shstk(size); > > + if (IS_ERR_VALUE(addr)) > > + return PTR_ERR((void *)addr); > > + > > + fpu_lock_and_load(); > > + wrmsrl(MSR_IA32_PL3_SSP, addr + size); > > + wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN); > > + fpregs_unlock(); > > + > > + shstk->base = addr; > > + shstk->size = size; > > + feature_set(CET_SHSTK); > > + > > + return 0; > > +} > > + > > +void reset_thread_shstk(void) > > +{ > > + memset(¤t->thread.shstk, 0, sizeof(struct thread_shstk)); > > + current->thread.features = 0; > > + current->thread.features_locked = 0; > > +} > > If features is always going to be tied to shstk, why not put them in > the > shstk struct? CET and LAM used to share an enabling interface and also kernel side enablement state tracking. But in the end LAM got its own enabling interface. Thomas had suggested that they could share a state field on the kernel side. But then LAM already had enough state tracking for it's needs. Shadow stack used to track enabling with the fields in the shstk struct that keep track of the threads shadow stack. But then we added WRSS which needs another field to keep track of the status. So I thought to leave the 'features' field and use it for all the CET features tracking. I left it outside of the shstk struct so it looks usable for any other features that might be looking for an status bit. I can definitely compile it out when there is no user shadow stack. snip > > + > > +void shstk_free(struct task_struct *tsk) > > +{ > > + struct thread_shstk *shstk = &tsk->thread.shstk; > > + > > + if (!cpu_feature_enabled(X86_FEATURE_SHSTK) || > > + !feature_enabled(CET_SHSTK)) > > + return; > > + > > + if (!tsk->mm) > > + return; > > + > > + unmap_shadow_stack(shstk->base, shstk->size); > > I feel like base and size should be zeroed here? > The code used to use shstk->base and shstk->size to keep track of if shadow stack was enabled. I'm not sure why to zero it now. Just defensively or did you see a concrete issue?
On Thu, Oct 20, 2022 at 09:29:38PM +0000, Edgecombe, Rick P wrote: > The code used to use shstk->base and shstk->size to keep track of if > shadow stack was enabled. I'm not sure why to zero it now. Just > defensively or did you see a concrete issue? Just to be defensive. It's not fast path by any means, to better to have values that make a bit of sense there. *shrug* It just stood out as feeling "missing" while I was reading the code.
diff --git a/arch/x86/include/asm/cet.h b/arch/x86/include/asm/cet.h index 0fa4dbc98c49..a4a1f4c0089b 100644 --- a/arch/x86/include/asm/cet.h +++ b/arch/x86/include/asm/cet.h @@ -7,12 +7,25 @@ struct task_struct; +struct thread_shstk { + u64 base; + u64 size; +}; + #ifdef CONFIG_X86_SHADOW_STACK long cet_prctl(struct task_struct *task, int option, unsigned long features); +int shstk_setup(void); +void shstk_free(struct task_struct *p); +int shstk_disable(void); +void reset_thread_shstk(void); #else static inline long cet_prctl(struct task_struct *task, int option, unsigned long features) { return -EINVAL; } +static inline int shstk_setup(void) { return -EOPNOTSUPP; } +static inline void shstk_free(struct task_struct *p) {} +static inline int shstk_disable(void) { return -EOPNOTSUPP; } +static inline void reset_thread_shstk(void) {} #endif /* CONFIG_X86_SHADOW_STACK */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 65ec1965cd28..a9cb4c434e60 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -310,6 +310,17 @@ void msrs_free(struct msr *msrs); int msr_set_bit(u32 msr, u8 bit); int msr_clear_bit(u32 msr, u8 bit); +static inline void set_clr_bits_msrl(u32 msr, u64 set, u64 clear) +{ + u64 val, new_val; + + rdmsrl(msr, val); + new_val = (val & ~clear) | set; + + if (new_val != val) + wrmsrl(msr, new_val); +} + #ifdef CONFIG_SMP int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a92bf76edafe..3a0c9d9d4d1d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -27,6 +27,7 @@ struct vm86; #include <asm/unwind_hints.h> #include <asm/vmxfeatures.h> #include <asm/vdso/processor.h> +#include <asm/cet.h> #include <linux/personality.h> #include <linux/cache.h> @@ -533,6 +534,10 @@ struct thread_struct { unsigned long features; unsigned long features_locked; +#ifdef CONFIG_X86_SHADOW_STACK + struct thread_shstk shstk; +#endif + /* Floating point and extended processor state */ struct fpu fpu; /* diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index 028158e35269..41af3a8c4fa4 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -26,4 +26,6 @@ #define ARCH_CET_DISABLE 0x4002 #define ARCH_CET_LOCK 0x4003 +#define CET_SHSTK 0x1 + #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a20a5ebfacd7..8950d1f71226 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -139,6 +139,8 @@ obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o +obj-$(CONFIG_X86_SHADOW_STACK) += shstk.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 8fa2c2b7de65..be544b4b4c8b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -514,6 +514,8 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, load_gs_index(__USER_DS); } + reset_thread_shstk(); + loadsegment(fs, 0); loadsegment(es, _ds); loadsegment(ds, _ds); diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c index e3276ac9e9b9..a0b8d4adb2bf 100644 --- a/arch/x86/kernel/shstk.c +++ b/arch/x86/kernel/shstk.c @@ -8,8 +8,151 @@ #include <linux/sched.h> #include <linux/bitops.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/sched/signal.h> +#include <linux/compat.h> +#include <linux/sizes.h> +#include <linux/user.h> +#include <asm/msr.h> +#include <asm/fpu/xstate.h> +#include <asm/fpu/types.h> +#include <asm/cet.h> +#include <asm/special_insns.h> +#include <asm/fpu/api.h> #include <asm/prctl.h> +static bool feature_enabled(unsigned long features) +{ + return current->thread.features & features; +} + +static void feature_set(unsigned long features) +{ + current->thread.features |= features; +} + +static void feature_clr(unsigned long features) +{ + current->thread.features &= ~features; +} + +static unsigned long alloc_shstk(unsigned long size) +{ + int flags = MAP_ANONYMOUS | MAP_PRIVATE; + struct mm_struct *mm = current->mm; + unsigned long addr, unused; + + mmap_write_lock(mm); + addr = do_mmap(NULL, addr, size, PROT_READ, flags, + VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL); + + mmap_write_unlock(mm); + + return addr; +} + +static void unmap_shadow_stack(u64 base, u64 size) +{ + while (1) { + int r; + + r = vm_munmap(base, size); + + /* + * vm_munmap() returns -EINTR when mmap_lock is held by + * something else, and that lock should not be held for a + * long time. Retry it for the case. + */ + if (r == -EINTR) { + cond_resched(); + continue; + } + + /* + * For all other types of vm_munmap() failure, either the + * system is out of memory or there is bug. + */ + WARN_ON_ONCE(r); + break; + } +} + +int shstk_setup(void) +{ + struct thread_shstk *shstk = ¤t->thread.shstk; + unsigned long addr, size; + + /* Already enabled */ + if (feature_enabled(CET_SHSTK)) + return 0; + + /* Also not supported for 32 bit */ + if (!cpu_feature_enabled(X86_FEATURE_SHSTK) || in_ia32_syscall()) + return -EOPNOTSUPP; + + size = PAGE_ALIGN(min_t(unsigned long long, rlimit(RLIMIT_STACK), SZ_4G)); + addr = alloc_shstk(size); + if (IS_ERR_VALUE(addr)) + return PTR_ERR((void *)addr); + + fpu_lock_and_load(); + wrmsrl(MSR_IA32_PL3_SSP, addr + size); + wrmsrl(MSR_IA32_U_CET, CET_SHSTK_EN); + fpregs_unlock(); + + shstk->base = addr; + shstk->size = size; + feature_set(CET_SHSTK); + + return 0; +} + +void reset_thread_shstk(void) +{ + memset(¤t->thread.shstk, 0, sizeof(struct thread_shstk)); + current->thread.features = 0; + current->thread.features_locked = 0; +} + +void shstk_free(struct task_struct *tsk) +{ + struct thread_shstk *shstk = &tsk->thread.shstk; + + if (!cpu_feature_enabled(X86_FEATURE_SHSTK) || + !feature_enabled(CET_SHSTK)) + return; + + if (!tsk->mm) + return; + + unmap_shadow_stack(shstk->base, shstk->size); +} + +int shstk_disable(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_SHSTK)) + return -EOPNOTSUPP; + + /* Already disabled? */ + if (!feature_enabled(CET_SHSTK)) + return 0; + + fpu_lock_and_load(); + /* Disable WRSS too when disabling shadow stack */ + set_clr_bits_msrl(MSR_IA32_U_CET, 0, CET_SHSTK_EN); + wrmsrl(MSR_IA32_PL3_SSP, 0); + fpregs_unlock(); + + shstk_free(current); + feature_clr(CET_SHSTK); + + return 0; +} + long cet_prctl(struct task_struct *task, int option, unsigned long features) { if (option == ARCH_CET_LOCK) {