Message ID | 20200325161249.55095-25-glider@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add KernelMemorySanitizer infrastructure | expand |
On Wed, Mar 25, 2020 at 5:14 PM <glider@google.com> wrote: > > Some functions are called from handwritten assembly, and therefore don't > have their arguments' metadata fully set up by the instrumentation code. > Mark them with __no_sanitize_memory to avoid false positives from > spreading further. > Certain functions perform task switching, so that the value of |current| > is different as they proceed. Because KMSAN state pointer is only read > once at the beginning of the function, touching it after |current| has > changed may be dangerous. I think that whenever a patch touches some subsystem, we need to mention that in the commit title. This patch touches mostly x86, so "kmsan, x86: ..." or something like that. This will probably make x86 maintainers more involved. > > Signed-off-by: Alexander Potapenko <glider@google.com> > To: Alexander Potapenko <glider@google.com> > Cc: Thomas Gleixner <tglx@linutronix.de> > Cc: Andrew Morton <akpm@linux-foundation.org> > Cc: Vegard Nossum <vegard.nossum@oracle.com> > Cc: Dmitry Vyukov <dvyukov@google.com> > Cc: Marco Elver <elver@google.com> > Cc: Andrey Konovalov <andreyknvl@google.com> > Cc: linux-mm@kvack.org > --- > v3: > - removed TODOs from comments > > v4: > - updated the comments, dropped __no_sanitize_memory from idle_cpu(), > sched_init(), profile_tick() > - split away the uprobes part as requested by Andrey Konovalov > > Change-Id: I684d23dac5a22eb0a4cea71993cb934302b17cea > --- > arch/x86/entry/common.c | 2 ++ > arch/x86/include/asm/irq_regs.h | 2 ++ > arch/x86/include/asm/syscall_wrapper.h | 2 ++ > arch/x86/kernel/apic/apic.c | 3 +++ > arch/x86/kernel/dumpstack_64.c | 5 +++++ > arch/x86/kernel/process_64.c | 5 +++++ > arch/x86/kernel/traps.c | 13 +++++++++++-- > kernel/sched/core.c | 22 ++++++++++++++++++++++ > 8 files changed, 52 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c > index ec167d8c41cbd..5c3d0f3a14c37 100644 > --- a/arch/x86/entry/common.c > +++ b/arch/x86/entry/common.c > @@ -280,6 +280,8 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs) > } > > #ifdef CONFIG_X86_64 > +/* Tell KMSAN to not instrument this function and to initialize |regs|. */ > +__no_sanitize_memory > __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs) > { > struct thread_info *ti; > diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h > index 187ce59aea28e..a6fc1641e2861 100644 > --- a/arch/x86/include/asm/irq_regs.h > +++ b/arch/x86/include/asm/irq_regs.h > @@ -14,6 +14,8 @@ > > DECLARE_PER_CPU(struct pt_regs *, irq_regs); > > +/* Tell KMSAN to return an initialized struct pt_regs. */ > +__no_sanitize_memory > static inline struct pt_regs *get_irq_regs(void) > { > return __this_cpu_read(irq_regs); > diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h > index e2389ce9bf58a..098b1a8d6bc41 100644 > --- a/arch/x86/include/asm/syscall_wrapper.h > +++ b/arch/x86/include/asm/syscall_wrapper.h > @@ -196,6 +196,8 @@ struct pt_regs; > ALLOW_ERROR_INJECTION(__x64_sys##name, ERRNO); \ > static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ > static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ > + /* Tell KMSAN to initialize |regs|. */ \ > + __no_sanitize_memory \ > asmlinkage long __x64_sys##name(const struct pt_regs *regs) \ > { \ > return __se_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ > diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c > index 5f973fed3c9ff..1f0250f14e462 100644 > --- a/arch/x86/kernel/apic/apic.c > +++ b/arch/x86/kernel/apic/apic.c > @@ -1127,6 +1127,9 @@ static void local_apic_timer_interrupt(void) > * [ if a single-CPU system runs an SMP kernel then we call the local > * interrupt as well. Thus we cannot inline the local irq ... ] > */ > + > +/* Tell KMSAN to initialize |regs|. */ > +__no_sanitize_memory > __visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) > { > struct pt_regs *old_regs = set_irq_regs(regs); > diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c > index 87b97897a8810..3d1691f81cada 100644 > --- a/arch/x86/kernel/dumpstack_64.c > +++ b/arch/x86/kernel/dumpstack_64.c > @@ -150,6 +150,11 @@ static bool in_irq_stack(unsigned long *stack, struct stack_info *info) > return true; > } > > +/* > + * This function may touch stale uninitialized values on stack. Do not > + * instrument it with KMSAN to avoid false positives. > + */ > +__no_sanitize_memory > int get_stack_info(unsigned long *stack, struct task_struct *task, > struct stack_info *info, unsigned long *visit_mask) > { > diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c > index ffd497804dbc3..5e8c6767e9916 100644 > --- a/arch/x86/kernel/process_64.c > +++ b/arch/x86/kernel/process_64.c > @@ -424,6 +424,11 @@ void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp) > * Kprobes not supported here. Set the probe on schedule instead. > * Function graph tracer not supported too. > */ > +/* > + * Avoid touching KMSAN state or reporting anything here, as __switch_to() does > + * weird things with tasks. > + */ > +__no_sanitize_memory > __visible __notrace_funcgraph struct task_struct * > __switch_to(struct task_struct *prev_p, struct task_struct *next_p) > { > diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c > index d54cffdc7cac2..917268aee054e 100644 > --- a/arch/x86/kernel/traps.c > +++ b/arch/x86/kernel/traps.c > @@ -638,7 +638,11 @@ NOKPROBE_SYMBOL(do_int3); > * Help handler running on a per-cpu (IST or entry trampoline) stack > * to switch to the normal thread stack if the interrupted code was in > * user mode. The actual stack switch is done in entry_64.S > + * > */ > + > +/* This function switches the registers - don't instrument it with KMSAN. */ > +__no_sanitize_memory > asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) > { > struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1; > @@ -654,6 +658,11 @@ struct bad_iret_stack { > }; > > asmlinkage __visible notrace > +/* > + * Dark magic happening here, let's not instrument this function. > + * Also avoid copying any metadata by using raw __memmove(). > + */ > +__no_sanitize_memory > struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) > { > /* > @@ -668,10 +677,10 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) > (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; > > /* Copy the IRET target to the new stack. */ > - memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); > + __memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); > > /* Copy the remainder of the stack from the current stack. */ > - memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); > + __memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); > > BUG_ON(!user_mode(&new_stack->regs)); > return new_stack; > diff --git a/kernel/sched/core.c b/kernel/sched/core.c > index 1a5937936ac75..bb1b659c12f6a 100644 > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -471,6 +471,11 @@ void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) > put_task_struct(task); > } > > +/* > + * Context switch here may lead to KMSAN task state corruption. Disable KMSAN > + * instrumentation. > + */ > +__no_sanitize_memory > void wake_up_q(struct wake_q_head *head) > { > struct wake_q_node *node = head->first; > @@ -3217,6 +3222,12 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, > * past. prev == current is still correct but we need to recalculate this_rq > * because prev may have moved to another CPU. > */ > + > +/* > + * Context switch here may lead to KMSAN task state corruption. Disable KMSAN > + * instrumentation. > + */ > +__no_sanitize_memory > static struct rq *finish_task_switch(struct task_struct *prev) > __releases(rq->lock) > { > @@ -4052,6 +4063,12 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) > * > * WARNING: must be called with preemption disabled! > */ > + > +/* > + * Context switch here may lead to KMSAN task state corruption. Disable KMSAN > + * instrumentation. > + */ > +__no_sanitize_memory > static void __sched notrace __schedule(bool preempt) > { > struct task_struct *prev, *next; > @@ -6789,6 +6806,11 @@ static inline int preempt_count_equals(int preempt_offset) > return (nested == preempt_offset); > } > > +/* > + * This function might be called from code that is not instrumented with KMSAN. > + * Nevertheless, treat its arguments as initialized. > + */ > +__no_sanitize_memory > void __might_sleep(const char *file, int line, int preempt_offset) > { > /* > -- > 2.25.1.696.g5e7596f4ac-goog >
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index ec167d8c41cbd..5c3d0f3a14c37 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -280,6 +280,8 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs) } #ifdef CONFIG_X86_64 +/* Tell KMSAN to not instrument this function and to initialize |regs|. */ +__no_sanitize_memory __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs) { struct thread_info *ti; diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h index 187ce59aea28e..a6fc1641e2861 100644 --- a/arch/x86/include/asm/irq_regs.h +++ b/arch/x86/include/asm/irq_regs.h @@ -14,6 +14,8 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs); +/* Tell KMSAN to return an initialized struct pt_regs. */ +__no_sanitize_memory static inline struct pt_regs *get_irq_regs(void) { return __this_cpu_read(irq_regs); diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index e2389ce9bf58a..098b1a8d6bc41 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -196,6 +196,8 @@ struct pt_regs; ALLOW_ERROR_INJECTION(__x64_sys##name, ERRNO); \ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ + /* Tell KMSAN to initialize |regs|. */ \ + __no_sanitize_memory \ asmlinkage long __x64_sys##name(const struct pt_regs *regs) \ { \ return __se_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 5f973fed3c9ff..1f0250f14e462 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1127,6 +1127,9 @@ static void local_apic_timer_interrupt(void) * [ if a single-CPU system runs an SMP kernel then we call the local * interrupt as well. Thus we cannot inline the local irq ... ] */ + +/* Tell KMSAN to initialize |regs|. */ +__no_sanitize_memory __visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 87b97897a8810..3d1691f81cada 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -150,6 +150,11 @@ static bool in_irq_stack(unsigned long *stack, struct stack_info *info) return true; } +/* + * This function may touch stale uninitialized values on stack. Do not + * instrument it with KMSAN to avoid false positives. + */ +__no_sanitize_memory int get_stack_info(unsigned long *stack, struct task_struct *task, struct stack_info *info, unsigned long *visit_mask) { diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ffd497804dbc3..5e8c6767e9916 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -424,6 +424,11 @@ void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp) * Kprobes not supported here. Set the probe on schedule instead. * Function graph tracer not supported too. */ +/* + * Avoid touching KMSAN state or reporting anything here, as __switch_to() does + * weird things with tasks. + */ +__no_sanitize_memory __visible __notrace_funcgraph struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d54cffdc7cac2..917268aee054e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -638,7 +638,11 @@ NOKPROBE_SYMBOL(do_int3); * Help handler running on a per-cpu (IST or entry trampoline) stack * to switch to the normal thread stack if the interrupted code was in * user mode. The actual stack switch is done in entry_64.S + * */ + +/* This function switches the registers - don't instrument it with KMSAN. */ +__no_sanitize_memory asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs) { struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1; @@ -654,6 +658,11 @@ struct bad_iret_stack { }; asmlinkage __visible notrace +/* + * Dark magic happening here, let's not instrument this function. + * Also avoid copying any metadata by using raw __memmove(). + */ +__no_sanitize_memory struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) { /* @@ -668,10 +677,10 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; /* Copy the IRET target to the new stack. */ - memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); + __memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); /* Copy the remainder of the stack from the current stack. */ - memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); + __memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); BUG_ON(!user_mode(&new_stack->regs)); return new_stack; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1a5937936ac75..bb1b659c12f6a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -471,6 +471,11 @@ void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) put_task_struct(task); } +/* + * Context switch here may lead to KMSAN task state corruption. Disable KMSAN + * instrumentation. + */ +__no_sanitize_memory void wake_up_q(struct wake_q_head *head) { struct wake_q_node *node = head->first; @@ -3217,6 +3222,12 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, * past. prev == current is still correct but we need to recalculate this_rq * because prev may have moved to another CPU. */ + +/* + * Context switch here may lead to KMSAN task state corruption. Disable KMSAN + * instrumentation. + */ +__no_sanitize_memory static struct rq *finish_task_switch(struct task_struct *prev) __releases(rq->lock) { @@ -4052,6 +4063,12 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) * * WARNING: must be called with preemption disabled! */ + +/* + * Context switch here may lead to KMSAN task state corruption. Disable KMSAN + * instrumentation. + */ +__no_sanitize_memory static void __sched notrace __schedule(bool preempt) { struct task_struct *prev, *next; @@ -6789,6 +6806,11 @@ static inline int preempt_count_equals(int preempt_offset) return (nested == preempt_offset); } +/* + * This function might be called from code that is not instrumented with KMSAN. + * Nevertheless, treat its arguments as initialized. + */ +__no_sanitize_memory void __might_sleep(const char *file, int line, int preempt_offset) { /*
Some functions are called from handwritten assembly, and therefore don't have their arguments' metadata fully set up by the instrumentation code. Mark them with __no_sanitize_memory to avoid false positives from spreading further. Certain functions perform task switching, so that the value of |current| is different as they proceed. Because KMSAN state pointer is only read once at the beginning of the function, touching it after |current| has changed may be dangerous. Signed-off-by: Alexander Potapenko <glider@google.com> To: Alexander Potapenko <glider@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Vegard Nossum <vegard.nossum@oracle.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Marco Elver <elver@google.com> Cc: Andrey Konovalov <andreyknvl@google.com> Cc: linux-mm@kvack.org --- v3: - removed TODOs from comments v4: - updated the comments, dropped __no_sanitize_memory from idle_cpu(), sched_init(), profile_tick() - split away the uprobes part as requested by Andrey Konovalov Change-Id: I684d23dac5a22eb0a4cea71993cb934302b17cea --- arch/x86/entry/common.c | 2 ++ arch/x86/include/asm/irq_regs.h | 2 ++ arch/x86/include/asm/syscall_wrapper.h | 2 ++ arch/x86/kernel/apic/apic.c | 3 +++ arch/x86/kernel/dumpstack_64.c | 5 +++++ arch/x86/kernel/process_64.c | 5 +++++ arch/x86/kernel/traps.c | 13 +++++++++++-- kernel/sched/core.c | 22 ++++++++++++++++++++++ 8 files changed, 52 insertions(+), 2 deletions(-)