Message ID | 1471381865-25724-5-git-send-email-keescook@chromium.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Aug 16, 2016 at 02:11:04PM -0700, Kees Cook wrote: > The kernel checks for several cases of data structure corruption under > either normal runtime, or under various CONFIG_DEBUG_* settings. When > corruption is detected, some systems may want to BUG() immediately instead > of letting the corruption continue. Many of these manipulation primitives > can be used by security flaws to gain arbitrary memory write control. This > provides CONFIG_BUG_ON_CORRUPTION to control newly added BUG() locations. > > This is inspired by similar hardening in PaX and Grsecurity, and by > Stephen Boyd in MSM kernels. > > Signed-off-by: Kees Cook <keescook@chromium.org> OK, I will bite... Why both the WARN() and the BUG_ON()? Thanx, Paul > --- > include/linux/bug.h | 7 +++++++ > kernel/locking/spinlock_debug.c | 1 + > kernel/workqueue.c | 2 ++ > lib/Kconfig.debug | 10 ++++++++++ > lib/list_debug.c | 7 +++++++ > 5 files changed, 27 insertions(+) > > diff --git a/include/linux/bug.h b/include/linux/bug.h > index e51b0709e78d..7e69758dd798 100644 > --- a/include/linux/bug.h > +++ b/include/linux/bug.h > @@ -118,4 +118,11 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr, > } > > #endif /* CONFIG_GENERIC_BUG */ > + > +#ifdef CONFIG_BUG_ON_CORRUPTION > +# define CORRUPTED_DATA_STRUCTURE true > +#else > +# define CORRUPTED_DATA_STRUCTURE false > +#endif > + > #endif /* _LINUX_BUG_H */ > diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c > index 0374a596cffa..d5f833769feb 100644 > --- a/kernel/locking/spinlock_debug.c > +++ b/kernel/locking/spinlock_debug.c > @@ -64,6 +64,7 @@ static void spin_dump(raw_spinlock_t *lock, const char *msg) > owner ? owner->comm : "<none>", > owner ? task_pid_nr(owner) : -1, > lock->owner_cpu); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > dump_stack(); > } > > diff --git a/kernel/workqueue.c b/kernel/workqueue.c > index ef071ca73fc3..ea0132b55eca 100644 > --- a/kernel/workqueue.c > +++ b/kernel/workqueue.c > @@ -48,6 +48,7 @@ > #include <linux/nodemask.h> > #include <linux/moduleparam.h> > #include <linux/uaccess.h> > +#include <linux/bug.h> > > #include "workqueue_internal.h" > > @@ -2108,6 +2109,7 @@ __acquires(&pool->lock) > current->comm, preempt_count(), task_pid_nr(current), > worker->current_func); > debug_show_held_locks(current); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > dump_stack(); > } > > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug > index 2307d7c89dac..d64bd6b6fd45 100644 > --- a/lib/Kconfig.debug > +++ b/lib/Kconfig.debug > @@ -1987,6 +1987,16 @@ config TEST_STATIC_KEYS > > If unsure, say N. > > +config BUG_ON_CORRUPTION > + bool "Trigger a BUG when data corruption is detected" > + help > + Select this option if the kernel should BUG when it encounters > + data corruption in various kernel memory structures during checks > + added by options like CONFIG_DEBUG_LIST, CONFIG_DEBUG_SPINLOCK, > + etc. > + > + If unsure, say N. > + > source "samples/Kconfig" > > source "lib/Kconfig.kgdb" > diff --git a/lib/list_debug.c b/lib/list_debug.c > index 80e2e40a6a4e..161c7e7d3478 100644 > --- a/lib/list_debug.c > +++ b/lib/list_debug.c > @@ -26,16 +26,19 @@ bool __list_add_debug(struct list_head *new, > if (unlikely(next->prev != prev)) { > WARN(1, "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n", > prev, next->prev, next); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > return false; > } > if (unlikely(prev->next != next)) { > WARN(1, "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n", > next, prev->next, prev); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > return false; > } > if (unlikely(new == prev || new == next)) { > WARN(1, "list_add double add: new=%p, prev=%p, next=%p.\n", > new, prev, next); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > return false; > } > return true; > @@ -52,21 +55,25 @@ bool __list_del_entry_debug(struct list_head *entry) > if (unlikely(next == LIST_POISON1)) { > WARN(1, "list_del corruption, %p->next is LIST_POISON1 (%p)\n", > entry, LIST_POISON1); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > return false; > } > if (unlikely(prev == LIST_POISON2)) { > WARN(1, "list_del corruption, %p->prev is LIST_POISON2 (%p)\n", > entry, LIST_POISON2); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > return false; > } > if (unlikely(prev->next != entry)) { > WARN(1, "list_del corruption. prev->next should be %p, but was %p\n", > entry, prev->next); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > return false; > } > if (unlikely(next->prev != entry)) { > WARN(1, "list_del corruption. next->prev should be %p, but was %p\n", > entry, next->prev); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > return false; > } > return true; > -- > 2.7.4 >
On Tue, Aug 16, 2016 at 2:26 PM, Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote: > On Tue, Aug 16, 2016 at 02:11:04PM -0700, Kees Cook wrote: >> The kernel checks for several cases of data structure corruption under >> either normal runtime, or under various CONFIG_DEBUG_* settings. When >> corruption is detected, some systems may want to BUG() immediately instead >> of letting the corruption continue. Many of these manipulation primitives >> can be used by security flaws to gain arbitrary memory write control. This >> provides CONFIG_BUG_ON_CORRUPTION to control newly added BUG() locations. >> >> This is inspired by similar hardening in PaX and Grsecurity, and by >> Stephen Boyd in MSM kernels. >> >> Signed-off-by: Kees Cook <keescook@chromium.org> > > OK, I will bite... Why both the WARN() and the BUG_ON()? Mostly because not every case of BUG(CORRUPTED_DATA_STRUCTURE) is cleanly paired with a WARN (see the workqueue addition that wants to dump locks too). I could rearrange things a bit, though, and create something like: #ifdef CONFIG_BUG_ON_CORRUPTION # define CORRUPTED(format...) { \ printk(KERN_ERR format); \ BUG(); \ } #else # define CORRUPTED(format...) WARN(1, format) #endif What do you think? -Kees > > Thanx, Paul > >> --- >> include/linux/bug.h | 7 +++++++ >> kernel/locking/spinlock_debug.c | 1 + >> kernel/workqueue.c | 2 ++ >> lib/Kconfig.debug | 10 ++++++++++ >> lib/list_debug.c | 7 +++++++ >> 5 files changed, 27 insertions(+) >> >> diff --git a/include/linux/bug.h b/include/linux/bug.h >> index e51b0709e78d..7e69758dd798 100644 >> --- a/include/linux/bug.h >> +++ b/include/linux/bug.h >> @@ -118,4 +118,11 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr, >> } >> >> #endif /* CONFIG_GENERIC_BUG */ >> + >> +#ifdef CONFIG_BUG_ON_CORRUPTION >> +# define CORRUPTED_DATA_STRUCTURE true >> +#else >> +# define CORRUPTED_DATA_STRUCTURE false >> +#endif >> + >> #endif /* _LINUX_BUG_H */ >> diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c >> index 0374a596cffa..d5f833769feb 100644 >> --- a/kernel/locking/spinlock_debug.c >> +++ b/kernel/locking/spinlock_debug.c >> @@ -64,6 +64,7 @@ static void spin_dump(raw_spinlock_t *lock, const char *msg) >> owner ? owner->comm : "<none>", >> owner ? task_pid_nr(owner) : -1, >> lock->owner_cpu); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> dump_stack(); >> } >> >> diff --git a/kernel/workqueue.c b/kernel/workqueue.c >> index ef071ca73fc3..ea0132b55eca 100644 >> --- a/kernel/workqueue.c >> +++ b/kernel/workqueue.c >> @@ -48,6 +48,7 @@ >> #include <linux/nodemask.h> >> #include <linux/moduleparam.h> >> #include <linux/uaccess.h> >> +#include <linux/bug.h> >> >> #include "workqueue_internal.h" >> >> @@ -2108,6 +2109,7 @@ __acquires(&pool->lock) >> current->comm, preempt_count(), task_pid_nr(current), >> worker->current_func); >> debug_show_held_locks(current); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> dump_stack(); >> } >> >> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug >> index 2307d7c89dac..d64bd6b6fd45 100644 >> --- a/lib/Kconfig.debug >> +++ b/lib/Kconfig.debug >> @@ -1987,6 +1987,16 @@ config TEST_STATIC_KEYS >> >> If unsure, say N. >> >> +config BUG_ON_CORRUPTION >> + bool "Trigger a BUG when data corruption is detected" >> + help >> + Select this option if the kernel should BUG when it encounters >> + data corruption in various kernel memory structures during checks >> + added by options like CONFIG_DEBUG_LIST, CONFIG_DEBUG_SPINLOCK, >> + etc. >> + >> + If unsure, say N. >> + >> source "samples/Kconfig" >> >> source "lib/Kconfig.kgdb" >> diff --git a/lib/list_debug.c b/lib/list_debug.c >> index 80e2e40a6a4e..161c7e7d3478 100644 >> --- a/lib/list_debug.c >> +++ b/lib/list_debug.c >> @@ -26,16 +26,19 @@ bool __list_add_debug(struct list_head *new, >> if (unlikely(next->prev != prev)) { >> WARN(1, "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n", >> prev, next->prev, next); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> return false; >> } >> if (unlikely(prev->next != next)) { >> WARN(1, "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n", >> next, prev->next, prev); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> return false; >> } >> if (unlikely(new == prev || new == next)) { >> WARN(1, "list_add double add: new=%p, prev=%p, next=%p.\n", >> new, prev, next); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> return false; >> } >> return true; >> @@ -52,21 +55,25 @@ bool __list_del_entry_debug(struct list_head *entry) >> if (unlikely(next == LIST_POISON1)) { >> WARN(1, "list_del corruption, %p->next is LIST_POISON1 (%p)\n", >> entry, LIST_POISON1); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> return false; >> } >> if (unlikely(prev == LIST_POISON2)) { >> WARN(1, "list_del corruption, %p->prev is LIST_POISON2 (%p)\n", >> entry, LIST_POISON2); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> return false; >> } >> if (unlikely(prev->next != entry)) { >> WARN(1, "list_del corruption. prev->next should be %p, but was %p\n", >> entry, prev->next); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> return false; >> } >> if (unlikely(next->prev != entry)) { >> WARN(1, "list_del corruption. next->prev should be %p, but was %p\n", >> entry, next->prev); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> return false; >> } >> return true; >> -- >> 2.7.4 >> >
On 08/16/2016 02:11 PM, Kees Cook wrote: > The kernel checks for several cases of data structure corruption under > either normal runtime, or under various CONFIG_DEBUG_* settings. When > corruption is detected, some systems may want to BUG() immediately instead > of letting the corruption continue. Many of these manipulation primitives > can be used by security flaws to gain arbitrary memory write control. This > provides CONFIG_BUG_ON_CORRUPTION to control newly added BUG() locations. > > This is inspired by similar hardening in PaX and Grsecurity, and by > Stephen Boyd in MSM kernels. > This was never my favorite patch in the MSM tree, mostly because it seemed to proliferate in random places. Some of these like the list were legit corruption but others like the spinlock and workqueue were indication of more hardware induced corruption and less kernel or software bugs. I'd rather see the detection added in structures specifically identified to be vulnerable. spinlocks and workqueues don't seem to be high on the list to me. If they are, I think this could use some explanation of why these in particular deserve checks vs. any other place where we might detect corruption. > Signed-off-by: Kees Cook <keescook@chromium.org> > --- > include/linux/bug.h | 7 +++++++ > kernel/locking/spinlock_debug.c | 1 + > kernel/workqueue.c | 2 ++ > lib/Kconfig.debug | 10 ++++++++++ > lib/list_debug.c | 7 +++++++ > 5 files changed, 27 insertions(+) > > diff --git a/include/linux/bug.h b/include/linux/bug.h > index e51b0709e78d..7e69758dd798 100644 > --- a/include/linux/bug.h > +++ b/include/linux/bug.h > @@ -118,4 +118,11 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr, > } > > #endif /* CONFIG_GENERIC_BUG */ > + > +#ifdef CONFIG_BUG_ON_CORRUPTION > +# define CORRUPTED_DATA_STRUCTURE true > +#else > +# define CORRUPTED_DATA_STRUCTURE false > +#endif > + > #endif /* _LINUX_BUG_H */ > diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c > index 0374a596cffa..d5f833769feb 100644 > --- a/kernel/locking/spinlock_debug.c > +++ b/kernel/locking/spinlock_debug.c > @@ -64,6 +64,7 @@ static void spin_dump(raw_spinlock_t *lock, const char *msg) > owner ? owner->comm : "<none>", > owner ? task_pid_nr(owner) : -1, > lock->owner_cpu); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > dump_stack(); > } > > diff --git a/kernel/workqueue.c b/kernel/workqueue.c > index ef071ca73fc3..ea0132b55eca 100644 > --- a/kernel/workqueue.c > +++ b/kernel/workqueue.c > @@ -48,6 +48,7 @@ > #include <linux/nodemask.h> > #include <linux/moduleparam.h> > #include <linux/uaccess.h> > +#include <linux/bug.h> > > #include "workqueue_internal.h" > > @@ -2108,6 +2109,7 @@ __acquires(&pool->lock) > current->comm, preempt_count(), task_pid_nr(current), > worker->current_func); > debug_show_held_locks(current); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > dump_stack(); > } > > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug > index 2307d7c89dac..d64bd6b6fd45 100644 > --- a/lib/Kconfig.debug > +++ b/lib/Kconfig.debug > @@ -1987,6 +1987,16 @@ config TEST_STATIC_KEYS > > If unsure, say N. > > +config BUG_ON_CORRUPTION > + bool "Trigger a BUG when data corruption is detected" > + help > + Select this option if the kernel should BUG when it encounters > + data corruption in various kernel memory structures during checks > + added by options like CONFIG_DEBUG_LIST, CONFIG_DEBUG_SPINLOCK, > + etc. > + > + If unsure, say N. > + > source "samples/Kconfig" > > source "lib/Kconfig.kgdb" > diff --git a/lib/list_debug.c b/lib/list_debug.c > index 80e2e40a6a4e..161c7e7d3478 100644 > --- a/lib/list_debug.c > +++ b/lib/list_debug.c > @@ -26,16 +26,19 @@ bool __list_add_debug(struct list_head *new, > if (unlikely(next->prev != prev)) { > WARN(1, "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n", > prev, next->prev, next); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > return false; > } > if (unlikely(prev->next != next)) { > WARN(1, "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n", > next, prev->next, prev); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > return false; > } > if (unlikely(new == prev || new == next)) { > WARN(1, "list_add double add: new=%p, prev=%p, next=%p.\n", > new, prev, next); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > return false; > } > return true; > @@ -52,21 +55,25 @@ bool __list_del_entry_debug(struct list_head *entry) > if (unlikely(next == LIST_POISON1)) { > WARN(1, "list_del corruption, %p->next is LIST_POISON1 (%p)\n", > entry, LIST_POISON1); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > return false; > } > if (unlikely(prev == LIST_POISON2)) { > WARN(1, "list_del corruption, %p->prev is LIST_POISON2 (%p)\n", > entry, LIST_POISON2); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > return false; > } > if (unlikely(prev->next != entry)) { > WARN(1, "list_del corruption. prev->next should be %p, but was %p\n", > entry, prev->next); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > return false; > } > if (unlikely(next->prev != entry)) { > WARN(1, "list_del corruption. next->prev should be %p, but was %p\n", > entry, next->prev); > + BUG_ON(CORRUPTED_DATA_STRUCTURE); > return false; > } > return true; > The git history shows 924d9addb9b1 ("list debugging: use WARN() instead of BUG()") deliberately switched this from BUG to WARN. Do we still need the WARN at all or can we just switch to BUG permanently? Thanks, Laura
On Tue, 16 Aug 2016 14:42:28 -0700 Kees Cook <keescook@chromium.org> wrote: > > OK, I will bite... Why both the WARN() and the BUG_ON()? > > Mostly because not every case of BUG(CORRUPTED_DATA_STRUCTURE) is > cleanly paired with a WARN (see the workqueue addition that wants to > dump locks too). I could rearrange things a bit, though, and create > something like: > > #ifdef CONFIG_BUG_ON_CORRUPTION > # define CORRUPTED(format...) { \ > printk(KERN_ERR format); \ > BUG(); \ > } > #else > # define CORRUPTED(format...) WARN(1, format) > #endif > > What do you think? I prefer what you currently have. WARN(1, "list_del corruption. next->prev should be %p, but was %p\n", entry, next->prev); BUG_ON(CORRUPTED_DATA_STRUCTURE); Will always warn (as stated by "1") and and the BUG_ON() will bug if CORRUPTED_DATA_STRUCTURE is set. Although, I don't like that name. Can we have a: BUG_ON(BUG_ON_CORRUPTED_DATA_STRUCTURES); Or maybe have that as a macro: #ifdef CONFIG_BUG_ON_CORRUPTION # define BUG_ON_CORRUPTED_DATA_STRUCTURE() BUG_ON(1) #else # define BUG_ON_CORRUPTED_DATA_STRUCTURE() do {} while (0) #endif Then we can have: WARN(1, "list_del corruption. next->prev should be %p, but was %p\n", entry, next->prev); BUG_ON_CORRUPTED_DATA_STRUCTURE(); ?? -- Steve > > -Kees > > > > > Thanx, Paul > > > >> --- > >> include/linux/bug.h | 7 +++++++ > >> kernel/locking/spinlock_debug.c | 1 + > >> kernel/workqueue.c | 2 ++ > >> lib/Kconfig.debug | 10 ++++++++++ > >> lib/list_debug.c | 7 +++++++ > >> 5 files changed, 27 insertions(+) > >> > >> diff --git a/include/linux/bug.h b/include/linux/bug.h > >> index e51b0709e78d..7e69758dd798 100644 > >> --- a/include/linux/bug.h > >> +++ b/include/linux/bug.h > >> @@ -118,4 +118,11 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr, > >> } > >> > >> #endif /* CONFIG_GENERIC_BUG */ > >> + > >> +#ifdef CONFIG_BUG_ON_CORRUPTION > >> +# define CORRUPTED_DATA_STRUCTURE true > >> +#else > >> +# define CORRUPTED_DATA_STRUCTURE false > >> +#endif > >> + > >> #endif /* _LINUX_BUG_H */ > >> diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c > >> index 0374a596cffa..d5f833769feb 100644 > >> --- a/kernel/locking/spinlock_debug.c > >> +++ b/kernel/locking/spinlock_debug.c > >> @@ -64,6 +64,7 @@ static void spin_dump(raw_spinlock_t *lock, const char *msg) > >> owner ? owner->comm : "<none>", > >> owner ? task_pid_nr(owner) : -1, > >> lock->owner_cpu); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> dump_stack(); > >> } > >> > >> diff --git a/kernel/workqueue.c b/kernel/workqueue.c > >> index ef071ca73fc3..ea0132b55eca 100644 > >> --- a/kernel/workqueue.c > >> +++ b/kernel/workqueue.c > >> @@ -48,6 +48,7 @@ > >> #include <linux/nodemask.h> > >> #include <linux/moduleparam.h> > >> #include <linux/uaccess.h> > >> +#include <linux/bug.h> > >> > >> #include "workqueue_internal.h" > >> > >> @@ -2108,6 +2109,7 @@ __acquires(&pool->lock) > >> current->comm, preempt_count(), task_pid_nr(current), > >> worker->current_func); > >> debug_show_held_locks(current); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> dump_stack(); > >> } > >> > >> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug > >> index 2307d7c89dac..d64bd6b6fd45 100644 > >> --- a/lib/Kconfig.debug > >> +++ b/lib/Kconfig.debug > >> @@ -1987,6 +1987,16 @@ config TEST_STATIC_KEYS > >> > >> If unsure, say N. > >> > >> +config BUG_ON_CORRUPTION > >> + bool "Trigger a BUG when data corruption is detected" > >> + help > >> + Select this option if the kernel should BUG when it encounters > >> + data corruption in various kernel memory structures during checks > >> + added by options like CONFIG_DEBUG_LIST, CONFIG_DEBUG_SPINLOCK, > >> + etc. > >> + > >> + If unsure, say N. > >> + > >> source "samples/Kconfig" > >> > >> source "lib/Kconfig.kgdb" > >> diff --git a/lib/list_debug.c b/lib/list_debug.c > >> index 80e2e40a6a4e..161c7e7d3478 100644 > >> --- a/lib/list_debug.c > >> +++ b/lib/list_debug.c > >> @@ -26,16 +26,19 @@ bool __list_add_debug(struct list_head *new, > >> if (unlikely(next->prev != prev)) { > >> WARN(1, "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n", > >> prev, next->prev, next); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> return false; > >> } > >> if (unlikely(prev->next != next)) { > >> WARN(1, "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n", > >> next, prev->next, prev); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> return false; > >> } > >> if (unlikely(new == prev || new == next)) { > >> WARN(1, "list_add double add: new=%p, prev=%p, next=%p.\n", > >> new, prev, next); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> return false; > >> } > >> return true; > >> @@ -52,21 +55,25 @@ bool __list_del_entry_debug(struct list_head *entry) > >> if (unlikely(next == LIST_POISON1)) { > >> WARN(1, "list_del corruption, %p->next is LIST_POISON1 (%p)\n", > >> entry, LIST_POISON1); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> return false; > >> } > >> if (unlikely(prev == LIST_POISON2)) { > >> WARN(1, "list_del corruption, %p->prev is LIST_POISON2 (%p)\n", > >> entry, LIST_POISON2); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> return false; > >> } > >> if (unlikely(prev->next != entry)) { > >> WARN(1, "list_del corruption. prev->next should be %p, but was %p\n", > >> entry, prev->next); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> return false; > >> } > >> if (unlikely(next->prev != entry)) { > >> WARN(1, "list_del corruption. next->prev should be %p, but was %p\n", > >> entry, next->prev); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> return false; > >> } > >> return true; > >> -- > >> 2.7.4 > >> > > > > >
On Tue, 16 Aug 2016 17:53:54 -0400 Steven Rostedt <rostedt@goodmis.org> wrote: > WARN(1, "list_del corruption. next->prev should be %p, but was %p\n", > entry, next->prev); > BUG_ON(CORRUPTED_DATA_STRUCTURE); > > Will always warn (as stated by "1") and and the BUG_ON() will bug if > CORRUPTED_DATA_STRUCTURE is set. Although, I don't like that name. Can > we have a: > > BUG_ON(BUG_ON_CORRUPTED_DATA_STRUCTURES); > > Or maybe have that as a macro: > > #ifdef CONFIG_BUG_ON_CORRUPTION > # define BUG_ON_CORRUPTED_DATA_STRUCTURE() BUG_ON(1) > #else > # define BUG_ON_CORRUPTED_DATA_STRUCTURE() do {} while (0) > #endif > > Then we can have: > > WARN(1, "list_del corruption. next->prev should be %p, but was %p\n", > entry, next->prev); > BUG_ON_CORRUPTED_DATA_STRUCTURE(); > > ?? > Hmm, maybe better yet, just have it called "CORRUPTED_DATA_STRUCTURE()" because it wont bug if the config is not set, and having "BUG_ON" in the name, it might be somewhat confusing. -- Steve
On Tue, Aug 16, 2016 at 2:50 PM, Laura Abbott <labbott@redhat.com> wrote: > On 08/16/2016 02:11 PM, Kees Cook wrote: >> >> The kernel checks for several cases of data structure corruption under >> either normal runtime, or under various CONFIG_DEBUG_* settings. When >> corruption is detected, some systems may want to BUG() immediately instead >> of letting the corruption continue. Many of these manipulation primitives >> can be used by security flaws to gain arbitrary memory write control. This >> provides CONFIG_BUG_ON_CORRUPTION to control newly added BUG() locations. >> >> This is inspired by similar hardening in PaX and Grsecurity, and by >> Stephen Boyd in MSM kernels. >> > > This was never my favorite patch in the MSM tree, mostly because it seemed > to proliferate in random places. Some of these like the list were legit > corruption but others like the spinlock and workqueue were indication of > more hardware induced corruption and less kernel or software bugs. > I'd rather see the detection added in structures specifically identified to > be vulnerable. spinlocks and workqueues don't seem to be high on the > list to me. If they are, I think this could use some explanation of why > these in particular deserve checks vs. any other place where we might > detect corruption. Ah, interesting. I was wondering about why those two were added, especially the workqueue one which seemed to be a counter issue (hopefully we'll get the counter protection in soon too). Unless Stephen speaks up, I'll just drop the spinlock and workqueue chunks. >> Signed-off-by: Kees Cook <keescook@chromium.org> >> --- >> include/linux/bug.h | 7 +++++++ >> kernel/locking/spinlock_debug.c | 1 + >> kernel/workqueue.c | 2 ++ >> lib/Kconfig.debug | 10 ++++++++++ >> lib/list_debug.c | 7 +++++++ >> 5 files changed, 27 insertions(+) >> >> diff --git a/include/linux/bug.h b/include/linux/bug.h >> index e51b0709e78d..7e69758dd798 100644 >> --- a/include/linux/bug.h >> +++ b/include/linux/bug.h >> @@ -118,4 +118,11 @@ static inline enum bug_trap_type report_bug(unsigned >> long bug_addr, >> } >> >> #endif /* CONFIG_GENERIC_BUG */ >> + >> +#ifdef CONFIG_BUG_ON_CORRUPTION >> +# define CORRUPTED_DATA_STRUCTURE true >> +#else >> +# define CORRUPTED_DATA_STRUCTURE false >> +#endif >> + >> #endif /* _LINUX_BUG_H */ >> diff --git a/kernel/locking/spinlock_debug.c >> b/kernel/locking/spinlock_debug.c >> index 0374a596cffa..d5f833769feb 100644 >> --- a/kernel/locking/spinlock_debug.c >> +++ b/kernel/locking/spinlock_debug.c >> @@ -64,6 +64,7 @@ static void spin_dump(raw_spinlock_t *lock, const char >> *msg) >> owner ? owner->comm : "<none>", >> owner ? task_pid_nr(owner) : -1, >> lock->owner_cpu); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> dump_stack(); >> } >> >> diff --git a/kernel/workqueue.c b/kernel/workqueue.c >> index ef071ca73fc3..ea0132b55eca 100644 >> --- a/kernel/workqueue.c >> +++ b/kernel/workqueue.c >> @@ -48,6 +48,7 @@ >> #include <linux/nodemask.h> >> #include <linux/moduleparam.h> >> #include <linux/uaccess.h> >> +#include <linux/bug.h> >> >> #include "workqueue_internal.h" >> >> @@ -2108,6 +2109,7 @@ __acquires(&pool->lock) >> current->comm, preempt_count(), >> task_pid_nr(current), >> worker->current_func); >> debug_show_held_locks(current); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> dump_stack(); >> } >> >> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug >> index 2307d7c89dac..d64bd6b6fd45 100644 >> --- a/lib/Kconfig.debug >> +++ b/lib/Kconfig.debug >> @@ -1987,6 +1987,16 @@ config TEST_STATIC_KEYS >> >> If unsure, say N. >> >> +config BUG_ON_CORRUPTION >> + bool "Trigger a BUG when data corruption is detected" >> + help >> + Select this option if the kernel should BUG when it encounters >> + data corruption in various kernel memory structures during >> checks >> + added by options like CONFIG_DEBUG_LIST, CONFIG_DEBUG_SPINLOCK, >> + etc. >> + >> + If unsure, say N. >> + >> source "samples/Kconfig" >> >> source "lib/Kconfig.kgdb" >> diff --git a/lib/list_debug.c b/lib/list_debug.c >> index 80e2e40a6a4e..161c7e7d3478 100644 >> --- a/lib/list_debug.c >> +++ b/lib/list_debug.c >> @@ -26,16 +26,19 @@ bool __list_add_debug(struct list_head *new, >> if (unlikely(next->prev != prev)) { >> WARN(1, "list_add corruption. next->prev should be prev >> (%p), but was %p. (next=%p).\n", >> prev, next->prev, next); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> return false; >> } >> if (unlikely(prev->next != next)) { >> WARN(1, "list_add corruption. prev->next should be next >> (%p), but was %p. (prev=%p).\n", >> next, prev->next, prev); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> return false; >> } >> if (unlikely(new == prev || new == next)) { >> WARN(1, "list_add double add: new=%p, prev=%p, >> next=%p.\n", >> new, prev, next); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> return false; >> } >> return true; >> @@ -52,21 +55,25 @@ bool __list_del_entry_debug(struct list_head *entry) >> if (unlikely(next == LIST_POISON1)) { >> WARN(1, "list_del corruption, %p->next is LIST_POISON1 >> (%p)\n", >> entry, LIST_POISON1); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> return false; >> } >> if (unlikely(prev == LIST_POISON2)) { >> WARN(1, "list_del corruption, %p->prev is LIST_POISON2 >> (%p)\n", >> entry, LIST_POISON2); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> return false; >> } >> if (unlikely(prev->next != entry)) { >> WARN(1, "list_del corruption. prev->next should be %p, but >> was %p\n", >> entry, prev->next); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> return false; >> } >> if (unlikely(next->prev != entry)) { >> WARN(1, "list_del corruption. next->prev should be %p, but >> was %p\n", >> entry, next->prev); >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); >> return false; >> } >> return true; >> > > The git history shows 924d9addb9b1 ("list debugging: use > WARN() instead of BUG()") deliberately switched this from BUG to WARN. > Do we still need the WARN at all or can we just switch to BUG permanently? Hah. Yeah, okay, so that explains the history of this a little more. Looks like when this was converted to WARN, the "stop execution flow" part of that was not retained (which is what PaX/Grsecurity added back). Regardless, it certainly supports having a CONFIG for "should this WARN and abort, or BUG?" -Kees
On Tue, Aug 16, 2016 at 2:57 PM, Steven Rostedt <rostedt@goodmis.org> wrote: > On Tue, 16 Aug 2016 17:53:54 -0400 > Steven Rostedt <rostedt@goodmis.org> wrote: > > >> WARN(1, "list_del corruption. next->prev should be %p, but was %p\n", >> entry, next->prev); >> BUG_ON(CORRUPTED_DATA_STRUCTURE); >> >> Will always warn (as stated by "1") and and the BUG_ON() will bug if >> CORRUPTED_DATA_STRUCTURE is set. Although, I don't like that name. Can >> we have a: >> >> BUG_ON(BUG_ON_CORRUPTED_DATA_STRUCTURES); >> >> Or maybe have that as a macro: >> >> #ifdef CONFIG_BUG_ON_CORRUPTION >> # define BUG_ON_CORRUPTED_DATA_STRUCTURE() BUG_ON(1) >> #else >> # define BUG_ON_CORRUPTED_DATA_STRUCTURE() do {} while (0) >> #endif >> >> Then we can have: >> >> WARN(1, "list_del corruption. next->prev should be %p, but was %p\n", >> entry, next->prev); >> BUG_ON_CORRUPTED_DATA_STRUCTURE(); >> >> ?? >> > > Hmm, maybe better yet, just have it called "CORRUPTED_DATA_STRUCTURE()" > because it wont bug if the config is not set, and having "BUG_ON" in > the name, it might be somewhat confusing. Yeah, I'm trying to redesign this now, since one thing I think is important to build into the new macro is the concept of _stopping_ execution. i.e. even if you don't want to BUG, you really don't want to operate on the busted data structure. This protection was precisely what went missing with commit 924d9addb9b1. -Kees
On Tue, Aug 16, 2016 at 02:42:28PM -0700, Kees Cook wrote: > On Tue, Aug 16, 2016 at 2:26 PM, Paul E. McKenney > <paulmck@linux.vnet.ibm.com> wrote: > > On Tue, Aug 16, 2016 at 02:11:04PM -0700, Kees Cook wrote: > >> The kernel checks for several cases of data structure corruption under > >> either normal runtime, or under various CONFIG_DEBUG_* settings. When > >> corruption is detected, some systems may want to BUG() immediately instead > >> of letting the corruption continue. Many of these manipulation primitives > >> can be used by security flaws to gain arbitrary memory write control. This > >> provides CONFIG_BUG_ON_CORRUPTION to control newly added BUG() locations. > >> > >> This is inspired by similar hardening in PaX and Grsecurity, and by > >> Stephen Boyd in MSM kernels. > >> > >> Signed-off-by: Kees Cook <keescook@chromium.org> > > > > OK, I will bite... Why both the WARN() and the BUG_ON()? > > Mostly because not every case of BUG(CORRUPTED_DATA_STRUCTURE) is > cleanly paired with a WARN (see the workqueue addition that wants to > dump locks too). I could rearrange things a bit, though, and create > something like: > > #ifdef CONFIG_BUG_ON_CORRUPTION > # define CORRUPTED(format...) { \ > printk(KERN_ERR format); \ > BUG(); \ > } > #else > # define CORRUPTED(format...) WARN(1, format) > #endif > > What do you think? First let me see if I understand the rationale... The idea is to allow those in security-irrelevant environments, such as test systems, to have the old "complain but soldier on" semantics, while security-conscious systems just panic, thereby hopefully converting a more dangerous form of attack into a denial-of-service attack. An alternative approach would be to make WARN() panic on systems built with CONFIG_BUG_ON_CORRUPTION, but we might instead want to choose which warnings are fatal on security-conscious systems. Or am I missing the point? At a more detailed level, one could argue for something like this: #define CORRUPTED(format...) \ do { \ if (IS_ENABLED(CONFIG_BUG_ON_CORRUPTION)) { \ printk(KERN_ERR format); \ BUG(); \ } else { \ WARN(1, format); \ } \ } while (0) Some might prefer #ifdef to IS_ENABLED(), but the bare {} needs to be do-while in any case. Thanx, Paul > -Kees > > > > > Thanx, Paul > > > >> --- > >> include/linux/bug.h | 7 +++++++ > >> kernel/locking/spinlock_debug.c | 1 + > >> kernel/workqueue.c | 2 ++ > >> lib/Kconfig.debug | 10 ++++++++++ > >> lib/list_debug.c | 7 +++++++ > >> 5 files changed, 27 insertions(+) > >> > >> diff --git a/include/linux/bug.h b/include/linux/bug.h > >> index e51b0709e78d..7e69758dd798 100644 > >> --- a/include/linux/bug.h > >> +++ b/include/linux/bug.h > >> @@ -118,4 +118,11 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr, > >> } > >> > >> #endif /* CONFIG_GENERIC_BUG */ > >> + > >> +#ifdef CONFIG_BUG_ON_CORRUPTION > >> +# define CORRUPTED_DATA_STRUCTURE true > >> +#else > >> +# define CORRUPTED_DATA_STRUCTURE false > >> +#endif > >> + > >> #endif /* _LINUX_BUG_H */ > >> diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c > >> index 0374a596cffa..d5f833769feb 100644 > >> --- a/kernel/locking/spinlock_debug.c > >> +++ b/kernel/locking/spinlock_debug.c > >> @@ -64,6 +64,7 @@ static void spin_dump(raw_spinlock_t *lock, const char *msg) > >> owner ? owner->comm : "<none>", > >> owner ? task_pid_nr(owner) : -1, > >> lock->owner_cpu); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> dump_stack(); > >> } > >> > >> diff --git a/kernel/workqueue.c b/kernel/workqueue.c > >> index ef071ca73fc3..ea0132b55eca 100644 > >> --- a/kernel/workqueue.c > >> +++ b/kernel/workqueue.c > >> @@ -48,6 +48,7 @@ > >> #include <linux/nodemask.h> > >> #include <linux/moduleparam.h> > >> #include <linux/uaccess.h> > >> +#include <linux/bug.h> > >> > >> #include "workqueue_internal.h" > >> > >> @@ -2108,6 +2109,7 @@ __acquires(&pool->lock) > >> current->comm, preempt_count(), task_pid_nr(current), > >> worker->current_func); > >> debug_show_held_locks(current); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> dump_stack(); > >> } > >> > >> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug > >> index 2307d7c89dac..d64bd6b6fd45 100644 > >> --- a/lib/Kconfig.debug > >> +++ b/lib/Kconfig.debug > >> @@ -1987,6 +1987,16 @@ config TEST_STATIC_KEYS > >> > >> If unsure, say N. > >> > >> +config BUG_ON_CORRUPTION > >> + bool "Trigger a BUG when data corruption is detected" > >> + help > >> + Select this option if the kernel should BUG when it encounters > >> + data corruption in various kernel memory structures during checks > >> + added by options like CONFIG_DEBUG_LIST, CONFIG_DEBUG_SPINLOCK, > >> + etc. > >> + > >> + If unsure, say N. > >> + > >> source "samples/Kconfig" > >> > >> source "lib/Kconfig.kgdb" > >> diff --git a/lib/list_debug.c b/lib/list_debug.c > >> index 80e2e40a6a4e..161c7e7d3478 100644 > >> --- a/lib/list_debug.c > >> +++ b/lib/list_debug.c > >> @@ -26,16 +26,19 @@ bool __list_add_debug(struct list_head *new, > >> if (unlikely(next->prev != prev)) { > >> WARN(1, "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n", > >> prev, next->prev, next); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> return false; > >> } > >> if (unlikely(prev->next != next)) { > >> WARN(1, "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n", > >> next, prev->next, prev); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> return false; > >> } > >> if (unlikely(new == prev || new == next)) { > >> WARN(1, "list_add double add: new=%p, prev=%p, next=%p.\n", > >> new, prev, next); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> return false; > >> } > >> return true; > >> @@ -52,21 +55,25 @@ bool __list_del_entry_debug(struct list_head *entry) > >> if (unlikely(next == LIST_POISON1)) { > >> WARN(1, "list_del corruption, %p->next is LIST_POISON1 (%p)\n", > >> entry, LIST_POISON1); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> return false; > >> } > >> if (unlikely(prev == LIST_POISON2)) { > >> WARN(1, "list_del corruption, %p->prev is LIST_POISON2 (%p)\n", > >> entry, LIST_POISON2); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> return false; > >> } > >> if (unlikely(prev->next != entry)) { > >> WARN(1, "list_del corruption. prev->next should be %p, but was %p\n", > >> entry, prev->next); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> return false; > >> } > >> if (unlikely(next->prev != entry)) { > >> WARN(1, "list_del corruption. next->prev should be %p, but was %p\n", > >> entry, next->prev); > >> + BUG_ON(CORRUPTED_DATA_STRUCTURE); > >> return false; > >> } > >> return true; > >> -- > >> 2.7.4 > >> > > > > > > -- > Kees Cook > Nexus Security >
On Tue, Aug 16, 2016 at 5:01 PM, Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote: > On Tue, Aug 16, 2016 at 02:42:28PM -0700, Kees Cook wrote: >> On Tue, Aug 16, 2016 at 2:26 PM, Paul E. McKenney >> <paulmck@linux.vnet.ibm.com> wrote: >> > On Tue, Aug 16, 2016 at 02:11:04PM -0700, Kees Cook wrote: >> >> The kernel checks for several cases of data structure corruption under >> >> either normal runtime, or under various CONFIG_DEBUG_* settings. When >> >> corruption is detected, some systems may want to BUG() immediately instead >> >> of letting the corruption continue. Many of these manipulation primitives >> >> can be used by security flaws to gain arbitrary memory write control. This >> >> provides CONFIG_BUG_ON_CORRUPTION to control newly added BUG() locations. >> >> >> >> This is inspired by similar hardening in PaX and Grsecurity, and by >> >> Stephen Boyd in MSM kernels. >> >> >> >> Signed-off-by: Kees Cook <keescook@chromium.org> >> > >> > OK, I will bite... Why both the WARN() and the BUG_ON()? >> >> Mostly because not every case of BUG(CORRUPTED_DATA_STRUCTURE) is >> cleanly paired with a WARN (see the workqueue addition that wants to >> dump locks too). I could rearrange things a bit, though, and create >> something like: >> >> #ifdef CONFIG_BUG_ON_CORRUPTION >> # define CORRUPTED(format...) { \ >> printk(KERN_ERR format); \ >> BUG(); \ >> } >> #else >> # define CORRUPTED(format...) WARN(1, format) >> #endif >> >> What do you think? > > First let me see if I understand the rationale... The idea is to allow > those in security-irrelevant environments, such as test systems, to > have the old "complain but soldier on" semantics, while security-conscious > systems just panic, thereby hopefully converting a more dangerous form > of attack into a denial-of-service attack. Right, we don't want to wholesale upgrade all WARNs to BUGs. Just any security-sensitive conditionals. And based on Laura's feedback, this is really just about CONFIG_DEBUG_LIST now. We'll likely find some more to add this to, but for the moment, we'll focus on list. Here are the rationales as I see it: - if you've enabled CONFIG_DEBUG_LIST - you want to get a report of the corruption - you want the kernel to _not operate on the structure_ (this went missing when s/BUG/WARN/) - if you've enabled CONFIG_BUG_ON_DATA_CORRUPTION - everything from CONFIG_DEBUG_LIST - you want the offending process to go away (i.e. BUG instead of WARN) - you may want the entire system to dump if you've set the panic_on_oops sysctl (i.e. BUG) > An alternative approach would be to make WARN() panic on systems built > with CONFIG_BUG_ON_CORRUPTION, but we might instead want to choose which > warnings are fatal on security-conscious systems. > > Or am I missing the point? > > At a more detailed level, one could argue for something like this: > > #define CORRUPTED(format...) \ > do { \ > if (IS_ENABLED(CONFIG_BUG_ON_CORRUPTION)) { \ > printk(KERN_ERR format); \ > BUG(); \ > } else { \ > WARN(1, format); \ > } \ > } while (0) > > Some might prefer #ifdef to IS_ENABLED(), but the bare {} needs to > be do-while in any case. Yup, this is almost exactly what I've got in the v2. I wanted to enforce a control-flow side-effect, though, so I've included a non-optional "return false" as well. -Kees
On Tue, Aug 16, 2016 at 05:09:53PM -0700, Kees Cook wrote: > On Tue, Aug 16, 2016 at 5:01 PM, Paul E. McKenney > <paulmck@linux.vnet.ibm.com> wrote: > > On Tue, Aug 16, 2016 at 02:42:28PM -0700, Kees Cook wrote: > >> On Tue, Aug 16, 2016 at 2:26 PM, Paul E. McKenney > >> <paulmck@linux.vnet.ibm.com> wrote: > >> > On Tue, Aug 16, 2016 at 02:11:04PM -0700, Kees Cook wrote: > >> >> The kernel checks for several cases of data structure corruption under > >> >> either normal runtime, or under various CONFIG_DEBUG_* settings. When > >> >> corruption is detected, some systems may want to BUG() immediately instead > >> >> of letting the corruption continue. Many of these manipulation primitives > >> >> can be used by security flaws to gain arbitrary memory write control. This > >> >> provides CONFIG_BUG_ON_CORRUPTION to control newly added BUG() locations. > >> >> > >> >> This is inspired by similar hardening in PaX and Grsecurity, and by > >> >> Stephen Boyd in MSM kernels. > >> >> > >> >> Signed-off-by: Kees Cook <keescook@chromium.org> > >> > > >> > OK, I will bite... Why both the WARN() and the BUG_ON()? > >> > >> Mostly because not every case of BUG(CORRUPTED_DATA_STRUCTURE) is > >> cleanly paired with a WARN (see the workqueue addition that wants to > >> dump locks too). I could rearrange things a bit, though, and create > >> something like: > >> > >> #ifdef CONFIG_BUG_ON_CORRUPTION > >> # define CORRUPTED(format...) { \ > >> printk(KERN_ERR format); \ > >> BUG(); \ > >> } > >> #else > >> # define CORRUPTED(format...) WARN(1, format) > >> #endif > >> > >> What do you think? > > > > First let me see if I understand the rationale... The idea is to allow > > those in security-irrelevant environments, such as test systems, to > > have the old "complain but soldier on" semantics, while security-conscious > > systems just panic, thereby hopefully converting a more dangerous form > > of attack into a denial-of-service attack. > > Right, we don't want to wholesale upgrade all WARNs to BUGs. Just any > security-sensitive conditionals. And based on Laura's feedback, this > is really just about CONFIG_DEBUG_LIST now. We'll likely find some > more to add this to, but for the moment, we'll focus on list. > > Here are the rationales as I see it: > > - if you've enabled CONFIG_DEBUG_LIST > - you want to get a report of the corruption > - you want the kernel to _not operate on the structure_ (this went > missing when s/BUG/WARN/) > - if you've enabled CONFIG_BUG_ON_DATA_CORRUPTION > - everything from CONFIG_DEBUG_LIST > - you want the offending process to go away (i.e. BUG instead of WARN) > - you may want the entire system to dump if you've set the > panic_on_oops sysctl (i.e. BUG) OK, this looks good to me. Just to be clear, if you've enabled neither CONFIG_DEBUG_LIST nor CONFIG_BUG_ON_DATA_CORRUPTION, then you get better performance, but are taking responsibility for using some other means of shielding your system from attack, correct? (I believe that we do need to give the user this choice, just checking your intent.) > > An alternative approach would be to make WARN() panic on systems built > > with CONFIG_BUG_ON_CORRUPTION, but we might instead want to choose which > > warnings are fatal on security-conscious systems. > > > > Or am I missing the point? > > > > At a more detailed level, one could argue for something like this: > > > > #define CORRUPTED(format...) \ > > do { \ > > if (IS_ENABLED(CONFIG_BUG_ON_CORRUPTION)) { \ > > printk(KERN_ERR format); \ > > BUG(); \ > > } else { \ > > WARN(1, format); \ > > } \ > > } while (0) > > > > Some might prefer #ifdef to IS_ENABLED(), but the bare {} needs to > > be do-while in any case. > > Yup, this is almost exactly what I've got in the v2. I wanted to > enforce a control-flow side-effect, though, so I've included a > non-optional "return false" as well. Sounds good! And yes, pulling the condition in makes a lot of sense to me as well. Looking forward to seeing v3. Thanx, Paul
On Wed, Aug 17, 2016 at 9:09 AM, Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote: > On Tue, Aug 16, 2016 at 05:09:53PM -0700, Kees Cook wrote: >> On Tue, Aug 16, 2016 at 5:01 PM, Paul E. McKenney >> <paulmck@linux.vnet.ibm.com> wrote: >> > On Tue, Aug 16, 2016 at 02:42:28PM -0700, Kees Cook wrote: >> >> On Tue, Aug 16, 2016 at 2:26 PM, Paul E. McKenney >> >> <paulmck@linux.vnet.ibm.com> wrote: >> >> > On Tue, Aug 16, 2016 at 02:11:04PM -0700, Kees Cook wrote: >> >> >> The kernel checks for several cases of data structure corruption under >> >> >> either normal runtime, or under various CONFIG_DEBUG_* settings. When >> >> >> corruption is detected, some systems may want to BUG() immediately instead >> >> >> of letting the corruption continue. Many of these manipulation primitives >> >> >> can be used by security flaws to gain arbitrary memory write control. This >> >> >> provides CONFIG_BUG_ON_CORRUPTION to control newly added BUG() locations. >> >> >> >> >> >> This is inspired by similar hardening in PaX and Grsecurity, and by >> >> >> Stephen Boyd in MSM kernels. >> >> >> >> >> >> Signed-off-by: Kees Cook <keescook@chromium.org> >> >> > >> >> > OK, I will bite... Why both the WARN() and the BUG_ON()? >> >> >> >> Mostly because not every case of BUG(CORRUPTED_DATA_STRUCTURE) is >> >> cleanly paired with a WARN (see the workqueue addition that wants to >> >> dump locks too). I could rearrange things a bit, though, and create >> >> something like: >> >> >> >> #ifdef CONFIG_BUG_ON_CORRUPTION >> >> # define CORRUPTED(format...) { \ >> >> printk(KERN_ERR format); \ >> >> BUG(); \ >> >> } >> >> #else >> >> # define CORRUPTED(format...) WARN(1, format) >> >> #endif >> >> >> >> What do you think? >> > >> > First let me see if I understand the rationale... The idea is to allow >> > those in security-irrelevant environments, such as test systems, to >> > have the old "complain but soldier on" semantics, while security-conscious >> > systems just panic, thereby hopefully converting a more dangerous form >> > of attack into a denial-of-service attack. >> >> Right, we don't want to wholesale upgrade all WARNs to BUGs. Just any >> security-sensitive conditionals. And based on Laura's feedback, this >> is really just about CONFIG_DEBUG_LIST now. We'll likely find some >> more to add this to, but for the moment, we'll focus on list. >> >> Here are the rationales as I see it: >> >> - if you've enabled CONFIG_DEBUG_LIST >> - you want to get a report of the corruption >> - you want the kernel to _not operate on the structure_ (this went >> missing when s/BUG/WARN/) >> - if you've enabled CONFIG_BUG_ON_DATA_CORRUPTION >> - everything from CONFIG_DEBUG_LIST >> - you want the offending process to go away (i.e. BUG instead of WARN) >> - you may want the entire system to dump if you've set the >> panic_on_oops sysctl (i.e. BUG) > > OK, this looks good to me. > > Just to be clear, if you've enabled neither CONFIG_DEBUG_LIST nor > CONFIG_BUG_ON_DATA_CORRUPTION, then you get better performance, but are > taking responsibility for using some other means of shielding your system > from attack, correct? (I believe that we do need to give the user this > choice, just checking your intent.) That's correct. (And in the future I intend to prove that the performance impact is comically small and that DEBUG_LIST should be yes-by-default, but that'll be a whole separate issue.) >> > An alternative approach would be to make WARN() panic on systems built >> > with CONFIG_BUG_ON_CORRUPTION, but we might instead want to choose which >> > warnings are fatal on security-conscious systems. >> > >> > Or am I missing the point? >> > >> > At a more detailed level, one could argue for something like this: >> > >> > #define CORRUPTED(format...) \ >> > do { \ >> > if (IS_ENABLED(CONFIG_BUG_ON_CORRUPTION)) { \ >> > printk(KERN_ERR format); \ >> > BUG(); \ >> > } else { \ >> > WARN(1, format); \ >> > } \ >> > } while (0) >> > >> > Some might prefer #ifdef to IS_ENABLED(), but the bare {} needs to >> > be do-while in any case. >> >> Yup, this is almost exactly what I've got in the v2. I wanted to >> enforce a control-flow side-effect, though, so I've included a >> non-optional "return false" as well. > > Sounds good! And yes, pulling the condition in makes a lot of sense > to me as well. Looking forward to seeing v3. Great, thanks! -Kees
On Wed, Aug 17, 2016 at 09:14:59AM -0700, Kees Cook wrote: > On Wed, Aug 17, 2016 at 9:09 AM, Paul E. McKenney > <paulmck@linux.vnet.ibm.com> wrote: > > On Tue, Aug 16, 2016 at 05:09:53PM -0700, Kees Cook wrote: > >> On Tue, Aug 16, 2016 at 5:01 PM, Paul E. McKenney > >> <paulmck@linux.vnet.ibm.com> wrote: > >> > On Tue, Aug 16, 2016 at 02:42:28PM -0700, Kees Cook wrote: > >> >> On Tue, Aug 16, 2016 at 2:26 PM, Paul E. McKenney > >> >> <paulmck@linux.vnet.ibm.com> wrote: > >> >> > On Tue, Aug 16, 2016 at 02:11:04PM -0700, Kees Cook wrote: > >> >> >> The kernel checks for several cases of data structure corruption under > >> >> >> either normal runtime, or under various CONFIG_DEBUG_* settings. When > >> >> >> corruption is detected, some systems may want to BUG() immediately instead > >> >> >> of letting the corruption continue. Many of these manipulation primitives > >> >> >> can be used by security flaws to gain arbitrary memory write control. This > >> >> >> provides CONFIG_BUG_ON_CORRUPTION to control newly added BUG() locations. > >> >> >> > >> >> >> This is inspired by similar hardening in PaX and Grsecurity, and by > >> >> >> Stephen Boyd in MSM kernels. > >> >> >> > >> >> >> Signed-off-by: Kees Cook <keescook@chromium.org> > >> >> > > >> >> > OK, I will bite... Why both the WARN() and the BUG_ON()? > >> >> > >> >> Mostly because not every case of BUG(CORRUPTED_DATA_STRUCTURE) is > >> >> cleanly paired with a WARN (see the workqueue addition that wants to > >> >> dump locks too). I could rearrange things a bit, though, and create > >> >> something like: > >> >> > >> >> #ifdef CONFIG_BUG_ON_CORRUPTION > >> >> # define CORRUPTED(format...) { \ > >> >> printk(KERN_ERR format); \ > >> >> BUG(); \ > >> >> } > >> >> #else > >> >> # define CORRUPTED(format...) WARN(1, format) > >> >> #endif > >> >> > >> >> What do you think? > >> > > >> > First let me see if I understand the rationale... The idea is to allow > >> > those in security-irrelevant environments, such as test systems, to > >> > have the old "complain but soldier on" semantics, while security-conscious > >> > systems just panic, thereby hopefully converting a more dangerous form > >> > of attack into a denial-of-service attack. > >> > >> Right, we don't want to wholesale upgrade all WARNs to BUGs. Just any > >> security-sensitive conditionals. And based on Laura's feedback, this > >> is really just about CONFIG_DEBUG_LIST now. We'll likely find some > >> more to add this to, but for the moment, we'll focus on list. > >> > >> Here are the rationales as I see it: > >> > >> - if you've enabled CONFIG_DEBUG_LIST > >> - you want to get a report of the corruption > >> - you want the kernel to _not operate on the structure_ (this went > >> missing when s/BUG/WARN/) > >> - if you've enabled CONFIG_BUG_ON_DATA_CORRUPTION > >> - everything from CONFIG_DEBUG_LIST > >> - you want the offending process to go away (i.e. BUG instead of WARN) > >> - you may want the entire system to dump if you've set the > >> panic_on_oops sysctl (i.e. BUG) > > > > OK, this looks good to me. > > > > Just to be clear, if you've enabled neither CONFIG_DEBUG_LIST nor > > CONFIG_BUG_ON_DATA_CORRUPTION, then you get better performance, but are > > taking responsibility for using some other means of shielding your system > > from attack, correct? (I believe that we do need to give the user this > > choice, just checking your intent.) > > That's correct. (And in the future I intend to prove that the > performance impact is comically small and that DEBUG_LIST should be > yes-by-default, but that'll be a whole separate issue.) And I am sure that will prove to be the case. But the people looking to squeeze every last drop of performance from their system will cheerfully ignore such results on the grounds that eliminating several thousand such insignificant checks -might- have a useful overall benefit. And who knows, they might be right. Thanx, Paul > >> > An alternative approach would be to make WARN() panic on systems built > >> > with CONFIG_BUG_ON_CORRUPTION, but we might instead want to choose which > >> > warnings are fatal on security-conscious systems. > >> > > >> > Or am I missing the point? > >> > > >> > At a more detailed level, one could argue for something like this: > >> > > >> > #define CORRUPTED(format...) \ > >> > do { \ > >> > if (IS_ENABLED(CONFIG_BUG_ON_CORRUPTION)) { \ > >> > printk(KERN_ERR format); \ > >> > BUG(); \ > >> > } else { \ > >> > WARN(1, format); \ > >> > } \ > >> > } while (0) > >> > > >> > Some might prefer #ifdef to IS_ENABLED(), but the bare {} needs to > >> > be do-while in any case. > >> > >> Yup, this is almost exactly what I've got in the v2. I wanted to > >> enforce a control-flow side-effect, though, so I've included a > >> non-optional "return false" as well. > > > > Sounds good! And yes, pulling the condition in makes a lot of sense > > to me as well. Looking forward to seeing v3. > > Great, thanks! > > -Kees > > -- > Kees Cook > Nexus Security >
diff --git a/include/linux/bug.h b/include/linux/bug.h index e51b0709e78d..7e69758dd798 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -118,4 +118,11 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr, } #endif /* CONFIG_GENERIC_BUG */ + +#ifdef CONFIG_BUG_ON_CORRUPTION +# define CORRUPTED_DATA_STRUCTURE true +#else +# define CORRUPTED_DATA_STRUCTURE false +#endif + #endif /* _LINUX_BUG_H */ diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c index 0374a596cffa..d5f833769feb 100644 --- a/kernel/locking/spinlock_debug.c +++ b/kernel/locking/spinlock_debug.c @@ -64,6 +64,7 @@ static void spin_dump(raw_spinlock_t *lock, const char *msg) owner ? owner->comm : "<none>", owner ? task_pid_nr(owner) : -1, lock->owner_cpu); + BUG_ON(CORRUPTED_DATA_STRUCTURE); dump_stack(); } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ef071ca73fc3..ea0132b55eca 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -48,6 +48,7 @@ #include <linux/nodemask.h> #include <linux/moduleparam.h> #include <linux/uaccess.h> +#include <linux/bug.h> #include "workqueue_internal.h" @@ -2108,6 +2109,7 @@ __acquires(&pool->lock) current->comm, preempt_count(), task_pid_nr(current), worker->current_func); debug_show_held_locks(current); + BUG_ON(CORRUPTED_DATA_STRUCTURE); dump_stack(); } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2307d7c89dac..d64bd6b6fd45 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1987,6 +1987,16 @@ config TEST_STATIC_KEYS If unsure, say N. +config BUG_ON_CORRUPTION + bool "Trigger a BUG when data corruption is detected" + help + Select this option if the kernel should BUG when it encounters + data corruption in various kernel memory structures during checks + added by options like CONFIG_DEBUG_LIST, CONFIG_DEBUG_SPINLOCK, + etc. + + If unsure, say N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/list_debug.c b/lib/list_debug.c index 80e2e40a6a4e..161c7e7d3478 100644 --- a/lib/list_debug.c +++ b/lib/list_debug.c @@ -26,16 +26,19 @@ bool __list_add_debug(struct list_head *new, if (unlikely(next->prev != prev)) { WARN(1, "list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n", prev, next->prev, next); + BUG_ON(CORRUPTED_DATA_STRUCTURE); return false; } if (unlikely(prev->next != next)) { WARN(1, "list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n", next, prev->next, prev); + BUG_ON(CORRUPTED_DATA_STRUCTURE); return false; } if (unlikely(new == prev || new == next)) { WARN(1, "list_add double add: new=%p, prev=%p, next=%p.\n", new, prev, next); + BUG_ON(CORRUPTED_DATA_STRUCTURE); return false; } return true; @@ -52,21 +55,25 @@ bool __list_del_entry_debug(struct list_head *entry) if (unlikely(next == LIST_POISON1)) { WARN(1, "list_del corruption, %p->next is LIST_POISON1 (%p)\n", entry, LIST_POISON1); + BUG_ON(CORRUPTED_DATA_STRUCTURE); return false; } if (unlikely(prev == LIST_POISON2)) { WARN(1, "list_del corruption, %p->prev is LIST_POISON2 (%p)\n", entry, LIST_POISON2); + BUG_ON(CORRUPTED_DATA_STRUCTURE); return false; } if (unlikely(prev->next != entry)) { WARN(1, "list_del corruption. prev->next should be %p, but was %p\n", entry, prev->next); + BUG_ON(CORRUPTED_DATA_STRUCTURE); return false; } if (unlikely(next->prev != entry)) { WARN(1, "list_del corruption. next->prev should be %p, but was %p\n", entry, next->prev); + BUG_ON(CORRUPTED_DATA_STRUCTURE); return false; } return true;
The kernel checks for several cases of data structure corruption under either normal runtime, or under various CONFIG_DEBUG_* settings. When corruption is detected, some systems may want to BUG() immediately instead of letting the corruption continue. Many of these manipulation primitives can be used by security flaws to gain arbitrary memory write control. This provides CONFIG_BUG_ON_CORRUPTION to control newly added BUG() locations. This is inspired by similar hardening in PaX and Grsecurity, and by Stephen Boyd in MSM kernels. Signed-off-by: Kees Cook <keescook@chromium.org> --- include/linux/bug.h | 7 +++++++ kernel/locking/spinlock_debug.c | 1 + kernel/workqueue.c | 2 ++ lib/Kconfig.debug | 10 ++++++++++ lib/list_debug.c | 7 +++++++ 5 files changed, 27 insertions(+)