Message ID | 31b6c0c44cb385667287c826528ad422c6433091.1620849613.git.pcc@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: improve efficiency of setting tags for user pages | expand |
+Jann On Wed, May 12, 2021 at 11:09 PM Peter Collingbourne <pcc@google.com> wrote: > > Poisoning freed pages protects against kernel use-after-free. The > likelihood of such a bug involving kernel pages is significantly higher > than that for user pages. At the same time, poisoning freed pages can > impose a significant performance cost, which cannot always be justified > for user pages given the lower probability of finding a bug. Therefore, > make it possible to configure the kernel to disable freed user page > poisoning when using HW tags via the new kasan.skip_user_poison_on_free > command line option. So the potential scenario that would be undetectable with kasan.skip_user_poison_on_free enabled is: 1) kernel allocates a user page and maps it for userspace, 2) the page gets freed in the kernel, 3) kernel accesses the page leading to a use-after-free. Is this correct? If bugs involving use-after-free accesses on user pages is something that is extremely rare, perhaps we could just change the default and avoid adding a command line switch. Jann, maybe you have an idea of how common something like this is or have other inputs? Peter, is the plan to have this poisoning disabled in production? Is there an estimate on slow this is? > Signed-off-by: Peter Collingbourne <pcc@google.com> > Link: https://linux-review.googlesource.com/id/I716846e2de8ef179f44e835770df7e6307be96c9 > --- > include/linux/gfp.h | 13 ++++++++++--- > include/linux/page-flags.h | 9 +++++++++ > include/trace/events/mmflags.h | 9 ++++++++- > mm/kasan/hw_tags.c | 10 ++++++++++ > mm/page_alloc.c | 12 +++++++----- > 5 files changed, 44 insertions(+), 9 deletions(-) > > diff --git a/include/linux/gfp.h b/include/linux/gfp.h > index 68ba237365dc..9a77e5660b07 100644 > --- a/include/linux/gfp.h > +++ b/include/linux/gfp.h > @@ -54,8 +54,9 @@ struct vm_area_struct; > #define ___GFP_THISNODE 0x200000u > #define ___GFP_ACCOUNT 0x400000u > #define ___GFP_ZEROTAGS 0x800000u > +#define ___GFP_SKIP_KASAN_POISON 0x1000000u > #ifdef CONFIG_LOCKDEP > -#define ___GFP_NOLOCKDEP 0x1000000u > +#define ___GFP_NOLOCKDEP 0x2000000u > #else > #define ___GFP_NOLOCKDEP 0 > #endif > @@ -233,17 +234,22 @@ struct vm_area_struct; > * > * %__GFP_ZEROTAGS returns a page with zeroed memory tags on success, if > * __GFP_ZERO is set. > + * > + * %__GFP_SKIP_KASAN_POISON returns a page which does not need to be poisoned > + * on deallocation. Typically used for userspace pages. Currently only has an > + * effect in HW tags mode, and only if a command line option is set. > */ > #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) > #define __GFP_COMP ((__force gfp_t)___GFP_COMP) > #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) > #define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) > +#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON) > > /* Disable lockdep for GFP context tracking */ > #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) > > /* Room for N __GFP_FOO bits */ > -#define __GFP_BITS_SHIFT (24 + IS_ENABLED(CONFIG_LOCKDEP)) > +#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP)) > #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) > > /** > @@ -320,7 +326,8 @@ struct vm_area_struct; > #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) > #define GFP_NOIO (__GFP_RECLAIM) > #define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) > -#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) > +#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | \ > + __GFP_HARDWALL | __GFP_SKIP_KASAN_POISON) > #define GFP_DMA __GFP_DMA > #define GFP_DMA32 __GFP_DMA32 > #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h > index 04a34c08e0a6..40e2c5000585 100644 > --- a/include/linux/page-flags.h > +++ b/include/linux/page-flags.h > @@ -137,6 +137,9 @@ enum pageflags { > #endif > #ifdef CONFIG_64BIT > PG_arch_2, > +#endif > +#ifdef CONFIG_KASAN_HW_TAGS > + PG_skip_kasan_poison, > #endif > __NR_PAGEFLAGS, > > @@ -443,6 +446,12 @@ TESTCLEARFLAG(Young, young, PF_ANY) > PAGEFLAG(Idle, idle, PF_ANY) > #endif > > +#ifdef CONFIG_KASAN_HW_TAGS > +PAGEFLAG(SkipKASanPoison, skip_kasan_poison, PF_HEAD) > +#else > +PAGEFLAG_FALSE(SkipKASanPoison) > +#endif > + > /* > * PageReported() is used to track reported free pages within the Buddy > * allocator. We can use the non-atomic version of the test and set > diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h > index 629c7a0eaff2..390270e00a1d 100644 > --- a/include/trace/events/mmflags.h > +++ b/include/trace/events/mmflags.h > @@ -85,6 +85,12 @@ > #define IF_HAVE_PG_ARCH_2(flag,string) > #endif > > +#ifdef CONFIG_KASAN_HW_TAGS > +#define IF_HAVE_PG_SKIP_KASAN_POISON(flag,string) ,{1UL << flag, string} > +#else > +#define IF_HAVE_PG_SKIP_KASAN_POISON(flag,string) > +#endif > + > #define __def_pageflag_names \ > {1UL << PG_locked, "locked" }, \ > {1UL << PG_waiters, "waiters" }, \ > @@ -112,7 +118,8 @@ IF_HAVE_PG_UNCACHED(PG_uncached, "uncached" ) \ > IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \ > IF_HAVE_PG_IDLE(PG_young, "young" ) \ > IF_HAVE_PG_IDLE(PG_idle, "idle" ) \ > -IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) > +IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) \ > +IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison") > > #define show_page_flags(flags) \ > (flags) ? __print_flags(flags, "|", \ > diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c > index 34362c8d0955..954d5c2f7683 100644 > --- a/mm/kasan/hw_tags.c > +++ b/mm/kasan/hw_tags.c > @@ -238,10 +238,20 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, > return &alloc_meta->free_track[0]; > } > > +static bool skip_user_poison_on_free; > +static int __init skip_user_poison_on_free_param(char *buf) > +{ > + return kstrtobool(buf, &skip_user_poison_on_free); > +} > +early_param("kasan.skip_user_poison_on_free", skip_user_poison_on_free_param); > + > void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) > { > bool init = !want_init_on_free() && want_init_on_alloc(flags); > > + if (skip_user_poison_on_free && (flags & __GFP_SKIP_KASAN_POISON)) > + SetPageSkipKASanPoison(page); > + > if (flags & __GFP_ZEROTAGS) { > int i; > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index 24e6f668ef73..2c3ac15ddd54 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -394,11 +394,12 @@ static DEFINE_STATIC_KEY_TRUE(deferred_pages); > * on-demand allocation and then freed again before the deferred pages > * initialization is done, but this is not likely to happen. > */ > -static inline bool should_skip_kasan_poison(fpi_t fpi_flags) > +static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags) > { > return static_branch_unlikely(&deferred_pages) || > (!IS_ENABLED(CONFIG_KASAN_GENERIC) && > - (fpi_flags & FPI_SKIP_KASAN_POISON)); > + (fpi_flags & FPI_SKIP_KASAN_POISON)) || > + PageSkipKASanPoison(page); > } > > /* Returns true if the struct page for the pfn is uninitialised */ > @@ -449,10 +450,11 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) > return false; > } > #else > -static inline bool should_skip_kasan_poison(fpi_t fpi_flags) > +static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags) > { > return (!IS_ENABLED(CONFIG_KASAN_GENERIC) && > - (fpi_flags & FPI_SKIP_KASAN_POISON)); > + (fpi_flags & FPI_SKIP_KASAN_POISON)) || > + PageSkipKASanPoison(page); > } > > static inline bool early_page_uninitialised(unsigned long pfn) > @@ -1244,7 +1246,7 @@ static __always_inline bool free_pages_prepare(struct page *page, > unsigned int order, bool check_free, fpi_t fpi_flags) > { > int bad = 0; > - bool skip_kasan_poison = should_skip_kasan_poison(fpi_flags); > + bool skip_kasan_poison = should_skip_kasan_poison(page, fpi_flags); > > VM_BUG_ON_PAGE(PageTail(page), page); > > -- > 2.31.1.607.g51e8a6a459-goog >
On Wed, May 26, 2021 at 12:07 AM Andrey Konovalov <andreyknvl@gmail.com> wrote: > On Wed, May 12, 2021 at 11:09 PM Peter Collingbourne <pcc@google.com> wrote: > > Poisoning freed pages protects against kernel use-after-free. The > > likelihood of such a bug involving kernel pages is significantly higher > > than that for user pages. At the same time, poisoning freed pages can > > impose a significant performance cost, which cannot always be justified > > for user pages given the lower probability of finding a bug. Therefore, > > make it possible to configure the kernel to disable freed user page > > poisoning when using HW tags via the new kasan.skip_user_poison_on_free > > command line option. > > So the potential scenario that would be undetectable with > kasan.skip_user_poison_on_free enabled is: 1) kernel allocates a user > page and maps it for userspace, 2) the page gets freed in the kernel, > 3) kernel accesses the page leading to a use-after-free. Is this > correct? > > If bugs involving use-after-free accesses on user pages is something > that is extremely rare, perhaps we could just change the default and > avoid adding a command line switch. > > Jann, maybe you have an idea of how common something like this is or > have other inputs? GFP_USER is kind of a squishy concept, and if you grep around for it in the kernel tree, you can see it being used for all kinds of things - including SKBs in some weird ISDN driver, various types of BPF allocations, and so on. It's probably the wrong flag to hook if you want something that means "these pages will mostly be accessed from userspace". My guess is that what pcc@ is actually interested in are probably mainly anonymous pages, and to a lesser degree also page cache pages? Those use the more specific GFP_HIGHUSER_MOVABLE (which indicates that the kernel will usually not be holding any direct pointers to the page outside of rmap/pagecache logic, and that any kernel access to the pages will be using the kmap API). It's probably safe to assume that the majority of kernel bugs won't directly involve GFP_HIGHUSER_MOVABLE memory - that's probably mostly only going to happen if there are bugs in code that grabs pages with get_user_pages* and then kmap()s them, or if there's something broken in the pipe logic, or maybe an OOB issue in filesystem parsing code (?), or something like that.
On Wed, May 26, 2021 at 3:45 AM Jann Horn <jannh@google.com> wrote: > > On Wed, May 26, 2021 at 12:07 AM Andrey Konovalov <andreyknvl@gmail.com> wrote: > > On Wed, May 12, 2021 at 11:09 PM Peter Collingbourne <pcc@google.com> wrote: > > > Poisoning freed pages protects against kernel use-after-free. The > > > likelihood of such a bug involving kernel pages is significantly higher > > > than that for user pages. At the same time, poisoning freed pages can > > > impose a significant performance cost, which cannot always be justified > > > for user pages given the lower probability of finding a bug. Therefore, > > > make it possible to configure the kernel to disable freed user page > > > poisoning when using HW tags via the new kasan.skip_user_poison_on_free > > > command line option. > > > > So the potential scenario that would be undetectable with > > kasan.skip_user_poison_on_free enabled is: 1) kernel allocates a user > > page and maps it for userspace, 2) the page gets freed in the kernel, > > 3) kernel accesses the page leading to a use-after-free. Is this > > correct? Yes, that's correct. > > If bugs involving use-after-free accesses on user pages is something > > that is extremely rare, perhaps we could just change the default and > > avoid adding a command line switch. > > > > Jann, maybe you have an idea of how common something like this is or > > have other inputs? > > GFP_USER is kind of a squishy concept, and if you grep around for it > in the kernel tree, you can see it being used for all kinds of things > - including SKBs in some weird ISDN driver, various types of BPF > allocations, and so on. It's probably the wrong flag to hook if you > want something that means "these pages will mostly be accessed from > userspace". > > My guess is that what pcc@ is actually interested in are probably > mainly anonymous pages, and to a lesser degree also page cache pages? > Those use the more specific GFP_HIGHUSER_MOVABLE (which indicates that > the kernel will usually not be holding any direct pointers to the page > outside of rmap/pagecache logic, and that any kernel access to the > pages will be using the kmap API). > > It's probably safe to assume that the majority of kernel bugs won't > directly involve GFP_HIGHUSER_MOVABLE memory - that's probably mostly > only going to happen if there are bugs in code that grabs pages with > get_user_pages* and then kmap()s them, or if there's something broken > in the pipe logic, or maybe an OOB issue in filesystem parsing code > (?), or something like that. This makes sense to me. The pages that I'm most interested in getting this treatment are indeed the anonymous and, to a lesser extent, pagecache pages. The GFP_HIGHUSER_MOVABLE restrictions to me imply a lack of direct kernel access more strongly than GFP_USER, as you point out. Therefore I agree with Andrey that we probably don't need a switch for this and can just change the behavior for GFP_HIGHUSER_MOVABLE. I've done so in v4, although this required a preparatory patch to merge __alloc_zeroed_user_highpage() and alloc_zeroed_user_highpage_movable() so that we actually use the GFP_HIGHUSER_MOVABLE constant for anonymous pages. > Peter, is the plan to have this poisoning disabled in production? Is > there an estimate on slow this is? Yes, that is the plan. I don't think I'm at liberty to provide exact numbers, but given that the previous patches mean that at allocation time we only touch pages once whether or not KASAN is enabled, the most significant remaining KASAN-associated source of overhead for user pages is the deallocation time overhead that I'm eliminating in this patch. Peter
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 68ba237365dc..9a77e5660b07 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -54,8 +54,9 @@ struct vm_area_struct; #define ___GFP_THISNODE 0x200000u #define ___GFP_ACCOUNT 0x400000u #define ___GFP_ZEROTAGS 0x800000u +#define ___GFP_SKIP_KASAN_POISON 0x1000000u #ifdef CONFIG_LOCKDEP -#define ___GFP_NOLOCKDEP 0x1000000u +#define ___GFP_NOLOCKDEP 0x2000000u #else #define ___GFP_NOLOCKDEP 0 #endif @@ -233,17 +234,22 @@ struct vm_area_struct; * * %__GFP_ZEROTAGS returns a page with zeroed memory tags on success, if * __GFP_ZERO is set. + * + * %__GFP_SKIP_KASAN_POISON returns a page which does not need to be poisoned + * on deallocation. Typically used for userspace pages. Currently only has an + * effect in HW tags mode, and only if a command line option is set. */ #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) #define __GFP_COMP ((__force gfp_t)___GFP_COMP) #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) #define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) +#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON) /* Disable lockdep for GFP context tracking */ #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT (24 + IS_ENABLED(CONFIG_LOCKDEP)) +#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /** @@ -320,7 +326,8 @@ struct vm_area_struct; #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) #define GFP_NOIO (__GFP_RECLAIM) #define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) -#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) +#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | \ + __GFP_HARDWALL | __GFP_SKIP_KASAN_POISON) #define GFP_DMA __GFP_DMA #define GFP_DMA32 __GFP_DMA32 #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 04a34c08e0a6..40e2c5000585 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -137,6 +137,9 @@ enum pageflags { #endif #ifdef CONFIG_64BIT PG_arch_2, +#endif +#ifdef CONFIG_KASAN_HW_TAGS + PG_skip_kasan_poison, #endif __NR_PAGEFLAGS, @@ -443,6 +446,12 @@ TESTCLEARFLAG(Young, young, PF_ANY) PAGEFLAG(Idle, idle, PF_ANY) #endif +#ifdef CONFIG_KASAN_HW_TAGS +PAGEFLAG(SkipKASanPoison, skip_kasan_poison, PF_HEAD) +#else +PAGEFLAG_FALSE(SkipKASanPoison) +#endif + /* * PageReported() is used to track reported free pages within the Buddy * allocator. We can use the non-atomic version of the test and set diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 629c7a0eaff2..390270e00a1d 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -85,6 +85,12 @@ #define IF_HAVE_PG_ARCH_2(flag,string) #endif +#ifdef CONFIG_KASAN_HW_TAGS +#define IF_HAVE_PG_SKIP_KASAN_POISON(flag,string) ,{1UL << flag, string} +#else +#define IF_HAVE_PG_SKIP_KASAN_POISON(flag,string) +#endif + #define __def_pageflag_names \ {1UL << PG_locked, "locked" }, \ {1UL << PG_waiters, "waiters" }, \ @@ -112,7 +118,8 @@ IF_HAVE_PG_UNCACHED(PG_uncached, "uncached" ) \ IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \ IF_HAVE_PG_IDLE(PG_young, "young" ) \ IF_HAVE_PG_IDLE(PG_idle, "idle" ) \ -IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) +IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) \ +IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison") #define show_page_flags(flags) \ (flags) ? __print_flags(flags, "|", \ diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 34362c8d0955..954d5c2f7683 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -238,10 +238,20 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, return &alloc_meta->free_track[0]; } +static bool skip_user_poison_on_free; +static int __init skip_user_poison_on_free_param(char *buf) +{ + return kstrtobool(buf, &skip_user_poison_on_free); +} +early_param("kasan.skip_user_poison_on_free", skip_user_poison_on_free_param); + void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) { bool init = !want_init_on_free() && want_init_on_alloc(flags); + if (skip_user_poison_on_free && (flags & __GFP_SKIP_KASAN_POISON)) + SetPageSkipKASanPoison(page); + if (flags & __GFP_ZEROTAGS) { int i; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 24e6f668ef73..2c3ac15ddd54 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -394,11 +394,12 @@ static DEFINE_STATIC_KEY_TRUE(deferred_pages); * on-demand allocation and then freed again before the deferred pages * initialization is done, but this is not likely to happen. */ -static inline bool should_skip_kasan_poison(fpi_t fpi_flags) +static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags) { return static_branch_unlikely(&deferred_pages) || (!IS_ENABLED(CONFIG_KASAN_GENERIC) && - (fpi_flags & FPI_SKIP_KASAN_POISON)); + (fpi_flags & FPI_SKIP_KASAN_POISON)) || + PageSkipKASanPoison(page); } /* Returns true if the struct page for the pfn is uninitialised */ @@ -449,10 +450,11 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) return false; } #else -static inline bool should_skip_kasan_poison(fpi_t fpi_flags) +static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags) { return (!IS_ENABLED(CONFIG_KASAN_GENERIC) && - (fpi_flags & FPI_SKIP_KASAN_POISON)); + (fpi_flags & FPI_SKIP_KASAN_POISON)) || + PageSkipKASanPoison(page); } static inline bool early_page_uninitialised(unsigned long pfn) @@ -1244,7 +1246,7 @@ static __always_inline bool free_pages_prepare(struct page *page, unsigned int order, bool check_free, fpi_t fpi_flags) { int bad = 0; - bool skip_kasan_poison = should_skip_kasan_poison(fpi_flags); + bool skip_kasan_poison = should_skip_kasan_poison(page, fpi_flags); VM_BUG_ON_PAGE(PageTail(page), page);
Poisoning freed pages protects against kernel use-after-free. The likelihood of such a bug involving kernel pages is significantly higher than that for user pages. At the same time, poisoning freed pages can impose a significant performance cost, which cannot always be justified for user pages given the lower probability of finding a bug. Therefore, make it possible to configure the kernel to disable freed user page poisoning when using HW tags via the new kasan.skip_user_poison_on_free command line option. Signed-off-by: Peter Collingbourne <pcc@google.com> Link: https://linux-review.googlesource.com/id/I716846e2de8ef179f44e835770df7e6307be96c9 --- include/linux/gfp.h | 13 ++++++++++--- include/linux/page-flags.h | 9 +++++++++ include/trace/events/mmflags.h | 9 ++++++++- mm/kasan/hw_tags.c | 10 ++++++++++ mm/page_alloc.c | 12 +++++++----- 5 files changed, 44 insertions(+), 9 deletions(-)