Message ID | 20220427195820.1716975-2-pcc@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v5,1/2] printk: stop including cache.h from printk.h | expand |
On Wed, 27 Apr 2022 12:58:20 -0700 Peter Collingbourne <pcc@google.com> wrote: > When CONFIG_KASAN_HW_TAGS is enabled we currently increase the minimum > slab alignment to 16. This happens even if MTE is not supported in > hardware or disabled via kasan=off, which creates an unnecessary > memory overhead in those cases. Eliminate this overhead by making > the minimum slab alignment a runtime property and only aligning to > 16 if KASAN is enabled at runtime. > > On a DragonBoard 845c (non-MTE hardware) with a kernel built with > CONFIG_KASAN_HW_TAGS, waiting for quiescence after a full Android > boot I see the following Slab measurements in /proc/meminfo (median > of 3 reboots): > > ... > > --- a/mm/slab.c > +++ b/mm/slab.c > @@ -3009,10 +3009,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, > objp += obj_offset(cachep); > if (cachep->ctor && cachep->flags & SLAB_POISON) > cachep->ctor(objp); > - if (ARCH_SLAB_MINALIGN && > - ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) { > - pr_err("0x%px: not aligned to ARCH_SLAB_MINALIGN=%d\n", > - objp, (int)ARCH_SLAB_MINALIGN); > + if ((unsigned long)objp & (arch_slab_minalign() - 1)) { > + pr_err("0x%px: not aligned to arch_slab_minalign()=%d\n", objp, > + (int)arch_slab_minalign()); printf/printk know about size_t. Use %zu, no cast needed. But... > } > return objp; > } > diff --git a/mm/slab_common.c b/mm/slab_common.c > index 2b3206a2c3b5..33cc49810a54 100644 > --- a/mm/slab_common.c > +++ b/mm/slab_common.c > @@ -154,8 +154,7 @@ static unsigned int calculate_alignment(slab_flags_t flags, > align = max(align, ralign); > } > > - if (align < ARCH_SLAB_MINALIGN) > - align = ARCH_SLAB_MINALIGN; > + align = max_t(size_t, align, arch_slab_minalign()); max_t/min_t are nature's way of telling us "you screwed up the types". So what type _is_ slab alignment? size_t seems sensible, but the code prefers unsigned int. So how about we stick with that? This compiles. Still some max_t's in slob.c because I was too lazy to go fix the type of ARCH_KMALLOC_MINALIGN. Shrug, I don't know if we can be bothered. You decide :) arch/arm64/include/asm/cache.h | 2 +- include/linux/slab.h | 2 +- mm/slab.c | 4 ++-- mm/slab_common.c | 2 +- mm/slob.c | 16 +++++++++++----- 5 files changed, 16 insertions(+), 10 deletions(-) --- a/arch/arm64/include/asm/cache.h~mm-make-minimum-slab-alignment-a-runtime-property-fix +++ a/arch/arm64/include/asm/cache.h @@ -58,7 +58,7 @@ #ifdef CONFIG_KASAN_SW_TAGS #define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT) #elif defined(CONFIG_KASAN_HW_TAGS) -static inline size_t arch_slab_minalign(void) +static inline unsigned int arch_slab_minalign(void) { return kasan_hw_tags_enabled() ? MTE_GRANULE_SIZE : __alignof__(unsigned long long); --- a/include/linux/slab.h~mm-make-minimum-slab-alignment-a-runtime-property-fix +++ a/include/linux/slab.h @@ -215,7 +215,7 @@ void kmem_dump_obj(void *object); * of two and >= ARCH_SLAB_MINALIGN. */ #ifndef arch_slab_minalign -static inline size_t arch_slab_minalign(void) +static inline unsigned int arch_slab_minalign(void) { return ARCH_SLAB_MINALIGN; } --- a/mm/slab.c~mm-make-minimum-slab-alignment-a-runtime-property-fix +++ a/mm/slab.c @@ -3010,8 +3010,8 @@ static void *cache_alloc_debugcheck_afte if (cachep->ctor && cachep->flags & SLAB_POISON) cachep->ctor(objp); if ((unsigned long)objp & (arch_slab_minalign() - 1)) { - pr_err("0x%px: not aligned to arch_slab_minalign()=%d\n", objp, - (int)arch_slab_minalign()); + pr_err("0x%px: not aligned to arch_slab_minalign()=%u\n", objp, + arch_slab_minalign()); } return objp; } --- a/mm/slab_common.c~mm-make-minimum-slab-alignment-a-runtime-property-fix +++ a/mm/slab_common.c @@ -154,7 +154,7 @@ static unsigned int calculate_alignment( align = max(align, ralign); } - align = max_t(size_t, align, arch_slab_minalign()); + align = max(align, arch_slab_minalign()); return ALIGN(align, sizeof(void *)); } --- a/mm/slob.c~mm-make-minimum-slab-alignment-a-runtime-property-fix +++ a/mm/slob.c @@ -478,9 +478,11 @@ static __always_inline void * __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) { unsigned int *m; - int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign()); + unsigned int minalign; void *ret; + minalign = max_t(unsigned int, ARCH_KMALLOC_MINALIGN, + arch_slab_minalign()); gfp &= gfp_allowed_mask; might_alloc(gfp); @@ -493,7 +495,7 @@ __do_kmalloc_node(size_t size, gfp_t gfp * kmalloc()'d objects. */ if (is_power_of_2(size)) - align = max(minalign, (int) size); + align = max_t(unsigned int, minalign, size); if (!size) return ZERO_SIZE_PTR; @@ -555,8 +557,11 @@ void kfree(const void *block) sp = virt_to_folio(block); if (folio_test_slab(sp)) { - int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign()); + unsigned int align = max_t(unsigned int, + ARCH_KMALLOC_MINALIGN, + arch_slab_minalign()); unsigned int *m = (unsigned int *)(block - align); + slob_free(m, *m + align); } else { unsigned int order = folio_order(sp); @@ -573,7 +578,7 @@ EXPORT_SYMBOL(kfree); size_t __ksize(const void *block) { struct folio *folio; - int align; + unsigned int align; unsigned int *m; BUG_ON(!block); @@ -584,7 +589,8 @@ size_t __ksize(const void *block) if (unlikely(!folio_test_slab(folio))) return folio_size(folio); - align = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign()); + align = max_t(unsigned int, ARCH_KMALLOC_MINALIGN, + arch_slab_minalign()); m = (unsigned int *)(block - align); return SLOB_UNITS(*m) * SLOB_UNIT; }
On Wed, Apr 27, 2022 at 1:27 PM Andrew Morton <akpm@linux-foundation.org> wrote: > > On Wed, 27 Apr 2022 12:58:20 -0700 Peter Collingbourne <pcc@google.com> wrote: > > > When CONFIG_KASAN_HW_TAGS is enabled we currently increase the minimum > > slab alignment to 16. This happens even if MTE is not supported in > > hardware or disabled via kasan=off, which creates an unnecessary > > memory overhead in those cases. Eliminate this overhead by making > > the minimum slab alignment a runtime property and only aligning to > > 16 if KASAN is enabled at runtime. > > > > On a DragonBoard 845c (non-MTE hardware) with a kernel built with > > CONFIG_KASAN_HW_TAGS, waiting for quiescence after a full Android > > boot I see the following Slab measurements in /proc/meminfo (median > > of 3 reboots): > > > > ... > > > > --- a/mm/slab.c > > +++ b/mm/slab.c > > @@ -3009,10 +3009,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, > > objp += obj_offset(cachep); > > if (cachep->ctor && cachep->flags & SLAB_POISON) > > cachep->ctor(objp); > > - if (ARCH_SLAB_MINALIGN && > > - ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) { > > - pr_err("0x%px: not aligned to ARCH_SLAB_MINALIGN=%d\n", > > - objp, (int)ARCH_SLAB_MINALIGN); > > + if ((unsigned long)objp & (arch_slab_minalign() - 1)) { > > + pr_err("0x%px: not aligned to arch_slab_minalign()=%d\n", objp, > > + (int)arch_slab_minalign()); > > printf/printk know about size_t. Use %zu, no cast needed. But... > > > } > > return objp; > > } > > diff --git a/mm/slab_common.c b/mm/slab_common.c > > index 2b3206a2c3b5..33cc49810a54 100644 > > --- a/mm/slab_common.c > > +++ b/mm/slab_common.c > > @@ -154,8 +154,7 @@ static unsigned int calculate_alignment(slab_flags_t flags, > > align = max(align, ralign); > > } > > > > - if (align < ARCH_SLAB_MINALIGN) > > - align = ARCH_SLAB_MINALIGN; > > + align = max_t(size_t, align, arch_slab_minalign()); > > max_t/min_t are nature's way of telling us "you screwed up the types". > > So what type _is_ slab alignment? size_t seems sensible, but the code > prefers unsigned int. So how about we stick with that? > > > This compiles. Still some max_t's in slob.c because I was too lazy to > go fix the type of ARCH_KMALLOC_MINALIGN. > > Shrug, I don't know if we can be bothered. You decide :) Hi Andrew, No strong opinions here. I'm happy with the fixup that you added to your tree on top of my patch. Peter
On 4/27/22 21:58, Peter Collingbourne wrote: > When CONFIG_KASAN_HW_TAGS is enabled we currently increase the minimum > slab alignment to 16. This happens even if MTE is not supported in > hardware or disabled via kasan=off, which creates an unnecessary > memory overhead in those cases. Eliminate this overhead by making > the minimum slab alignment a runtime property and only aligning to > 16 if KASAN is enabled at runtime. > > On a DragonBoard 845c (non-MTE hardware) with a kernel built with > CONFIG_KASAN_HW_TAGS, waiting for quiescence after a full Android > boot I see the following Slab measurements in /proc/meminfo (median > of 3 reboots): > > Before: 169020 kB > After: 167304 kB > > Link: https://linux-review.googlesource.com/id/I752e725179b43b144153f4b6f584ceb646473ead > Signed-off-by: Peter Collingbourne <pcc@google.com> > Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com> > Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> > Tested-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> > Acked-by: David Rientjes <rientjes@google.com> > Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Andrew's fixup LGTM too.
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index a074459f8f2f..22b22dc1b1b5 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -6,6 +6,7 @@ #define __ASM_CACHE_H #include <asm/cputype.h> +#include <asm/mte-def.h> #define CTR_L1IP_SHIFT 14 #define CTR_L1IP_MASK 3 @@ -49,16 +50,22 @@ */ #define ARCH_DMA_MINALIGN (128) +#ifndef __ASSEMBLY__ + +#include <linux/bitops.h> +#include <linux/kasan-enabled.h> + #ifdef CONFIG_KASAN_SW_TAGS #define ARCH_SLAB_MINALIGN (1ULL << KASAN_SHADOW_SCALE_SHIFT) #elif defined(CONFIG_KASAN_HW_TAGS) -#define ARCH_SLAB_MINALIGN MTE_GRANULE_SIZE +static inline size_t arch_slab_minalign(void) +{ + return kasan_hw_tags_enabled() ? MTE_GRANULE_SIZE : + __alignof__(unsigned long long); +} +#define arch_slab_minalign() arch_slab_minalign() #endif -#ifndef __ASSEMBLY__ - -#include <linux/bitops.h> - #define ICACHEF_ALIASING 0 #define ICACHEF_VPIPT 1 extern unsigned long __icache_flags; diff --git a/include/linux/slab.h b/include/linux/slab.h index 373b3ef99f4e..2c7190db4cc0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -209,6 +209,18 @@ void kmem_dump_obj(void *object); #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) #endif +/* + * Arches can define this function if they want to decide the minimum slab + * alignment at runtime. The value returned by the function must be a power + * of two and >= ARCH_SLAB_MINALIGN. + */ +#ifndef arch_slab_minalign +static inline size_t arch_slab_minalign(void) +{ + return ARCH_SLAB_MINALIGN; +} +#endif + /* * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN diff --git a/mm/slab.c b/mm/slab.c index 0edb474edef1..97b756976c8b 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3009,10 +3009,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, objp += obj_offset(cachep); if (cachep->ctor && cachep->flags & SLAB_POISON) cachep->ctor(objp); - if (ARCH_SLAB_MINALIGN && - ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) { - pr_err("0x%px: not aligned to ARCH_SLAB_MINALIGN=%d\n", - objp, (int)ARCH_SLAB_MINALIGN); + if ((unsigned long)objp & (arch_slab_minalign() - 1)) { + pr_err("0x%px: not aligned to arch_slab_minalign()=%d\n", objp, + (int)arch_slab_minalign()); } return objp; } diff --git a/mm/slab_common.c b/mm/slab_common.c index 2b3206a2c3b5..33cc49810a54 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -154,8 +154,7 @@ static unsigned int calculate_alignment(slab_flags_t flags, align = max(align, ralign); } - if (align < ARCH_SLAB_MINALIGN) - align = ARCH_SLAB_MINALIGN; + align = max_t(size_t, align, arch_slab_minalign()); return ALIGN(align, sizeof(void *)); } diff --git a/mm/slob.c b/mm/slob.c index 40ea6e2d4ccd..3bd2669bd690 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -478,7 +478,7 @@ static __always_inline void * __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller) { unsigned int *m; - int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); + int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign()); void *ret; gfp &= gfp_allowed_mask; @@ -555,7 +555,7 @@ void kfree(const void *block) sp = virt_to_folio(block); if (folio_test_slab(sp)) { - int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); + int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign()); unsigned int *m = (unsigned int *)(block - align); slob_free(m, *m + align); } else { @@ -584,7 +584,7 @@ size_t __ksize(const void *block) if (unlikely(!folio_test_slab(folio))) return folio_size(folio); - align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); + align = max_t(size_t, ARCH_KMALLOC_MINALIGN, arch_slab_minalign()); m = (unsigned int *)(block - align); return SLOB_UNITS(*m) * SLOB_UNIT; }