Message ID | 20240430054604.4169568-2-david@fromorbit.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm: fix nested allocation context filtering | expand |
On Tue, 30 Apr 2024 at 07:46, Dave Chinner <david@fromorbit.com> wrote: > > From: Dave Chinner <dchinner@redhat.com> > > Any "internal" nested allocation done from within an allocation > context needs to obey the high level allocation gfp_mask > constraints. This is necessary for debug code like KASAN, kmemleak, > lockdep, etc that allocate memory for saving stack traces and other > information during memory allocation. If they don't obey things like > __GFP_NOLOCKDEP or __GFP_NOWARN, they produce false positive failure > detections. > > kmemleak gets this right by using gfp_kmemleak_mask() to pass > through the relevant context flags to the nested allocation > to ensure that the allocation follows the constraints of the caller > context. > > KASAN recently was foudn to be missing __GFP_NOLOCKDEP due to stack > depot allocations, and even more recently the page owner tracking > code was also found to be missing __GFP_NOLOCKDEP support. > > We also don't wan't want KASAN or lockdep to drive the system into > OOM kill territory by exhausting emergency reserves. This is > something that kmemleak also gets right by adding (__GFP_NORETRY | > __GFP_NOMEMALLOC | __GFP_NOWARN) to the allocation mask. > > Hence it is clear that we need to define a common nested allocation > filter mask for these sorts of third party nested allocations used > in debug code. So to start this process, lift gfp_kmemleak_mask() to > gfp.h and rename it to gfp_nested_mask(), and convert the kmemleak > callers to use it. > > Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Marco Elver <elver@google.com> Looks very reasonable, thanks. > --- > include/linux/gfp.h | 25 +++++++++++++++++++++++++ > mm/kmemleak.c | 10 ++-------- > 2 files changed, 27 insertions(+), 8 deletions(-) > > diff --git a/include/linux/gfp.h b/include/linux/gfp.h > index c775ea3c6015..a4ca004f3b8e 100644 > --- a/include/linux/gfp.h > +++ b/include/linux/gfp.h > @@ -154,6 +154,31 @@ static inline int gfp_zonelist(gfp_t flags) > return ZONELIST_FALLBACK; > } > > +/* > + * gfp flag masking for nested internal allocations. > + * > + * For code that needs to do allocations inside the public allocation API (e.g. > + * memory allocation tracking code) the allocations need to obey the caller > + * allocation context constrains to prevent allocation context mismatches (e.g. > + * GFP_KERNEL allocations in GFP_NOFS contexts) from potential deadlock > + * situations. > + * > + * It is also assumed that these nested allocations are for internal kernel > + * object storage purposes only and are not going to be used for DMA, etc. Hence > + * we strip out all the zone information and leave just the context information > + * intact. > + * > + * Further, internal allocations must fail before the higher level allocation > + * can fail, so we must make them fail faster and fail silently. We also don't > + * want them to deplete emergency reserves. Hence nested allocations must be > + * prepared for these allocations to fail. > + */ > +static inline gfp_t gfp_nested_mask(gfp_t flags) > +{ > + return ((flags & (GFP_KERNEL | GFP_ATOMIC | __GFP_NOLOCKDEP)) | > + (__GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN)); > +} > + > /* > * We get the zone list from the current node and the gfp_mask. > * This zone list contains a maximum of MAX_NUMNODES*MAX_NR_ZONES zones. > diff --git a/mm/kmemleak.c b/mm/kmemleak.c > index 6a540c2b27c5..b723f937e513 100644 > --- a/mm/kmemleak.c > +++ b/mm/kmemleak.c > @@ -114,12 +114,6 @@ > > #define BYTES_PER_POINTER sizeof(void *) > > -/* GFP bitmask for kmemleak internal allocations */ > -#define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC | \ > - __GFP_NOLOCKDEP)) | \ > - __GFP_NORETRY | __GFP_NOMEMALLOC | \ > - __GFP_NOWARN) > - > /* scanning area inside a memory block */ > struct kmemleak_scan_area { > struct hlist_node node; > @@ -463,7 +457,7 @@ static struct kmemleak_object *mem_pool_alloc(gfp_t gfp) > > /* try the slab allocator first */ > if (object_cache) { > - object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp)); > + object = kmem_cache_alloc(object_cache, gfp_nested_mask(gfp)); > if (object) > return object; > } > @@ -947,7 +941,7 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) > untagged_objp = (unsigned long)kasan_reset_tag((void *)object->pointer); > > if (scan_area_cache) > - area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp)); > + area = kmem_cache_alloc(scan_area_cache, gfp_nested_mask(gfp)); > > raw_spin_lock_irqsave(&object->lock, flags); > if (!area) { > -- > 2.43.0 >
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index c775ea3c6015..a4ca004f3b8e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -154,6 +154,31 @@ static inline int gfp_zonelist(gfp_t flags) return ZONELIST_FALLBACK; } +/* + * gfp flag masking for nested internal allocations. + * + * For code that needs to do allocations inside the public allocation API (e.g. + * memory allocation tracking code) the allocations need to obey the caller + * allocation context constrains to prevent allocation context mismatches (e.g. + * GFP_KERNEL allocations in GFP_NOFS contexts) from potential deadlock + * situations. + * + * It is also assumed that these nested allocations are for internal kernel + * object storage purposes only and are not going to be used for DMA, etc. Hence + * we strip out all the zone information and leave just the context information + * intact. + * + * Further, internal allocations must fail before the higher level allocation + * can fail, so we must make them fail faster and fail silently. We also don't + * want them to deplete emergency reserves. Hence nested allocations must be + * prepared for these allocations to fail. + */ +static inline gfp_t gfp_nested_mask(gfp_t flags) +{ + return ((flags & (GFP_KERNEL | GFP_ATOMIC | __GFP_NOLOCKDEP)) | + (__GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN)); +} + /* * We get the zone list from the current node and the gfp_mask. * This zone list contains a maximum of MAX_NUMNODES*MAX_NR_ZONES zones. diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 6a540c2b27c5..b723f937e513 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -114,12 +114,6 @@ #define BYTES_PER_POINTER sizeof(void *) -/* GFP bitmask for kmemleak internal allocations */ -#define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC | \ - __GFP_NOLOCKDEP)) | \ - __GFP_NORETRY | __GFP_NOMEMALLOC | \ - __GFP_NOWARN) - /* scanning area inside a memory block */ struct kmemleak_scan_area { struct hlist_node node; @@ -463,7 +457,7 @@ static struct kmemleak_object *mem_pool_alloc(gfp_t gfp) /* try the slab allocator first */ if (object_cache) { - object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp)); + object = kmem_cache_alloc(object_cache, gfp_nested_mask(gfp)); if (object) return object; } @@ -947,7 +941,7 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) untagged_objp = (unsigned long)kasan_reset_tag((void *)object->pointer); if (scan_area_cache) - area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp)); + area = kmem_cache_alloc(scan_area_cache, gfp_nested_mask(gfp)); raw_spin_lock_irqsave(&object->lock, flags); if (!area) {