Message ID | 20210609113903.1421-34-vbabka@suse.cz (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | SLUB: reduce irq disabled scope and make it RT compatible | expand |
On 6/9/21 1:39 PM, Vlastimil Babka wrote: > We currently use preempt_disable() (directly or via get_cpu_ptr()) to stabilize > the pointer to kmem_cache_cpu. On PREEMPT_RT this would be incompatible with > the list_lock spinlock. We can use migrate_disable() instead, but that > increases overhead on !PREEMPT_RT as it's an unconditional function call even > though it's ultimately a migrate_disable() there. > > In order to get the best available mechanism on both PREEMPT_RT and > !PREEMPT_RT, introduce private slub_get_cpu_ptr() and slub_put_cpu_ptr() > wrappers and use them. > > Signed-off-by: Vlastimil Babka <vbabka@suse.cz> > --- > mm/slub.c | 41 +++++++++++++++++++++++++++++++---------- > 1 file changed, 31 insertions(+), 10 deletions(-) > > diff --git a/mm/slub.c b/mm/slub.c > index 12e966f07f19..caa206213e72 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -115,6 +115,26 @@ > * the fast path and disables lockless freelists. > */ > > +/* > + * We could simply use migrate_disable()/enable() but as long as it's a > + * function call even on !PREEMPT_RT, use inline preempt_disable() there. > + */ > +#ifdef CONFIG_PREEMPT_RT > +#define slub_get_cpu_ptr(var) get_cpu_ptr(var) > +#define slub_put_cpu_ptr(var) put_cpu_ptr(var) After Mel's report and bisect pointing to this patch, I realized I got the #ifdef wrong and it should be #ifnded Fixed patch follows: ----8<---- From e79e166414b8c22e3bcf73a9383bafe802fa64d2 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka <vbabka@suse.cz> Date: Fri, 21 May 2021 14:03:23 +0200 Subject: [PATCH] mm, slub: use migrate_disable() on PREEMPT_RT We currently use preempt_disable() (directly or via get_cpu_ptr()) to stabilize the pointer to kmem_cache_cpu. On PREEMPT_RT this would be incompatible with the list_lock spinlock. We can use migrate_disable() instead, but that increases overhead on !PREEMPT_RT as it's an unconditional function call even though it's ultimately a migrate_disable() there. In order to get the best available mechanism on both PREEMPT_RT and !PREEMPT_RT, introduce private slub_get_cpu_ptr() and slub_put_cpu_ptr() wrappers and use them. Signed-off-by: Vlastimil Babka <vbabka@suse.cz> --- mm/slub.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 12e966f07f19..f0359b0c8154 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -115,6 +115,26 @@ * the fast path and disables lockless freelists. */ +/* + * We could simply use migrate_disable()/enable() but as long as it's a + * function call even on !PREEMPT_RT, use inline preempt_disable() there. + */ +#ifndef CONFIG_PREEMPT_RT +#define slub_get_cpu_ptr(var) get_cpu_ptr(var) +#define slub_put_cpu_ptr(var) put_cpu_ptr(var) +#else +#define slub_get_cpu_ptr(var) \ +({ \ + migrate_disable(); \ + this_cpu_ptr(var); \ +}) +#define slub_put_cpu_ptr(var) \ +do { \ + (void)(var); \ + migrate_enable(); \ +} while (0) +#endif + #ifdef CONFIG_SLUB_DEBUG #ifdef CONFIG_SLUB_DEBUG_ON DEFINE_STATIC_KEY_TRUE(slub_debug_enabled); @@ -2419,7 +2439,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) int pages; int pobjects; - preempt_disable(); + slub_get_cpu_ptr(s->cpu_slab); do { pages = 0; pobjects = 0; @@ -2450,7 +2470,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); - preempt_enable(); + slub_put_cpu_ptr(s->cpu_slab); #endif /* CONFIG_SLUB_CPU_PARTIAL */ } @@ -2759,7 +2779,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, if (unlikely(!pfmemalloc_match(page, gfpflags))) goto deactivate_slab; - /* must check again c->page in case IRQ handler changed it */ + /* must check again c->page in case we got preempted and it changed */ local_irq_save(flags); if (unlikely(page != c->page)) { local_irq_restore(flags); @@ -2818,7 +2838,8 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, } if (unlikely(!slub_percpu_partial(c))) { local_irq_restore(flags); - goto new_objects; /* stolen by an IRQ handler */ + /* we were preempted and partial list got empty */ + goto new_objects; } page = c->page = slub_percpu_partial(c); @@ -2834,9 +2855,9 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, if (freelist) goto check_new_page; - put_cpu_ptr(s->cpu_slab); + slub_put_cpu_ptr(s->cpu_slab); page = new_slab(s, gfpflags, node); - c = get_cpu_ptr(s->cpu_slab); + c = slub_get_cpu_ptr(s->cpu_slab); if (unlikely(!page)) { slab_out_of_memory(s, gfpflags, node); @@ -2919,12 +2940,12 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, * cpu before disabling preemption. Need to reload cpu area * pointer. */ - c = get_cpu_ptr(s->cpu_slab); + c = slub_get_cpu_ptr(s->cpu_slab); #endif p = ___slab_alloc(s, gfpflags, node, addr, c); #ifdef CONFIG_PREEMPTION - put_cpu_ptr(s->cpu_slab); + slub_put_cpu_ptr(s->cpu_slab); #endif return p; } @@ -3445,7 +3466,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, * IRQs, which protects against PREEMPT and interrupts * handlers invoking normal fastpath. */ - c = get_cpu_ptr(s->cpu_slab); + c = slub_get_cpu_ptr(s->cpu_slab); local_irq_disable(); for (i = 0; i < size; i++) { @@ -3491,7 +3512,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, } c->tid = next_tid(c->tid); local_irq_enable(); - put_cpu_ptr(s->cpu_slab); + slub_put_cpu_ptr(s->cpu_slab); /* * memcg and kmem_cache debug support and memory initialization.
On 2021-06-14 13:07:14 [+0200], Vlastimil Babka wrote: > > +#ifdef CONFIG_PREEMPT_RT > > +#define slub_get_cpu_ptr(var) get_cpu_ptr(var) > > +#define slub_put_cpu_ptr(var) put_cpu_ptr(var) > > After Mel's report and bisect pointing to this patch, I realized I got the > #ifdef wrong and it should be #ifnded So if you got the ifdef wrong (and kept everything as-is) then you tested the RT version on !RT. migrate_disable() behaves on !RT as on RT. As per changelog you don't use migrate_disable() unconditionally because it increases the overhead on !RT. I haven't looked at the series and I have just this tiny question: why did migrate_disable() crash for Mel on !RT and why do you expect that it does not happen on PREEMPT_RT? Sebastian
On 6/14/21 1:16 PM, Sebastian Andrzej Siewior wrote: > On 2021-06-14 13:07:14 [+0200], Vlastimil Babka wrote: >> > +#ifdef CONFIG_PREEMPT_RT >> > +#define slub_get_cpu_ptr(var) get_cpu_ptr(var) >> > +#define slub_put_cpu_ptr(var) put_cpu_ptr(var) >> >> After Mel's report and bisect pointing to this patch, I realized I got the >> #ifdef wrong and it should be #ifnded > > So if you got the ifdef wrong (and kept everything as-is) then you > tested the RT version on !RT. migrate_disable() behaves on !RT as on RT. > As per changelog you don't use migrate_disable() unconditionally because > it increases the overhead on !RT. Correct. > I haven't looked at the series and I have just this tiny question: why > did migrate_disable() crash for Mel on !RT and why do you expect that it > does not happen on PREEMPT_RT? Right, so it's because __slab_alloc() has this optimization to avoid re-reading 'c' in case there is no preemption enabled at all (or it's just voluntary). #ifdef CONFIG_PREEMPTION /* * We may have been preempted and rescheduled on a different * cpu before disabling preemption. Need to reload cpu area * pointer. */ c = slub_get_cpu_ptr(s->cpu_slab); #endif Mel's config has CONFIG_PREEMPT_VOLUNTARY, which means CONFIG_PREEMPTION is not enabled. But then later in ___slab_alloc() we have slub_put_cpu_ptr(s->cpu_slab); page = new_slab(s, gfpflags, node); c = slub_get_cpu_ptr(s->cpu_slab); And this is not hidden under CONFIG_PREEMPTION, so with the #ifdef bug the slub_put_cpu_ptr did a migrate_enable() with Mel's config, without prior migrate_disable(). If there wasn't the #ifdef PREEMPT_RT bug: - this slub_put_cpu_ptr() would translate to put_cpu_ptr() thus preempt_enable(), which on this config is just a barrier(), so it doesn't matter that there was no matching preempt_disable() before. - with PREEMPT_RT the CONFIG_PREEMPTION would be enabled, so the slub_get_cpu_ptr() would do a migrate_disable() and there's no imbalance. But now that I dig into this in detail, I can see there might be another instance of this imbalance bug, if CONFIG_PREEMPTION is disabled, but CONFIG_PREEMPT_COUNT is enabled, which seems to be possible in some debug scenarios. Because then preempt_disable()/preempt_enable() still manipulate the preempt counter and compiling them out in __slab_alloc() will cause imbalance. So I think the guards in __slab_alloc() should be using CONFIG_PREEMPT_COUNT instead of CONFIG_PREEMPT to be correct on all configs. I dare not remove them completely :)
On 6/14/21 1:33 PM, Vlastimil Babka wrote: > On 6/14/21 1:16 PM, Sebastian Andrzej Siewior wrote: > But now that I dig into this in detail, I can see there might be another > instance of this imbalance bug, if CONFIG_PREEMPTION is disabled, but > CONFIG_PREEMPT_COUNT is enabled, which seems to be possible in some debug > scenarios. Because then preempt_disable()/preempt_enable() still manipulate the > preempt counter and compiling them out in __slab_alloc() will cause imbalance. > > So I think the guards in __slab_alloc() should be using CONFIG_PREEMPT_COUNT > instead of CONFIG_PREEMPT to be correct on all configs. I dare not remove them > completely :) Yep, it's possible to get such scenario with PREEMPT_VOLUNTARY plus PROVE_LOCKING - CONFIG_PREEMPTION is disabled but CONFIG_PREEMPT_COUNT is enabled, and RCU then complains in the page allocator due to the unpaired preempt_disable() before entering it. I've pushed a new branch revision with this fixed: https://git.kernel.org/pub/scm/linux/kernel/git/vbabka/linux.git/log/?h=slub-local-lock-v2r3
On 2021-06-14 13:33:43 [+0200], Vlastimil Babka wrote: > On 6/14/21 1:16 PM, Sebastian Andrzej Siewior wrote: > > I haven't looked at the series and I have just this tiny question: why > > did migrate_disable() crash for Mel on !RT and why do you expect that it > > does not happen on PREEMPT_RT? > > Right, so it's because __slab_alloc() has this optimization to avoid re-reading > 'c' in case there is no preemption enabled at all (or it's just voluntary). > > #ifdef CONFIG_PREEMPTION > /* > * We may have been preempted and rescheduled on a different > * cpu before disabling preemption. Need to reload cpu area > * pointer. > */ > c = slub_get_cpu_ptr(s->cpu_slab); > #endif > > Mel's config has CONFIG_PREEMPT_VOLUNTARY, which means CONFIG_PREEMPTION is not > enabled. > > But then later in ___slab_alloc() we have > > slub_put_cpu_ptr(s->cpu_slab); > page = new_slab(s, gfpflags, node); > c = slub_get_cpu_ptr(s->cpu_slab); > > And this is not hidden under CONFIG_PREEMPTION, so with the #ifdef bug the > slub_put_cpu_ptr did a migrate_enable() with Mel's config, without prior > migrate_disable(). Ach, right. The update to this field is done with cmpxchg-double (if I remember correctly) but I don't remember if this is also re-entry safe. > If there wasn't the #ifdef PREEMPT_RT bug: > - this slub_put_cpu_ptr() would translate to put_cpu_ptr() thus > preempt_enable(), which on this config is just a barrier(), so it doesn't matter > that there was no matching preempt_disable() before. > - with PREEMPT_RT the CONFIG_PREEMPTION would be enabled, so the > slub_get_cpu_ptr() would do a migrate_disable() and there's no imbalance. > > But now that I dig into this in detail, I can see there might be another > instance of this imbalance bug, if CONFIG_PREEMPTION is disabled, but > CONFIG_PREEMPT_COUNT is enabled, which seems to be possible in some debug > scenarios. Because then preempt_disable()/preempt_enable() still manipulate the > preempt counter and compiling them out in __slab_alloc() will cause imbalance. > > So I think the guards in __slab_alloc() should be using CONFIG_PREEMPT_COUNT > instead of CONFIG_PREEMPT to be correct on all configs. I dare not remove them > completely :) :) Sebastian
diff --git a/mm/slub.c b/mm/slub.c index 12e966f07f19..caa206213e72 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -115,6 +115,26 @@ * the fast path and disables lockless freelists. */ +/* + * We could simply use migrate_disable()/enable() but as long as it's a + * function call even on !PREEMPT_RT, use inline preempt_disable() there. + */ +#ifdef CONFIG_PREEMPT_RT +#define slub_get_cpu_ptr(var) get_cpu_ptr(var) +#define slub_put_cpu_ptr(var) put_cpu_ptr(var) +#else +#define slub_get_cpu_ptr(var) \ +({ \ + migrate_disable(); \ + this_cpu_ptr(var); \ +}) +#define slub_put_cpu_ptr(var) \ +do { \ + (void)(var); \ + migrate_enable(); \ +} while (0) +#endif + #ifdef CONFIG_SLUB_DEBUG #ifdef CONFIG_SLUB_DEBUG_ON DEFINE_STATIC_KEY_TRUE(slub_debug_enabled); @@ -2419,7 +2439,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) int pages; int pobjects; - preempt_disable(); + slub_get_cpu_ptr(s->cpu_slab); do { pages = 0; pobjects = 0; @@ -2450,7 +2470,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); - preempt_enable(); + slub_put_cpu_ptr(s->cpu_slab); #endif /* CONFIG_SLUB_CPU_PARTIAL */ } @@ -2759,7 +2779,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, if (unlikely(!pfmemalloc_match(page, gfpflags))) goto deactivate_slab; - /* must check again c->page in case IRQ handler changed it */ + /* must check again c->page in case we got preempted and it changed */ local_irq_save(flags); if (unlikely(page != c->page)) { local_irq_restore(flags); @@ -2818,7 +2838,8 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, } if (unlikely(!slub_percpu_partial(c))) { local_irq_restore(flags); - goto new_objects; /* stolen by an IRQ handler */ + /* we were preempted and partial list got empty */ + goto new_objects; } page = c->page = slub_percpu_partial(c); @@ -2834,9 +2855,9 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, if (freelist) goto check_new_page; - put_cpu_ptr(s->cpu_slab); + slub_put_cpu_ptr(s->cpu_slab); page = new_slab(s, gfpflags, node); - c = get_cpu_ptr(s->cpu_slab); + c = slub_get_cpu_ptr(s->cpu_slab); if (unlikely(!page)) { slab_out_of_memory(s, gfpflags, node); @@ -2919,12 +2940,12 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, * cpu before disabling preemption. Need to reload cpu area * pointer. */ - c = get_cpu_ptr(s->cpu_slab); + c = slub_get_cpu_ptr(s->cpu_slab); #endif p = ___slab_alloc(s, gfpflags, node, addr, c); #ifdef CONFIG_PREEMPTION - put_cpu_ptr(s->cpu_slab); + slub_put_cpu_ptr(s->cpu_slab); #endif return p; } @@ -3445,7 +3466,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, * IRQs, which protects against PREEMPT and interrupts * handlers invoking normal fastpath. */ - c = get_cpu_ptr(s->cpu_slab); + c = slub_get_cpu_ptr(s->cpu_slab); local_irq_disable(); for (i = 0; i < size; i++) { @@ -3491,7 +3512,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, } c->tid = next_tid(c->tid); local_irq_enable(); - put_cpu_ptr(s->cpu_slab); + slub_put_cpu_ptr(s->cpu_slab); /* * memcg and kmem_cache debug support and memory initialization.
We currently use preempt_disable() (directly or via get_cpu_ptr()) to stabilize the pointer to kmem_cache_cpu. On PREEMPT_RT this would be incompatible with the list_lock spinlock. We can use migrate_disable() instead, but that increases overhead on !PREEMPT_RT as it's an unconditional function call even though it's ultimately a migrate_disable() there. In order to get the best available mechanism on both PREEMPT_RT and !PREEMPT_RT, introduce private slub_get_cpu_ptr() and slub_put_cpu_ptr() wrappers and use them. Signed-off-by: Vlastimil Babka <vbabka@suse.cz> --- mm/slub.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-)