diff mbox series

[RFC,v2,29/34] mm: slub: Move flush_cpu_slab() invocations __free_slab() invocations out of IRQ context

Message ID 20210609113903.1421-30-vbabka@suse.cz (mailing list archive)
State New, archived
Headers show
Series SLUB: reduce irq disabled scope and make it RT compatible | expand

Commit Message

Vlastimil Babka June 9, 2021, 11:38 a.m. UTC
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

flush_all() flushes a specific SLAB cache on each CPU (where the cache
is present). The deactivate_slab()/__free_slab() invocation happens
within IPI handler and is problematic for PREEMPT_RT.

The flush operation is not a frequent operation or a hot path. The
per-CPU flush operation can be moved to within a workqueue.

[vbabka@suse.cz: adapt to new SLUB changes]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 mm/slub.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 48 insertions(+), 8 deletions(-)

Comments

Cyrill Gorcunov June 9, 2021, 10:29 p.m. UTC | #1
On Wed, Jun 09, 2021 at 01:38:58PM +0200, Vlastimil Babka wrote:
> +static DEFINE_MUTEX(flush_lock);
> +static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
> +
>  static void flush_all(struct kmem_cache *s)
>  {
> -	on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
> +	struct slub_flush_work *sfw;
> +	unsigned int cpu;
> +
> +	cpus_read_lock();
> +	mutex_lock(&flush_lock);
> +

Hi, Vlastimil! Could you please point why do you lock cpus first and
mutex only after? Why not mutex_lock + cpus_read_lock instead?
Vlastimil Babka June 10, 2021, 8:32 a.m. UTC | #2
On 6/10/21 12:29 AM, Cyrill Gorcunov wrote:
> On Wed, Jun 09, 2021 at 01:38:58PM +0200, Vlastimil Babka wrote:
>> +static DEFINE_MUTEX(flush_lock);
>> +static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
>> +
>>  static void flush_all(struct kmem_cache *s)
>>  {
>> -	on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
>> +	struct slub_flush_work *sfw;
>> +	unsigned int cpu;
>> +
>> +	cpus_read_lock();
>> +	mutex_lock(&flush_lock);
>> +
> 
> Hi, Vlastimil! Could you please point why do you lock cpus first and
> mutex only after? Why not mutex_lock + cpus_read_lock instead?

Good question! I must admit I didn't think about it much and just followed the
order that was in the original Sebastian's patch [1]
But there was a good reason for this order as some paths via
__kmem_cache_shutdown() and __kmem_cache_shrink() were alreadu called under
cpus_read_lock. Meanwhile mainline (me, actually) removed those, so now it
doesn't seem to be a need to keep this order anymore and we could switch it.

Thanks,
Vlastimil

[1]
https://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git/tree/patches/0005-mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch?h=linux-5.12.y-rt-patches
Cyrill Gorcunov June 10, 2021, 8:36 a.m. UTC | #3
On Thu, Jun 10, 2021 at 10:32:14AM +0200, Vlastimil Babka wrote:
> >>  static void flush_all(struct kmem_cache *s)
> >>  {
> >> -	on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
> >> +	struct slub_flush_work *sfw;
> >> +	unsigned int cpu;
> >> +
> >> +	cpus_read_lock();
> >> +	mutex_lock(&flush_lock);
> >> +
> > 
> > Hi, Vlastimil! Could you please point why do you lock cpus first and
> > mutex only after? Why not mutex_lock + cpus_read_lock instead?
> 
> Good question! I must admit I didn't think about it much and just followed the
> order that was in the original Sebastian's patch [1]
> But there was a good reason for this order as some paths via
> __kmem_cache_shutdown() and __kmem_cache_shrink() were alreadu called under
> cpus_read_lock. Meanwhile mainline (me, actually) removed those, so now it
> doesn't seem to be a need to keep this order anymore and we could switch it.

I bet we should switch :) But we can do that on top later, once series is
settled down and merged.
Hillf Danton July 7, 2021, 6:33 a.m. UTC | #4
On Wed,  9 Jun 2021 13:38:58 +0200
>From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
>
>flush_all() flushes a specific SLAB cache on each CPU (where the cache
>is present). The deactivate_slab()/__free_slab() invocation happens
>within IPI handler and is problematic for PREEMPT_RT.
>
>The flush operation is not a frequent operation or a hot path. The
>per-CPU flush operation can be moved to within a workqueue.
>
>[vbabka@suse.cz: adapt to new SLUB changes]
>Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
>Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
>---
> mm/slub.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++--------
> 1 file changed, 48 insertions(+), 8 deletions(-)
>
>diff --git a/mm/slub.c b/mm/slub.c
>index 5c70fc17e9be..6e747e0d7dcd 100644
>--- a/mm/slub.c
>+++ b/mm/slub.c
>@@ -2474,33 +2474,73 @@ static inline void __flush_cpu_slab(struct kmem_c=
>ache *s, int cpu)
> 	unfreeze_partials_cpu(s, c);
> }
>
>+struct slub_flush_work {
>+	struct work_struct work;
>+	struct kmem_cache *s;
>+	bool skip;
>+};
>+
> /*
>  * Flush cpu slab.
>  *
>- * Called from IPI handler with interrupts disabled.
>+ * Called from CPU work handler with migration disabled.
>  */

The comment muddies a pint more than it could clear up - kworkers do their
works without an eye on cpuhp. It is fine just to cut it given the
cpus_read_lock() in flush_all().

>-static void flush_cpu_slab(void *d)
>+static void flush_cpu_slab(struct work_struct *w)
> {
>-	struct kmem_cache *s = d;
>-	struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
>+	struct kmem_cache *s;
>+	struct kmem_cache_cpu *c;
>+	struct slub_flush_work *sfw;
>+
>+	sfw = container_of(w, struct slub_flush_work, work);
>+
>+	s = sfw->s;
>+	c = this_cpu_ptr(s->cpu_slab);
>
> 	if (c->page)
>-		flush_slab(s, c, false);
>+		flush_slab(s, c, true);
>
> 	unfreeze_partials(s);
> }
>
>-static bool has_cpu_slab(int cpu, void *info)
>+static bool has_cpu_slab(int cpu, struct kmem_cache *s)
> {
>-	struct kmem_cache *s = info;
> 	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
>
> 	return c->page || slub_percpu_partial(c);
> }
>
>+static DEFINE_MUTEX(flush_lock);
>+static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
>+
> static void flush_all(struct kmem_cache *s)
> {
>-	on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
>+	struct slub_flush_work *sfw;
>+	unsigned int cpu;
>+
>+	cpus_read_lock();
>+	mutex_lock(&flush_lock);
>+
>+	for_each_online_cpu(cpu) {
>+		sfw = &per_cpu(slub_flush, cpu);
>+		if (!has_cpu_slab(cpu, s)) {
>+			sfw->skip = true;
>+			continue;
>+		}
>+		INIT_WORK(&sfw->work, flush_cpu_slab);
>+		sfw->skip = false;
>+		sfw->s = s;
>+		schedule_work_on(cpu, &sfw->work);
>+	}
>+
>+	for_each_online_cpu(cpu) {
>+		sfw = &per_cpu(slub_flush, cpu);
>+		if (sfw->skip)
>+			continue;
>+		flush_work(&sfw->work);
>+	}
>+
>+	mutex_unlock(&flush_lock);
>+	cpus_read_unlock();
> }
>
> /*
>--
>2.31.1
diff mbox series

Patch

diff --git a/mm/slub.c b/mm/slub.c
index 5c70fc17e9be..6e747e0d7dcd 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2474,33 +2474,73 @@  static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
 	unfreeze_partials_cpu(s, c);
 }
 
+struct slub_flush_work {
+	struct work_struct work;
+	struct kmem_cache *s;
+	bool skip;
+};
+
 /*
  * Flush cpu slab.
  *
- * Called from IPI handler with interrupts disabled.
+ * Called from CPU work handler with migration disabled.
  */
-static void flush_cpu_slab(void *d)
+static void flush_cpu_slab(struct work_struct *w)
 {
-	struct kmem_cache *s = d;
-	struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
+	struct kmem_cache *s;
+	struct kmem_cache_cpu *c;
+	struct slub_flush_work *sfw;
+
+	sfw = container_of(w, struct slub_flush_work, work);
+
+	s = sfw->s;
+	c = this_cpu_ptr(s->cpu_slab);
 
 	if (c->page)
-		flush_slab(s, c, false);
+		flush_slab(s, c, true);
 
 	unfreeze_partials(s);
 }
 
-static bool has_cpu_slab(int cpu, void *info)
+static bool has_cpu_slab(int cpu, struct kmem_cache *s)
 {
-	struct kmem_cache *s = info;
 	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
 
 	return c->page || slub_percpu_partial(c);
 }
 
+static DEFINE_MUTEX(flush_lock);
+static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
+
 static void flush_all(struct kmem_cache *s)
 {
-	on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
+	struct slub_flush_work *sfw;
+	unsigned int cpu;
+
+	cpus_read_lock();
+	mutex_lock(&flush_lock);
+
+	for_each_online_cpu(cpu) {
+		sfw = &per_cpu(slub_flush, cpu);
+		if (!has_cpu_slab(cpu, s)) {
+			sfw->skip = true;
+			continue;
+		}
+		INIT_WORK(&sfw->work, flush_cpu_slab);
+		sfw->skip = false;
+		sfw->s = s;
+		schedule_work_on(cpu, &sfw->work);
+	}
+
+	for_each_online_cpu(cpu) {
+		sfw = &per_cpu(slub_flush, cpu);
+		if (sfw->skip)
+			continue;
+		flush_work(&sfw->work);
+	}
+
+	mutex_unlock(&flush_lock);
+	cpus_read_unlock();
 }
 
 /*