@@ -323,8 +323,6 @@ void arch_cpu_idle_dead(void)
{
unsigned int cpu = smp_processor_id();
- idle_task_exit();
-
local_irq_disable();
/*
@@ -366,8 +366,6 @@ void cpu_die(void)
unsigned int cpu = smp_processor_id();
const struct cpu_operations *ops = get_cpu_ops(cpu);
- idle_task_exit();
-
local_daif_mask();
/* Tell __cpu_die() that this CPU is now safe to dispose of */
@@ -309,8 +309,6 @@ void __cpu_die(unsigned int cpu)
void arch_cpu_idle_dead(void)
{
- idle_task_exit();
-
cpu_report_death();
while (!secondary_stack)
@@ -209,7 +209,6 @@ static inline void play_dead(void)
max_xtp();
local_irq_disable();
- idle_task_exit();
ia64_jump_to_sal(&sal_boot_rendez_state[this_cpu]);
/*
* The above is a point of no-return, the processor is
@@ -343,7 +343,6 @@ void play_dead(void)
{
int cpu = cpu_number_map(cvmx_get_core_num());
- idle_task_exit();
octeon_processor_boot = 0xff;
per_cpu(cpu_state, cpu) = CPU_DEAD;
@@ -388,8 +388,6 @@ static void bmips_cpu_die(unsigned int cpu)
void __ref play_dead(void)
{
- idle_task_exit();
-
/* flush data cache */
_dma_cache_wback_inv(0, ~0);
@@ -472,7 +472,6 @@ void play_dead(void)
unsigned int cpu;
local_irq_disable();
- idle_task_exit();
cpu = smp_processor_id();
cpu_death = CPU_DEATH_POWER;
@@ -788,8 +788,6 @@ void play_dead(void)
unsigned int cpu = smp_processor_id();
void (*play_dead_at_ckseg1)(int *);
- idle_task_exit();
-
prid_imp = read_c0_prid() & PRID_IMP_MASK;
prid_rev = read_c0_prid() & PRID_REV_MASK;
@@ -121,8 +121,6 @@ static void smp_85xx_cpu_offline_self(void)
/* mask all irqs to prevent cpu wakeup */
qoriq_pm_ops->irq_mask(cpu);
- idle_task_exit();
-
mtspr(SPRN_TCR, 0);
mtspr(SPRN_TSR, mfspr(SPRN_TSR));
@@ -924,7 +924,6 @@ static void pmac_cpu_offline_self(void)
int cpu = smp_processor_id();
local_irq_disable();
- idle_task_exit();
pr_debug("CPU%d offline\n", cpu);
generic_set_cpu_dead(cpu);
smp_wmb();
@@ -939,7 +938,6 @@ static void pmac_cpu_offline_self(void)
int cpu = smp_processor_id();
local_irq_disable();
- idle_task_exit();
/*
* turn off as much as possible, we'll be
@@ -169,7 +169,6 @@ static void pnv_cpu_offline_self(void)
/* Standard hot unplug procedure */
- idle_task_exit();
cpu = smp_processor_id();
DBG("CPU%d offline\n", cpu);
generic_set_cpu_dead(cpu);
@@ -19,7 +19,6 @@
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
-#include <linux/sched.h> /* for idle_task_exit */
#include <linux/sched/hotplug.h>
#include <linux/cpu.h>
#include <linux/of.h>
@@ -63,7 +62,6 @@ static void pseries_cpu_offline_self(void)
unsigned int hwcpu = hard_smp_processor_id();
local_irq_disable();
- idle_task_exit();
if (xive_enabled())
xive_teardown_cpu();
else
@@ -9,7 +9,6 @@
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
-#include <linux/sched.h> /* for idle_task_exit */
#include <linux/sched/hotplug.h>
#include <linux/cpu.h>
#include <linux/of.h>
@@ -77,8 +77,6 @@ void __cpu_die(unsigned int cpu)
*/
void cpu_stop(void)
{
- idle_task_exit();
-
(void)cpu_report_death();
cpu_ops[smp_processor_id()]->cpu_stop();
@@ -987,7 +987,6 @@ void __cpu_die(unsigned int cpu)
void __noreturn cpu_die(void)
{
- idle_task_exit();
__bpon();
pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
for (;;) ;
@@ -106,7 +106,6 @@ int native_cpu_disable(unsigned int cpu)
void play_dead_common(void)
{
- idle_task_exit();
irq_ctx_exit(raw_smp_processor_id());
mb();
@@ -1301,8 +1301,6 @@ void cpu_play_dead(void)
int cpu = smp_processor_id();
unsigned long pstate;
- idle_task_exit();
-
if (tlb_type == hypervisor) {
struct trap_per_cpu *tb = &trap_block[cpu];
@@ -1656,8 +1656,6 @@ void native_cpu_die(unsigned int cpu)
void play_dead_common(void)
{
- idle_task_exit();
-
/* Ack it */
(void)cpu_report_death();
@@ -329,7 +329,6 @@ void arch_cpu_idle_dead(void)
*/
void __ref cpu_die(void)
{
- idle_task_exit();
local_irq_disable();
__asm__ __volatile__(
" movi a2, cpu_restart\n"
@@ -18,10 +18,4 @@ extern int sched_cpu_dying(unsigned int cpu);
# define sched_cpu_dying NULL
#endif
-#ifdef CONFIG_HOTPLUG_CPU
-extern void idle_task_exit(void);
-#else
-static inline void idle_task_exit(void) {}
-#endif
-
#endif /* _LINUX_SCHED_HOTPLUG_H */
@@ -3,7 +3,6 @@
*
* This code is licenced under the GPL.
*/
-#include <linux/sched/mm.h>
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
@@ -605,21 +604,6 @@ static int bringup_cpu(unsigned int cpu)
return bringup_wait_for_ap(cpu);
}
-static int finish_cpu(unsigned int cpu)
-{
- struct task_struct *idle = idle_thread_get(cpu);
- struct mm_struct *mm = idle->active_mm;
-
- /*
- * idle_task_exit() will have switched to &init_mm, now
- * clean up any remaining active_mm state.
- */
- if (mm != &init_mm)
- idle->active_mm = &init_mm;
- mmdrop(mm);
- return 0;
-}
-
/*
* Hotplug state machine related functions
*/
@@ -1699,7 +1683,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
[CPUHP_BRINGUP_CPU] = {
.name = "cpu:bringup",
.startup.single = bringup_cpu,
- .teardown.single = finish_cpu,
+ .teardown.single = NULL,
.cant_stop = true,
},
/* Final state before CPU kills itself */
@@ -8678,6 +8678,30 @@ void __init init_idle(struct task_struct *idle, int cpu)
#endif
}
+/*
+ * Drops current->active_mm and switches current->active_mm to &init_mm.
+ * Caller must have IRQs off and must have current->mm == NULL (i.e. must
+ * be in a kernel thread).
+ */
+void unlazy_mm_irqs_off(void)
+{
+ struct mm_struct *mm = current->active_mm;
+
+ lockdep_assert_irqs_disabled();
+
+ if (WARN_ON_ONCE(current->mm))
+ return;
+
+ if (mm == &init_mm)
+ return;
+
+ switch_mm_irqs_off(mm, &init_mm, current);
+ mmgrab(&init_mm);
+ current->active_mm = &init_mm;
+ finish_arch_post_lock_switch();
+ mmdrop(mm);
+}
+
#ifdef CONFIG_SMP
int cpuset_cpumask_can_shrink(const struct cpumask *cur,
@@ -8771,25 +8795,6 @@ void sched_setnuma(struct task_struct *p, int nid)
#endif /* CONFIG_NUMA_BALANCING */
#ifdef CONFIG_HOTPLUG_CPU
-/*
- * Ensure that the idle task is using init_mm right before its CPU goes
- * offline.
- */
-void idle_task_exit(void)
-{
- struct mm_struct *mm = current->active_mm;
-
- BUG_ON(cpu_online(smp_processor_id()));
- BUG_ON(current != this_rq()->idle);
-
- if (mm != &init_mm) {
- switch_mm(mm, &init_mm, current);
- finish_arch_post_lock_switch();
- }
-
- /* finish_cpu(), as ran on the BP, will clean up the active_mm state */
-}
-
static int __balance_push_cpu_stop(void *arg)
{
struct task_struct *p = arg;
@@ -285,6 +285,7 @@ static void do_idle(void)
local_irq_disable();
if (cpu_is_offline(cpu)) {
+ unlazy_mm_irqs_off();
tick_nohz_idle_stop_tick();
cpuhp_report_idle_dead();
arch_cpu_idle_dead();
@@ -3064,3 +3064,4 @@ extern int sched_dynamic_mode(const char *str);
extern void sched_dynamic_update(int mode);
#endif
+extern void unlazy_mm_irqs_off(void);
This reworks commit bf2c59fce4074e55d622089b34be3a6bc95484fb. The problem solved by that commit was that mmdrop() after cpuhp_report_idle_dead() is an illegal use of RCU, so, with that commit applied, mmdrop() of the last lazy mm on an offlined CPU was done by the BSP. With the upcoming reworking of lazy mm references, retaining that design would involve the cpu hotplug code poking into internal scheduler details. Rework the fix. Add a helper unlazy_mm_irqs_off() to fully switch a CPU to init_mm, releasing any previous lazy active_mm, and do this before cpuhp_report_idle_dead(). Note that the actual refcounting of init_mm is inconsistent both before and after this patch. Most (all?) arches mmgrab(&init_mm) when booting an AP and set current->active_mm = &init_mm on that AP. This is consistent with the current ->active_mm refcounting rules, but archtectures don't do a corresponding mmdrop() when a CPU goes offine. The result is that each offline/online cycle leaks one init_mm reference. This seems fairly harmless. Signed-off-by: Andy Lutomirski <luto@kernel.org> --- arch/arm/kernel/smp.c | 2 - arch/arm64/kernel/smp.c | 2 - arch/csky/kernel/smp.c | 2 - arch/ia64/kernel/process.c | 1 - arch/mips/cavium-octeon/smp.c | 1 - arch/mips/kernel/smp-bmips.c | 2 - arch/mips/kernel/smp-cps.c | 1 - arch/mips/loongson64/smp.c | 2 - arch/powerpc/platforms/85xx/smp.c | 2 - arch/powerpc/platforms/powermac/smp.c | 2 - arch/powerpc/platforms/powernv/smp.c | 1 - arch/powerpc/platforms/pseries/hotplug-cpu.c | 2 - arch/powerpc/platforms/pseries/pmem.c | 1 - arch/riscv/kernel/cpu-hotplug.c | 2 - arch/s390/kernel/smp.c | 1 - arch/sh/kernel/smp.c | 1 - arch/sparc/kernel/smp_64.c | 2 - arch/x86/kernel/smpboot.c | 2 - arch/xtensa/kernel/smp.c | 1 - include/linux/sched/hotplug.h | 6 --- kernel/cpu.c | 18 +------- kernel/sched/core.c | 43 +++++++++++--------- kernel/sched/idle.c | 1 + kernel/sched/sched.h | 1 + 24 files changed, 27 insertions(+), 72 deletions(-)