diff mbox series

[19/37] x86/smpboot: Switch to hotplug core state synchronization

Message ID 20230414232310.382005483@linutronix.de (mailing list archive)
State Not Applicable
Headers show
Series cpu/hotplug, x86: Reworked parallel CPU bringup | expand

Commit Message

Thomas Gleixner April 14, 2023, 11:44 p.m. UTC
The new AP state tracking and synchronization mechanism in the CPU hotplug
core code allows to remove quite some x86 specific code:

  1) The AP alive synchronization based on cpumasks

  2) The decision whether an AP can be brought up again

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Juergen Gross <jgross@suse.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: xen-devel@lists.xenproject.org
---
 arch/x86/Kconfig           |    1 
 arch/x86/include/asm/smp.h |    7 +
 arch/x86/kernel/smp.c      |    1 
 arch/x86/kernel/smpboot.c  |  159 ++++++++++-----------------------------------
 arch/x86/xen/smp_hvm.c     |   16 +---
 arch/x86/xen/smp_pv.c      |   39 ++++++-----
 6 files changed, 72 insertions(+), 151 deletions(-)

Comments

Brian Gerst April 15, 2023, 12:58 p.m. UTC | #1
On Fri, Apr 14, 2023 at 7:44 PM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> The new AP state tracking and synchronization mechanism in the CPU hotplug
> core code allows to remove quite some x86 specific code:
>
>   1) The AP alive synchronization based on cpumasks
>
>   2) The decision whether an AP can be brought up again
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: Juergen Gross <jgross@suse.com>
> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> Cc: xen-devel@lists.xenproject.org
> ---
>  arch/x86/Kconfig           |    1
>  arch/x86/include/asm/smp.h |    7 +
>  arch/x86/kernel/smp.c      |    1
>  arch/x86/kernel/smpboot.c  |  159 ++++++++++-----------------------------------
>  arch/x86/xen/smp_hvm.c     |   16 +---
>  arch/x86/xen/smp_pv.c      |   39 ++++++-----
>  6 files changed, 72 insertions(+), 151 deletions(-)
>
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -272,6 +272,7 @@ config X86
>         select HAVE_UNSTABLE_SCHED_CLOCK
>         select HAVE_USER_RETURN_NOTIFIER
>         select HAVE_GENERIC_VDSO
> +       select HOTPLUG_CORE_SYNC_FULL           if SMP
>         select HOTPLUG_SMT                      if SMP
>         select IRQ_FORCED_THREADING
>         select NEED_PER_CPU_EMBED_FIRST_CHUNK
> --- a/arch/x86/include/asm/smp.h
> +++ b/arch/x86/include/asm/smp.h
> @@ -38,6 +38,8 @@ struct smp_ops {
>         void (*crash_stop_other_cpus)(void);
>         void (*smp_send_reschedule)(int cpu);
>
> +       void (*cleanup_dead_cpu)(unsigned cpu);
> +       void (*poll_sync_state)(void);
>         int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
>         int (*cpu_disable)(void);
>         void (*cpu_die)(unsigned int cpu);
> @@ -90,7 +92,8 @@ static inline int __cpu_disable(void)
>
>  static inline void __cpu_die(unsigned int cpu)
>  {
> -       smp_ops.cpu_die(cpu);
> +       if (smp_ops.cpu_die)
> +               smp_ops.cpu_die(cpu);
>  }
>
>  static inline void play_dead(void)
> @@ -122,8 +125,6 @@ void native_smp_cpus_done(unsigned int m
>  int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
>  int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
>  int native_cpu_disable(void);
> -int common_cpu_die(unsigned int cpu);
> -void native_cpu_die(unsigned int cpu);
>  void hlt_play_dead(void);
>  void native_play_dead(void);
>  void play_dead_common(void);
> --- a/arch/x86/kernel/smp.c
> +++ b/arch/x86/kernel/smp.c
> @@ -269,7 +269,6 @@ struct smp_ops smp_ops = {
>         .smp_send_reschedule    = native_smp_send_reschedule,
>
>         .cpu_up                 = native_cpu_up,
> -       .cpu_die                = native_cpu_die,
>         .cpu_disable            = native_cpu_disable,
>         .play_dead              = native_play_dead,
>
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -57,6 +57,7 @@
>  #include <linux/pgtable.h>
>  #include <linux/overflow.h>
>  #include <linux/stackprotector.h>
> +#include <linux/cpuhotplug.h>
>
>  #include <asm/acpi.h>
>  #include <asm/cacheinfo.h>
> @@ -101,9 +102,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map);
>  DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
>  EXPORT_PER_CPU_SYMBOL(cpu_info);
>
> -/* All of these masks are initialized in setup_cpu_local_masks() */
> -static cpumask_var_t cpu_initialized_mask;
> -static cpumask_var_t cpu_callout_mask;
>  /* Representing CPUs for which sibling maps can be computed */
>  static cpumask_var_t cpu_sibling_setup_mask;
>
> @@ -169,8 +167,8 @@ static void smp_callin(void)
>         int cpuid = smp_processor_id();
>
>         /*
> -        * If waken up by an INIT in an 82489DX configuration
> -        * cpu_callout_mask guarantees we don't get here before an
> +        * If waken up by an INIT in an 82489DX configuration the alive
> +        * synchronization guarantees we don't get here before an
>          * INIT_deassert IPI reaches our local APIC, so it is now safe to
>          * touch our local APIC.
>          *
> @@ -216,17 +214,6 @@ static void ap_calibrate_delay(void)
>         cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy;
>  }
>
> -static void wait_for_master_cpu(int cpu)
> -{
> -       /*
> -        * Wait for release by control CPU before continuing with AP
> -        * initialization.
> -        */
> -       WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask));
> -       while (!cpumask_test_cpu(cpu, cpu_callout_mask))
> -               cpu_relax();
> -}
> -
>  /*
>   * Activate a secondary processor.
>   */
> @@ -247,11 +234,10 @@ static void notrace start_secondary(void
>         cpu_init_exception_handling();
>
>         /*
> -        * Sync point with wait_cpu_initialized(). Sets AP in
> -        * cpu_initialized_mask and then waits for the control CPU
> -        * to release it.
> +        * Sync point with the hotplug core. Sets the sync state to ALIVE
> +        * and waits for the control CPU to release it.
>          */
> -       wait_for_master_cpu(raw_smp_processor_id());
> +       cpuhp_ap_sync_alive();
>
>         cpu_init();
>         rcu_cpu_starting(raw_smp_processor_id());
> @@ -285,7 +271,6 @@ static void notrace start_secondary(void
>         set_cpu_online(smp_processor_id(), true);
>         lapic_online();
>         unlock_vector_lock();
> -       cpu_set_state_online(smp_processor_id());
>         x86_platform.nmi_init();
>
>         /* enable local interrupts */
> @@ -736,9 +721,10 @@ static void impress_friends(void)
>          * Allow the user to impress friends.
>          */
>         pr_debug("Before bogomips\n");
> -       for_each_possible_cpu(cpu)
> -               if (cpumask_test_cpu(cpu, cpu_callout_mask))
> +       for_each_possible_cpu(cpu) {
> +               if (cpumask_test_cpu(cpu, cpu_online_mask))
>                         bogosum += cpu_data(cpu).loops_per_jiffy;

This should be the same as for_each_online_cpu().

--
Brian Gerst
Thomas Gleixner April 15, 2023, 9:04 p.m. UTC | #2
On Sat, Apr 15 2023 at 08:58, Brian Gerst wrote:
> On Fri, Apr 14, 2023 at 7:44 PM Thomas Gleixner <tglx@linutronix.de> wrote:
>>         pr_debug("Before bogomips\n");
>> -       for_each_possible_cpu(cpu)
>> -               if (cpumask_test_cpu(cpu, cpu_callout_mask))
>> +       for_each_possible_cpu(cpu) {
>> +               if (cpumask_test_cpu(cpu, cpu_online_mask))
>>                         bogosum += cpu_data(cpu).loops_per_jiffy;
>
> This should be the same as for_each_online_cpu().

Duh, yes. Obviously...
diff mbox series

Patch

--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -272,6 +272,7 @@  config X86
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_USER_RETURN_NOTIFIER
 	select HAVE_GENERIC_VDSO
+	select HOTPLUG_CORE_SYNC_FULL		if SMP
 	select HOTPLUG_SMT			if SMP
 	select IRQ_FORCED_THREADING
 	select NEED_PER_CPU_EMBED_FIRST_CHUNK
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -38,6 +38,8 @@  struct smp_ops {
 	void (*crash_stop_other_cpus)(void);
 	void (*smp_send_reschedule)(int cpu);
 
+	void (*cleanup_dead_cpu)(unsigned cpu);
+	void (*poll_sync_state)(void);
 	int (*cpu_up)(unsigned cpu, struct task_struct *tidle);
 	int (*cpu_disable)(void);
 	void (*cpu_die)(unsigned int cpu);
@@ -90,7 +92,8 @@  static inline int __cpu_disable(void)
 
 static inline void __cpu_die(unsigned int cpu)
 {
-	smp_ops.cpu_die(cpu);
+	if (smp_ops.cpu_die)
+		smp_ops.cpu_die(cpu);
 }
 
 static inline void play_dead(void)
@@ -122,8 +125,6 @@  void native_smp_cpus_done(unsigned int m
 int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_disable(void);
-int common_cpu_die(unsigned int cpu);
-void native_cpu_die(unsigned int cpu);
 void hlt_play_dead(void);
 void native_play_dead(void);
 void play_dead_common(void);
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -269,7 +269,6 @@  struct smp_ops smp_ops = {
 	.smp_send_reschedule	= native_smp_send_reschedule,
 
 	.cpu_up			= native_cpu_up,
-	.cpu_die		= native_cpu_die,
 	.cpu_disable		= native_cpu_disable,
 	.play_dead		= native_play_dead,
 
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -57,6 +57,7 @@ 
 #include <linux/pgtable.h>
 #include <linux/overflow.h>
 #include <linux/stackprotector.h>
+#include <linux/cpuhotplug.h>
 
 #include <asm/acpi.h>
 #include <asm/cacheinfo.h>
@@ -101,9 +102,6 @@  EXPORT_PER_CPU_SYMBOL(cpu_die_map);
 DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
 EXPORT_PER_CPU_SYMBOL(cpu_info);
 
-/* All of these masks are initialized in setup_cpu_local_masks() */
-static cpumask_var_t cpu_initialized_mask;
-static cpumask_var_t cpu_callout_mask;
 /* Representing CPUs for which sibling maps can be computed */
 static cpumask_var_t cpu_sibling_setup_mask;
 
@@ -169,8 +167,8 @@  static void smp_callin(void)
 	int cpuid = smp_processor_id();
 
 	/*
-	 * If waken up by an INIT in an 82489DX configuration
-	 * cpu_callout_mask guarantees we don't get here before an
+	 * If waken up by an INIT in an 82489DX configuration the alive
+	 * synchronization guarantees we don't get here before an
 	 * INIT_deassert IPI reaches our local APIC, so it is now safe to
 	 * touch our local APIC.
 	 *
@@ -216,17 +214,6 @@  static void ap_calibrate_delay(void)
 	cpu_data(smp_processor_id()).loops_per_jiffy = loops_per_jiffy;
 }
 
-static void wait_for_master_cpu(int cpu)
-{
-	/*
-	 * Wait for release by control CPU before continuing with AP
-	 * initialization.
-	 */
-	WARN_ON(cpumask_test_and_set_cpu(cpu, cpu_initialized_mask));
-	while (!cpumask_test_cpu(cpu, cpu_callout_mask))
-		cpu_relax();
-}
-
 /*
  * Activate a secondary processor.
  */
@@ -247,11 +234,10 @@  static void notrace start_secondary(void
 	cpu_init_exception_handling();
 
 	/*
-	 * Sync point with wait_cpu_initialized(). Sets AP in
-	 * cpu_initialized_mask and then waits for the control CPU
-	 * to release it.
+	 * Sync point with the hotplug core. Sets the sync state to ALIVE
+	 * and waits for the control CPU to release it.
 	 */
-	wait_for_master_cpu(raw_smp_processor_id());
+	cpuhp_ap_sync_alive();
 
 	cpu_init();
 	rcu_cpu_starting(raw_smp_processor_id());
@@ -285,7 +271,6 @@  static void notrace start_secondary(void
 	set_cpu_online(smp_processor_id(), true);
 	lapic_online();
 	unlock_vector_lock();
-	cpu_set_state_online(smp_processor_id());
 	x86_platform.nmi_init();
 
 	/* enable local interrupts */
@@ -736,9 +721,10 @@  static void impress_friends(void)
 	 * Allow the user to impress friends.
 	 */
 	pr_debug("Before bogomips\n");
-	for_each_possible_cpu(cpu)
-		if (cpumask_test_cpu(cpu, cpu_callout_mask))
+	for_each_possible_cpu(cpu) {
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
 			bogosum += cpu_data(cpu).loops_per_jiffy;
+	}
 	pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",
 		num_online_cpus(),
 		bogosum/(500000/HZ),
@@ -1010,6 +996,7 @@  int common_cpu_up(unsigned int cpu, stru
 static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 {
 	unsigned long start_ip = real_mode_header->trampoline_start;
+	int ret;
 
 #ifdef CONFIG_X86_64
 	/* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
@@ -1050,13 +1037,6 @@  static int do_boot_cpu(int apicid, int c
 		}
 	}
 
-	/*
-	 * AP might wait on cpu_callout_mask in cpu_init() with
-	 * cpu_initialized_mask set if previous attempt to online
-	 * it timed-out. Clear cpu_initialized_mask so that after
-	 * INIT/SIPI it could start with a clean state.
-	 */
-	cpumask_clear_cpu(cpu, cpu_initialized_mask);
 	smp_mb();
 
 	/*
@@ -1067,47 +1047,16 @@  static int do_boot_cpu(int apicid, int c
 	 * - Use an INIT boot APIC message
 	 */
 	if (apic->wakeup_secondary_cpu_64)
-		return apic->wakeup_secondary_cpu_64(apicid, start_ip);
+		ret = apic->wakeup_secondary_cpu_64(apicid, start_ip);
 	else if (apic->wakeup_secondary_cpu)
-		return apic->wakeup_secondary_cpu(apicid, start_ip);
-
-	return wakeup_secondary_cpu_via_init(apicid, start_ip);
-}
-
-static int wait_cpu_cpumask(unsigned int cpu, const struct cpumask *mask)
-{
-	unsigned long timeout;
-
-	/*
-	 * Wait up to 10s for the CPU to report in.
-	 */
-	timeout = jiffies + 10*HZ;
-	while (time_before(jiffies, timeout)) {
-		if (cpumask_test_cpu(cpu, mask))
-			return 0;
-
-		schedule();
-	}
-	return -1;
-}
-
-/*
- * Bringup step two: Wait for the target AP to reach cpu_init_secondary()
- * and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it
- * to proceed.  The AP will then proceed past setting its 'callin' bit
- * and end up waiting in check_tsc_sync_target() until we reach
- * wait_cpu_online() to tend to it.
- */
-static int wait_cpu_initialized(unsigned int cpu)
-{
-	/*
-	 * Wait for first sign of life from AP.
-	 */
-	if (wait_cpu_cpumask(cpu, cpu_initialized_mask))
-		return -1;
+		ret = apic->wakeup_secondary_cpu(apicid, start_ip);
+	else
+		ret = wakeup_secondary_cpu_via_init(apicid, start_ip);
 
-	cpumask_set_cpu(cpu, cpu_callout_mask);
-	return 0;
+	/* If the wakeup mechanism failed, cleanup the warm reset vector */
+	if (ret)
+		arch_cpuhp_cleanup_kick_cpu(cpu);
+	return ret;
 }
 
 static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
@@ -1132,11 +1081,6 @@  static int native_kick_ap(unsigned int c
 	 */
 	mtrr_save_state();
 
-	/* x86 CPUs take themselves offline, so delayed offline is OK. */
-	err = cpu_check_up_prepare(cpu);
-	if (err && err != -EBUSY)
-		return err;
-
 	/* the FPU context is blank, nobody can own it */
 	per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
 
@@ -1153,17 +1097,29 @@  static int native_kick_ap(unsigned int c
 
 int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
-	int ret;
-
-	ret = native_kick_ap(cpu, tidle);
-	if (!ret)
-		ret = wait_cpu_initialized(cpu);
+	return native_kick_ap(cpu, tidle);
+}
 
+void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu)
+{
 	/* Cleanup possible dangling ends... */
-	if (x86_platform.legacy.warm_reset)
+	if (smp_ops.cpu_up == native_cpu_up && x86_platform.legacy.warm_reset)
 		smpboot_restore_warm_reset_vector();
+}
 
-	return ret;
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
+{
+	if (smp_ops.cleanup_dead_cpu)
+		smp_ops.cleanup_dead_cpu(cpu);
+
+	if (system_state == SYSTEM_RUNNING)
+		pr_info("CPU %u is now offline\n", cpu);
+}
+
+void arch_cpuhp_sync_state_poll(void)
+{
+	if (smp_ops.poll_sync_state)
+		smp_ops.poll_sync_state();
 }
 
 /**
@@ -1355,9 +1311,6 @@  void __init native_smp_prepare_boot_cpu(
 	if (!IS_ENABLED(CONFIG_SMP))
 		switch_gdt_and_percpu_base(me);
 
-	/* already set me in cpu_online_mask in boot_cpu_init() */
-	cpumask_set_cpu(me, cpu_callout_mask);
-	cpu_set_state_online(me);
 	native_pv_lock_init();
 }
 
@@ -1484,8 +1437,6 @@  early_param("possible_cpus", _setup_poss
 /* correctly size the local cpu masks */
 void __init setup_cpu_local_masks(void)
 {
-	alloc_bootmem_cpumask_var(&cpu_initialized_mask);
-	alloc_bootmem_cpumask_var(&cpu_callout_mask);
 	alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
 }
 
@@ -1547,9 +1498,6 @@  static void remove_siblinginfo(int cpu)
 static void remove_cpu_from_maps(int cpu)
 {
 	set_cpu_online(cpu, false);
-	cpumask_clear_cpu(cpu, cpu_callout_mask);
-	/* was set by cpu_init() */
-	cpumask_clear_cpu(cpu, cpu_initialized_mask);
 	numa_remove_cpu(cpu);
 }
 
@@ -1600,36 +1548,11 @@  int native_cpu_disable(void)
 	return 0;
 }
 
-int common_cpu_die(unsigned int cpu)
-{
-	int ret = 0;
-
-	/* We don't do anything here: idle task is faking death itself. */
-
-	/* They ack this in play_dead() by setting CPU_DEAD */
-	if (cpu_wait_death(cpu, 5)) {
-		if (system_state == SYSTEM_RUNNING)
-			pr_info("CPU %u is now offline\n", cpu);
-	} else {
-		pr_err("CPU %u didn't die...\n", cpu);
-		ret = -1;
-	}
-
-	return ret;
-}
-
-void native_cpu_die(unsigned int cpu)
-{
-	common_cpu_die(cpu);
-}
-
 void play_dead_common(void)
 {
 	idle_task_exit();
 
-	/* Ack it */
-	(void)cpu_report_death();
-
+	cpuhp_ap_report_dead();
 	/*
 	 * With physical CPU hotplug, we should halt the cpu
 	 */
@@ -1731,12 +1654,6 @@  int native_cpu_disable(void)
 	return -ENOSYS;
 }
 
-void native_cpu_die(unsigned int cpu)
-{
-	/* We said "no" in __cpu_disable */
-	BUG();
-}
-
 void native_play_dead(void)
 {
 	BUG();
--- a/arch/x86/xen/smp_hvm.c
+++ b/arch/x86/xen/smp_hvm.c
@@ -55,18 +55,16 @@  static void __init xen_hvm_smp_prepare_c
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static void xen_hvm_cpu_die(unsigned int cpu)
+static void xen_hvm_cleanup_dead_cpu(unsigned int cpu)
 {
-	if (common_cpu_die(cpu) == 0) {
-		if (xen_have_vector_callback) {
-			xen_smp_intr_free(cpu);
-			xen_uninit_lock_cpu(cpu);
-			xen_teardown_timer(cpu);
-		}
+	if (xen_have_vector_callback) {
+		xen_smp_intr_free(cpu);
+		xen_uninit_lock_cpu(cpu);
+		xen_teardown_timer(cpu);
 	}
 }
 #else
-static void xen_hvm_cpu_die(unsigned int cpu)
+static void xen_hvm_cleanup_dead_cpu(unsigned int cpu)
 {
 	BUG();
 }
@@ -77,7 +75,7 @@  void __init xen_hvm_smp_init(void)
 	smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu;
 	smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
 	smp_ops.smp_cpus_done = xen_smp_cpus_done;
-	smp_ops.cpu_die = xen_hvm_cpu_die;
+	smp_ops.cleanup_dead_cpu = xen_hvm_cleanup_dead_cpu;
 
 	if (!xen_have_vector_callback) {
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -62,6 +62,7 @@  static void cpu_bringup(void)
 	int cpu;
 
 	cr4_init();
+	cpuhp_ap_sync_alive();
 	cpu_init();
 	touch_softlockup_watchdog();
 
@@ -83,7 +84,7 @@  static void cpu_bringup(void)
 
 	set_cpu_online(cpu, true);
 
-	cpu_set_state_online(cpu);  /* Implies full memory barrier. */
+	smp_mb();
 
 	/* We can take interrupts now: we're officially "up". */
 	local_irq_enable();
@@ -323,14 +324,6 @@  static int xen_pv_cpu_up(unsigned int cp
 
 	xen_setup_runstate_info(cpu);
 
-	/*
-	 * PV VCPUs are always successfully taken down (see 'while' loop
-	 * in xen_cpu_die()), so -EBUSY is an error.
-	 */
-	rc = cpu_check_up_prepare(cpu);
-	if (rc)
-		return rc;
-
 	/* make sure interrupts start blocked */
 	per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
 
@@ -349,6 +342,11 @@  static int xen_pv_cpu_up(unsigned int cp
 	return 0;
 }
 
+static void xen_pv_poll_sync_state(void)
+{
+	HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 static int xen_pv_cpu_disable(void)
 {
@@ -364,18 +362,18 @@  static int xen_pv_cpu_disable(void)
 
 static void xen_pv_cpu_die(unsigned int cpu)
 {
-	while (HYPERVISOR_vcpu_op(VCPUOP_is_up,
-				  xen_vcpu_nr(cpu), NULL)) {
+	while (HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), NULL)) {
 		__set_current_state(TASK_UNINTERRUPTIBLE);
 		schedule_timeout(HZ/10);
 	}
+}
 
-	if (common_cpu_die(cpu) == 0) {
-		xen_smp_intr_free(cpu);
-		xen_uninit_lock_cpu(cpu);
-		xen_teardown_timer(cpu);
-		xen_pmu_finish(cpu);
-	}
+static void xen_pv_cleanup_dead_cpu(unsigned int cpu)
+{
+	xen_smp_intr_free(cpu);
+	xen_uninit_lock_cpu(cpu);
+	xen_teardown_timer(cpu);
+	xen_pmu_finish(cpu);
 }
 
 static void __noreturn xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
@@ -397,6 +395,11 @@  static void xen_pv_cpu_die(unsigned int
 	BUG();
 }
 
+static void xen_pv_cleanup_dead_cpu(unsigned int cpu)
+{
+	BUG();
+}
+
 static void __noreturn xen_pv_play_dead(void)
 {
 	BUG();
@@ -437,6 +440,8 @@  static const struct smp_ops xen_smp_ops
 
 	.cpu_up = xen_pv_cpu_up,
 	.cpu_die = xen_pv_cpu_die,
+	.cleanup_dead_cpu = xen_pv_cleanup_dead_cpu,
+	.poll_sync_state = xen_pv_poll_sync_state,
 	.cpu_disable = xen_pv_cpu_disable,
 	.play_dead = xen_pv_play_dead,