diff mbox

[v4,2/3] acpi : prevent cpu from becoming online

Message ID 4FFFE182.2090601@jp.fujitsu.com (mailing list archive)
State New, archived
Headers show

Commit Message

Yasuaki Ishimatsu July 13, 2012, 8:51 a.m. UTC
Even if acpi_processor_handle_eject() offlines cpu, there is a chance
to online the cpu after that. So the patch closes the window by using
get/put_online_cpus().

Why does the patch change _cpu_up() logic?

The patch cares the race of hot-remove cpu and _cpu_up(). If the patch
does not change it, there is the following race.

hot-remove cpu                         |  _cpu_up()
------------------------------------- ------------------------------------
call acpi_processor_handle_eject()     |
     call cpu_down()                   |
     call get_online_cpus()            |
                                       | call cpu_hotplug_begin() and stop here
     call arch_unregister_cpu()        |
     call acpi_unmap_lsapic()          |
     call put_online_cpus()            |
                                       | start and continue _cpu_up()
     return acpi_processor_remove()    |
continue hot-remove the cpu            |

So _cpu_up() can continue to itself. And hot-remove cpu can also continue
itself. If the patch changes _cpu_up() logic, the race disappears as below:

hot-remove cpu                         | _cpu_up()
-----------------------------------------------------------------------
call acpi_processor_handle_eject()     |
     call cpu_down()                   |
     call get_online_cpus()            |
                                       | call cpu_hotplug_begin() and stop here
     call arch_unregister_cpu()        |
     call acpi_unmap_lsapic()          |
          cpu's cpu_present is set     |
          to false by set_cpu_present()|
     call put_online_cpus()            |
                                       | start _cpu_up()
                                       | check cpu_present() and return -EINVAL
     return acpi_processor_remove()    |
continue hot-remove the cpu            |

Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

---
 drivers/acpi/processor_driver.c |   14 ++++++++++++++
 kernel/cpu.c                    |    8 +++++---
 2 files changed, 19 insertions(+), 3 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Toshi Kani July 13, 2012, 3:25 p.m. UTC | #1
On Fri, 2012-07-13 at 17:51 +0900, Yasuaki Ishimatsu wrote:
> Even if acpi_processor_handle_eject() offlines cpu, there is a chance
> to online the cpu after that. So the patch closes the window by using
> get/put_online_cpus().
> 
> Why does the patch change _cpu_up() logic?
> 
> The patch cares the race of hot-remove cpu and _cpu_up(). If the patch
> does not change it, there is the following race.
> 
> hot-remove cpu                         |  _cpu_up()
> ------------------------------------- ------------------------------------
> call acpi_processor_handle_eject()     |
>      call cpu_down()                   |
>      call get_online_cpus()            |
>                                        | call cpu_hotplug_begin() and stop here
>      call arch_unregister_cpu()        |
>      call acpi_unmap_lsapic()          |
>      call put_online_cpus()            |
>                                        | start and continue _cpu_up()
>      return acpi_processor_remove()    |
> continue hot-remove the cpu            |
> 
> So _cpu_up() can continue to itself. And hot-remove cpu can also continue
> itself. If the patch changes _cpu_up() logic, the race disappears as below:
> 
> hot-remove cpu                         | _cpu_up()
> -----------------------------------------------------------------------
> call acpi_processor_handle_eject()     |
>      call cpu_down()                   |
>      call get_online_cpus()            |
>                                        | call cpu_hotplug_begin() and stop here
>      call arch_unregister_cpu()        |
>      call acpi_unmap_lsapic()          |
>           cpu's cpu_present is set     |
>           to false by set_cpu_present()|
>      call put_online_cpus()            |
>                                        | start _cpu_up()
>                                        | check cpu_present() and return -EINVAL
>      return acpi_processor_remove()    |
> continue hot-remove the cpu            |
> 
> Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
> Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
> 
> ---
>  drivers/acpi/processor_driver.c |   14 ++++++++++++++
>  kernel/cpu.c                    |    8 +++++---
>  2 files changed, 19 insertions(+), 3 deletions(-)
> 
> Index: linux-3.5-rc6/drivers/acpi/processor_driver.c
> ===================================================================
> --- linux-3.5-rc6.orig/drivers/acpi/processor_driver.c	2012-07-13 17:31:37.799130100 +0900
> +++ linux-3.5-rc6/drivers/acpi/processor_driver.c	2012-07-13 17:39:47.727006338 +0900
> @@ -850,8 +850,22 @@ static int acpi_processor_handle_eject(s
>  			return ret;
>  	}
> 
> +	get_online_cpus();
> +	/*
> +	 * The cpu might become online again at this point. So we check whether
> +	 * the cpu has been onlined or not. If the cpu became online, it means
> +	 * that someone wants to use the cpu. So acpi_processor_handle_eject()
> +	 * returns -EAGAIN.
> +	 */
> +	if (unlikely(cpu_online(pr->id))) {
> +		put_online_cpus();
> +		pr_warn("Failed to remove CPU %d, ", pr->id);
> +		pr_warn("because other task brought the CPU back online\n");

Just a nit-pick, but you can simply do it like:

		pr_warn("Failed to remove CPU %d, because other task "
			"brought the CPU back online\n", pr->id);

Other than that,

Reviewed-by: Toshi Kani <toshi.kani@hp.com>

Thanks,
-Toshi

> +		return -EAGAIN;
> +	}
>  	arch_unregister_cpu(pr->id);
>  	acpi_unmap_lsapic(pr->id);
> +	put_online_cpus();
>  	return ret;
>  }
>  #else
> Index: linux-3.5-rc6/kernel/cpu.c
> ===================================================================
> --- linux-3.5-rc6.orig/kernel/cpu.c	2012-07-13 17:31:37.800130087 +0900
> +++ linux-3.5-rc6/kernel/cpu.c	2012-07-13 17:31:39.661106874 +0900
> @@ -343,11 +343,13 @@ static int __cpuinit _cpu_up(unsigned in
>  	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
>  	struct task_struct *idle;
> 
> -	if (cpu_online(cpu) || !cpu_present(cpu))
> -		return -EINVAL;
> -
>  	cpu_hotplug_begin();
> 
> +	if (cpu_online(cpu) || !cpu_present(cpu)) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
>  	idle = idle_thread_get(cpu);
>  	if (IS_ERR(idle)) {
>  		ret = PTR_ERR(idle);
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

Index: linux-3.5-rc6/drivers/acpi/processor_driver.c
===================================================================
--- linux-3.5-rc6.orig/drivers/acpi/processor_driver.c	2012-07-13 17:31:37.799130100 +0900
+++ linux-3.5-rc6/drivers/acpi/processor_driver.c	2012-07-13 17:39:47.727006338 +0900
@@ -850,8 +850,22 @@  static int acpi_processor_handle_eject(s
 			return ret;
 	}

+	get_online_cpus();
+	/*
+	 * The cpu might become online again at this point. So we check whether
+	 * the cpu has been onlined or not. If the cpu became online, it means
+	 * that someone wants to use the cpu. So acpi_processor_handle_eject()
+	 * returns -EAGAIN.
+	 */
+	if (unlikely(cpu_online(pr->id))) {
+		put_online_cpus();
+		pr_warn("Failed to remove CPU %d, ", pr->id);
+		pr_warn("because other task brought the CPU back online\n");
+		return -EAGAIN;
+	}
 	arch_unregister_cpu(pr->id);
 	acpi_unmap_lsapic(pr->id);
+	put_online_cpus();
 	return ret;
 }
 #else
Index: linux-3.5-rc6/kernel/cpu.c
===================================================================
--- linux-3.5-rc6.orig/kernel/cpu.c	2012-07-13 17:31:37.800130087 +0900
+++ linux-3.5-rc6/kernel/cpu.c	2012-07-13 17:31:39.661106874 +0900
@@ -343,11 +343,13 @@  static int __cpuinit _cpu_up(unsigned in
 	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
 	struct task_struct *idle;

-	if (cpu_online(cpu) || !cpu_present(cpu))
-		return -EINVAL;
-
 	cpu_hotplug_begin();

+	if (cpu_online(cpu) || !cpu_present(cpu)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	idle = idle_thread_get(cpu);
 	if (IS_ERR(idle)) {
 		ret = PTR_ERR(idle);