diff mbox

acpi : acpi_bus_trim() stops removing devices when failing to remove the device

Message ID 5073E4CC.3090600@jp.fujitsu.com (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Yasuaki Ishimatsu Oct. 9, 2012, 8:48 a.m. UTC
acpi_bus_trim() stops removing devices, when acpi_bus_remove() return error
number. But acpi_bus_remove() cannot return error number correctly.
acpi_bus_remove() only return -EINVAL, when dev argument is NULL. Thus even if
device cannot be removed correctly, acpi_bus_trim() ignores and continues to
remove devices. acpi_bus_hot_remove_device() uses acpi_bus_trim() for removing
devices. Therefore acpi_bus_hot_remove_device() can send "_EJ0" to firmware,
even if the device is running on the system. In this case, the system cannot
work well.

Vasilis hit the bug at memory hotplug and reported it as follow:
https://lkml.org/lkml/2012/9/26/318

So acpi_bus_trim() should check whether device was removed or not correctly.
The patch adds error check into some functions to remove the device.

Applying the patch, acpi_bus_trim() stops removing devices when failing
to remove the device. But I think there is no impact with the
exceptionof CPU and Memory hotplug path. Because other device also fails
but the fail is an irregular case like device is NULL.

Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

---
 drivers/acpi/scan.c    |   15 ++++++++++++---
 drivers/base/dd.c      |   22 +++++++++++++++++-----
 include/linux/device.h |    2 +-
 3 files changed, 30 insertions(+), 9 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Toshi Kani Oct. 9, 2012, 4:36 p.m. UTC | #1
On Tue, 2012-10-09 at 17:48 +0900, Yasuaki Ishimatsu wrote:
> acpi_bus_trim() stops removing devices, when acpi_bus_remove() return error
> number. But acpi_bus_remove() cannot return error number correctly.
> acpi_bus_remove() only return -EINVAL, when dev argument is NULL. Thus even if
> device cannot be removed correctly, acpi_bus_trim() ignores and continues to
> remove devices. acpi_bus_hot_remove_device() uses acpi_bus_trim() for removing
> devices. Therefore acpi_bus_hot_remove_device() can send "_EJ0" to firmware,
> even if the device is running on the system. In this case, the system cannot
> work well.
> 
> Vasilis hit the bug at memory hotplug and reported it as follow:
> https://lkml.org/lkml/2012/9/26/318
> 
> So acpi_bus_trim() should check whether device was removed or not correctly.
> The patch adds error check into some functions to remove the device.
> 
> Applying the patch, acpi_bus_trim() stops removing devices when failing
> to remove the device. But I think there is no impact with the
> exceptionof CPU and Memory hotplug path. Because other device also fails
> but the fail is an irregular case like device is NULL.
> 
> Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
> 
> ---
>  drivers/acpi/scan.c    |   15 ++++++++++++---
>  drivers/base/dd.c      |   22 +++++++++++++++++-----
>  include/linux/device.h |    2 +-
>  3 files changed, 30 insertions(+), 9 deletions(-)
> 
> Index: linux-3.6/drivers/acpi/scan.c
> ===================================================================
> --- linux-3.6.orig/drivers/acpi/scan.c	2012-10-09 17:25:40.956496325 +0900
> +++ linux-3.6/drivers/acpi/scan.c	2012-10-09 17:25:55.405497800 +0900
> @@ -445,12 +445,17 @@ static int acpi_device_remove(struct dev
>  {
>  	struct acpi_device *acpi_dev = to_acpi_device(dev);
>  	struct acpi_driver *acpi_drv = acpi_dev->driver;
> +	int ret;
>  
>  	if (acpi_drv) {
>  		if (acpi_drv->ops.notify)
>  			acpi_device_remove_notify_handler(acpi_dev);
> -		if (acpi_drv->ops.remove)
> -			acpi_drv->ops.remove(acpi_dev, acpi_dev->removal_type);
> +		if (acpi_drv->ops.remove) {
> +			ret = acpi_drv->ops.remove(acpi_dev,
> +						   acpi_dev->removal_type);
> +			if (ret)

Hi Yasuaki,

Shouldn't the notify handler be reinstalled here if it was removed by
the acpi_device_remove_notify_handler() above?

Thanks,
-Toshi

> +				return ret;
> +		}
>  	}
>  	acpi_dev->driver = NULL;
>  	acpi_dev->driver_data = NULL;
> @@ -1226,11 +1231,15 @@ static int acpi_device_set_context(struc
>  
>  static int acpi_bus_remove(struct acpi_device *dev, int rmdevice)
>  {
> +	int ret;
> +
>  	if (!dev)
>  		return -EINVAL;
>  
>  	dev->removal_type = ACPI_BUS_REMOVAL_EJECT;
> -	device_release_driver(&dev->dev);
> +	ret = device_release_driver(&dev->dev);
> +	if (ret)
> +		return ret;
>  
>  	if (!rmdevice)
>  		return 0;
> Index: linux-3.6/drivers/base/dd.c
> ===================================================================
> --- linux-3.6.orig/drivers/base/dd.c	2012-10-01 08:47:46.000000000 +0900
> +++ linux-3.6/drivers/base/dd.c	2012-10-09 17:25:55.442497825 +0900
> @@ -475,9 +475,10 @@ EXPORT_SYMBOL_GPL(driver_attach);
>   * __device_release_driver() must be called with @dev lock held.
>   * When called for a USB interface, @dev->parent lock must be held as well.
>   */
> -static void __device_release_driver(struct device *dev)
> +static int __device_release_driver(struct device *dev)
>  {
>  	struct device_driver *drv;
> +	int ret = 0;
>  
>  	drv = dev->driver;
>  	if (drv) {
> @@ -493,9 +494,11 @@ static void __device_release_driver(stru
>  		pm_runtime_put_sync(dev);
>  
>  		if (dev->bus && dev->bus->remove)
> -			dev->bus->remove(dev);
> +			ret = dev->bus->remove(dev);
>  		else if (drv->remove)
> -			drv->remove(dev);
> +			ret = drv->remove(dev);
> +		if (ret)
> +			goto rollback;
>  		devres_release_all(dev);
>  		dev->driver = NULL;
>  		dev_set_drvdata(dev, NULL);
> @@ -506,6 +509,12 @@ static void __device_release_driver(stru
>  						     dev);
>  
>  	}
> +
> +	return ret;
> +
> +rollback:
> +	driver_sysfs_add(dev);
> +	return ret;
>  }
>  
>  /**
> @@ -515,16 +524,19 @@ static void __device_release_driver(stru
>   * Manually detach device from driver.
>   * When called for a USB interface, @dev->parent lock must be held.
>   */
> -void device_release_driver(struct device *dev)
> +int device_release_driver(struct device *dev)
>  {
> +	int ret;
>  	/*
>  	 * If anyone calls device_release_driver() recursively from
>  	 * within their ->remove callback for the same device, they
>  	 * will deadlock right here.
>  	 */
>  	device_lock(dev);
> -	__device_release_driver(dev);
> +	ret = __device_release_driver(dev);
>  	device_unlock(dev);
> +
> +	return ret;
>  }
>  EXPORT_SYMBOL_GPL(device_release_driver);
>  
> Index: linux-3.6/include/linux/device.h
> ===================================================================
> --- linux-3.6.orig/include/linux/device.h	2012-10-01 08:47:46.000000000 +0900
> +++ linux-3.6/include/linux/device.h	2012-10-09 17:25:55.479497852 +0900
> @@ -834,7 +834,7 @@ static inline void *dev_get_platdata(con
>   * for information on use.
>   */
>  extern int __must_check device_bind_driver(struct device *dev);
> -extern void device_release_driver(struct device *dev);
> +extern int device_release_driver(struct device *dev);
>  extern int  __must_check device_attach(struct device *dev);
>  extern int __must_check driver_attach(struct device_driver *drv);
>  extern int __must_check device_reprobe(struct device *dev);
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yasuaki Ishimatsu Oct. 10, 2012, 1:07 a.m. UTC | #2
Hi Toshi,

2012/10/10 1:36, Toshi Kani wrote:
> On Tue, 2012-10-09 at 17:48 +0900, Yasuaki Ishimatsu wrote:
>> acpi_bus_trim() stops removing devices, when acpi_bus_remove() return error
>> number. But acpi_bus_remove() cannot return error number correctly.
>> acpi_bus_remove() only return -EINVAL, when dev argument is NULL. Thus even if
>> device cannot be removed correctly, acpi_bus_trim() ignores and continues to
>> remove devices. acpi_bus_hot_remove_device() uses acpi_bus_trim() for removing
>> devices. Therefore acpi_bus_hot_remove_device() can send "_EJ0" to firmware,
>> even if the device is running on the system. In this case, the system cannot
>> work well.
>>
>> Vasilis hit the bug at memory hotplug and reported it as follow:
>> https://lkml.org/lkml/2012/9/26/318
>>
>> So acpi_bus_trim() should check whether device was removed or not correctly.
>> The patch adds error check into some functions to remove the device.
>>
>> Applying the patch, acpi_bus_trim() stops removing devices when failing
>> to remove the device. But I think there is no impact with the
>> exceptionof CPU and Memory hotplug path. Because other device also fails
>> but the fail is an irregular case like device is NULL.
>>
>> Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
>>
>> ---
>>   drivers/acpi/scan.c    |   15 ++++++++++++---
>>   drivers/base/dd.c      |   22 +++++++++++++++++-----
>>   include/linux/device.h |    2 +-
>>   3 files changed, 30 insertions(+), 9 deletions(-)
>>
>> Index: linux-3.6/drivers/acpi/scan.c
>> ===================================================================
>> --- linux-3.6.orig/drivers/acpi/scan.c	2012-10-09 17:25:40.956496325 +0900
>> +++ linux-3.6/drivers/acpi/scan.c	2012-10-09 17:25:55.405497800 +0900
>> @@ -445,12 +445,17 @@ static int acpi_device_remove(struct dev
>>   {
>>   	struct acpi_device *acpi_dev = to_acpi_device(dev);
>>   	struct acpi_driver *acpi_drv = acpi_dev->driver;
>> +	int ret;
>>
>>   	if (acpi_drv) {
>>   		if (acpi_drv->ops.notify)
>>   			acpi_device_remove_notify_handler(acpi_dev);
>> -		if (acpi_drv->ops.remove)
>> -			acpi_drv->ops.remove(acpi_dev, acpi_dev->removal_type);
>> +		if (acpi_drv->ops.remove) {
>> +			ret = acpi_drv->ops.remove(acpi_dev,
>> +						   acpi_dev->removal_type);
>> +			if (ret)
>
> Hi Yasuaki,
>
> Shouldn't the notify handler be reinstalled here if it was removed by
> the acpi_device_remove_notify_handler() above?

I do not reinstall the notify handler.
The function has not been removed on linux-3.6. And the patch is created
on linux-3.6. So the function remains in the patch.

Thanks,
Yasuaki Ishimatsu

>
> Thanks,
> -Toshi
>
>> +				return ret;
>> +		}
>>   	}
>>   	acpi_dev->driver = NULL;
>>   	acpi_dev->driver_data = NULL;
>> @@ -1226,11 +1231,15 @@ static int acpi_device_set_context(struc
>>
>>   static int acpi_bus_remove(struct acpi_device *dev, int rmdevice)
>>   {
>> +	int ret;
>> +
>>   	if (!dev)
>>   		return -EINVAL;
>>
>>   	dev->removal_type = ACPI_BUS_REMOVAL_EJECT;
>> -	device_release_driver(&dev->dev);
>> +	ret = device_release_driver(&dev->dev);
>> +	if (ret)
>> +		return ret;
>>
>>   	if (!rmdevice)
>>   		return 0;
>> Index: linux-3.6/drivers/base/dd.c
>> ===================================================================
>> --- linux-3.6.orig/drivers/base/dd.c	2012-10-01 08:47:46.000000000 +0900
>> +++ linux-3.6/drivers/base/dd.c	2012-10-09 17:25:55.442497825 +0900
>> @@ -475,9 +475,10 @@ EXPORT_SYMBOL_GPL(driver_attach);
>>    * __device_release_driver() must be called with @dev lock held.
>>    * When called for a USB interface, @dev->parent lock must be held as well.
>>    */
>> -static void __device_release_driver(struct device *dev)
>> +static int __device_release_driver(struct device *dev)
>>   {
>>   	struct device_driver *drv;
>> +	int ret = 0;
>>
>>   	drv = dev->driver;
>>   	if (drv) {
>> @@ -493,9 +494,11 @@ static void __device_release_driver(stru
>>   		pm_runtime_put_sync(dev);
>>
>>   		if (dev->bus && dev->bus->remove)
>> -			dev->bus->remove(dev);
>> +			ret = dev->bus->remove(dev);
>>   		else if (drv->remove)
>> -			drv->remove(dev);
>> +			ret = drv->remove(dev);
>> +		if (ret)
>> +			goto rollback;
>>   		devres_release_all(dev);
>>   		dev->driver = NULL;
>>   		dev_set_drvdata(dev, NULL);
>> @@ -506,6 +509,12 @@ static void __device_release_driver(stru
>>   						     dev);
>>
>>   	}
>> +
>> +	return ret;
>> +
>> +rollback:
>> +	driver_sysfs_add(dev);
>> +	return ret;
>>   }
>>
>>   /**
>> @@ -515,16 +524,19 @@ static void __device_release_driver(stru
>>    * Manually detach device from driver.
>>    * When called for a USB interface, @dev->parent lock must be held.
>>    */
>> -void device_release_driver(struct device *dev)
>> +int device_release_driver(struct device *dev)
>>   {
>> +	int ret;
>>   	/*
>>   	 * If anyone calls device_release_driver() recursively from
>>   	 * within their ->remove callback for the same device, they
>>   	 * will deadlock right here.
>>   	 */
>>   	device_lock(dev);
>> -	__device_release_driver(dev);
>> +	ret = __device_release_driver(dev);
>>   	device_unlock(dev);
>> +
>> +	return ret;
>>   }
>>   EXPORT_SYMBOL_GPL(device_release_driver);
>>
>> Index: linux-3.6/include/linux/device.h
>> ===================================================================
>> --- linux-3.6.orig/include/linux/device.h	2012-10-01 08:47:46.000000000 +0900
>> +++ linux-3.6/include/linux/device.h	2012-10-09 17:25:55.479497852 +0900
>> @@ -834,7 +834,7 @@ static inline void *dev_get_platdata(con
>>    * for information on use.
>>    */
>>   extern int __must_check device_bind_driver(struct device *dev);
>> -extern void device_release_driver(struct device *dev);
>> +extern int device_release_driver(struct device *dev);
>>   extern int  __must_check device_attach(struct device *dev);
>>   extern int __must_check driver_attach(struct device_driver *drv);
>>   extern int __must_check device_reprobe(struct device *dev);
>>
>
>


--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Toshi Kani Oct. 10, 2012, 1:01 p.m. UTC | #3
On Wed, 2012-10-10 at 10:07 +0900, Yasuaki Ishimatsu wrote:
 :
> >>   	if (acpi_drv) {
> >>   		if (acpi_drv->ops.notify)
> >>   			acpi_device_remove_notify_handler(acpi_dev);

THIS CALL

> >> -		if (acpi_drv->ops.remove)
> >> -			acpi_drv->ops.remove(acpi_dev, acpi_dev->removal_type);
> >> +		if (acpi_drv->ops.remove) {
> >> +			ret = acpi_drv->ops.remove(acpi_dev,
> >> +						   acpi_dev->removal_type);
> >> +			if (ret)
> >
> > Hi Yasuaki,
> >
> > Shouldn't the notify handler be reinstalled here if it was removed by
> > the acpi_device_remove_notify_handler() above?
> 
> I do not reinstall the notify handler.
> The function has not been removed on linux-3.6. And the patch is created
> on linux-3.6. So the function remains in the patch.

Umm... I am not sure what you meant.  Let me clarify my comment.  When
acpi_drv->ops.remove() failed, I thought we would need to roll-back the
procedure done by the acpi_device_remove_notify_handler() call, which I
indicated as "THIS CALL" above.  So, in this error path, don't we need
something like below?

if (acpi_drv->ops.notify)
	acpi_device_install_notify_handler(acpi_dev)

Thanks,
-Toshi




--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yasuaki Ishimatsu Oct. 11, 2012, 5:40 a.m. UTC | #4
Hi Toshi,

2012/10/10 22:01, Toshi Kani wrote:
> On Wed, 2012-10-10 at 10:07 +0900, Yasuaki Ishimatsu wrote:
>   :
>>>>    	if (acpi_drv) {
>>>>    		if (acpi_drv->ops.notify)
>>>>    			acpi_device_remove_notify_handler(acpi_dev);
>
> THIS CALL
>
>>>> -		if (acpi_drv->ops.remove)
>>>> -			acpi_drv->ops.remove(acpi_dev, acpi_dev->removal_type);
>>>> +		if (acpi_drv->ops.remove) {
>>>> +			ret = acpi_drv->ops.remove(acpi_dev,
>>>> +						   acpi_dev->removal_type);
>>>> +			if (ret)
>>>
>>> Hi Yasuaki,
>>>
>>> Shouldn't the notify handler be reinstalled here if it was removed by
>>> the acpi_device_remove_notify_handler() above?
>>
>> I do not reinstall the notify handler.
>> The function has not been removed on linux-3.6. And the patch is created
>> on linux-3.6. So the function remains in the patch.
>
> Umm... I am not sure what you meant.  Let me clarify my comment.  When
> acpi_drv->ops.remove() failed, I thought we would need to roll-back the
> procedure done by the acpi_device_remove_notify_handler() call, which I
> indicated as "THIS CALL" above.  So, in this error path, don't we need
> something like below?
>
> if (acpi_drv->ops.notify)
> 	acpi_device_install_notify_handler(acpi_dev)

I understood what you said.  I'll update it.

Thanks,
Yasuaki Ishimatsu

>
> Thanks,
> -Toshi
>
>
>
>


--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

Index: linux-3.6/drivers/acpi/scan.c
===================================================================
--- linux-3.6.orig/drivers/acpi/scan.c	2012-10-09 17:25:40.956496325 +0900
+++ linux-3.6/drivers/acpi/scan.c	2012-10-09 17:25:55.405497800 +0900
@@ -445,12 +445,17 @@  static int acpi_device_remove(struct dev
 {
 	struct acpi_device *acpi_dev = to_acpi_device(dev);
 	struct acpi_driver *acpi_drv = acpi_dev->driver;
+	int ret;
 
 	if (acpi_drv) {
 		if (acpi_drv->ops.notify)
 			acpi_device_remove_notify_handler(acpi_dev);
-		if (acpi_drv->ops.remove)
-			acpi_drv->ops.remove(acpi_dev, acpi_dev->removal_type);
+		if (acpi_drv->ops.remove) {
+			ret = acpi_drv->ops.remove(acpi_dev,
+						   acpi_dev->removal_type);
+			if (ret)
+				return ret;
+		}
 	}
 	acpi_dev->driver = NULL;
 	acpi_dev->driver_data = NULL;
@@ -1226,11 +1231,15 @@  static int acpi_device_set_context(struc
 
 static int acpi_bus_remove(struct acpi_device *dev, int rmdevice)
 {
+	int ret;
+
 	if (!dev)
 		return -EINVAL;
 
 	dev->removal_type = ACPI_BUS_REMOVAL_EJECT;
-	device_release_driver(&dev->dev);
+	ret = device_release_driver(&dev->dev);
+	if (ret)
+		return ret;
 
 	if (!rmdevice)
 		return 0;
Index: linux-3.6/drivers/base/dd.c
===================================================================
--- linux-3.6.orig/drivers/base/dd.c	2012-10-01 08:47:46.000000000 +0900
+++ linux-3.6/drivers/base/dd.c	2012-10-09 17:25:55.442497825 +0900
@@ -475,9 +475,10 @@  EXPORT_SYMBOL_GPL(driver_attach);
  * __device_release_driver() must be called with @dev lock held.
  * When called for a USB interface, @dev->parent lock must be held as well.
  */
-static void __device_release_driver(struct device *dev)
+static int __device_release_driver(struct device *dev)
 {
 	struct device_driver *drv;
+	int ret = 0;
 
 	drv = dev->driver;
 	if (drv) {
@@ -493,9 +494,11 @@  static void __device_release_driver(stru
 		pm_runtime_put_sync(dev);
 
 		if (dev->bus && dev->bus->remove)
-			dev->bus->remove(dev);
+			ret = dev->bus->remove(dev);
 		else if (drv->remove)
-			drv->remove(dev);
+			ret = drv->remove(dev);
+		if (ret)
+			goto rollback;
 		devres_release_all(dev);
 		dev->driver = NULL;
 		dev_set_drvdata(dev, NULL);
@@ -506,6 +509,12 @@  static void __device_release_driver(stru
 						     dev);
 
 	}
+
+	return ret;
+
+rollback:
+	driver_sysfs_add(dev);
+	return ret;
 }
 
 /**
@@ -515,16 +524,19 @@  static void __device_release_driver(stru
  * Manually detach device from driver.
  * When called for a USB interface, @dev->parent lock must be held.
  */
-void device_release_driver(struct device *dev)
+int device_release_driver(struct device *dev)
 {
+	int ret;
 	/*
 	 * If anyone calls device_release_driver() recursively from
 	 * within their ->remove callback for the same device, they
 	 * will deadlock right here.
 	 */
 	device_lock(dev);
-	__device_release_driver(dev);
+	ret = __device_release_driver(dev);
 	device_unlock(dev);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(device_release_driver);
 
Index: linux-3.6/include/linux/device.h
===================================================================
--- linux-3.6.orig/include/linux/device.h	2012-10-01 08:47:46.000000000 +0900
+++ linux-3.6/include/linux/device.h	2012-10-09 17:25:55.479497852 +0900
@@ -834,7 +834,7 @@  static inline void *dev_get_platdata(con
  * for information on use.
  */
 extern int __must_check device_bind_driver(struct device *dev);
-extern void device_release_driver(struct device *dev);
+extern int device_release_driver(struct device *dev);
 extern int  __must_check device_attach(struct device *dev);
 extern int __must_check driver_attach(struct device_driver *drv);
 extern int __must_check device_reprobe(struct device *dev);