diff mbox series

drivers: thermal: Add NULL pointer check before using cooling device stats

Message ID 1607108991-31948-1-git-send-email-manafm@codeaurora.org (mailing list archive)
State Changes Requested, archived
Delegated to: Zhang Rui
Headers show
Series drivers: thermal: Add NULL pointer check before using cooling device stats | expand

Commit Message

Manaf Meethalavalappu Pallikunhi Dec. 4, 2020, 7:09 p.m. UTC
There is a possible chance that some cooling device stats buffer
allocation fails due to very high cooling device max state value.
Later cooling device update or cooling stats sysfs will try to
access stats data for the same cooling device. It will lead to
NULL pointer dereference issue.

Add a NULL pointer check before accessing thermal cooling device
stats data. It fixes the following bug

[ 26.812833] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004
[ 27.122960] Call trace:
[ 27.122963] do_raw_spin_lock+0x18/0xe8
[ 27.122966] _raw_spin_lock+0x24/0x30
[ 27.128157] thermal_cooling_device_stats_update+0x24/0x98
[ 27.128162] cur_state_store+0x88/0xb8
[ 27.128166] dev_attr_store+0x40/0x58
[ 27.128169] sysfs_kf_write+0x50/0x68
[ 27.133358] kernfs_fop_write+0x12c/0x1c8
[ 27.133362] __vfs_write+0x54/0x160
[ 27.152297] vfs_write+0xcc/0x188
[ 27.157132] ksys_write+0x78/0x108
[ 27.162050] ksys_write+0xf8/0x108
[ 27.166968] __arm_smccc_hvc+0x158/0x4b0
[ 27.166973] __arm_smccc_hvc+0x9c/0x4b0
[ 27.186005] el0_svc+0x8/0xc

Signed-off-by: Manaf Meethalavalappu Pallikunhi <manafm@codeaurora.org>
---
 drivers/thermal/thermal_sysfs.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

Comments

Daniel Lezcano Dec. 4, 2020, 7:21 p.m. UTC | #1
On 04/12/2020 20:09, Manaf Meethalavalappu Pallikunhi wrote:
> There is a possible chance that some cooling device stats buffer
> allocation fails due to very high cooling device max state value.
> Later cooling device update or cooling stats sysfs will try to
> access stats data for the same cooling device. It will lead to
> NULL pointer dereference issue.
> 
> Add a NULL pointer check before accessing thermal cooling device
> stats data. It fixes the following bug

Why not create the 'stats' dir if the setup fails ?

> [ 26.812833] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004
> [ 27.122960] Call trace:
> [ 27.122963] do_raw_spin_lock+0x18/0xe8
> [ 27.122966] _raw_spin_lock+0x24/0x30
> [ 27.128157] thermal_cooling_device_stats_update+0x24/0x98
> [ 27.128162] cur_state_store+0x88/0xb8
> [ 27.128166] dev_attr_store+0x40/0x58
> [ 27.128169] sysfs_kf_write+0x50/0x68
> [ 27.133358] kernfs_fop_write+0x12c/0x1c8
> [ 27.133362] __vfs_write+0x54/0x160
> [ 27.152297] vfs_write+0xcc/0x188
> [ 27.157132] ksys_write+0x78/0x108
> [ 27.162050] ksys_write+0xf8/0x108
> [ 27.166968] __arm_smccc_hvc+0x158/0x4b0
> [ 27.166973] __arm_smccc_hvc+0x9c/0x4b0
> [ 27.186005] el0_svc+0x8/0xc
> 
> Signed-off-by: Manaf Meethalavalappu Pallikunhi <manafm@codeaurora.org>
> ---
>  drivers/thermal/thermal_sysfs.c | 18 +++++++++++++++++-
>  1 file changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
> index 473449b..a5e4855 100644
> --- a/drivers/thermal/thermal_sysfs.c
> +++ b/drivers/thermal/thermal_sysfs.c
> @@ -827,6 +827,9 @@ void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
>  {
>  	struct cooling_dev_stats *stats = cdev->stats;
>  
> +	if (!stats)
> +		return;
> +
>  	spin_lock(&stats->lock);
>  
>  	if (stats->state == new_state)
> @@ -848,6 +851,9 @@ static ssize_t total_trans_show(struct device *dev,
>  	struct cooling_dev_stats *stats = cdev->stats;
>  	int ret;
>  
> +	if (!stats)
> +		return -ENODEV;
> +
>  	spin_lock(&stats->lock);
>  	ret = sprintf(buf, "%u\n", stats->total_trans);
>  	spin_unlock(&stats->lock);
> @@ -864,6 +870,9 @@ time_in_state_ms_show(struct device *dev, struct device_attribute *attr,
>  	ssize_t len = 0;
>  	int i;
>  
> +	if (!stats)
> +		return -ENODEV;
> +
>  	spin_lock(&stats->lock);
>  	update_time_in_state(stats);
>  
> @@ -882,8 +891,12 @@ reset_store(struct device *dev, struct device_attribute *attr, const char *buf,
>  {
>  	struct thermal_cooling_device *cdev = to_cooling_device(dev);
>  	struct cooling_dev_stats *stats = cdev->stats;
> -	int i, states = stats->max_states;
> +	int i, states;
> +
> +	if (!stats)
> +		return -ENODEV;
>  
> +	states = stats->max_states;
>  	spin_lock(&stats->lock);
>  
>  	stats->total_trans = 0;
> @@ -907,6 +920,9 @@ static ssize_t trans_table_show(struct device *dev,
>  	ssize_t len = 0;
>  	int i, j;
>  
> +	if (!stats)
> +		return -ENODEV;
> +
>  	len += snprintf(buf + len, PAGE_SIZE - len, " From  :    To\n");
>  	len += snprintf(buf + len, PAGE_SIZE - len, "       : ");
>  	for (i = 0; i < stats->max_states; i++) {
>
Daniel Lezcano Dec. 5, 2020, 11:12 a.m. UTC | #2
On 04/12/2020 20:09, Manaf Meethalavalappu Pallikunhi wrote:
> There is a possible chance that some cooling device stats buffer
> allocation fails due to very high cooling device max state value.
> Later cooling device update or cooling stats sysfs will try to
> access stats data for the same cooling device. It will lead to
> NULL pointer dereference issue.
> 
> Add a NULL pointer check before accessing thermal cooling device
> stats data. It fixes the following bug
> 
> [ 26.812833] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004
> [ 27.122960] Call trace:
> [ 27.122963] do_raw_spin_lock+0x18/0xe8
> [ 27.122966] _raw_spin_lock+0x24/0x30
> [ 27.128157] thermal_cooling_device_stats_update+0x24/0x98
> [ 27.128162] cur_state_store+0x88/0xb8
> [ 27.128166] dev_attr_store+0x40/0x58
> [ 27.128169] sysfs_kf_write+0x50/0x68
> [ 27.133358] kernfs_fop_write+0x12c/0x1c8
> [ 27.133362] __vfs_write+0x54/0x160
> [ 27.152297] vfs_write+0xcc/0x188
> [ 27.157132] ksys_write+0x78/0x108
> [ 27.162050] ksys_write+0xf8/0x108
> [ 27.166968] __arm_smccc_hvc+0x158/0x4b0
> [ 27.166973] __arm_smccc_hvc+0x9c/0x4b0
> [ 27.186005] el0_svc+0x8/0xc
> 
> Signed-off-by: Manaf Meethalavalappu Pallikunhi <manafm@codeaurora.org>

The only place where it can crash is when the
thermal_cooling_device_stats_update() function is called.

The other places in show*/store* in the stats directory are inaccessible
as the sysfs entry is not showed up due to the
thermal_cooling_device_stats_setup() failing.

It would have been nice if the thermal_cooling_device_stats_update() was
not called at all but I don't see how we can do that without static keys
which is overkill for a degraded mode.

I guess having the kzallocation warning in the console output is enough
to warn the user the system is working without the stats for the cooling
device. That should not prevent the system functioning.

Can you resend with the check in thermal_cooling_device_stats_update() only?

Thanks

  -- Daniel


> ---
>  drivers/thermal/thermal_sysfs.c | 18 +++++++++++++++++-
>  1 file changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
> index 473449b..a5e4855 100644
> --- a/drivers/thermal/thermal_sysfs.c
> +++ b/drivers/thermal/thermal_sysfs.c
> @@ -827,6 +827,9 @@ void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
>  {
>  	struct cooling_dev_stats *stats = cdev->stats;
>  
> +	if (!stats)
> +		return;
> +
>  	spin_lock(&stats->lock);
>  
>  	if (stats->state == new_state)
> @@ -848,6 +851,9 @@ static ssize_t total_trans_show(struct device *dev,
>  	struct cooling_dev_stats *stats = cdev->stats;
>  	int ret;
>  
> +	if (!stats)
> +		return -ENODEV;
> +
>  	spin_lock(&stats->lock);
>  	ret = sprintf(buf, "%u\n", stats->total_trans);
>  	spin_unlock(&stats->lock);
> @@ -864,6 +870,9 @@ time_in_state_ms_show(struct device *dev, struct device_attribute *attr,
>  	ssize_t len = 0;
>  	int i;
>  
> +	if (!stats)
> +		return -ENODEV;
> +
>  	spin_lock(&stats->lock);
>  	update_time_in_state(stats);
>  
> @@ -882,8 +891,12 @@ reset_store(struct device *dev, struct device_attribute *attr, const char *buf,
>  {
>  	struct thermal_cooling_device *cdev = to_cooling_device(dev);
>  	struct cooling_dev_stats *stats = cdev->stats;
> -	int i, states = stats->max_states;
> +	int i, states;
> +
> +	if (!stats)
> +		return -ENODEV;
>  
> +	states = stats->max_states;
>  	spin_lock(&stats->lock);
>  
>  	stats->total_trans = 0;
> @@ -907,6 +920,9 @@ static ssize_t trans_table_show(struct device *dev,
>  	ssize_t len = 0;
>  	int i, j;
>  
> +	if (!stats)
> +		return -ENODEV;
> +
>  	len += snprintf(buf + len, PAGE_SIZE - len, " From  :    To\n");
>  	len += snprintf(buf + len, PAGE_SIZE - len, "       : ");
>  	for (i = 0; i < stats->max_states; i++) {
>
Manaf Meethalavalappu Pallikunhi Dec. 7, 2020, 6:52 p.m. UTC | #3
On 2020-12-05 16:42, Daniel Lezcano wrote:
> On 04/12/2020 20:09, Manaf Meethalavalappu Pallikunhi wrote:
>> There is a possible chance that some cooling device stats buffer
>> allocation fails due to very high cooling device max state value.
>> Later cooling device update or cooling stats sysfs will try to
>> access stats data for the same cooling device. It will lead to
>> NULL pointer dereference issue.
>> 
>> Add a NULL pointer check before accessing thermal cooling device
>> stats data. It fixes the following bug
>> 
>> [ 26.812833] Unable to handle kernel NULL pointer dereference at 
>> virtual address 0000000000000004
>> [ 27.122960] Call trace:
>> [ 27.122963] do_raw_spin_lock+0x18/0xe8
>> [ 27.122966] _raw_spin_lock+0x24/0x30
>> [ 27.128157] thermal_cooling_device_stats_update+0x24/0x98
>> [ 27.128162] cur_state_store+0x88/0xb8
>> [ 27.128166] dev_attr_store+0x40/0x58
>> [ 27.128169] sysfs_kf_write+0x50/0x68
>> [ 27.133358] kernfs_fop_write+0x12c/0x1c8
>> [ 27.133362] __vfs_write+0x54/0x160
>> [ 27.152297] vfs_write+0xcc/0x188
>> [ 27.157132] ksys_write+0x78/0x108
>> [ 27.162050] ksys_write+0xf8/0x108
>> [ 27.166968] __arm_smccc_hvc+0x158/0x4b0
>> [ 27.166973] __arm_smccc_hvc+0x9c/0x4b0
>> [ 27.186005] el0_svc+0x8/0xc
>> 
>> Signed-off-by: Manaf Meethalavalappu Pallikunhi 
>> <manafm@codeaurora.org>
> 
> The only place where it can crash is when the
> thermal_cooling_device_stats_update() function is called.
> 
> The other places in show*/store* in the stats directory are 
> inaccessible
> as the sysfs entry is not showed up due to the
> thermal_cooling_device_stats_setup() failing.
> 
> It would have been nice if the thermal_cooling_device_stats_update() 
> was
> not called at all but I don't see how we can do that without static 
> keys
> which is overkill for a degraded mode.
> 
> I guess having the kzallocation warning in the console output is enough
> to warn the user the system is working without the stats for the 
> cooling
> device. That should not prevent the system functioning.
> 
> Can you resend with the check in thermal_cooling_device_stats_update() 
> only?
> 
> Thanks
> 
>   -- Daniel
Thanks for your review, I double checked it, yes, no need to add NULL 
check in stats sysfs show*/store* functions.
I will update this in V2

Thanks
Manaf
> 
> 
>> ---
>>  drivers/thermal/thermal_sysfs.c | 18 +++++++++++++++++-
>>  1 file changed, 17 insertions(+), 1 deletion(-)
>> 
>> diff --git a/drivers/thermal/thermal_sysfs.c 
>> b/drivers/thermal/thermal_sysfs.c
>> index 473449b..a5e4855 100644
>> --- a/drivers/thermal/thermal_sysfs.c
>> +++ b/drivers/thermal/thermal_sysfs.c
>> @@ -827,6 +827,9 @@ void thermal_cooling_device_stats_update(struct 
>> thermal_cooling_device *cdev,
>>  {
>>  	struct cooling_dev_stats *stats = cdev->stats;
>> 
>> +	if (!stats)
>> +		return;
>> +
>>  	spin_lock(&stats->lock);
>> 
>>  	if (stats->state == new_state)
>> @@ -848,6 +851,9 @@ static ssize_t total_trans_show(struct device 
>> *dev,
>>  	struct cooling_dev_stats *stats = cdev->stats;
>>  	int ret;
>> 
>> +	if (!stats)
>> +		return -ENODEV;
>> +
>>  	spin_lock(&stats->lock);
>>  	ret = sprintf(buf, "%u\n", stats->total_trans);
>>  	spin_unlock(&stats->lock);
>> @@ -864,6 +870,9 @@ time_in_state_ms_show(struct device *dev, struct 
>> device_attribute *attr,
>>  	ssize_t len = 0;
>>  	int i;
>> 
>> +	if (!stats)
>> +		return -ENODEV;
>> +
>>  	spin_lock(&stats->lock);
>>  	update_time_in_state(stats);
>> 
>> @@ -882,8 +891,12 @@ reset_store(struct device *dev, struct 
>> device_attribute *attr, const char *buf,
>>  {
>>  	struct thermal_cooling_device *cdev = to_cooling_device(dev);
>>  	struct cooling_dev_stats *stats = cdev->stats;
>> -	int i, states = stats->max_states;
>> +	int i, states;
>> +
>> +	if (!stats)
>> +		return -ENODEV;
>> 
>> +	states = stats->max_states;
>>  	spin_lock(&stats->lock);
>> 
>>  	stats->total_trans = 0;
>> @@ -907,6 +920,9 @@ static ssize_t trans_table_show(struct device 
>> *dev,
>>  	ssize_t len = 0;
>>  	int i, j;
>> 
>> +	if (!stats)
>> +		return -ENODEV;
>> +
>>  	len += snprintf(buf + len, PAGE_SIZE - len, " From  :    To\n");
>>  	len += snprintf(buf + len, PAGE_SIZE - len, "       : ");
>>  	for (i = 0; i < stats->max_states; i++) {
>>
diff mbox series

Patch

diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index 473449b..a5e4855 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -827,6 +827,9 @@  void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
 {
 	struct cooling_dev_stats *stats = cdev->stats;
 
+	if (!stats)
+		return;
+
 	spin_lock(&stats->lock);
 
 	if (stats->state == new_state)
@@ -848,6 +851,9 @@  static ssize_t total_trans_show(struct device *dev,
 	struct cooling_dev_stats *stats = cdev->stats;
 	int ret;
 
+	if (!stats)
+		return -ENODEV;
+
 	spin_lock(&stats->lock);
 	ret = sprintf(buf, "%u\n", stats->total_trans);
 	spin_unlock(&stats->lock);
@@ -864,6 +870,9 @@  time_in_state_ms_show(struct device *dev, struct device_attribute *attr,
 	ssize_t len = 0;
 	int i;
 
+	if (!stats)
+		return -ENODEV;
+
 	spin_lock(&stats->lock);
 	update_time_in_state(stats);
 
@@ -882,8 +891,12 @@  reset_store(struct device *dev, struct device_attribute *attr, const char *buf,
 {
 	struct thermal_cooling_device *cdev = to_cooling_device(dev);
 	struct cooling_dev_stats *stats = cdev->stats;
-	int i, states = stats->max_states;
+	int i, states;
+
+	if (!stats)
+		return -ENODEV;
 
+	states = stats->max_states;
 	spin_lock(&stats->lock);
 
 	stats->total_trans = 0;
@@ -907,6 +920,9 @@  static ssize_t trans_table_show(struct device *dev,
 	ssize_t len = 0;
 	int i, j;
 
+	if (!stats)
+		return -ENODEV;
+
 	len += snprintf(buf + len, PAGE_SIZE - len, " From  :    To\n");
 	len += snprintf(buf + len, PAGE_SIZE - len, "       : ");
 	for (i = 0; i < stats->max_states; i++) {