diff mbox series

[v2,1/5] thermal/drivers/core: Use a char pointer for the cooling device name

Message ID 20210312170316.3138-1-daniel.lezcano@linaro.org (mailing list archive)
State New, archived
Delegated to: Daniel Lezcano
Headers show
Series [v2,1/5] thermal/drivers/core: Use a char pointer for the cooling device name | expand

Commit Message

Daniel Lezcano March 12, 2021, 5:03 p.m. UTC
We want to have any kind of name for the cooling devices as we do no
longer want to rely on auto-numbering. Let's replace the cooling
device's fixed array by a char pointer to be allocated dynamically
when registering the cooling device, so we don't limit the length of
the name.

Rework the error path at the same time as we have to rollback the
allocations in case of error.

Tested with a dummy device having the name:
 "Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch"

A village on the island of Anglesey (Wales), known to have the longest
name in Europe.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 .../ethernet/mellanox/mlxsw/core_thermal.c    |  2 +-
 drivers/thermal/thermal_core.c                | 38 +++++++++++--------
 include/linux/thermal.h                       |  2 +-
 3 files changed, 24 insertions(+), 18 deletions(-)

Comments

Lukasz Luba March 12, 2021, 6:49 p.m. UTC | #1
On 3/12/21 5:03 PM, Daniel Lezcano wrote:
> We want to have any kind of name for the cooling devices as we do no
> longer want to rely on auto-numbering. Let's replace the cooling
> device's fixed array by a char pointer to be allocated dynamically
> when registering the cooling device, so we don't limit the length of
> the name.
> 
> Rework the error path at the same time as we have to rollback the
> allocations in case of error.
> 
> Tested with a dummy device having the name:
>   "Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch"
> 
> A village on the island of Anglesey (Wales), known to have the longest
> name in Europe.
> 
> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> ---
>   .../ethernet/mellanox/mlxsw/core_thermal.c    |  2 +-
>   drivers/thermal/thermal_core.c                | 38 +++++++++++--------
>   include/linux/thermal.h                       |  2 +-
>   3 files changed, 24 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
> index bf85ce9835d7..7447c2a73cbd 100644
> --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
> +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
> @@ -141,7 +141,7 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
>   	/* Allow mlxsw thermal zone binding to an external cooling device */
>   	for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) {
>   		if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i],
> -			    sizeof(cdev->type)))
> +			    strlen(cdev->type)))
>   			return 0;
>   	}
>   
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 996c038f83a4..9ef8090eb645 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -960,10 +960,7 @@ __thermal_cooling_device_register(struct device_node *np,
>   {
>   	struct thermal_cooling_device *cdev;
>   	struct thermal_zone_device *pos = NULL;
> -	int result;
> -
> -	if (type && strlen(type) >= THERMAL_NAME_LENGTH)
> -		return ERR_PTR(-EINVAL);
> +	int ret;
>   
>   	if (!ops || !ops->get_max_state || !ops->get_cur_state ||
>   	    !ops->set_cur_state)
> @@ -973,14 +970,17 @@ __thermal_cooling_device_register(struct device_node *np,
>   	if (!cdev)
>   		return ERR_PTR(-ENOMEM);
>   
> -	result = ida_simple_get(&thermal_cdev_ida, 0, 0, GFP_KERNEL);
> -	if (result < 0) {
> -		kfree(cdev);
> -		return ERR_PTR(result);
> +	ret = ida_simple_get(&thermal_cdev_ida, 0, 0, GFP_KERNEL);
> +	if (ret < 0)
> +		goto out_kfree_cdev;
> +	cdev->id = ret;
> +
> +	cdev->type = kstrdup(type ? type : "", GFP_KERNEL);
> +	if (!cdev->type) {
> +		ret = -ENOMEM;

Since we haven't called the device_register() yet, I would call here:
kfree(cdev);
and then jump

> +		goto out_ida_remove;
>   	}
>   

Other than that, LGTM

Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>

Regards,
Lukasz
Daniel Lezcano March 12, 2021, 9:01 p.m. UTC | #2
On 12/03/2021 19:49, Lukasz Luba wrote:
> 
> 
> On 3/12/21 5:03 PM, Daniel Lezcano wrote:
>> We want to have any kind of name for the cooling devices as we do no
>> longer want to rely on auto-numbering. Let's replace the cooling
>> device's fixed array by a char pointer to be allocated dynamically
>> when registering the cooling device, so we don't limit the length of
>> the name.
>>
>> Rework the error path at the same time as we have to rollback the
>> allocations in case of error.
>>
>> Tested with a dummy device having the name:
>>   "Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch"
>>
>> A village on the island of Anglesey (Wales), known to have the longest
>> name in Europe.
>>
>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
>> ---
>>   .../ethernet/mellanox/mlxsw/core_thermal.c    |  2 +-
>>   drivers/thermal/thermal_core.c                | 38 +++++++++++--------
>>   include/linux/thermal.h                       |  2 +-
>>   3 files changed, 24 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
>> b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
>> index bf85ce9835d7..7447c2a73cbd 100644
>> --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
>> +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
>> @@ -141,7 +141,7 @@ static int mlxsw_get_cooling_device_idx(struct
>> mlxsw_thermal *thermal,
>>       /* Allow mlxsw thermal zone binding to an external cooling
>> device */
>>       for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev);
>> i++) {
>>           if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i],
>> -                sizeof(cdev->type)))
>> +                strlen(cdev->type)))
>>               return 0;
>>       }
>>   diff --git a/drivers/thermal/thermal_core.c
>> b/drivers/thermal/thermal_core.c
>> index 996c038f83a4..9ef8090eb645 100644
>> --- a/drivers/thermal/thermal_core.c
>> +++ b/drivers/thermal/thermal_core.c
>> @@ -960,10 +960,7 @@ __thermal_cooling_device_register(struct
>> device_node *np,
>>   {
>>       struct thermal_cooling_device *cdev;
>>       struct thermal_zone_device *pos = NULL;
>> -    int result;
>> -
>> -    if (type && strlen(type) >= THERMAL_NAME_LENGTH)
>> -        return ERR_PTR(-EINVAL);
>> +    int ret;
>>         if (!ops || !ops->get_max_state || !ops->get_cur_state ||
>>           !ops->set_cur_state)
>> @@ -973,14 +970,17 @@ __thermal_cooling_device_register(struct
>> device_node *np,
>>       if (!cdev)
>>           return ERR_PTR(-ENOMEM);
>>   -    result = ida_simple_get(&thermal_cdev_ida, 0, 0, GFP_KERNEL);
>> -    if (result < 0) {
>> -        kfree(cdev);
>> -        return ERR_PTR(result);
>> +    ret = ida_simple_get(&thermal_cdev_ida, 0, 0, GFP_KERNEL);
>> +    if (ret < 0)
>> +        goto out_kfree_cdev;
>> +    cdev->id = ret;
>> +
>> +    cdev->type = kstrdup(type ? type : "", GFP_KERNEL);
>> +    if (!cdev->type) {
>> +        ret = -ENOMEM;
> 
> Since we haven't called the device_register() yet, I would call here:
> kfree(cdev);
> and then jump

I'm not sure to understand, we have to remove the ida, no ?

> Other than that, LGTM
> 
> Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
> 
> Regards,
> Lukasz
Ido Schimmel March 14, 2021, 9:53 a.m. UTC | #3
On Fri, Mar 12, 2021 at 06:03:12PM +0100, Daniel Lezcano wrote:
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 996c038f83a4..9ef8090eb645 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -960,10 +960,7 @@ __thermal_cooling_device_register(struct device_node *np,
>  {
>  	struct thermal_cooling_device *cdev;
>  	struct thermal_zone_device *pos = NULL;
> -	int result;
> -
> -	if (type && strlen(type) >= THERMAL_NAME_LENGTH)
> -		return ERR_PTR(-EINVAL);
> +	int ret;
>  
>  	if (!ops || !ops->get_max_state || !ops->get_cur_state ||
>  	    !ops->set_cur_state)
> @@ -973,14 +970,17 @@ __thermal_cooling_device_register(struct device_node *np,
>  	if (!cdev)
>  		return ERR_PTR(-ENOMEM);
>  
> -	result = ida_simple_get(&thermal_cdev_ida, 0, 0, GFP_KERNEL);
> -	if (result < 0) {
> -		kfree(cdev);
> -		return ERR_PTR(result);
> +	ret = ida_simple_get(&thermal_cdev_ida, 0, 0, GFP_KERNEL);
> +	if (ret < 0)
> +		goto out_kfree_cdev;
> +	cdev->id = ret;
> +
> +	cdev->type = kstrdup(type ? type : "", GFP_KERNEL);
> +	if (!cdev->type) {
> +		ret = -ENOMEM;
> +		goto out_ida_remove;
>  	}
>  
> -	cdev->id = result;
> -	strlcpy(cdev->type, type ? : "", sizeof(cdev->type));
>  	mutex_init(&cdev->lock);
>  	INIT_LIST_HEAD(&cdev->thermal_instances);
>  	cdev->np = np;
> @@ -990,12 +990,9 @@ __thermal_cooling_device_register(struct device_node *np,
>  	cdev->devdata = devdata;
>  	thermal_cooling_device_setup_sysfs(cdev);
>  	dev_set_name(&cdev->device, "cooling_device%d", cdev->id);
> -	result = device_register(&cdev->device);
> -	if (result) {
> -		ida_simple_remove(&thermal_cdev_ida, cdev->id);
> -		put_device(&cdev->device);
> -		return ERR_PTR(result);
> -	}
> +	ret = device_register(&cdev->device);
> +	if (ret)
> +		goto out_kfree_type;
>  
>  	/* Add 'this' new cdev to the global cdev list */
>  	mutex_lock(&thermal_list_lock);
> @@ -1013,6 +1010,14 @@ __thermal_cooling_device_register(struct device_node *np,
>  	mutex_unlock(&thermal_list_lock);
>  
>  	return cdev;
> +
> +out_kfree_type:
> +	kfree(cdev->type);
> +	put_device(&cdev->device);
> +out_ida_remove:
> +	ida_simple_remove(&thermal_cdev_ida, cdev->id);
> +out_kfree_cdev:
> +	return ERR_PTR(ret);
>  }
>  
>  /**
> @@ -1172,6 +1177,7 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
>  	device_del(&cdev->device);
>  	thermal_cooling_device_destroy_sysfs(cdev);
>  	put_device(&cdev->device);
> +	kfree(cdev->type);
>  }
>  EXPORT_SYMBOL_GPL(thermal_cooling_device_unregister);

I'm getting the following user-after-free with this patch [1]. Fixed by:

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 9ef8090eb645..c8d4010940ef 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1176,8 +1176,8 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
        ida_simple_remove(&thermal_cdev_ida, cdev->id);
        device_del(&cdev->device);
        thermal_cooling_device_destroy_sysfs(cdev);
-       put_device(&cdev->device);
        kfree(cdev->type);
+       put_device(&cdev->device);
 }
 EXPORT_SYMBOL_GPL(thermal_cooling_device_unregister);

[1]
[  148.601815] ==================================================================
[  148.610260] BUG: KASAN: use-after-free in thermal_cooling_device_unregister+0x6ca/0x6e0
[  148.619304] Read of size 8 at addr ffff8881510a0808 by task devlink/574
[  148.626768]
[  148.628477] CPU: 2 PID: 574 Comm: devlink Not tainted 5.12.0-rc2-custom-12525-g7ba8a2feee15 #3301
[  148.638463] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016
[  148.648625] Call Trace:
[  148.651408]  dump_stack+0xfa/0x151
[  148.661701]  print_address_description.constprop.0+0x18/0x130
[  148.681014]  kasan_report.cold+0x7f/0x111
[  148.692003]  thermal_cooling_device_unregister+0x6ca/0x6e0
[  148.703984]  mlxsw_thermal_fini+0xd2/0x1f0
[  148.708664]  mlxsw_core_bus_device_unregister+0x158/0x8d0
[  148.714794]  mlxsw_devlink_core_bus_device_reload_down+0x93/0xc0
[  148.721594]  devlink_reload+0x15f/0x5e0
[  148.749669]  devlink_nl_cmd_reload+0x7fc/0x1210
[  148.775992]  genl_family_rcv_msg_doit+0x22a/0x320
[  148.799789]  genl_rcv_msg+0x341/0x5a0
[  148.818789]  netlink_rcv_skb+0x14d/0x430
[  148.836450]  genl_rcv+0x29/0x40
[  148.840034]  netlink_unicast+0x539/0x7e0
[  148.859219]  netlink_sendmsg+0x8d7/0xe10
[  148.879271]  __sys_sendto+0x23f/0x350
[  148.904178]  __x64_sys_sendto+0xe2/0x1b0
[  148.919297]  do_syscall_64+0x2d/0x40
[  148.923365]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[  148.929081] RIP: 0033:0x7f17c0dbaefa
[  148.933139] Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76
 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c
[  148.954190] RSP: 002b:00007ffd879c5e18 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
[  148.962723] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f17c0dbaefa
[  148.970751] RDX: 0000000000000030 RSI: 0000000000ad0ad0 RDI: 0000000000000003
[  148.978776] RBP: 0000000000ad0aa0 R08: 00007f17c0e8d000 R09: 000000000000000c
[  148.986803] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000408d70
[  148.994834] R13: 0000000000ad0910 R14: 0000000000000000 R15: 0000000000000001
[  149.002978]
[  149.004687] Allocated by task 1:
[  149.008345]  kasan_save_stack+0x1b/0x40
[  149.012711]  __kasan_kmalloc+0x7a/0x90
[  149.016974]  __thermal_cooling_device_register.part.0+0x59/0x9e0
[  149.023753]  thermal_cooling_device_register+0xb3/0x100
[  149.029671]  mlxsw_thermal_init+0x78b/0xa10
[  149.034427]  __mlxsw_core_bus_device_register+0xd05/0x1a30
[  149.040634]  mlxsw_core_bus_device_register+0x56/0xb0
[  149.046349]  mlxsw_pci_probe+0x53b/0x750
[  149.050800]  local_pci_probe+0xc6/0x170
[  149.055144]  pci_device_probe+0x2a3/0x4a0
[  149.059683]  really_probe+0x2b6/0xec0
[  149.063840]  driver_probe_device+0x1e2/0x330
[  149.068673]  device_driver_attach+0x282/0x2f0
[  149.073605]  __driver_attach+0x160/0x2f0
[  149.078050]  bus_for_each_dev+0x14c/0x1d0
[  149.082589]  bus_add_driver+0x3ac/0x650
[  149.086935]  driver_register+0x225/0x3a0
[  149.091381]  mlxsw_sp_module_init+0xa2/0x174
[  149.096216]  do_one_initcall+0x108/0x690
[  149.100660]  kernel_init_freeable+0x3ec/0x46b
[  149.105595]  kernel_init+0x13/0x1eb
[  149.109559]  ret_from_fork+0x1f/0x30
[  149.113613]
[  149.115311] Freed by task 574:
[  149.118765]  kasan_save_stack+0x1b/0x40
[  149.123116]  kasan_set_track+0x1c/0x30
[  149.127373]  kasan_set_free_info+0x20/0x30
[  149.132021]  __kasan_slab_free+0xe5/0x110
[  149.136556]  slab_free_freelist_hook+0x59/0x150
[  149.141681]  kfree+0xd5/0x3b0
[  149.145055]  thermal_release+0xa0/0x110
[  149.149414]  device_release+0xa4/0x240
[  149.153680]  kobject_put+0x1c8/0x540
[  149.157747]  put_device+0x20/0x30
[  149.161530]  thermal_cooling_device_unregister+0x578/0x6e0
[  149.167751]  mlxsw_thermal_fini+0xd2/0x1f0
[  149.172414]  mlxsw_core_bus_device_unregister+0x158/0x8d0
[  149.178529]  mlxsw_devlink_core_bus_device_reload_down+0x93/0xc0
[  149.185327]  devlink_reload+0x15f/0x5e0
[  149.189695]  devlink_nl_cmd_reload+0x7fc/0x1210
[  149.194838]  genl_family_rcv_msg_doit+0x22a/0x320
[  149.200182]  genl_rcv_msg+0x341/0x5a0
[  149.204337]  netlink_rcv_skb+0x14d/0x430
[  149.208799]  genl_rcv+0x29/0x40
[  149.212381]  netlink_unicast+0x539/0x7e0
[  149.216825]  netlink_sendmsg+0x8d7/0xe10
[  149.221274]  __sys_sendto+0x23f/0x350
[  149.225423]  __x64_sys_sendto+0xe2/0x1b0
[  149.229864]  do_syscall_64+0x2d/0x40
[  149.233912]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[  149.239624]
[  149.241322] The buggy address belongs to the object at ffff8881510a0800
[  149.241322]  which belongs to the cache kmalloc-2k of size 2048
[  149.255372] The buggy address is located 8 bytes inside of
[  149.255372]  2048-byte region [ffff8881510a0800, ffff8881510a1000)
[  149.268456] The buggy address belongs to the page:
[  149.273850] page:000000006ec87a73 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1510a0
[  149.284416] head:000000006ec87a73 order:3 compound_mapcount:0 compound_pincount:0
[  149.292838] flags: 0x200000000010200(slab|head)
[  149.297981] raw: 0200000000010200 ffffea000426e208 ffffea000544b808 ffff88810004de40
[  149.306707] raw: 0000000000000000 0000000000050005 00000001ffffffff 0000000000000000
[  149.315417] page dumped because: kasan: bad access detected
[  149.321697]
[  149.323403] Memory state around the buggy address:
[  149.328811]  ffff8881510a0700: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  149.336939]  ffff8881510a0780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  149.345075] >ffff8881510a0800: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  149.353202]                       ^
[  149.357159]  ffff8881510a0880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  149.365298]  ffff8881510a0900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  149.373424] ==================================================================
Daniel Lezcano March 14, 2021, 10:48 a.m. UTC | #4
Hi Ido,

On 14/03/2021 10:53, Ido Schimmel wrote:
> On Fri, Mar 12, 2021 at 06:03:12PM +0100, Daniel Lezcano wrote:
>> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
>> index 996c038f83a4..9ef8090eb645 100644
>> --- a/drivers/thermal/thermal_core.c
>> +++ b/drivers/thermal/thermal_core.c
>> @@ -960,10 +960,7 @@ __thermal_cooling_device_register(struct device_node *np,

[ ... ]

>>  /**
>> @@ -1172,6 +1177,7 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
>>  	device_del(&cdev->device);
>>  	thermal_cooling_device_destroy_sysfs(cdev);
>>  	put_device(&cdev->device);
>> +	kfree(cdev->type);
>>  }
>>  EXPORT_SYMBOL_GPL(thermal_cooling_device_unregister);
> 
> I'm getting the following user-after-free with this patch [1]. Fixed by:
> 
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 9ef8090eb645..c8d4010940ef 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -1176,8 +1176,8 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
>         ida_simple_remove(&thermal_cdev_ida, cdev->id);
>         device_del(&cdev->device);
>         thermal_cooling_device_destroy_sysfs(cdev);
> -       put_device(&cdev->device);
>         kfree(cdev->type);
> +       put_device(&cdev->device);

Indeed 'thermal_release' frees the cdev pointer and is called by
put_device, then kfree use the pointer right after.

Thanks for the fix

  -- Daniel
Lukasz Luba March 15, 2021, 9:40 a.m. UTC | #5
On 3/12/21 9:01 PM, Daniel Lezcano wrote:
> On 12/03/2021 19:49, Lukasz Luba wrote:
>>
>>
>> On 3/12/21 5:03 PM, Daniel Lezcano wrote:
>>> We want to have any kind of name for the cooling devices as we do no
>>> longer want to rely on auto-numbering. Let's replace the cooling
>>> device's fixed array by a char pointer to be allocated dynamically
>>> when registering the cooling device, so we don't limit the length of
>>> the name.
>>>
>>> Rework the error path at the same time as we have to rollback the
>>> allocations in case of error.
>>>
>>> Tested with a dummy device having the name:
>>>    "Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch"
>>>
>>> A village on the island of Anglesey (Wales), known to have the longest
>>> name in Europe.
>>>
>>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
>>> ---
>>>    .../ethernet/mellanox/mlxsw/core_thermal.c    |  2 +-
>>>    drivers/thermal/thermal_core.c                | 38 +++++++++++--------
>>>    include/linux/thermal.h                       |  2 +-
>>>    3 files changed, 24 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
>>> b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
>>> index bf85ce9835d7..7447c2a73cbd 100644
>>> --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
>>> +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
>>> @@ -141,7 +141,7 @@ static int mlxsw_get_cooling_device_idx(struct
>>> mlxsw_thermal *thermal,
>>>        /* Allow mlxsw thermal zone binding to an external cooling
>>> device */
>>>        for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev);
>>> i++) {
>>>            if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i],
>>> -                sizeof(cdev->type)))
>>> +                strlen(cdev->type)))
>>>                return 0;
>>>        }
>>>    diff --git a/drivers/thermal/thermal_core.c
>>> b/drivers/thermal/thermal_core.c
>>> index 996c038f83a4..9ef8090eb645 100644
>>> --- a/drivers/thermal/thermal_core.c
>>> +++ b/drivers/thermal/thermal_core.c
>>> @@ -960,10 +960,7 @@ __thermal_cooling_device_register(struct
>>> device_node *np,
>>>    {
>>>        struct thermal_cooling_device *cdev;
>>>        struct thermal_zone_device *pos = NULL;
>>> -    int result;
>>> -
>>> -    if (type && strlen(type) >= THERMAL_NAME_LENGTH)
>>> -        return ERR_PTR(-EINVAL);
>>> +    int ret;
>>>          if (!ops || !ops->get_max_state || !ops->get_cur_state ||
>>>            !ops->set_cur_state)
>>> @@ -973,14 +970,17 @@ __thermal_cooling_device_register(struct
>>> device_node *np,
>>>        if (!cdev)
>>>            return ERR_PTR(-ENOMEM);
>>>    -    result = ida_simple_get(&thermal_cdev_ida, 0, 0, GFP_KERNEL);
>>> -    if (result < 0) {
>>> -        kfree(cdev);
>>> -        return ERR_PTR(result);
>>> +    ret = ida_simple_get(&thermal_cdev_ida, 0, 0, GFP_KERNEL);
>>> +    if (ret < 0)
>>> +        goto out_kfree_cdev;
>>> +    cdev->id = ret;
>>> +
>>> +    cdev->type = kstrdup(type ? type : "", GFP_KERNEL);
>>> +    if (!cdev->type) {
>>> +        ret = -ENOMEM;
>>
>> Since we haven't called the device_register() yet, I would call here:
>> kfree(cdev);
>> and then jump
> 
> I'm not sure to understand, we have to remove the ida, no ?

Yes, we have to remove 'ida' and you jump to that label:
goto out_ida_remove;
but under that label, there is no 'put_device()'.
We could have here, before the 'goto', a simple kfree, which
should be safe, since we haven't called the device_register() yet.
Something like:

--------8<------------------------------
cdev->type = kstrdup(type ? type : "", GFP_KERNEL);
if (!cdev->type) {
	ret = -ENOMEM;
	kfree(cdev);
	goto out_ida_remove;
}

-------->8------------------------------


> 
>> Other than that, LGTM
>>
>> Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
>>
>> Regards,
>> Lukasz
> 
>
Lukasz Luba March 15, 2021, 9:58 a.m. UTC | #6
On 3/15/21 9:40 AM, Lukasz Luba wrote:
> 
> 
> On 3/12/21 9:01 PM, Daniel Lezcano wrote:
>> On 12/03/2021 19:49, Lukasz Luba wrote:
>>>
>>>
>>> On 3/12/21 5:03 PM, Daniel Lezcano wrote:
>>>> We want to have any kind of name for the cooling devices as we do no
>>>> longer want to rely on auto-numbering. Let's replace the cooling
>>>> device's fixed array by a char pointer to be allocated dynamically
>>>> when registering the cooling device, so we don't limit the length of
>>>> the name.
>>>>
>>>> Rework the error path at the same time as we have to rollback the
>>>> allocations in case of error.
>>>>
>>>> Tested with a dummy device having the name:
>>>>    "Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch"
>>>>
>>>> A village on the island of Anglesey (Wales), known to have the longest
>>>> name in Europe.
>>>>
>>>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
>>>> ---
>>>>    .../ethernet/mellanox/mlxsw/core_thermal.c    |  2 +-
>>>>    drivers/thermal/thermal_core.c                | 38 
>>>> +++++++++++--------
>>>>    include/linux/thermal.h                       |  2 +-
>>>>    3 files changed, 24 insertions(+), 18 deletions(-)
>>>>
>>>> diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
>>>> b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
>>>> index bf85ce9835d7..7447c2a73cbd 100644
>>>> --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
>>>> +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
>>>> @@ -141,7 +141,7 @@ static int mlxsw_get_cooling_device_idx(struct
>>>> mlxsw_thermal *thermal,
>>>>        /* Allow mlxsw thermal zone binding to an external cooling
>>>> device */
>>>>        for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev);
>>>> i++) {
>>>>            if (strnstr(cdev->type, 
>>>> mlxsw_thermal_external_allowed_cdev[i],
>>>> -                sizeof(cdev->type)))
>>>> +                strlen(cdev->type)))
>>>>                return 0;
>>>>        }
>>>>    diff --git a/drivers/thermal/thermal_core.c
>>>> b/drivers/thermal/thermal_core.c
>>>> index 996c038f83a4..9ef8090eb645 100644
>>>> --- a/drivers/thermal/thermal_core.c
>>>> +++ b/drivers/thermal/thermal_core.c
>>>> @@ -960,10 +960,7 @@ __thermal_cooling_device_register(struct
>>>> device_node *np,
>>>>    {
>>>>        struct thermal_cooling_device *cdev;
>>>>        struct thermal_zone_device *pos = NULL;
>>>> -    int result;
>>>> -
>>>> -    if (type && strlen(type) >= THERMAL_NAME_LENGTH)
>>>> -        return ERR_PTR(-EINVAL);
>>>> +    int ret;
>>>>          if (!ops || !ops->get_max_state || !ops->get_cur_state ||
>>>>            !ops->set_cur_state)
>>>> @@ -973,14 +970,17 @@ __thermal_cooling_device_register(struct
>>>> device_node *np,
>>>>        if (!cdev)
>>>>            return ERR_PTR(-ENOMEM);
>>>>    -    result = ida_simple_get(&thermal_cdev_ida, 0, 0, GFP_KERNEL);
>>>> -    if (result < 0) {
>>>> -        kfree(cdev);
>>>> -        return ERR_PTR(result);
>>>> +    ret = ida_simple_get(&thermal_cdev_ida, 0, 0, GFP_KERNEL);
>>>> +    if (ret < 0)
>>>> +        goto out_kfree_cdev;
>>>> +    cdev->id = ret;
>>>> +
>>>> +    cdev->type = kstrdup(type ? type : "", GFP_KERNEL);
>>>> +    if (!cdev->type) {
>>>> +        ret = -ENOMEM;
>>>
>>> Since we haven't called the device_register() yet, I would call here:
>>> kfree(cdev);
>>> and then jump
>>
>> I'm not sure to understand, we have to remove the ida, no ?
> 
> Yes, we have to remove 'ida' and you jump to that label:
> goto out_ida_remove;
> but under that label, there is no 'put_device()'.
> We could have here, before the 'goto', a simple kfree, which
> should be safe, since we haven't called the device_register() yet.
> Something like:
> 
> --------8<------------------------------
> cdev->type = kstrdup(type ? type : "", GFP_KERNEL);
> if (!cdev->type) {
>      ret = -ENOMEM;
>      kfree(cdev);
>      goto out_ida_remove;
> }
> 
> -------->8------------------------------
> 

I've check that label and probably not easy to modify it
and put conditional there. So probably you would have to
call everything here (not jumping to label):

ida_simple_remove(&thermal_cdev_ida, cdev->id);
kfree(cdev);
return ERR_PTR(-_ENOMEM);


> 
>>
>>> Other than that, LGTM
>>>
>>> Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
>>>
>>> Regards,
>>> Lukasz
>>
>>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index bf85ce9835d7..7447c2a73cbd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -141,7 +141,7 @@  static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
 	/* Allow mlxsw thermal zone binding to an external cooling device */
 	for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) {
 		if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i],
-			    sizeof(cdev->type)))
+			    strlen(cdev->type)))
 			return 0;
 	}
 
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 996c038f83a4..9ef8090eb645 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -960,10 +960,7 @@  __thermal_cooling_device_register(struct device_node *np,
 {
 	struct thermal_cooling_device *cdev;
 	struct thermal_zone_device *pos = NULL;
-	int result;
-
-	if (type && strlen(type) >= THERMAL_NAME_LENGTH)
-		return ERR_PTR(-EINVAL);
+	int ret;
 
 	if (!ops || !ops->get_max_state || !ops->get_cur_state ||
 	    !ops->set_cur_state)
@@ -973,14 +970,17 @@  __thermal_cooling_device_register(struct device_node *np,
 	if (!cdev)
 		return ERR_PTR(-ENOMEM);
 
-	result = ida_simple_get(&thermal_cdev_ida, 0, 0, GFP_KERNEL);
-	if (result < 0) {
-		kfree(cdev);
-		return ERR_PTR(result);
+	ret = ida_simple_get(&thermal_cdev_ida, 0, 0, GFP_KERNEL);
+	if (ret < 0)
+		goto out_kfree_cdev;
+	cdev->id = ret;
+
+	cdev->type = kstrdup(type ? type : "", GFP_KERNEL);
+	if (!cdev->type) {
+		ret = -ENOMEM;
+		goto out_ida_remove;
 	}
 
-	cdev->id = result;
-	strlcpy(cdev->type, type ? : "", sizeof(cdev->type));
 	mutex_init(&cdev->lock);
 	INIT_LIST_HEAD(&cdev->thermal_instances);
 	cdev->np = np;
@@ -990,12 +990,9 @@  __thermal_cooling_device_register(struct device_node *np,
 	cdev->devdata = devdata;
 	thermal_cooling_device_setup_sysfs(cdev);
 	dev_set_name(&cdev->device, "cooling_device%d", cdev->id);
-	result = device_register(&cdev->device);
-	if (result) {
-		ida_simple_remove(&thermal_cdev_ida, cdev->id);
-		put_device(&cdev->device);
-		return ERR_PTR(result);
-	}
+	ret = device_register(&cdev->device);
+	if (ret)
+		goto out_kfree_type;
 
 	/* Add 'this' new cdev to the global cdev list */
 	mutex_lock(&thermal_list_lock);
@@ -1013,6 +1010,14 @@  __thermal_cooling_device_register(struct device_node *np,
 	mutex_unlock(&thermal_list_lock);
 
 	return cdev;
+
+out_kfree_type:
+	kfree(cdev->type);
+	put_device(&cdev->device);
+out_ida_remove:
+	ida_simple_remove(&thermal_cdev_ida, cdev->id);
+out_kfree_cdev:
+	return ERR_PTR(ret);
 }
 
 /**
@@ -1172,6 +1177,7 @@  void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
 	device_del(&cdev->device);
 	thermal_cooling_device_destroy_sysfs(cdev);
 	put_device(&cdev->device);
+	kfree(cdev->type);
 }
 EXPORT_SYMBOL_GPL(thermal_cooling_device_unregister);
 
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 6ac7bb1d2b1f..169502164364 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -91,7 +91,7 @@  struct thermal_cooling_device_ops {
 
 struct thermal_cooling_device {
 	int id;
-	char type[THERMAL_NAME_LENGTH];
+	char *type;
 	struct device device;
 	struct device_node *np;
 	void *devdata;