diff mbox series

[v2,1/2] thermal: core: Reference count the zone in thermal_zone_get_by_id()

Message ID 6112242.lOV4Wx5bFT@rjwysocki.net (mailing list archive)
State In Next
Delegated to: Rafael Wysocki
Headers show
Series thermal: core: Fix potential use-after-free issues | expand

Commit Message

Rafael J. Wysocki Oct. 3, 2024, 12:25 p.m. UTC
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

There are places in the thermal netlink code where nothing prevents
the thermal zone object from going away while being accessed after it
has been returned by thermal_zone_get_by_id().

To address this, make thermal_zone_get_by_id() get a reference on the
thermal zone device object to be returned with the help of get_device(),
under thermal_list_lock, and adjust all of its callers to this change
with the help of the cleanup.h infrastructure.

Fixes: 1ce50e7d408e ("thermal: core: genetlink support for events/cmd/sampling")
Cc: 6.8+ <stable@vger.kernel.org> # 6.8+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

v1 -> v2: Use the cleanup.h infrastructure to reduce the amount of code changes.

---
 drivers/thermal/thermal_core.c    |    1 +
 drivers/thermal/thermal_core.h    |    3 +++
 drivers/thermal/thermal_netlink.c |    9 +++------
 3 files changed, 7 insertions(+), 6 deletions(-)

Comments

Lukasz Luba Oct. 4, 2024, 8:02 a.m. UTC | #1
Hi Rafael,

On 10/3/24 13:25, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> There are places in the thermal netlink code where nothing prevents
> the thermal zone object from going away while being accessed after it
> has been returned by thermal_zone_get_by_id().
> 
> To address this, make thermal_zone_get_by_id() get a reference on the
> thermal zone device object to be returned with the help of get_device(),
> under thermal_list_lock, and adjust all of its callers to this change
> with the help of the cleanup.h infrastructure.
> 
> Fixes: 1ce50e7d408e ("thermal: core: genetlink support for events/cmd/sampling")
> Cc: 6.8+ <stable@vger.kernel.org> # 6.8+
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> ---
> 
> v1 -> v2: Use the cleanup.h infrastructure to reduce the amount of code changes.
> 
> ---
>   drivers/thermal/thermal_core.c    |    1 +
>   drivers/thermal/thermal_core.h    |    3 +++
>   drivers/thermal/thermal_netlink.c |    9 +++------
>   3 files changed, 7 insertions(+), 6 deletions(-)
> 
> Index: linux-pm/drivers/thermal/thermal_core.c
> ===================================================================
> --- linux-pm.orig/drivers/thermal/thermal_core.c
> +++ linux-pm/drivers/thermal/thermal_core.c
> @@ -728,6 +728,7 @@ struct thermal_zone_device *thermal_zone
>   	mutex_lock(&thermal_list_lock);
>   	list_for_each_entry(tz, &thermal_tz_list, node) {
>   		if (tz->id == id) {
> +			get_device(&tz->device);
>   			match = tz;
>   			break;
>   		}
> Index: linux-pm/drivers/thermal/thermal_core.h
> ===================================================================
> --- linux-pm.orig/drivers/thermal/thermal_core.h
> +++ linux-pm/drivers/thermal/thermal_core.h
> @@ -194,6 +194,9 @@ int for_each_thermal_governor(int (*cb)(
>   
>   struct thermal_zone_device *thermal_zone_get_by_id(int id);
>   
> +DEFINE_CLASS(thermal_zone_get_by_id, struct thermal_zone_device *,
> +	     if (_T) put_device(&_T->device), thermal_zone_get_by_id(id), int id)
> +
>   static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev)
>   {
>   	return cdev->ops->get_requested_power && cdev->ops->state2power &&
> Index: linux-pm/drivers/thermal/thermal_netlink.c
> ===================================================================
> --- linux-pm.orig/drivers/thermal/thermal_netlink.c
> +++ linux-pm/drivers/thermal/thermal_netlink.c
> @@ -443,7 +443,6 @@ static int thermal_genl_cmd_tz_get_trip(
>   {
>   	struct sk_buff *msg = p->msg;
>   	const struct thermal_trip_desc *td;
> -	struct thermal_zone_device *tz;
>   	struct nlattr *start_trip;
>   	int id;
>   
> @@ -452,7 +451,7 @@ static int thermal_genl_cmd_tz_get_trip(
>   
>   	id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
>   
> -	tz = thermal_zone_get_by_id(id);
> +	CLASS(thermal_zone_get_by_id, tz)(id);
>   	if (!tz)
>   		return -EINVAL;
>   
> @@ -488,7 +487,6 @@ out_cancel_nest:
>   static int thermal_genl_cmd_tz_get_temp(struct param *p)
>   {
>   	struct sk_buff *msg = p->msg;
> -	struct thermal_zone_device *tz;
>   	int temp, ret, id;
>   
>   	if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID])
> @@ -496,7 +494,7 @@ static int thermal_genl_cmd_tz_get_temp(
>   
>   	id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
>   
> -	tz = thermal_zone_get_by_id(id);
> +	CLASS(thermal_zone_get_by_id, tz)(id);
>   	if (!tz)
>   		return -EINVAL;
>   
> @@ -514,7 +512,6 @@ static int thermal_genl_cmd_tz_get_temp(
>   static int thermal_genl_cmd_tz_get_gov(struct param *p)
>   {
>   	struct sk_buff *msg = p->msg;
> -	struct thermal_zone_device *tz;
>   	int id, ret = 0;
>   
>   	if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID])
> @@ -522,7 +519,7 @@ static int thermal_genl_cmd_tz_get_gov(s
>   
>   	id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
>   
> -	tz = thermal_zone_get_by_id(id);
> +	CLASS(thermal_zone_get_by_id, tz)(id);
>   	if (!tz)
>   		return -EINVAL;
>   
> 
> 
> 

I wasn't aware of that helpers in cleanup.h.

Could you help me to understand when this this
'if (_T) put_device((&_T->device)' will be called?

Regards,
Lukasz
Rafael J. Wysocki Oct. 4, 2024, 1:25 p.m. UTC | #2
Hi Łukasz,

On Fri, Oct 4, 2024 at 10:01 AM Lukasz Luba <lukasz.luba@arm.com> wrote:
>
> Hi Rafael,
>
> On 10/3/24 13:25, Rafael J. Wysocki wrote:
> > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >
> > There are places in the thermal netlink code where nothing prevents
> > the thermal zone object from going away while being accessed after it
> > has been returned by thermal_zone_get_by_id().
> >
> > To address this, make thermal_zone_get_by_id() get a reference on the
> > thermal zone device object to be returned with the help of get_device(),
> > under thermal_list_lock, and adjust all of its callers to this change
> > with the help of the cleanup.h infrastructure.
> >
> > Fixes: 1ce50e7d408e ("thermal: core: genetlink support for events/cmd/sampling")
> > Cc: 6.8+ <stable@vger.kernel.org> # 6.8+
> > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > ---
> >
> > v1 -> v2: Use the cleanup.h infrastructure to reduce the amount of code changes.
> >
> > ---
> >   drivers/thermal/thermal_core.c    |    1 +
> >   drivers/thermal/thermal_core.h    |    3 +++
> >   drivers/thermal/thermal_netlink.c |    9 +++------
> >   3 files changed, 7 insertions(+), 6 deletions(-)
> >
> > Index: linux-pm/drivers/thermal/thermal_core.c
> > ===================================================================
> > --- linux-pm.orig/drivers/thermal/thermal_core.c
> > +++ linux-pm/drivers/thermal/thermal_core.c
> > @@ -728,6 +728,7 @@ struct thermal_zone_device *thermal_zone
> >       mutex_lock(&thermal_list_lock);
> >       list_for_each_entry(tz, &thermal_tz_list, node) {
> >               if (tz->id == id) {
> > +                     get_device(&tz->device);
> >                       match = tz;
> >                       break;
> >               }
> > Index: linux-pm/drivers/thermal/thermal_core.h
> > ===================================================================
> > --- linux-pm.orig/drivers/thermal/thermal_core.h
> > +++ linux-pm/drivers/thermal/thermal_core.h
> > @@ -194,6 +194,9 @@ int for_each_thermal_governor(int (*cb)(
> >
> >   struct thermal_zone_device *thermal_zone_get_by_id(int id);
> >
> > +DEFINE_CLASS(thermal_zone_get_by_id, struct thermal_zone_device *,
> > +          if (_T) put_device(&_T->device), thermal_zone_get_by_id(id), int id)
> > +
> >   static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev)
> >   {
> >       return cdev->ops->get_requested_power && cdev->ops->state2power &&
> > Index: linux-pm/drivers/thermal/thermal_netlink.c
> > ===================================================================
> > --- linux-pm.orig/drivers/thermal/thermal_netlink.c
> > +++ linux-pm/drivers/thermal/thermal_netlink.c
> > @@ -443,7 +443,6 @@ static int thermal_genl_cmd_tz_get_trip(
> >   {
> >       struct sk_buff *msg = p->msg;
> >       const struct thermal_trip_desc *td;
> > -     struct thermal_zone_device *tz;
> >       struct nlattr *start_trip;
> >       int id;
> >
> > @@ -452,7 +451,7 @@ static int thermal_genl_cmd_tz_get_trip(
> >
> >       id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
> >
> > -     tz = thermal_zone_get_by_id(id);
> > +     CLASS(thermal_zone_get_by_id, tz)(id);
> >       if (!tz)
> >               return -EINVAL;
> >
> > @@ -488,7 +487,6 @@ out_cancel_nest:
> >   static int thermal_genl_cmd_tz_get_temp(struct param *p)
> >   {
> >       struct sk_buff *msg = p->msg;
> > -     struct thermal_zone_device *tz;
> >       int temp, ret, id;
> >
> >       if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID])
> > @@ -496,7 +494,7 @@ static int thermal_genl_cmd_tz_get_temp(
> >
> >       id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
> >
> > -     tz = thermal_zone_get_by_id(id);
> > +     CLASS(thermal_zone_get_by_id, tz)(id);
> >       if (!tz)
> >               return -EINVAL;
> >
> > @@ -514,7 +512,6 @@ static int thermal_genl_cmd_tz_get_temp(
> >   static int thermal_genl_cmd_tz_get_gov(struct param *p)
> >   {
> >       struct sk_buff *msg = p->msg;
> > -     struct thermal_zone_device *tz;
> >       int id, ret = 0;
> >
> >       if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID])
> > @@ -522,7 +519,7 @@ static int thermal_genl_cmd_tz_get_gov(s
> >
> >       id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
> >
> > -     tz = thermal_zone_get_by_id(id);
> > +     CLASS(thermal_zone_get_by_id, tz)(id);
> >       if (!tz)
> >               return -EINVAL;
> >
> >
> >
> >
>
> I wasn't aware of that helpers in cleanup.h.
>
> Could you help me to understand when this this
> 'if (_T) put_device((&_T->device)' will be called?

When the pointer variable initialized via the CLASS() macro goes out
of scope (that is, before freeing the memory occupied by the pointer
itself).
Lukasz Luba Oct. 4, 2024, 1:31 p.m. UTC | #3
On 10/4/24 14:25, Rafael J. Wysocki wrote:
> Hi Łukasz,
> 
> On Fri, Oct 4, 2024 at 10:01 AM Lukasz Luba <lukasz.luba@arm.com> wrote:
>>
>> Hi Rafael,
>>
>> On 10/3/24 13:25, Rafael J. Wysocki wrote:
>>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>>
>>> There are places in the thermal netlink code where nothing prevents
>>> the thermal zone object from going away while being accessed after it
>>> has been returned by thermal_zone_get_by_id().
>>>
>>> To address this, make thermal_zone_get_by_id() get a reference on the
>>> thermal zone device object to be returned with the help of get_device(),
>>> under thermal_list_lock, and adjust all of its callers to this change
>>> with the help of the cleanup.h infrastructure.
>>>
>>> Fixes: 1ce50e7d408e ("thermal: core: genetlink support for events/cmd/sampling")
>>> Cc: 6.8+ <stable@vger.kernel.org> # 6.8+
>>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>> ---
>>>
>>> v1 -> v2: Use the cleanup.h infrastructure to reduce the amount of code changes.
>>>
>>> ---
>>>    drivers/thermal/thermal_core.c    |    1 +
>>>    drivers/thermal/thermal_core.h    |    3 +++
>>>    drivers/thermal/thermal_netlink.c |    9 +++------
>>>    3 files changed, 7 insertions(+), 6 deletions(-)
>>>
>>> Index: linux-pm/drivers/thermal/thermal_core.c
>>> ===================================================================
>>> --- linux-pm.orig/drivers/thermal/thermal_core.c
>>> +++ linux-pm/drivers/thermal/thermal_core.c
>>> @@ -728,6 +728,7 @@ struct thermal_zone_device *thermal_zone
>>>        mutex_lock(&thermal_list_lock);
>>>        list_for_each_entry(tz, &thermal_tz_list, node) {
>>>                if (tz->id == id) {
>>> +                     get_device(&tz->device);
>>>                        match = tz;
>>>                        break;
>>>                }
>>> Index: linux-pm/drivers/thermal/thermal_core.h
>>> ===================================================================
>>> --- linux-pm.orig/drivers/thermal/thermal_core.h
>>> +++ linux-pm/drivers/thermal/thermal_core.h
>>> @@ -194,6 +194,9 @@ int for_each_thermal_governor(int (*cb)(
>>>
>>>    struct thermal_zone_device *thermal_zone_get_by_id(int id);
>>>
>>> +DEFINE_CLASS(thermal_zone_get_by_id, struct thermal_zone_device *,
>>> +          if (_T) put_device(&_T->device), thermal_zone_get_by_id(id), int id)
>>> +
>>>    static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev)
>>>    {
>>>        return cdev->ops->get_requested_power && cdev->ops->state2power &&
>>> Index: linux-pm/drivers/thermal/thermal_netlink.c
>>> ===================================================================
>>> --- linux-pm.orig/drivers/thermal/thermal_netlink.c
>>> +++ linux-pm/drivers/thermal/thermal_netlink.c
>>> @@ -443,7 +443,6 @@ static int thermal_genl_cmd_tz_get_trip(
>>>    {
>>>        struct sk_buff *msg = p->msg;
>>>        const struct thermal_trip_desc *td;
>>> -     struct thermal_zone_device *tz;
>>>        struct nlattr *start_trip;
>>>        int id;
>>>
>>> @@ -452,7 +451,7 @@ static int thermal_genl_cmd_tz_get_trip(
>>>
>>>        id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
>>>
>>> -     tz = thermal_zone_get_by_id(id);
>>> +     CLASS(thermal_zone_get_by_id, tz)(id);
>>>        if (!tz)
>>>                return -EINVAL;
>>>
>>> @@ -488,7 +487,6 @@ out_cancel_nest:
>>>    static int thermal_genl_cmd_tz_get_temp(struct param *p)
>>>    {
>>>        struct sk_buff *msg = p->msg;
>>> -     struct thermal_zone_device *tz;
>>>        int temp, ret, id;
>>>
>>>        if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID])
>>> @@ -496,7 +494,7 @@ static int thermal_genl_cmd_tz_get_temp(
>>>
>>>        id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
>>>
>>> -     tz = thermal_zone_get_by_id(id);
>>> +     CLASS(thermal_zone_get_by_id, tz)(id);
>>>        if (!tz)
>>>                return -EINVAL;
>>>
>>> @@ -514,7 +512,6 @@ static int thermal_genl_cmd_tz_get_temp(
>>>    static int thermal_genl_cmd_tz_get_gov(struct param *p)
>>>    {
>>>        struct sk_buff *msg = p->msg;
>>> -     struct thermal_zone_device *tz;
>>>        int id, ret = 0;
>>>
>>>        if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID])
>>> @@ -522,7 +519,7 @@ static int thermal_genl_cmd_tz_get_gov(s
>>>
>>>        id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
>>>
>>> -     tz = thermal_zone_get_by_id(id);
>>> +     CLASS(thermal_zone_get_by_id, tz)(id);
>>>        if (!tz)
>>>                return -EINVAL;
>>>
>>>
>>>
>>>
>>
>> I wasn't aware of that helpers in cleanup.h.
>>
>> Could you help me to understand when this this
>> 'if (_T) put_device((&_T->device)' will be called?
> 
> When the pointer variable initialized via the CLASS() macro goes out
> of scope (that is, before freeing the memory occupied by the pointer
> itself).


OK, so do we still need the old code in
thermal_zone_device_unregister(), which calls
put_device(&tz->device) ?
Maybe that code can go away?
Rafael J. Wysocki Oct. 4, 2024, 1:33 p.m. UTC | #4
On Fri, Oct 4, 2024 at 3:25 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
>
> Hi Łukasz,
>
> On Fri, Oct 4, 2024 at 10:01 AM Lukasz Luba <lukasz.luba@arm.com> wrote:
> >
> > Hi Rafael,
> >
> > On 10/3/24 13:25, Rafael J. Wysocki wrote:
> > > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > >
> > > There are places in the thermal netlink code where nothing prevents
> > > the thermal zone object from going away while being accessed after it
> > > has been returned by thermal_zone_get_by_id().
> > >
> > > To address this, make thermal_zone_get_by_id() get a reference on the
> > > thermal zone device object to be returned with the help of get_device(),
> > > under thermal_list_lock, and adjust all of its callers to this change
> > > with the help of the cleanup.h infrastructure.
> > >
> > > Fixes: 1ce50e7d408e ("thermal: core: genetlink support for events/cmd/sampling")
> > > Cc: 6.8+ <stable@vger.kernel.org> # 6.8+
> > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > > ---
> > >
> > > v1 -> v2: Use the cleanup.h infrastructure to reduce the amount of code changes.
> > >
> > > ---
> > >   drivers/thermal/thermal_core.c    |    1 +
> > >   drivers/thermal/thermal_core.h    |    3 +++
> > >   drivers/thermal/thermal_netlink.c |    9 +++------
> > >   3 files changed, 7 insertions(+), 6 deletions(-)
> > >
> > > Index: linux-pm/drivers/thermal/thermal_core.c
> > > ===================================================================
> > > --- linux-pm.orig/drivers/thermal/thermal_core.c
> > > +++ linux-pm/drivers/thermal/thermal_core.c
> > > @@ -728,6 +728,7 @@ struct thermal_zone_device *thermal_zone
> > >       mutex_lock(&thermal_list_lock);
> > >       list_for_each_entry(tz, &thermal_tz_list, node) {
> > >               if (tz->id == id) {
> > > +                     get_device(&tz->device);
> > >                       match = tz;
> > >                       break;
> > >               }
> > > Index: linux-pm/drivers/thermal/thermal_core.h
> > > ===================================================================
> > > --- linux-pm.orig/drivers/thermal/thermal_core.h
> > > +++ linux-pm/drivers/thermal/thermal_core.h
> > > @@ -194,6 +194,9 @@ int for_each_thermal_governor(int (*cb)(
> > >
> > >   struct thermal_zone_device *thermal_zone_get_by_id(int id);
> > >
> > > +DEFINE_CLASS(thermal_zone_get_by_id, struct thermal_zone_device *,
> > > +          if (_T) put_device(&_T->device), thermal_zone_get_by_id(id), int id)
> > > +
> > >   static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev)
> > >   {
> > >       return cdev->ops->get_requested_power && cdev->ops->state2power &&
> > > Index: linux-pm/drivers/thermal/thermal_netlink.c
> > > ===================================================================
> > > --- linux-pm.orig/drivers/thermal/thermal_netlink.c
> > > +++ linux-pm/drivers/thermal/thermal_netlink.c
> > > @@ -443,7 +443,6 @@ static int thermal_genl_cmd_tz_get_trip(
> > >   {
> > >       struct sk_buff *msg = p->msg;
> > >       const struct thermal_trip_desc *td;
> > > -     struct thermal_zone_device *tz;
> > >       struct nlattr *start_trip;
> > >       int id;
> > >
> > > @@ -452,7 +451,7 @@ static int thermal_genl_cmd_tz_get_trip(
> > >
> > >       id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
> > >
> > > -     tz = thermal_zone_get_by_id(id);
> > > +     CLASS(thermal_zone_get_by_id, tz)(id);
> > >       if (!tz)
> > >               return -EINVAL;
> > >
> > > @@ -488,7 +487,6 @@ out_cancel_nest:
> > >   static int thermal_genl_cmd_tz_get_temp(struct param *p)
> > >   {
> > >       struct sk_buff *msg = p->msg;
> > > -     struct thermal_zone_device *tz;
> > >       int temp, ret, id;
> > >
> > >       if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID])
> > > @@ -496,7 +494,7 @@ static int thermal_genl_cmd_tz_get_temp(
> > >
> > >       id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
> > >
> > > -     tz = thermal_zone_get_by_id(id);
> > > +     CLASS(thermal_zone_get_by_id, tz)(id);
> > >       if (!tz)
> > >               return -EINVAL;
> > >
> > > @@ -514,7 +512,6 @@ static int thermal_genl_cmd_tz_get_temp(
> > >   static int thermal_genl_cmd_tz_get_gov(struct param *p)
> > >   {
> > >       struct sk_buff *msg = p->msg;
> > > -     struct thermal_zone_device *tz;
> > >       int id, ret = 0;
> > >
> > >       if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID])
> > > @@ -522,7 +519,7 @@ static int thermal_genl_cmd_tz_get_gov(s
> > >
> > >       id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
> > >
> > > -     tz = thermal_zone_get_by_id(id);
> > > +     CLASS(thermal_zone_get_by_id, tz)(id);
> > >       if (!tz)
> > >               return -EINVAL;
> > >
> > >
> > >
> > >
> >
> > I wasn't aware of that helpers in cleanup.h.
> >
> > Could you help me to understand when this this
> > 'if (_T) put_device((&_T->device)' will be called?
>
> When the pointer variable initialized via the CLASS() macro goes out
> of scope (that is, before freeing the memory occupied by the pointer
> itself).

Note that DEFINE_GUARD() is based on DEFINE_CLASS() and the guard()
macro is based on the CLASS() one, so it's basically the same
mechanism.
Rafael J. Wysocki Oct. 4, 2024, 1:43 p.m. UTC | #5
On Fri, Oct 4, 2024 at 3:37 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
>
>
>
> On 10/4/24 14:25, Rafael J. Wysocki wrote:
> > Hi Łukasz,
> >
> > On Fri, Oct 4, 2024 at 10:01 AM Lukasz Luba <lukasz.luba@arm.com> wrote:
> >>
> >> Hi Rafael,
> >>
> >> On 10/3/24 13:25, Rafael J. Wysocki wrote:
> >>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >>>
> >>> There are places in the thermal netlink code where nothing prevents
> >>> the thermal zone object from going away while being accessed after it
> >>> has been returned by thermal_zone_get_by_id().
> >>>
> >>> To address this, make thermal_zone_get_by_id() get a reference on the
> >>> thermal zone device object to be returned with the help of get_device(),
> >>> under thermal_list_lock, and adjust all of its callers to this change
> >>> with the help of the cleanup.h infrastructure.
> >>>
> >>> Fixes: 1ce50e7d408e ("thermal: core: genetlink support for events/cmd/sampling")
> >>> Cc: 6.8+ <stable@vger.kernel.org> # 6.8+
> >>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >>> ---
> >>>
> >>> v1 -> v2: Use the cleanup.h infrastructure to reduce the amount of code changes.
> >>>
> >>> ---
> >>>    drivers/thermal/thermal_core.c    |    1 +
> >>>    drivers/thermal/thermal_core.h    |    3 +++
> >>>    drivers/thermal/thermal_netlink.c |    9 +++------
> >>>    3 files changed, 7 insertions(+), 6 deletions(-)
> >>>
> >>> Index: linux-pm/drivers/thermal/thermal_core.c
> >>> ===================================================================
> >>> --- linux-pm.orig/drivers/thermal/thermal_core.c
> >>> +++ linux-pm/drivers/thermal/thermal_core.c
> >>> @@ -728,6 +728,7 @@ struct thermal_zone_device *thermal_zone
> >>>        mutex_lock(&thermal_list_lock);
> >>>        list_for_each_entry(tz, &thermal_tz_list, node) {
> >>>                if (tz->id == id) {
> >>> +                     get_device(&tz->device);
> >>>                        match = tz;
> >>>                        break;
> >>>                }
> >>> Index: linux-pm/drivers/thermal/thermal_core.h
> >>> ===================================================================
> >>> --- linux-pm.orig/drivers/thermal/thermal_core.h
> >>> +++ linux-pm/drivers/thermal/thermal_core.h
> >>> @@ -194,6 +194,9 @@ int for_each_thermal_governor(int (*cb)(
> >>>
> >>>    struct thermal_zone_device *thermal_zone_get_by_id(int id);
> >>>
> >>> +DEFINE_CLASS(thermal_zone_get_by_id, struct thermal_zone_device *,
> >>> +          if (_T) put_device(&_T->device), thermal_zone_get_by_id(id), int id)
> >>> +
> >>>    static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev)
> >>>    {
> >>>        return cdev->ops->get_requested_power && cdev->ops->state2power &&
> >>> Index: linux-pm/drivers/thermal/thermal_netlink.c
> >>> ===================================================================
> >>> --- linux-pm.orig/drivers/thermal/thermal_netlink.c
> >>> +++ linux-pm/drivers/thermal/thermal_netlink.c
> >>> @@ -443,7 +443,6 @@ static int thermal_genl_cmd_tz_get_trip(
> >>>    {
> >>>        struct sk_buff *msg = p->msg;
> >>>        const struct thermal_trip_desc *td;
> >>> -     struct thermal_zone_device *tz;
> >>>        struct nlattr *start_trip;
> >>>        int id;
> >>>
> >>> @@ -452,7 +451,7 @@ static int thermal_genl_cmd_tz_get_trip(
> >>>
> >>>        id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
> >>>
> >>> -     tz = thermal_zone_get_by_id(id);
> >>> +     CLASS(thermal_zone_get_by_id, tz)(id);
> >>>        if (!tz)
> >>>                return -EINVAL;
> >>>
> >>> @@ -488,7 +487,6 @@ out_cancel_nest:
> >>>    static int thermal_genl_cmd_tz_get_temp(struct param *p)
> >>>    {
> >>>        struct sk_buff *msg = p->msg;
> >>> -     struct thermal_zone_device *tz;
> >>>        int temp, ret, id;
> >>>
> >>>        if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID])
> >>> @@ -496,7 +494,7 @@ static int thermal_genl_cmd_tz_get_temp(
> >>>
> >>>        id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
> >>>
> >>> -     tz = thermal_zone_get_by_id(id);
> >>> +     CLASS(thermal_zone_get_by_id, tz)(id);
> >>>        if (!tz)
> >>>                return -EINVAL;
> >>>
> >>> @@ -514,7 +512,6 @@ static int thermal_genl_cmd_tz_get_temp(
> >>>    static int thermal_genl_cmd_tz_get_gov(struct param *p)
> >>>    {
> >>>        struct sk_buff *msg = p->msg;
> >>> -     struct thermal_zone_device *tz;
> >>>        int id, ret = 0;
> >>>
> >>>        if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID])
> >>> @@ -522,7 +519,7 @@ static int thermal_genl_cmd_tz_get_gov(s
> >>>
> >>>        id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
> >>>
> >>> -     tz = thermal_zone_get_by_id(id);
> >>> +     CLASS(thermal_zone_get_by_id, tz)(id);
> >>>        if (!tz)
> >>>                return -EINVAL;
> >>>
> >>>
> >>>
> >>>
> >>
> >> I wasn't aware of that helpers in cleanup.h.
> >>
> >> Could you help me to understand when this this
> >> 'if (_T) put_device((&_T->device)' will be called?
> >
> > When the pointer variable initialized via the CLASS() macro goes out
> > of scope (that is, before freeing the memory occupied by the pointer
> > itself).
>
>
> OK, so do we still need the old code in
> thermal_zone_device_unregister(), which calls
> put_device(&tz->device) ?

Yes, we do.

> Maybe that code can go away?

That particular one drops the reference acquired by device_register()
and I don't see an alternative clean way to drop it.
Rafael J. Wysocki Oct. 4, 2024, 1:48 p.m. UTC | #6
On Fri, Oct 4, 2024 at 3:43 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
>
> On Fri, Oct 4, 2024 at 3:37 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
> >
> >
> >
> > On 10/4/24 14:25, Rafael J. Wysocki wrote:
> > > Hi Łukasz,
> > >
> > > On Fri, Oct 4, 2024 at 10:01 AM Lukasz Luba <lukasz.luba@arm.com> wrote:
> > >>
> > >> Hi Rafael,
> > >>
> > >> On 10/3/24 13:25, Rafael J. Wysocki wrote:
> > >>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > >>>
> > >>> There are places in the thermal netlink code where nothing prevents
> > >>> the thermal zone object from going away while being accessed after it
> > >>> has been returned by thermal_zone_get_by_id().
> > >>>
> > >>> To address this, make thermal_zone_get_by_id() get a reference on the
> > >>> thermal zone device object to be returned with the help of get_device(),
> > >>> under thermal_list_lock, and adjust all of its callers to this change
> > >>> with the help of the cleanup.h infrastructure.
> > >>>
> > >>> Fixes: 1ce50e7d408e ("thermal: core: genetlink support for events/cmd/sampling")
> > >>> Cc: 6.8+ <stable@vger.kernel.org> # 6.8+
> > >>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > >>> ---
> > >>>
> > >>> v1 -> v2: Use the cleanup.h infrastructure to reduce the amount of code changes.
> > >>>
> > >>> ---
> > >>>    drivers/thermal/thermal_core.c    |    1 +
> > >>>    drivers/thermal/thermal_core.h    |    3 +++
> > >>>    drivers/thermal/thermal_netlink.c |    9 +++------
> > >>>    3 files changed, 7 insertions(+), 6 deletions(-)
> > >>>
> > >>> Index: linux-pm/drivers/thermal/thermal_core.c
> > >>> ===================================================================
> > >>> --- linux-pm.orig/drivers/thermal/thermal_core.c
> > >>> +++ linux-pm/drivers/thermal/thermal_core.c
> > >>> @@ -728,6 +728,7 @@ struct thermal_zone_device *thermal_zone
> > >>>        mutex_lock(&thermal_list_lock);
> > >>>        list_for_each_entry(tz, &thermal_tz_list, node) {
> > >>>                if (tz->id == id) {
> > >>> +                     get_device(&tz->device);
> > >>>                        match = tz;
> > >>>                        break;
> > >>>                }
> > >>> Index: linux-pm/drivers/thermal/thermal_core.h
> > >>> ===================================================================
> > >>> --- linux-pm.orig/drivers/thermal/thermal_core.h
> > >>> +++ linux-pm/drivers/thermal/thermal_core.h
> > >>> @@ -194,6 +194,9 @@ int for_each_thermal_governor(int (*cb)(
> > >>>
> > >>>    struct thermal_zone_device *thermal_zone_get_by_id(int id);
> > >>>
> > >>> +DEFINE_CLASS(thermal_zone_get_by_id, struct thermal_zone_device *,
> > >>> +          if (_T) put_device(&_T->device), thermal_zone_get_by_id(id), int id)
> > >>> +
> > >>>    static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev)
> > >>>    {
> > >>>        return cdev->ops->get_requested_power && cdev->ops->state2power &&
> > >>> Index: linux-pm/drivers/thermal/thermal_netlink.c
> > >>> ===================================================================
> > >>> --- linux-pm.orig/drivers/thermal/thermal_netlink.c
> > >>> +++ linux-pm/drivers/thermal/thermal_netlink.c
> > >>> @@ -443,7 +443,6 @@ static int thermal_genl_cmd_tz_get_trip(
> > >>>    {
> > >>>        struct sk_buff *msg = p->msg;
> > >>>        const struct thermal_trip_desc *td;
> > >>> -     struct thermal_zone_device *tz;
> > >>>        struct nlattr *start_trip;
> > >>>        int id;
> > >>>
> > >>> @@ -452,7 +451,7 @@ static int thermal_genl_cmd_tz_get_trip(
> > >>>
> > >>>        id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
> > >>>
> > >>> -     tz = thermal_zone_get_by_id(id);
> > >>> +     CLASS(thermal_zone_get_by_id, tz)(id);
> > >>>        if (!tz)
> > >>>                return -EINVAL;
> > >>>
> > >>> @@ -488,7 +487,6 @@ out_cancel_nest:
> > >>>    static int thermal_genl_cmd_tz_get_temp(struct param *p)
> > >>>    {
> > >>>        struct sk_buff *msg = p->msg;
> > >>> -     struct thermal_zone_device *tz;
> > >>>        int temp, ret, id;
> > >>>
> > >>>        if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID])
> > >>> @@ -496,7 +494,7 @@ static int thermal_genl_cmd_tz_get_temp(
> > >>>
> > >>>        id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
> > >>>
> > >>> -     tz = thermal_zone_get_by_id(id);
> > >>> +     CLASS(thermal_zone_get_by_id, tz)(id);
> > >>>        if (!tz)
> > >>>                return -EINVAL;
> > >>>
> > >>> @@ -514,7 +512,6 @@ static int thermal_genl_cmd_tz_get_temp(
> > >>>    static int thermal_genl_cmd_tz_get_gov(struct param *p)
> > >>>    {
> > >>>        struct sk_buff *msg = p->msg;
> > >>> -     struct thermal_zone_device *tz;
> > >>>        int id, ret = 0;
> > >>>
> > >>>        if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID])
> > >>> @@ -522,7 +519,7 @@ static int thermal_genl_cmd_tz_get_gov(s
> > >>>
> > >>>        id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
> > >>>
> > >>> -     tz = thermal_zone_get_by_id(id);
> > >>> +     CLASS(thermal_zone_get_by_id, tz)(id);
> > >>>        if (!tz)
> > >>>                return -EINVAL;
> > >>>
> > >>>
> > >>>
> > >>>
> > >>
> > >> I wasn't aware of that helpers in cleanup.h.
> > >>
> > >> Could you help me to understand when this this
> > >> 'if (_T) put_device((&_T->device)' will be called?
> > >
> > > When the pointer variable initialized via the CLASS() macro goes out
> > > of scope (that is, before freeing the memory occupied by the pointer
> > > itself).
> >
> >
> > OK, so do we still need the old code in
> > thermal_zone_device_unregister(), which calls
> > put_device(&tz->device) ?
>
> Yes, we do.
>
> > Maybe that code can go away?
>
> That particular one drops the reference acquired by device_register()
> and I don't see an alternative clean way to drop it.

The problem there is that local variable tz goes out of scope at the
end of the function (at least formally) and put_device(&tz->device)
needs to be called before the wait_for_completion(&tz->removal) which
definitely needs tz to be still around.
Lukasz Luba Oct. 4, 2024, 1:53 p.m. UTC | #7
On 10/4/24 14:48, Rafael J. Wysocki wrote:
> On Fri, Oct 4, 2024 at 3:43 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
>>
>> On Fri, Oct 4, 2024 at 3:37 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
>>>
>>>
>>>
>>> On 10/4/24 14:25, Rafael J. Wysocki wrote:
>>>> Hi Łukasz,
>>>>
>>>> On Fri, Oct 4, 2024 at 10:01 AM Lukasz Luba <lukasz.luba@arm.com> wrote:
>>>>>
>>>>> Hi Rafael,
>>>>>
>>>>> On 10/3/24 13:25, Rafael J. Wysocki wrote:
>>>>>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>>>>>

[snip]

>>>>>>
>>>>>
>>>>> I wasn't aware of that helpers in cleanup.h.
>>>>>
>>>>> Could you help me to understand when this this
>>>>> 'if (_T) put_device((&_T->device)' will be called?
>>>>
>>>> When the pointer variable initialized via the CLASS() macro goes out
>>>> of scope (that is, before freeing the memory occupied by the pointer
>>>> itself).
>>>
>>>
>>> OK, so do we still need the old code in
>>> thermal_zone_device_unregister(), which calls
>>> put_device(&tz->device) ?
>>
>> Yes, we do.
>>
>>> Maybe that code can go away?
>>
>> That particular one drops the reference acquired by device_register()
>> and I don't see an alternative clean way to drop it.
> 
> The problem there is that local variable tz goes out of scope at the
> end of the function (at least formally) and put_device(&tz->device)
> needs to be called before the wait_for_completion(&tz->removal) which
> definitely needs tz to be still around.

OK, I see now. That makes sense. With that feel free to add:

Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Rafael J. Wysocki Oct. 4, 2024, 4:28 p.m. UTC | #8
On Fri, Oct 4, 2024 at 3:52 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
>
>
>
> On 10/4/24 14:48, Rafael J. Wysocki wrote:
> > On Fri, Oct 4, 2024 at 3:43 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
> >>
> >> On Fri, Oct 4, 2024 at 3:37 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
> >>>
> >>>
> >>>
> >>> On 10/4/24 14:25, Rafael J. Wysocki wrote:
> >>>> Hi Łukasz,
> >>>>
> >>>> On Fri, Oct 4, 2024 at 10:01 AM Lukasz Luba <lukasz.luba@arm.com> wrote:
> >>>>>
> >>>>> Hi Rafael,
> >>>>>
> >>>>> On 10/3/24 13:25, Rafael J. Wysocki wrote:
> >>>>>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >>>>>>
>
> [snip]
>
> >>>>>>
> >>>>>
> >>>>> I wasn't aware of that helpers in cleanup.h.
> >>>>>
> >>>>> Could you help me to understand when this this
> >>>>> 'if (_T) put_device((&_T->device)' will be called?
> >>>>
> >>>> When the pointer variable initialized via the CLASS() macro goes out
> >>>> of scope (that is, before freeing the memory occupied by the pointer
> >>>> itself).
> >>>
> >>>
> >>> OK, so do we still need the old code in
> >>> thermal_zone_device_unregister(), which calls
> >>> put_device(&tz->device) ?
> >>
> >> Yes, we do.
> >>
> >>> Maybe that code can go away?
> >>
> >> That particular one drops the reference acquired by device_register()
> >> and I don't see an alternative clean way to drop it.
> >
> > The problem there is that local variable tz goes out of scope at the
> > end of the function (at least formally) and put_device(&tz->device)
> > needs to be called before the wait_for_completion(&tz->removal) which
> > definitely needs tz to be still around.
>
> OK, I see now. That makes sense. With that feel free to add:
>
> Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>

Thank you!
diff mbox series

Patch

Index: linux-pm/drivers/thermal/thermal_core.c
===================================================================
--- linux-pm.orig/drivers/thermal/thermal_core.c
+++ linux-pm/drivers/thermal/thermal_core.c
@@ -728,6 +728,7 @@  struct thermal_zone_device *thermal_zone
 	mutex_lock(&thermal_list_lock);
 	list_for_each_entry(tz, &thermal_tz_list, node) {
 		if (tz->id == id) {
+			get_device(&tz->device);
 			match = tz;
 			break;
 		}
Index: linux-pm/drivers/thermal/thermal_core.h
===================================================================
--- linux-pm.orig/drivers/thermal/thermal_core.h
+++ linux-pm/drivers/thermal/thermal_core.h
@@ -194,6 +194,9 @@  int for_each_thermal_governor(int (*cb)(
 
 struct thermal_zone_device *thermal_zone_get_by_id(int id);
 
+DEFINE_CLASS(thermal_zone_get_by_id, struct thermal_zone_device *,
+	     if (_T) put_device(&_T->device), thermal_zone_get_by_id(id), int id)
+
 static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev)
 {
 	return cdev->ops->get_requested_power && cdev->ops->state2power &&
Index: linux-pm/drivers/thermal/thermal_netlink.c
===================================================================
--- linux-pm.orig/drivers/thermal/thermal_netlink.c
+++ linux-pm/drivers/thermal/thermal_netlink.c
@@ -443,7 +443,6 @@  static int thermal_genl_cmd_tz_get_trip(
 {
 	struct sk_buff *msg = p->msg;
 	const struct thermal_trip_desc *td;
-	struct thermal_zone_device *tz;
 	struct nlattr *start_trip;
 	int id;
 
@@ -452,7 +451,7 @@  static int thermal_genl_cmd_tz_get_trip(
 
 	id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
 
-	tz = thermal_zone_get_by_id(id);
+	CLASS(thermal_zone_get_by_id, tz)(id);
 	if (!tz)
 		return -EINVAL;
 
@@ -488,7 +487,6 @@  out_cancel_nest:
 static int thermal_genl_cmd_tz_get_temp(struct param *p)
 {
 	struct sk_buff *msg = p->msg;
-	struct thermal_zone_device *tz;
 	int temp, ret, id;
 
 	if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID])
@@ -496,7 +494,7 @@  static int thermal_genl_cmd_tz_get_temp(
 
 	id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
 
-	tz = thermal_zone_get_by_id(id);
+	CLASS(thermal_zone_get_by_id, tz)(id);
 	if (!tz)
 		return -EINVAL;
 
@@ -514,7 +512,6 @@  static int thermal_genl_cmd_tz_get_temp(
 static int thermal_genl_cmd_tz_get_gov(struct param *p)
 {
 	struct sk_buff *msg = p->msg;
-	struct thermal_zone_device *tz;
 	int id, ret = 0;
 
 	if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID])
@@ -522,7 +519,7 @@  static int thermal_genl_cmd_tz_get_gov(s
 
 	id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]);
 
-	tz = thermal_zone_get_by_id(id);
+	CLASS(thermal_zone_get_by_id, tz)(id);
 	if (!tz)
 		return -EINVAL;