diff mbox series

[v4,3/3] thermal: intel: hfi: Enable interface only when required

Message ID 20240212161615.161935-4-stanislaw.gruszka@linux.intel.com (mailing list archive)
State Not Applicable
Headers show
Series thermal/netlink/intel_hfi: Enable HFI feature only when required | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 991 this patch: 991
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 2 maintainers not CCed: lukasz.luba@arm.com rui.zhang@intel.com
netdev/build_clang success Errors and warnings before: 1007 this patch: 1007
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1008 this patch: 1008
netdev/checkpatch warning WARNING: line length of 85 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-02-13--00-00 (tests: 1436)

Commit Message

Stanislaw Gruszka Feb. 12, 2024, 4:16 p.m. UTC
Enable and disable hardware feedback interface (HFI) when user space
handler is present. For example, enable HFI, when intel-speed-select or
Intel Low Power daemon is running and subscribing to thermal netlink
events. When user space handlers exit or remove subscription for
thermal netlink events, disable HFI.

Summary of changes:

- Register a thermal genetlink notifier

- In the notifier, process THERMAL_NOTIFY_BIND and THERMAL_NOTIFY_UNBIND
reason codes to count number of thermal event group netlink multicast
clients. If thermal netlink group has any listener enable HFI on all
packages. If there are no listener disable HFI on all packages.

- When CPU is online, instead of blindly enabling HFI, check if
the thermal netlink group has any listener. This will make sure that
HFI is not enabled by default during boot time.

- Actual processing to enable/disable matches what is done in
suspend/resume callbacks. Create two functions hfi_do_enable()
and hfi_do_disable(), which can be called from  the netlink notifier
callback and suspend/resume callbacks.

Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
---
 drivers/thermal/intel/intel_hfi.c | 95 +++++++++++++++++++++++++++----
 1 file changed, 85 insertions(+), 10 deletions(-)

Comments

Rafael J. Wysocki Feb. 13, 2024, 1:59 p.m. UTC | #1
On Mon, Feb 12, 2024 at 5:16 PM Stanislaw Gruszka
<stanislaw.gruszka@linux.intel.com> wrote:
>
> Enable and disable hardware feedback interface (HFI) when user space
> handler is present. For example, enable HFI, when intel-speed-select or
> Intel Low Power daemon is running and subscribing to thermal netlink
> events. When user space handlers exit or remove subscription for
> thermal netlink events, disable HFI.
>
> Summary of changes:
>
> - Register a thermal genetlink notifier
>
> - In the notifier, process THERMAL_NOTIFY_BIND and THERMAL_NOTIFY_UNBIND
> reason codes to count number of thermal event group netlink multicast
> clients. If thermal netlink group has any listener enable HFI on all
> packages. If there are no listener disable HFI on all packages.
>
> - When CPU is online, instead of blindly enabling HFI, check if
> the thermal netlink group has any listener. This will make sure that
> HFI is not enabled by default during boot time.
>
> - Actual processing to enable/disable matches what is done in
> suspend/resume callbacks. Create two functions hfi_do_enable()
> and hfi_do_disable(), which can be called from  the netlink notifier
> callback and suspend/resume callbacks.
>
> Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
> ---
>  drivers/thermal/intel/intel_hfi.c | 95 +++++++++++++++++++++++++++----
>  1 file changed, 85 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> index 3b04c6ec4fca..5e1e2b5269b7 100644
> --- a/drivers/thermal/intel/intel_hfi.c
> +++ b/drivers/thermal/intel/intel_hfi.c
> @@ -159,6 +159,7 @@ struct hfi_cpu_info {
>  static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
>
>  static int max_hfi_instances;
> +static int hfi_thermal_clients_num;
>  static struct hfi_instance *hfi_instances;
>
>  static struct hfi_features hfi_features;
> @@ -477,8 +478,11 @@ void intel_hfi_online(unsigned int cpu)
>  enable:
>         cpumask_set_cpu(cpu, hfi_instance->cpus);
>
> -       /* Enable this HFI instance if this is its first online CPU. */
> -       if (cpumask_weight(hfi_instance->cpus) == 1) {
> +       /*
> +        * Enable this HFI instance if this is its first online CPU and
> +        * there are user-space clients of thermal events.
> +        */
> +       if (cpumask_weight(hfi_instance->cpus) == 1 && hfi_thermal_clients_num > 0) {
>                 hfi_set_hw_table(hfi_instance);
>                 hfi_enable();
>         }
> @@ -573,28 +577,93 @@ static __init int hfi_parse_features(void)
>         return 0;
>  }
>
> -static void hfi_do_enable(void)
> +/*
> + * HFI enable/disable run in non-concurrent manner on boot CPU in syscore
> + * callbacks or under protection of hfi_instance_lock.
> + */

In the comment above I would say "If concurrency is not prevented by
other means, the HFI enable/disable routines must be called under
hfi_instance_lock." and I would retain the comments below (they don't
hurt IMO).

> +static void hfi_do_enable(void *ptr)

I would call this hfi_enable_instance().

> +{
> +       struct hfi_instance *hfi_instance = ptr;

Why is this variable needed ro even useful?  prt can be passed
directly to hfi_set_hw_table().

> +
> +       hfi_set_hw_table(hfi_instance);
> +       hfi_enable();
> +}
> +
> +static void hfi_do_disable(void *ptr)

And I'd call this hfi_disable_instance().

> +{
> +       hfi_disable();
> +}
> +
> +static void hfi_syscore_resume(void)
>  {
>         /* This code runs only on the boot CPU. */
>         struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
>         struct hfi_instance *hfi_instance = info->hfi_instance;
>
> -       /* No locking needed. There is no concurrency with CPU online. */
> -       hfi_set_hw_table(hfi_instance);
> -       hfi_enable();
> +       if (hfi_thermal_clients_num > 0)
> +               hfi_do_enable(hfi_instance);
>  }
>
> -static int hfi_do_disable(void)
> +static int hfi_syscore_suspend(void)
>  {
> -       /* No locking needed. There is no concurrency with CPU offline. */
>         hfi_disable();
>
>         return 0;
>  }
>
>  static struct syscore_ops hfi_pm_ops = {
> -       .resume = hfi_do_enable,
> -       .suspend = hfi_do_disable,
> +       .resume = hfi_syscore_resume,
> +       .suspend = hfi_syscore_suspend,
> +};
> +
> +static int hfi_thermal_notify(struct notifier_block *nb, unsigned long state,
> +                             void *_notify)
> +{
> +       struct thermal_genl_notify *notify = _notify;
> +       struct hfi_instance *hfi_instance;
> +       smp_call_func_t func;
> +       unsigned int cpu;
> +       int i;
> +
> +       if (notify->mcgrp != THERMAL_GENL_EVENT_GROUP)
> +               return NOTIFY_DONE;
> +
> +       if (state != THERMAL_NOTIFY_BIND && state != THERMAL_NOTIFY_UNBIND)
> +               return NOTIFY_DONE;
> +
> +       mutex_lock(&hfi_instance_lock);
> +
> +       switch (state) {
> +       case THERMAL_NOTIFY_BIND:
> +               hfi_thermal_clients_num++;
> +               break;
> +
> +       case THERMAL_NOTIFY_UNBIND:
> +               hfi_thermal_clients_num--;
> +               break;
> +       }
> +
> +       if (hfi_thermal_clients_num > 0)
> +               func = hfi_do_enable;
> +       else
> +               func = hfi_do_disable;
> +
> +       for (i = 0; i < max_hfi_instances; i++) {
> +               hfi_instance = &hfi_instances[i];
> +               if (cpumask_empty(hfi_instance->cpus))
> +                       continue;
> +
> +               cpu = cpumask_any(hfi_instance->cpus);
> +               smp_call_function_single(cpu, func, hfi_instance, true);
> +       }
> +
> +       mutex_unlock(&hfi_instance_lock);

So AFAICS, one instance can be enabled multiple times because of this.
  I guess that's OK?  In any case, it would be kind of nice to leave a
note regarding it somewhere here.

> +
> +       return NOTIFY_OK;
> +}
> +
> +static struct notifier_block hfi_thermal_nb = {
> +       .notifier_call = hfi_thermal_notify,
>  };
>
>  void __init intel_hfi_init(void)
> @@ -628,10 +697,16 @@ void __init intel_hfi_init(void)
>         if (!hfi_updates_wq)
>                 goto err_nomem;
>
> +       if (thermal_genl_register_notifier(&hfi_thermal_nb))
> +               goto err_nl_notif;

Is it possible for any clients to be there before the notifier is
registered?  If not, it would be good to add a comment about it.

> +
>         register_syscore_ops(&hfi_pm_ops);
>
>         return;
>
> +err_nl_notif:
> +       destroy_workqueue(hfi_updates_wq);
> +
>  err_nomem:
>         for (j = 0; j < i; ++j) {
>                 hfi_instance = &hfi_instances[j];
> --
Stanislaw Gruszka Feb. 22, 2024, 4:53 p.m. UTC | #2
On Tue, Feb 13, 2024 at 02:59:10PM +0100, Rafael J. Wysocki wrote:
> > -static void hfi_do_enable(void)
> > +/*
> > + * HFI enable/disable run in non-concurrent manner on boot CPU in syscore
> > + * callbacks or under protection of hfi_instance_lock.
> > + */
> 
> In the comment above I would say "If concurrency is not prevented by
> other means, the HFI enable/disable routines must be called under
> hfi_instance_lock." 

Ok. Will reword this way.

> and I would retain the comments below (they don't
> hurt IMO).

I found those comments somewhat confusing. FWICT at worst
what can happen when enable/resume race CPU online and
disable/suspend race with CPU offline is enable twice
or disable twice. What I think is fine, though plan to
check this (see below).

> > +static void hfi_do_enable(void *ptr)
> 
> I would call this hfi_enable_instance().
> 
> > +{
> > +       struct hfi_instance *hfi_instance = ptr;
> 
> Why is this variable needed ro even useful?  prt can be passed
> directly to hfi_set_hw_table().

Ok, will remove it.

> > +
> > +       hfi_set_hw_table(hfi_instance);
> > +       hfi_enable();
> > +}
> > +
> > +static void hfi_do_disable(void *ptr)
> 
> And I'd call this hfi_disable_instance().

Ok.

> > +static int hfi_thermal_notify(struct notifier_block *nb, unsigned long state,
> > +                             void *_notify)
> > +{
> > +       struct thermal_genl_notify *notify = _notify;
> > +       struct hfi_instance *hfi_instance;
> > +       smp_call_func_t func;
> > +       unsigned int cpu;
> > +       int i;
> > +
> > +       if (notify->mcgrp != THERMAL_GENL_EVENT_GROUP)
> > +               return NOTIFY_DONE;
> > +
> > +       if (state != THERMAL_NOTIFY_BIND && state != THERMAL_NOTIFY_UNBIND)
> > +               return NOTIFY_DONE;
> > +
> > +       mutex_lock(&hfi_instance_lock);
> > +
> > +       switch (state) {
> > +       case THERMAL_NOTIFY_BIND:
> > +               hfi_thermal_clients_num++;
> > +               break;
> > +
> > +       case THERMAL_NOTIFY_UNBIND:
> > +               hfi_thermal_clients_num--;
> > +               break;
> > +       }
> > +
> > +       if (hfi_thermal_clients_num > 0)
> > +               func = hfi_do_enable;
> > +       else
> > +               func = hfi_do_disable;
> > +
> > +       for (i = 0; i < max_hfi_instances; i++) {
> > +               hfi_instance = &hfi_instances[i];
> > +               if (cpumask_empty(hfi_instance->cpus))
> > +                       continue;
> > +
> > +               cpu = cpumask_any(hfi_instance->cpus);
> > +               smp_call_function_single(cpu, func, hfi_instance, true);
> > +       }
> > +
> > +       mutex_unlock(&hfi_instance_lock);
> 
> So AFAICS, one instance can be enabled multiple times because of this.
>   I guess that's OK?  In any case, it would be kind of nice to leave a
> note regarding it somewhere here.

It's write the same values to the same registers. So I think this 
should be fine. However after your comment I start to think there
perhaps could be some side-effect of writing the registers.
I'll double check (previously I verified that double enable works,
but only on MTL) or eventually rearrange code to do not enable already
enabled interface.

> > +
> > +       return NOTIFY_OK;
> > +}
> > +
> > +static struct notifier_block hfi_thermal_nb = {
> > +       .notifier_call = hfi_thermal_notify,
> >  };
> >
> >  void __init intel_hfi_init(void)
> > @@ -628,10 +697,16 @@ void __init intel_hfi_init(void)
> >         if (!hfi_updates_wq)
> >                 goto err_nomem;
> >
> > +       if (thermal_genl_register_notifier(&hfi_thermal_nb))
> > +               goto err_nl_notif;
> 
> Is it possible for any clients to be there before the notifier is
> registered?  If not, it would be good to add a comment about it.

HFI is build-in so it's started before any user space. I added note about that
in the cover letter but indeed it should be comment in the code. Will fix.  

Regards
Stanislaw
diff mbox series

Patch

diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index 3b04c6ec4fca..5e1e2b5269b7 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -159,6 +159,7 @@  struct hfi_cpu_info {
 static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
 
 static int max_hfi_instances;
+static int hfi_thermal_clients_num;
 static struct hfi_instance *hfi_instances;
 
 static struct hfi_features hfi_features;
@@ -477,8 +478,11 @@  void intel_hfi_online(unsigned int cpu)
 enable:
 	cpumask_set_cpu(cpu, hfi_instance->cpus);
 
-	/* Enable this HFI instance if this is its first online CPU. */
-	if (cpumask_weight(hfi_instance->cpus) == 1) {
+	/*
+	 * Enable this HFI instance if this is its first online CPU and
+	 * there are user-space clients of thermal events.
+	 */
+	if (cpumask_weight(hfi_instance->cpus) == 1 && hfi_thermal_clients_num > 0) {
 		hfi_set_hw_table(hfi_instance);
 		hfi_enable();
 	}
@@ -573,28 +577,93 @@  static __init int hfi_parse_features(void)
 	return 0;
 }
 
-static void hfi_do_enable(void)
+/*
+ * HFI enable/disable run in non-concurrent manner on boot CPU in syscore
+ * callbacks or under protection of hfi_instance_lock.
+ */
+static void hfi_do_enable(void *ptr)
+{
+	struct hfi_instance *hfi_instance = ptr;
+
+	hfi_set_hw_table(hfi_instance);
+	hfi_enable();
+}
+
+static void hfi_do_disable(void *ptr)
+{
+	hfi_disable();
+}
+
+static void hfi_syscore_resume(void)
 {
 	/* This code runs only on the boot CPU. */
 	struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
 	struct hfi_instance *hfi_instance = info->hfi_instance;
 
-	/* No locking needed. There is no concurrency with CPU online. */
-	hfi_set_hw_table(hfi_instance);
-	hfi_enable();
+	if (hfi_thermal_clients_num > 0)
+		hfi_do_enable(hfi_instance);
 }
 
-static int hfi_do_disable(void)
+static int hfi_syscore_suspend(void)
 {
-	/* No locking needed. There is no concurrency with CPU offline. */
 	hfi_disable();
 
 	return 0;
 }
 
 static struct syscore_ops hfi_pm_ops = {
-	.resume = hfi_do_enable,
-	.suspend = hfi_do_disable,
+	.resume = hfi_syscore_resume,
+	.suspend = hfi_syscore_suspend,
+};
+
+static int hfi_thermal_notify(struct notifier_block *nb, unsigned long state,
+			      void *_notify)
+{
+	struct thermal_genl_notify *notify = _notify;
+	struct hfi_instance *hfi_instance;
+	smp_call_func_t func;
+	unsigned int cpu;
+	int i;
+
+	if (notify->mcgrp != THERMAL_GENL_EVENT_GROUP)
+		return NOTIFY_DONE;
+
+	if (state != THERMAL_NOTIFY_BIND && state != THERMAL_NOTIFY_UNBIND)
+		return NOTIFY_DONE;
+
+	mutex_lock(&hfi_instance_lock);
+
+	switch (state) {
+	case THERMAL_NOTIFY_BIND:
+		hfi_thermal_clients_num++;
+		break;
+
+	case THERMAL_NOTIFY_UNBIND:
+		hfi_thermal_clients_num--;
+		break;
+	}
+
+	if (hfi_thermal_clients_num > 0)
+		func = hfi_do_enable;
+	else
+		func = hfi_do_disable;
+
+	for (i = 0; i < max_hfi_instances; i++) {
+		hfi_instance = &hfi_instances[i];
+		if (cpumask_empty(hfi_instance->cpus))
+			continue;
+
+		cpu = cpumask_any(hfi_instance->cpus);
+		smp_call_function_single(cpu, func, hfi_instance, true);
+	}
+
+	mutex_unlock(&hfi_instance_lock);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block hfi_thermal_nb = {
+	.notifier_call = hfi_thermal_notify,
 };
 
 void __init intel_hfi_init(void)
@@ -628,10 +697,16 @@  void __init intel_hfi_init(void)
 	if (!hfi_updates_wq)
 		goto err_nomem;
 
+	if (thermal_genl_register_notifier(&hfi_thermal_nb))
+		goto err_nl_notif;
+
 	register_syscore_ops(&hfi_pm_ops);
 
 	return;
 
+err_nl_notif:
+	destroy_workqueue(hfi_updates_wq);
+
 err_nomem:
 	for (j = 0; j < i; ++j) {
 		hfi_instance = &hfi_instances[j];