diff mbox series

[8/9] arm64: support cpuidle-haltpoll

Message ID 20240430183730.561960-9-ankur.a.arora@oracle.com (mailing list archive)
State New, archived
Headers show
Series Enable haltpoll for arm64 | expand

Commit Message

Ankur Arora April 30, 2024, 6:37 p.m. UTC
Add architectural support for the cpuidle-haltpoll driver by defining
arch_haltpoll_*(). Also select ARCH_HAS_OPTIMIZED_POLL since we have
an optimized polling mechanism via smp_cond_load*().

Add the configuration option, ARCH_CPUIDLE_HALTPOLL to allow
cpuidle-haltpoll to be selected.

Note that we limit cpuidle-haltpoll support to when the event-stream is
available. This is necessary because polling via smp_cond_load_relaxed()
uses WFE to wait for a store which might not happen for an prolonged
period of time. So, ensure the event-stream is around to provide a
terminating condition.

Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 arch/arm64/Kconfig                        | 10 ++++++++++
 arch/arm64/include/asm/cpuidle_haltpoll.h | 21 +++++++++++++++++++++
 2 files changed, 31 insertions(+)
 create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h

Comments

Okanovic, Haris May 30, 2024, 11:07 p.m. UTC | #1
On Tue, 2024-04-30 at 11:37 -0700, Ankur Arora wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
> 
> 
> 
> Add architectural support for the cpuidle-haltpoll driver by defining
> arch_haltpoll_*(). Also select ARCH_HAS_OPTIMIZED_POLL since we have
> an optimized polling mechanism via smp_cond_load*().
> 
> Add the configuration option, ARCH_CPUIDLE_HALTPOLL to allow
> cpuidle-haltpoll to be selected.
> 
> Note that we limit cpuidle-haltpoll support to when the event-stream is
> available. This is necessary because polling via smp_cond_load_relaxed()
> uses WFE to wait for a store which might not happen for an prolonged
> period of time. So, ensure the event-stream is around to provide a
> terminating condition.
> 
> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> ---
>  arch/arm64/Kconfig                        | 10 ++++++++++
>  arch/arm64/include/asm/cpuidle_haltpoll.h | 21 +++++++++++++++++++++
>  2 files changed, 31 insertions(+)
>  create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 7b11c98b3e84..6f2df162b10e 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -34,6 +34,7 @@ config ARM64
>         select ARCH_HAS_MEMBARRIER_SYNC_CORE
>         select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
>         select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> +       select ARCH_HAS_OPTIMIZED_POLL
>         select ARCH_HAS_PTE_DEVMAP
>         select ARCH_HAS_PTE_SPECIAL
>         select ARCH_HAS_HW_PTE_YOUNG
> @@ -2331,6 +2332,15 @@ config ARCH_HIBERNATION_HEADER
>  config ARCH_SUSPEND_POSSIBLE
>         def_bool y
> 
> +config ARCH_CPUIDLE_HALTPOLL
> +       bool "Enable selection of the cpuidle-haltpoll driver"
> +       default n
> +       help
> +         cpuidle-haltpoll allows for adaptive polling based on
> +         current load before entering the idle state.
> +
> +         Some virtualized workloads benefit from using it.
> +
>  endmenu # "Power management options"
> 
>  menu "CPU Power Management"
> diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
> new file mode 100644
> index 000000000000..a79bdec7f516
> --- /dev/null
> +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
> @@ -0,0 +1,21 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_HALTPOLL_H
> +#define _ASM_HALTPOLL_H
> +
> +static inline void arch_haltpoll_enable(unsigned int cpu)
> +{
> +}
> +
> +static inline void arch_haltpoll_disable(unsigned int cpu)
> +{
> +}
> +
> +static inline bool arch_haltpoll_supported(void)
> +{
> +       /*
> +        * Ensure the event stream is available to provide a terminating
> +        * condition to the WFE in the poll loop.
> +        */
> +       return arch_timer_evtstrm_available();

Note this fails build when CONFIG_HALTPOLL_CPUIDLE=m (module):

ERROR: modpost: "arch_cpu_idle" [drivers/cpuidle/cpuidle-haltpoll.ko]
undefined!                                                         
ERROR: modpost: "arch_timer_evtstrm_available"
[drivers/cpuidle/cpuidle-haltpoll.ko] undefined!                      
make[2]: *** [scripts/Makefile.modpost:145: Module.symvers] Error 1
make[1]: *** [/home/ubuntu/linux/Makefile:1886: modpost] Error 2
make: *** [Makefile:240: __sub-make] Error 2

You could add EXPORT_SYMBOL_*()'s on the above helpers or restrict
HALTPOLL_CPUIDLE module to built-in (remove "tristate" Kconfig).

Otherwise, everything worked for me when built-in (=y) atop 6.10.0
(4a4be1a). I see similar performance gains in `perf bench` on AWS
Graviton3 c7g.16xlarge.

Regards,
Haris Okanovic

> +}
> +#endif
> --
> 2.39.3
>
Ankur Arora June 4, 2024, 11:09 p.m. UTC | #2
Okanovic, Haris <harisokn@amazon.com> writes:

> On Tue, 2024-04-30 at 11:37 -0700, Ankur Arora wrote:
>> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>>
>>
>>
>> Add architectural support for the cpuidle-haltpoll driver by defining
>> arch_haltpoll_*(). Also select ARCH_HAS_OPTIMIZED_POLL since we have
>> an optimized polling mechanism via smp_cond_load*().
>>
>> Add the configuration option, ARCH_CPUIDLE_HALTPOLL to allow
>> cpuidle-haltpoll to be selected.
>>
>> Note that we limit cpuidle-haltpoll support to when the event-stream is
>> available. This is necessary because polling via smp_cond_load_relaxed()
>> uses WFE to wait for a store which might not happen for an prolonged
>> period of time. So, ensure the event-stream is around to provide a
>> terminating condition.
>>
>> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>> ---
>>  arch/arm64/Kconfig                        | 10 ++++++++++
>>  arch/arm64/include/asm/cpuidle_haltpoll.h | 21 +++++++++++++++++++++
>>  2 files changed, 31 insertions(+)
>>  create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index 7b11c98b3e84..6f2df162b10e 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -34,6 +34,7 @@ config ARM64
>>         select ARCH_HAS_MEMBARRIER_SYNC_CORE
>>         select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
>>         select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
>> +       select ARCH_HAS_OPTIMIZED_POLL
>>         select ARCH_HAS_PTE_DEVMAP
>>         select ARCH_HAS_PTE_SPECIAL
>>         select ARCH_HAS_HW_PTE_YOUNG
>> @@ -2331,6 +2332,15 @@ config ARCH_HIBERNATION_HEADER
>>  config ARCH_SUSPEND_POSSIBLE
>>         def_bool y
>>
>> +config ARCH_CPUIDLE_HALTPOLL
>> +       bool "Enable selection of the cpuidle-haltpoll driver"
>> +       default n
>> +       help
>> +         cpuidle-haltpoll allows for adaptive polling based on
>> +         current load before entering the idle state.
>> +
>> +         Some virtualized workloads benefit from using it.
>> +
>>  endmenu # "Power management options"
>>
>>  menu "CPU Power Management"
>> diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
>> new file mode 100644
>> index 000000000000..a79bdec7f516
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
>> @@ -0,0 +1,21 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef _ASM_HALTPOLL_H
>> +#define _ASM_HALTPOLL_H
>> +
>> +static inline void arch_haltpoll_enable(unsigned int cpu)
>> +{
>> +}
>> +
>> +static inline void arch_haltpoll_disable(unsigned int cpu)
>> +{
>> +}
>> +
>> +static inline bool arch_haltpoll_supported(void)
>> +{
>> +       /*
>> +        * Ensure the event stream is available to provide a terminating
>> +        * condition to the WFE in the poll loop.
>> +        */
>> +       return arch_timer_evtstrm_available();
>
> Note this fails build when CONFIG_HALTPOLL_CPUIDLE=m (module):
>
> ERROR: modpost: "arch_cpu_idle" [drivers/cpuidle/cpuidle-haltpoll.ko]
> undefined!
> ERROR: modpost: "arch_timer_evtstrm_available"
> [drivers/cpuidle/cpuidle-haltpoll.ko] undefined!
> make[2]: *** [scripts/Makefile.modpost:145: Module.symvers] Error 1
> make[1]: *** [/home/ubuntu/linux/Makefile:1886: modpost] Error 2
> make: *** [Makefile:240: __sub-make] Error 2

Thanks for trying it out. Missed that.

> You could add EXPORT_SYMBOL_*()'s on the above helpers or restrict
> HALTPOLL_CPUIDLE module to built-in (remove "tristate" Kconfig).

Yeah AFAICT this is the only cpuidle driver providing the module
option. Unfortunately can't remove the tristate thing. People might
already be using it as a module on x86.

I think the arch_cpu_idle() makes sense to export. For
arch_timer_evtstrm_available(), eventually the arch_haltpoll_*()
in any case need to move out of a header file. I'll just do that
now.

> Otherwise, everything worked for me when built-in (=y) atop 6.10.0
> (4a4be1a). I see similar performance gains in `perf bench` on AWS
> Graviton3 c7g.16xlarge.

Excellent. Thanks for checking.

--
ankur
Sudeep Holla June 19, 2024, 12:17 p.m. UTC | #3
On Tue, Apr 30, 2024 at 11:37:29AM -0700, Ankur Arora wrote:
> Add architectural support for the cpuidle-haltpoll driver by defining
> arch_haltpoll_*(). Also select ARCH_HAS_OPTIMIZED_POLL since we have
> an optimized polling mechanism via smp_cond_load*().
> 
> Add the configuration option, ARCH_CPUIDLE_HALTPOLL to allow
> cpuidle-haltpoll to be selected.
> 
> Note that we limit cpuidle-haltpoll support to when the event-stream is
> available. This is necessary because polling via smp_cond_load_relaxed()
> uses WFE to wait for a store which might not happen for an prolonged
> period of time. So, ensure the event-stream is around to provide a
> terminating condition.
>

Currently the event stream is configured 10kHz(1 signal per 100uS IIRC).
But the information in the cpuidle states for exit latency and residency
is set to 0(as per drivers/cpuidle/poll_state.c). Will this not cause any
performance issues ?
Ankur Arora June 21, 2024, 11:59 p.m. UTC | #4
Sudeep Holla <sudeep.holla@arm.com> writes:

> On Tue, Apr 30, 2024 at 11:37:29AM -0700, Ankur Arora wrote:
>> Add architectural support for the cpuidle-haltpoll driver by defining
>> arch_haltpoll_*(). Also select ARCH_HAS_OPTIMIZED_POLL since we have
>> an optimized polling mechanism via smp_cond_load*().
>>
>> Add the configuration option, ARCH_CPUIDLE_HALTPOLL to allow
>> cpuidle-haltpoll to be selected.
>>
>> Note that we limit cpuidle-haltpoll support to when the event-stream is
>> available. This is necessary because polling via smp_cond_load_relaxed()
>> uses WFE to wait for a store which might not happen for an prolonged
>> period of time. So, ensure the event-stream is around to provide a
>> terminating condition.
>>
>
> Currently the event stream is configured 10kHz(1 signal per 100uS IIRC).
> But the information in the cpuidle states for exit latency and residency
> is set to 0(as per drivers/cpuidle/poll_state.c). Will this not cause any
> performance issues ?

No I don't think there's any performance issue.

When the core is waiting in WFE for &thread_info->flags to
change, and set_nr_if_polling() happens, the CPU will come out
of the wait quickly.
So, the exit latency, residency can be reasonably set to 0.

If, however, there is no store to &thread_info->flags, then the event
stream is what would cause us to come out of the WFE and check if
the poll timeout has been exceeded.
In that case, there was no work to be done, so there was nothing
to wake up from.

So, in either circumstance there's no performance loss.

However, when we are polling under the haltpoll governor, this might
mean that we spend more time polling than determined based on the
guest_halt_poll_ns. But, that would only happen in the last polling
iteration.

So, I'd say, at worst no performance loss. But, we would sometimes
poll for longer than necessary before exiting to the host.

--
ankur
Sudeep Holla June 24, 2024, 10:54 a.m. UTC | #5
On Fri, Jun 21, 2024 at 04:59:22PM -0700, Ankur Arora wrote:
> 
> Sudeep Holla <sudeep.holla@arm.com> writes:
> 
> > On Tue, Apr 30, 2024 at 11:37:29AM -0700, Ankur Arora wrote:
> >> Add architectural support for the cpuidle-haltpoll driver by defining
> >> arch_haltpoll_*(). Also select ARCH_HAS_OPTIMIZED_POLL since we have
> >> an optimized polling mechanism via smp_cond_load*().
> >>
> >> Add the configuration option, ARCH_CPUIDLE_HALTPOLL to allow
> >> cpuidle-haltpoll to be selected.
> >>
> >> Note that we limit cpuidle-haltpoll support to when the event-stream is
> >> available. This is necessary because polling via smp_cond_load_relaxed()
> >> uses WFE to wait for a store which might not happen for an prolonged
> >> period of time. So, ensure the event-stream is around to provide a
> >> terminating condition.
> >>
> >
> > Currently the event stream is configured 10kHz(1 signal per 100uS IIRC).
> > But the information in the cpuidle states for exit latency and residency
> > is set to 0(as per drivers/cpuidle/poll_state.c). Will this not cause any
> > performance issues ?
> 
> No I don't think there's any performance issue.
>

Thanks for the confirmation, that was my assumption as well.

> When the core is waiting in WFE for &thread_info->flags to
> change, and set_nr_if_polling() happens, the CPU will come out
> of the wait quickly.
> So, the exit latency, residency can be reasonably set to 0.
>

Sure

> If, however, there is no store to &thread_info->flags, then the event
> stream is what would cause us to come out of the WFE and check if
> the poll timeout has been exceeded.
> In that case, there was no work to be done, so there was nothing
> to wake up from.
>

This is exactly what I was referring when I asked about performance, but
it looks like it is not a concern for the reason specified about.

> So, in either circumstance there's no performance loss.
>
> However, when we are polling under the haltpoll governor, this might
> mean that we spend more time polling than determined based on the
> guest_halt_poll_ns. But, that would only happen in the last polling
> iteration.
>
> So, I'd say, at worst no performance loss. But, we would sometimes
> poll for longer than necessary before exiting to the host.
>

Does it make sense to add some comment that implies briefly what we
have discussed here ? Mainly why 0 exit and target residency values
are fine and how worst case WFE wakeup doesn't impact the performance.

--
Regards,
Sudeep
Ankur Arora June 25, 2024, 1:17 a.m. UTC | #6
Sudeep Holla <sudeep.holla@arm.com> writes:

> On Fri, Jun 21, 2024 at 04:59:22PM -0700, Ankur Arora wrote:
>>
>> Sudeep Holla <sudeep.holla@arm.com> writes:
>>
>> > On Tue, Apr 30, 2024 at 11:37:29AM -0700, Ankur Arora wrote:
>> >> Add architectural support for the cpuidle-haltpoll driver by defining
>> >> arch_haltpoll_*(). Also select ARCH_HAS_OPTIMIZED_POLL since we have
>> >> an optimized polling mechanism via smp_cond_load*().
>> >>
>> >> Add the configuration option, ARCH_CPUIDLE_HALTPOLL to allow
>> >> cpuidle-haltpoll to be selected.
>> >>
>> >> Note that we limit cpuidle-haltpoll support to when the event-stream is
>> >> available. This is necessary because polling via smp_cond_load_relaxed()
>> >> uses WFE to wait for a store which might not happen for an prolonged
>> >> period of time. So, ensure the event-stream is around to provide a
>> >> terminating condition.
>> >>
>> >
>> > Currently the event stream is configured 10kHz(1 signal per 100uS IIRC).
>> > But the information in the cpuidle states for exit latency and residency
>> > is set to 0(as per drivers/cpuidle/poll_state.c). Will this not cause any
>> > performance issues ?
>>
>> No I don't think there's any performance issue.
>>
>
> Thanks for the confirmation, that was my assumption as well.
>
>> When the core is waiting in WFE for &thread_info->flags to
>> change, and set_nr_if_polling() happens, the CPU will come out
>> of the wait quickly.
>> So, the exit latency, residency can be reasonably set to 0.
>>
>
> Sure
>
>> If, however, there is no store to &thread_info->flags, then the event
>> stream is what would cause us to come out of the WFE and check if
>> the poll timeout has been exceeded.
>> In that case, there was no work to be done, so there was nothing
>> to wake up from.
>>
>
> This is exactly what I was referring when I asked about performance, but
> it looks like it is not a concern for the reason specified about.
>
>> So, in either circumstance there's no performance loss.
>>
>> However, when we are polling under the haltpoll governor, this might
>> mean that we spend more time polling than determined based on the
>> guest_halt_poll_ns. But, that would only happen in the last polling
>> iteration.
>>
>> So, I'd say, at worst no performance loss. But, we would sometimes
>> poll for longer than necessary before exiting to the host.
>>
>
> Does it make sense to add some comment that implies briefly what we
> have discussed here ? Mainly why 0 exit and target residency values
> are fine and how worst case WFE wakeup doesn't impact the performance.

Yeah let me thresh out the commit message for this patch a bit more.

Thanks for the review!

--
ankur
diff mbox series

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7b11c98b3e84..6f2df162b10e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -34,6 +34,7 @@  config ARM64
 	select ARCH_HAS_MEMBARRIER_SYNC_CORE
 	select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
 	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+	select ARCH_HAS_OPTIMIZED_POLL
 	select ARCH_HAS_PTE_DEVMAP
 	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_HAS_HW_PTE_YOUNG
@@ -2331,6 +2332,15 @@  config ARCH_HIBERNATION_HEADER
 config ARCH_SUSPEND_POSSIBLE
 	def_bool y
 
+config ARCH_CPUIDLE_HALTPOLL
+	bool "Enable selection of the cpuidle-haltpoll driver"
+	default n
+	help
+	  cpuidle-haltpoll allows for adaptive polling based on
+	  current load before entering the idle state.
+
+	  Some virtualized workloads benefit from using it.
+
 endmenu # "Power management options"
 
 menu "CPU Power Management"
diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
new file mode 100644
index 000000000000..a79bdec7f516
--- /dev/null
+++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
@@ -0,0 +1,21 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_HALTPOLL_H
+#define _ASM_HALTPOLL_H
+
+static inline void arch_haltpoll_enable(unsigned int cpu)
+{
+}
+
+static inline void arch_haltpoll_disable(unsigned int cpu)
+{
+}
+
+static inline bool arch_haltpoll_supported(void)
+{
+	/*
+	 * Ensure the event stream is available to provide a terminating
+	 * condition to the WFE in the poll loop.
+	 */
+	return arch_timer_evtstrm_available();
+}
+#endif