diff mbox series

[v6,09/10] arm64: support cpuidle-haltpoll

Message ID 20240726202134.627514-7-ankur.a.arora@oracle.com (mailing list archive)
State Superseded, archived
Headers show
Series Enable haltpoll on arm64 | expand

Commit Message

Ankur Arora July 26, 2024, 8:21 p.m. UTC
Add architectural support for cpuidle-haltpoll driver by defining
arch_haltpoll_*().

Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
selected, and given that we have an optimized polling mechanism
in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.

smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
a memory region in exclusive state and the WFE waiting for any
stores to it.

In the edge case -- no CPU stores to the waited region and there's no
interrupt -- the event-stream will provide the terminating condition
ensuring we don't wait forever, but because the event-stream runs at
a fixed frequency (configured at 10kHz) we might spend more time in
the polling stage than specified by cpuidle_poll_time().

This would only happen in the last iteration, since overshooting the
poll_limit means the governor moves out of the polling stage.

Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 arch/arm64/Kconfig                        | 10 ++++++++++
 arch/arm64/include/asm/cpuidle_haltpoll.h |  9 +++++++++
 arch/arm64/kernel/cpuidle.c               | 23 +++++++++++++++++++++++
 3 files changed, 42 insertions(+)
 create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h

Comments

Okanovic, Haris July 29, 2024, 5:20 p.m. UTC | #1
On Fri, 2024-07-26 at 13:21 -0700, Ankur Arora wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
> 
> 
> 
> Add architectural support for cpuidle-haltpoll driver by defining
> arch_haltpoll_*().
> 
> Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
> selected, and given that we have an optimized polling mechanism
> in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
> 
> smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
> a memory region in exclusive state and the WFE waiting for any
> stores to it.
> 
> In the edge case -- no CPU stores to the waited region and there's no
> interrupt -- the event-stream will provide the terminating condition
> ensuring we don't wait forever, but because the event-stream runs at
> a fixed frequency (configured at 10kHz) we might spend more time in
> the polling stage than specified by cpuidle_poll_time().
> 
> This would only happen in the last iteration, since overshooting the
> poll_limit means the governor moves out of the polling stage.
> 
> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> ---
>  arch/arm64/Kconfig                        | 10 ++++++++++
>  arch/arm64/include/asm/cpuidle_haltpoll.h |  9 +++++++++
>  arch/arm64/kernel/cpuidle.c               | 23 +++++++++++++++++++++++
>  3 files changed, 42 insertions(+)
>  create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 5d91259ee7b5..cf1c6681eb0a 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -35,6 +35,7 @@ config ARM64
>         select ARCH_HAS_MEMBARRIER_SYNC_CORE
>         select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
>         select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> +       select ARCH_HAS_OPTIMIZED_POLL
>         select ARCH_HAS_PTE_DEVMAP
>         select ARCH_HAS_PTE_SPECIAL
>         select ARCH_HAS_HW_PTE_YOUNG
> @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
>  config ARCH_SUSPEND_POSSIBLE
>         def_bool y
> 
> +config ARCH_CPUIDLE_HALTPOLL
> +       bool "Enable selection of the cpuidle-haltpoll driver"
> +       default n
> +       help
> +         cpuidle-haltpoll allows for adaptive polling based on
> +         current load before entering the idle state.
> +
> +         Some virtualized workloads benefit from using it.
> +
>  endmenu # "Power management options"
> 
>  menu "CPU Power Management"
> diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
> new file mode 100644
> index 000000000000..65f289407a6c
> --- /dev/null
> +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
> @@ -0,0 +1,9 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ARCH_HALTPOLL_H
> +#define _ARCH_HALTPOLL_H
> +
> +static inline void arch_haltpoll_enable(unsigned int cpu) { }
> +static inline void arch_haltpoll_disable(unsigned int cpu) { }
> +
> +bool arch_haltpoll_want(bool force);
> +#endif
> diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
> index f372295207fb..334df82a0eac 100644
> --- a/arch/arm64/kernel/cpuidle.c
> +++ b/arch/arm64/kernel/cpuidle.c
> @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
>                                              lpi->index, state);
>  }
>  #endif
> +
> +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
> +
> +#include <asm/cpuidle_haltpoll.h>
> +
> +bool arch_haltpoll_want(bool force)
> +{
> +       /*
> +        * Enabling haltpoll requires two things:
> +        *
> +        * - Event stream support to provide a terminating condition to the
> +        *   WFE in the poll loop.
> +        *
> +        * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().

typo: "arch_haltpoll_enable" and "arch_haltpoll_enable"

> +        *
> +        * Given that the second is missing, allow haltpoll to only be force
> +        * loaded.
> +        */
> +       return (arch_timer_evtstrm_available() && false) || force;

This should always evaluate false without force. Perhaps you meant
something like this?

```
-       return (arch_timer_evtstrm_available() && false) || force;
+       return arch_timer_evtstrm_available() || force;
```

Regards,
Haris Okanovic

> +}
> +
> +EXPORT_SYMBOL_GPL(arch_haltpoll_want);
> +#endif
> --
> 2.43.5
>
Ankur Arora July 29, 2024, 6:02 p.m. UTC | #2
Okanovic, Haris <harisokn@amazon.com> writes:

> On Fri, 2024-07-26 at 13:21 -0700, Ankur Arora wrote:
>> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>>
>>
>>
>> Add architectural support for cpuidle-haltpoll driver by defining
>> arch_haltpoll_*().
>>
>> Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
>> selected, and given that we have an optimized polling mechanism
>> in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
>>
>> smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
>> a memory region in exclusive state and the WFE waiting for any
>> stores to it.
>>
>> In the edge case -- no CPU stores to the waited region and there's no
>> interrupt -- the event-stream will provide the terminating condition
>> ensuring we don't wait forever, but because the event-stream runs at
>> a fixed frequency (configured at 10kHz) we might spend more time in
>> the polling stage than specified by cpuidle_poll_time().
>>
>> This would only happen in the last iteration, since overshooting the
>> poll_limit means the governor moves out of the polling stage.
>>
>> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>> ---
>>  arch/arm64/Kconfig                        | 10 ++++++++++
>>  arch/arm64/include/asm/cpuidle_haltpoll.h |  9 +++++++++
>>  arch/arm64/kernel/cpuidle.c               | 23 +++++++++++++++++++++++
>>  3 files changed, 42 insertions(+)
>>  create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index 5d91259ee7b5..cf1c6681eb0a 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -35,6 +35,7 @@ config ARM64
>>         select ARCH_HAS_MEMBARRIER_SYNC_CORE
>>         select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
>>         select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
>> +       select ARCH_HAS_OPTIMIZED_POLL
>>         select ARCH_HAS_PTE_DEVMAP
>>         select ARCH_HAS_PTE_SPECIAL
>>         select ARCH_HAS_HW_PTE_YOUNG
>> @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
>>  config ARCH_SUSPEND_POSSIBLE
>>         def_bool y
>>
>> +config ARCH_CPUIDLE_HALTPOLL
>> +       bool "Enable selection of the cpuidle-haltpoll driver"
>> +       default n
>> +       help
>> +         cpuidle-haltpoll allows for adaptive polling based on
>> +         current load before entering the idle state.
>> +
>> +         Some virtualized workloads benefit from using it.
>> +
>>  endmenu # "Power management options"
>>
>>  menu "CPU Power Management"
>> diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
>> new file mode 100644
>> index 000000000000..65f289407a6c
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
>> @@ -0,0 +1,9 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef _ARCH_HALTPOLL_H
>> +#define _ARCH_HALTPOLL_H
>> +
>> +static inline void arch_haltpoll_enable(unsigned int cpu) { }
>> +static inline void arch_haltpoll_disable(unsigned int cpu) { }
>> +
>> +bool arch_haltpoll_want(bool force);
>> +#endif
>> diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
>> index f372295207fb..334df82a0eac 100644
>> --- a/arch/arm64/kernel/cpuidle.c
>> +++ b/arch/arm64/kernel/cpuidle.c
>> @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
>>                                              lpi->index, state);
>>  }
>>  #endif
>> +
>> +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
>> +
>> +#include <asm/cpuidle_haltpoll.h>
>> +
>> +bool arch_haltpoll_want(bool force)
>> +{
>> +       /*
>> +        * Enabling haltpoll requires two things:
>> +        *
>> +        * - Event stream support to provide a terminating condition to the
>> +        *   WFE in the poll loop.
>> +        *
>> +        * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
>
> typo: "arch_haltpoll_enable" and "arch_haltpoll_enable"
>
>> +        *
>> +        * Given that the second is missing, allow haltpoll to only be force
>> +        * loaded.
>> +        */
>> +       return (arch_timer_evtstrm_available() && false) || force;
>
> This should always evaluate false without force. Perhaps you meant
> something like this?
>
> ```
> -       return (arch_timer_evtstrm_available() && false) || force;
> +       return arch_timer_evtstrm_available() || force;
> ```

No. This was intentional. As I meniton in the comment above, right now
the KVM support is missing. Which means that the guest has no way to
tell the host to not poll as part of host haltpoll.

Until that is available, only allow force loading.

--
ankur
bibo mao Aug. 6, 2024, 1:37 a.m. UTC | #3
On 2024/7/27 上午4:21, Ankur Arora wrote:
> Add architectural support for cpuidle-haltpoll driver by defining
> arch_haltpoll_*().
> 
> Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
> selected, and given that we have an optimized polling mechanism
> in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
> 
> smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
> a memory region in exclusive state and the WFE waiting for any
> stores to it.
> 
> In the edge case -- no CPU stores to the waited region and there's no
> interrupt -- the event-stream will provide the terminating condition
> ensuring we don't wait forever, but because the event-stream runs at
> a fixed frequency (configured at 10kHz) we might spend more time in
> the polling stage than specified by cpuidle_poll_time().
> 
> This would only happen in the last iteration, since overshooting the
> poll_limit means the governor moves out of the polling stage.
> 
> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> ---
>   arch/arm64/Kconfig                        | 10 ++++++++++
>   arch/arm64/include/asm/cpuidle_haltpoll.h |  9 +++++++++
>   arch/arm64/kernel/cpuidle.c               | 23 +++++++++++++++++++++++
>   3 files changed, 42 insertions(+)
>   create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 5d91259ee7b5..cf1c6681eb0a 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -35,6 +35,7 @@ config ARM64
>   	select ARCH_HAS_MEMBARRIER_SYNC_CORE
>   	select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
>   	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> +	select ARCH_HAS_OPTIMIZED_POLL
>   	select ARCH_HAS_PTE_DEVMAP
>   	select ARCH_HAS_PTE_SPECIAL
>   	select ARCH_HAS_HW_PTE_YOUNG
> @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
>   config ARCH_SUSPEND_POSSIBLE
>   	def_bool y
>   
> +config ARCH_CPUIDLE_HALTPOLL
> +	bool "Enable selection of the cpuidle-haltpoll driver"
> +	default n
> +	help
> +	  cpuidle-haltpoll allows for adaptive polling based on
> +	  current load before entering the idle state.
> +
> +	  Some virtualized workloads benefit from using it.
> +
>   endmenu # "Power management options"
>   
>   menu "CPU Power Management"
> diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
> new file mode 100644
> index 000000000000..65f289407a6c
> --- /dev/null
> +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
> @@ -0,0 +1,9 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ARCH_HALTPOLL_H
> +#define _ARCH_HALTPOLL_H
> +
> +static inline void arch_haltpoll_enable(unsigned int cpu) { }
> +static inline void arch_haltpoll_disable(unsigned int cpu) { }
It is better that guest supports halt poll on more architectures, 
LoongArch wants this if result is good.

Do we need disable halt polling on host hypervisor if guest also uses 
halt polling idle method?

Regards
Bibo Mao

> +
> +bool arch_haltpoll_want(bool force);
> +#endif
> diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
> index f372295207fb..334df82a0eac 100644
> --- a/arch/arm64/kernel/cpuidle.c
> +++ b/arch/arm64/kernel/cpuidle.c
> @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
>   					     lpi->index, state);
>   }
>   #endif
> +
> +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
> +
> +#include <asm/cpuidle_haltpoll.h>
> +
> +bool arch_haltpoll_want(bool force)
> +{
> +	/*
> +	 * Enabling haltpoll requires two things:
> +	 *
> +	 * - Event stream support to provide a terminating condition to the
> +	 *   WFE in the poll loop.
> +	 *
> +	 * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
> +	 *
> +	 * Given that the second is missing, allow haltpoll to only be force
> +	 * loaded.
> +	 */
> +	return (arch_timer_evtstrm_available() && false) || force;
> +}
> +
> +EXPORT_SYMBOL_GPL(arch_haltpoll_want);
> +#endif
>
Tomohiro Misono Aug. 9, 2024, 6:08 a.m. UTC | #4
> Subject: [PATCH v6 09/10] arm64: support cpuidle-haltpoll
> 
> Add architectural support for cpuidle-haltpoll driver by defining
> arch_haltpoll_*().
> 
> Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
> selected, and given that we have an optimized polling mechanism
> in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
> 
> smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
> a memory region in exclusive state and the WFE waiting for any
> stores to it.
> 
> In the edge case -- no CPU stores to the waited region and there's no
> interrupt -- the event-stream will provide the terminating condition
> ensuring we don't wait forever, but because the event-stream runs at
> a fixed frequency (configured at 10kHz) we might spend more time in
> the polling stage than specified by cpuidle_poll_time().
> 
> This would only happen in the last iteration, since overshooting the
> poll_limit means the governor moves out of the polling stage.
> 
> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> ---
>  arch/arm64/Kconfig                        | 10 ++++++++++
>  arch/arm64/include/asm/cpuidle_haltpoll.h |  9 +++++++++
>  arch/arm64/kernel/cpuidle.c               | 23 +++++++++++++++++++++++

FYI, arch/arm64/kernel/cpuidle.c is move to drivers/acpi/arm64/ in 6.11
and therefore I couldn't apply the series to 6.11.
https://github.com/torvalds/linux/commit/99e7a8adc0ca906151f5d70ff68b8a81f53fd106

Regards,
Tomohiro Misono

>  3 files changed, 42 insertions(+)
>  create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 5d91259ee7b5..cf1c6681eb0a 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -35,6 +35,7 @@ config ARM64
>  	select ARCH_HAS_MEMBARRIER_SYNC_CORE
>  	select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
>  	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> +	select ARCH_HAS_OPTIMIZED_POLL
>  	select ARCH_HAS_PTE_DEVMAP
>  	select ARCH_HAS_PTE_SPECIAL
>  	select ARCH_HAS_HW_PTE_YOUNG
> @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
>  config ARCH_SUSPEND_POSSIBLE
>  	def_bool y
> 
> +config ARCH_CPUIDLE_HALTPOLL
> +	bool "Enable selection of the cpuidle-haltpoll driver"
> +	default n
> +	help
> +	  cpuidle-haltpoll allows for adaptive polling based on
> +	  current load before entering the idle state.
> +
> +	  Some virtualized workloads benefit from using it.
> +
>  endmenu # "Power management options"
> 
>  menu "CPU Power Management"
> diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
> new file mode 100644
> index 000000000000..65f289407a6c
> --- /dev/null
> +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
> @@ -0,0 +1,9 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ARCH_HALTPOLL_H
> +#define _ARCH_HALTPOLL_H
> +
> +static inline void arch_haltpoll_enable(unsigned int cpu) { }
> +static inline void arch_haltpoll_disable(unsigned int cpu) { }
> +
> +bool arch_haltpoll_want(bool force);
> +#endif
> diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
> index f372295207fb..334df82a0eac 100644
> --- a/arch/arm64/kernel/cpuidle.c
> +++ b/arch/arm64/kernel/cpuidle.c
> @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
>  					     lpi->index, state);
>  }
>  #endif
> +
> +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
> +
> +#include <asm/cpuidle_haltpoll.h>
> +
> +bool arch_haltpoll_want(bool force)
> +{
> +	/*
> +	 * Enabling haltpoll requires two things:
> +	 *
> +	 * - Event stream support to provide a terminating condition to the
> +	 *   WFE in the poll loop.
> +	 *
> +	 * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
> +	 *
> +	 * Given that the second is missing, allow haltpoll to only be force
> +	 * loaded.
> +	 */
> +	return (arch_timer_evtstrm_available() && false) || force;
> +}
> +
> +EXPORT_SYMBOL_GPL(arch_haltpoll_want);
> +#endif
> --
> 2.43.5
Ankur Arora Aug. 12, 2024, 10:48 p.m. UTC | #5
maobibo <maobibo@loongson.cn> writes:

> On 2024/7/27 上午4:21, Ankur Arora wrote:
>> Add architectural support for cpuidle-haltpoll driver by defining
>> arch_haltpoll_*().
>> Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
>> selected, and given that we have an optimized polling mechanism
>> in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
>> smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
>> a memory region in exclusive state and the WFE waiting for any
>> stores to it.
>> In the edge case -- no CPU stores to the waited region and there's no
>> interrupt -- the event-stream will provide the terminating condition
>> ensuring we don't wait forever, but because the event-stream runs at
>> a fixed frequency (configured at 10kHz) we might spend more time in
>> the polling stage than specified by cpuidle_poll_time().
>> This would only happen in the last iteration, since overshooting the
>> poll_limit means the governor moves out of the polling stage.
>> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>> ---
>>   arch/arm64/Kconfig                        | 10 ++++++++++
>>   arch/arm64/include/asm/cpuidle_haltpoll.h |  9 +++++++++
>>   arch/arm64/kernel/cpuidle.c               | 23 +++++++++++++++++++++++
>>   3 files changed, 42 insertions(+)
>>   create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index 5d91259ee7b5..cf1c6681eb0a 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -35,6 +35,7 @@ config ARM64
>>   	select ARCH_HAS_MEMBARRIER_SYNC_CORE
>>   	select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
>>   	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
>> +	select ARCH_HAS_OPTIMIZED_POLL
>>   	select ARCH_HAS_PTE_DEVMAP
>>   	select ARCH_HAS_PTE_SPECIAL
>>   	select ARCH_HAS_HW_PTE_YOUNG
>> @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
>>   config ARCH_SUSPEND_POSSIBLE
>>   	def_bool y
>>   +config ARCH_CPUIDLE_HALTPOLL
>> +	bool "Enable selection of the cpuidle-haltpoll driver"
>> +	default n
>> +	help
>> +	  cpuidle-haltpoll allows for adaptive polling based on
>> +	  current load before entering the idle state.
>> +
>> +	  Some virtualized workloads benefit from using it.
>> +
>>   endmenu # "Power management options"
>>     menu "CPU Power Management"
>> diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
>> new file mode 100644
>> index 000000000000..65f289407a6c
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
>> @@ -0,0 +1,9 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef _ARCH_HALTPOLL_H
>> +#define _ARCH_HALTPOLL_H
>> +
>> +static inline void arch_haltpoll_enable(unsigned int cpu) { }
>> +static inline void arch_haltpoll_disable(unsigned int cpu) { }
> It is better that guest supports halt poll on more architectures, LoongArch
> wants this if result is good.
>
> Do we need disable halt polling on host hypervisor if guest also uses halt
> polling idle method?

Yes. The intent is to work on that separately from this series. As the comment
below states, until that is available we only allow force loading.

>> +
>> +bool arch_haltpoll_want(bool force);
>> +#endif
>> diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
>> index f372295207fb..334df82a0eac 100644
>> --- a/arch/arm64/kernel/cpuidle.c
>> +++ b/arch/arm64/kernel/cpuidle.c
>> @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
>>   					     lpi->index, state);
>>   }
>>   #endif
>> +
>> +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
>> +
>> +#include <asm/cpuidle_haltpoll.h>
>> +
>> +bool arch_haltpoll_want(bool force)
>> +{
>> +	/*
>> +	 * Enabling haltpoll requires two things:
>> +	 *
>> +	 * - Event stream support to provide a terminating condition to the
>> +	 *   WFE in the poll loop.
>> +	 *
>> +	 * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
>> +	 *
>> +	 * Given that the second is missing, allow haltpoll to only be force
>> +	 * loaded.
>> +	 */
>> +	return (arch_timer_evtstrm_available() && false) || force;
>> +}
>> +
>> +EXPORT_SYMBOL_GPL(arch_haltpoll_want);
>> +#endif
>>


--
ankur
bibo mao Aug. 13, 2024, 12:54 a.m. UTC | #6
On 2024/8/13 上午6:48, Ankur Arora wrote:
> 
> maobibo <maobibo@loongson.cn> writes:
> 
>> On 2024/7/27 上午4:21, Ankur Arora wrote:
>>> Add architectural support for cpuidle-haltpoll driver by defining
>>> arch_haltpoll_*().
>>> Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
>>> selected, and given that we have an optimized polling mechanism
>>> in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
>>> smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
>>> a memory region in exclusive state and the WFE waiting for any
>>> stores to it.
>>> In the edge case -- no CPU stores to the waited region and there's no
>>> interrupt -- the event-stream will provide the terminating condition
>>> ensuring we don't wait forever, but because the event-stream runs at
>>> a fixed frequency (configured at 10kHz) we might spend more time in
>>> the polling stage than specified by cpuidle_poll_time().
>>> This would only happen in the last iteration, since overshooting the
>>> poll_limit means the governor moves out of the polling stage.
>>> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>>> ---
>>>    arch/arm64/Kconfig                        | 10 ++++++++++
>>>    arch/arm64/include/asm/cpuidle_haltpoll.h |  9 +++++++++
>>>    arch/arm64/kernel/cpuidle.c               | 23 +++++++++++++++++++++++
>>>    3 files changed, 42 insertions(+)
>>>    create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
>>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>>> index 5d91259ee7b5..cf1c6681eb0a 100644
>>> --- a/arch/arm64/Kconfig
>>> +++ b/arch/arm64/Kconfig
>>> @@ -35,6 +35,7 @@ config ARM64
>>>    	select ARCH_HAS_MEMBARRIER_SYNC_CORE
>>>    	select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
>>>    	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
>>> +	select ARCH_HAS_OPTIMIZED_POLL
>>>    	select ARCH_HAS_PTE_DEVMAP
>>>    	select ARCH_HAS_PTE_SPECIAL
>>>    	select ARCH_HAS_HW_PTE_YOUNG
>>> @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
>>>    config ARCH_SUSPEND_POSSIBLE
>>>    	def_bool y
>>>    +config ARCH_CPUIDLE_HALTPOLL
>>> +	bool "Enable selection of the cpuidle-haltpoll driver"
>>> +	default n
>>> +	help
>>> +	  cpuidle-haltpoll allows for adaptive polling based on
>>> +	  current load before entering the idle state.
>>> +
>>> +	  Some virtualized workloads benefit from using it.
>>> +
>>>    endmenu # "Power management options"
>>>      menu "CPU Power Management"
>>> diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
>>> new file mode 100644
>>> index 000000000000..65f289407a6c
>>> --- /dev/null
>>> +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
>>> @@ -0,0 +1,9 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>> +#ifndef _ARCH_HALTPOLL_H
>>> +#define _ARCH_HALTPOLL_H
>>> +
>>> +static inline void arch_haltpoll_enable(unsigned int cpu) { }
>>> +static inline void arch_haltpoll_disable(unsigned int cpu) { }
>> It is better that guest supports halt poll on more architectures, LoongArch
>> wants this if result is good.
>>
>> Do we need disable halt polling on host hypervisor if guest also uses halt
>> polling idle method?
> 
> Yes. The intent is to work on that separately from this series. As the comment
> below states, until that is available we only allow force loading.
Thanks for your explanation. By internal test, it is useful for 
LoongArch virtmachine on some scenarios. And in late we want to add 
haltpoll support on LoongArch VM based your series.

Regards
Bibo Mao
> 
>>> +
>>> +bool arch_haltpoll_want(bool force);
>>> +#endif
>>> diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
>>> index f372295207fb..334df82a0eac 100644
>>> --- a/arch/arm64/kernel/cpuidle.c
>>> +++ b/arch/arm64/kernel/cpuidle.c
>>> @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
>>>    					     lpi->index, state);
>>>    }
>>>    #endif
>>> +
>>> +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
>>> +
>>> +#include <asm/cpuidle_haltpoll.h>
>>> +
>>> +bool arch_haltpoll_want(bool force)
>>> +{
>>> +	/*
>>> +	 * Enabling haltpoll requires two things:
>>> +	 *
>>> +	 * - Event stream support to provide a terminating condition to the
>>> +	 *   WFE in the poll loop.
>>> +	 *
>>> +	 * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
>>> +	 *
>>> +	 * Given that the second is missing, allow haltpoll to only be force
>>> +	 * loaded.
>>> +	 */
>>> +	return (arch_timer_evtstrm_available() && false) || force;
>>> +}
>>> +
>>> +EXPORT_SYMBOL_GPL(arch_haltpoll_want);
>>> +#endif
>>>
> 
> 
> --
> ankur
>
Okanovic, Haris Aug. 13, 2024, 3:26 p.m. UTC | #7
On Mon, 2024-07-29 at 11:02 -0700, Ankur Arora wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
> 
> 
> 
> Okanovic, Haris <harisokn@amazon.com> writes:
> 
> > On Fri, 2024-07-26 at 13:21 -0700, Ankur Arora wrote:
> > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
> > > 
> > > 
> > > 
> > > Add architectural support for cpuidle-haltpoll driver by defining
> > > arch_haltpoll_*().
> > > 
> > > Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
> > > selected, and given that we have an optimized polling mechanism
> > > in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
> > > 
> > > smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
> > > a memory region in exclusive state and the WFE waiting for any
> > > stores to it.
> > > 
> > > In the edge case -- no CPU stores to the waited region and there's no
> > > interrupt -- the event-stream will provide the terminating condition
> > > ensuring we don't wait forever, but because the event-stream runs at
> > > a fixed frequency (configured at 10kHz) we might spend more time in
> > > the polling stage than specified by cpuidle_poll_time().
> > > 
> > > This would only happen in the last iteration, since overshooting the
> > > poll_limit means the governor moves out of the polling stage.
> > > 
> > > Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> > > ---
> > >  arch/arm64/Kconfig                        | 10 ++++++++++
> > >  arch/arm64/include/asm/cpuidle_haltpoll.h |  9 +++++++++
> > >  arch/arm64/kernel/cpuidle.c               | 23 +++++++++++++++++++++++
> > >  3 files changed, 42 insertions(+)
> > >  create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
> > > 
> > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > > index 5d91259ee7b5..cf1c6681eb0a 100644
> > > --- a/arch/arm64/Kconfig
> > > +++ b/arch/arm64/Kconfig
> > > @@ -35,6 +35,7 @@ config ARM64
> > >         select ARCH_HAS_MEMBARRIER_SYNC_CORE
> > >         select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
> > >         select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> > > +       select ARCH_HAS_OPTIMIZED_POLL
> > >         select ARCH_HAS_PTE_DEVMAP
> > >         select ARCH_HAS_PTE_SPECIAL
> > >         select ARCH_HAS_HW_PTE_YOUNG
> > > @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
> > >  config ARCH_SUSPEND_POSSIBLE
> > >         def_bool y
> > > 
> > > +config ARCH_CPUIDLE_HALTPOLL
> > > +       bool "Enable selection of the cpuidle-haltpoll driver"
> > > +       default n
> > > +       help
> > > +         cpuidle-haltpoll allows for adaptive polling based on
> > > +         current load before entering the idle state.
> > > +
> > > +         Some virtualized workloads benefit from using it.
> > > +
> > >  endmenu # "Power management options"
> > > 
> > >  menu "CPU Power Management"
> > > diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
> > > new file mode 100644
> > > index 000000000000..65f289407a6c
> > > --- /dev/null
> > > +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
> > > @@ -0,0 +1,9 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +#ifndef _ARCH_HALTPOLL_H
> > > +#define _ARCH_HALTPOLL_H
> > > +
> > > +static inline void arch_haltpoll_enable(unsigned int cpu) { }
> > > +static inline void arch_haltpoll_disable(unsigned int cpu) { }
> > > +
> > > +bool arch_haltpoll_want(bool force);
> > > +#endif
> > > diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
> > > index f372295207fb..334df82a0eac 100644
> > > --- a/arch/arm64/kernel/cpuidle.c
> > > +++ b/arch/arm64/kernel/cpuidle.c
> > > @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
> > >                                              lpi->index, state);
> > >  }
> > >  #endif
> > > +
> > > +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
> > > +
> > > +#include <asm/cpuidle_haltpoll.h>
> > > +
> > > +bool arch_haltpoll_want(bool force)
> > > +{
> > > +       /*
> > > +        * Enabling haltpoll requires two things:
> > > +        *
> > > +        * - Event stream support to provide a terminating condition to the
> > > +        *   WFE in the poll loop.
> > > +        *
> > > +        * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
> > 
> > typo: "arch_haltpoll_enable" and "arch_haltpoll_enable"
> > 
> > > +        *
> > > +        * Given that the second is missing, allow haltpoll to only be force
> > > +        * loaded.
> > > +        */
> > > +       return (arch_timer_evtstrm_available() && false) || force;
> > 
> > This should always evaluate false without force. Perhaps you meant
> > something like this?
> > 
> > ```
> > -       return (arch_timer_evtstrm_available() && false) || force;
> > +       return arch_timer_evtstrm_available() || force;
> > ```
> 
> No. This was intentional. As I meniton in the comment above, right now
> the KVM support is missing. Which means that the guest has no way to
> tell the host to not poll as part of host haltpoll.
> 
> Until that is available, only allow force loading.

I see, arm64's kvm is missing the poll control mechanism.

I'll follow-up your changes with a patch for AWS Graviton; still seeing
the same performance gains.

Tested-by: Haris Okanovic <harisokn@amazon.com>

> 
> --
> ankur
Ankur Arora Aug. 13, 2024, 6:56 p.m. UTC | #8
Okanovic, Haris <harisokn@amazon.com> writes:

> On Mon, 2024-07-29 at 11:02 -0700, Ankur Arora wrote:
>> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>>
>>
>>
>> Okanovic, Haris <harisokn@amazon.com> writes:
>>
>> > On Fri, 2024-07-26 at 13:21 -0700, Ankur Arora wrote:
>> > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>> > >
>> > >
>> > >
>> > > Add architectural support for cpuidle-haltpoll driver by defining
>> > > arch_haltpoll_*().
>> > >
>> > > Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
>> > > selected, and given that we have an optimized polling mechanism
>> > > in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
>> > >
>> > > smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
>> > > a memory region in exclusive state and the WFE waiting for any
>> > > stores to it.
>> > >
>> > > In the edge case -- no CPU stores to the waited region and there's no
>> > > interrupt -- the event-stream will provide the terminating condition
>> > > ensuring we don't wait forever, but because the event-stream runs at
>> > > a fixed frequency (configured at 10kHz) we might spend more time in
>> > > the polling stage than specified by cpuidle_poll_time().
>> > >
>> > > This would only happen in the last iteration, since overshooting the
>> > > poll_limit means the governor moves out of the polling stage.
>> > >
>> > > Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>> > > ---
>> > >  arch/arm64/Kconfig                        | 10 ++++++++++
>> > >  arch/arm64/include/asm/cpuidle_haltpoll.h |  9 +++++++++
>> > >  arch/arm64/kernel/cpuidle.c               | 23 +++++++++++++++++++++++
>> > >  3 files changed, 42 insertions(+)
>> > >  create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
>> > >
>> > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> > > index 5d91259ee7b5..cf1c6681eb0a 100644
>> > > --- a/arch/arm64/Kconfig
>> > > +++ b/arch/arm64/Kconfig
>> > > @@ -35,6 +35,7 @@ config ARM64
>> > >         select ARCH_HAS_MEMBARRIER_SYNC_CORE
>> > >         select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
>> > >         select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
>> > > +       select ARCH_HAS_OPTIMIZED_POLL
>> > >         select ARCH_HAS_PTE_DEVMAP
>> > >         select ARCH_HAS_PTE_SPECIAL
>> > >         select ARCH_HAS_HW_PTE_YOUNG
>> > > @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
>> > >  config ARCH_SUSPEND_POSSIBLE
>> > >         def_bool y
>> > >
>> > > +config ARCH_CPUIDLE_HALTPOLL
>> > > +       bool "Enable selection of the cpuidle-haltpoll driver"
>> > > +       default n
>> > > +       help
>> > > +         cpuidle-haltpoll allows for adaptive polling based on
>> > > +         current load before entering the idle state.
>> > > +
>> > > +         Some virtualized workloads benefit from using it.
>> > > +
>> > >  endmenu # "Power management options"
>> > >
>> > >  menu "CPU Power Management"
>> > > diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
>> > > new file mode 100644
>> > > index 000000000000..65f289407a6c
>> > > --- /dev/null
>> > > +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
>> > > @@ -0,0 +1,9 @@
>> > > +/* SPDX-License-Identifier: GPL-2.0 */
>> > > +#ifndef _ARCH_HALTPOLL_H
>> > > +#define _ARCH_HALTPOLL_H
>> > > +
>> > > +static inline void arch_haltpoll_enable(unsigned int cpu) { }
>> > > +static inline void arch_haltpoll_disable(unsigned int cpu) { }
>> > > +
>> > > +bool arch_haltpoll_want(bool force);
>> > > +#endif
>> > > diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
>> > > index f372295207fb..334df82a0eac 100644
>> > > --- a/arch/arm64/kernel/cpuidle.c
>> > > +++ b/arch/arm64/kernel/cpuidle.c
>> > > @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
>> > >                                              lpi->index, state);
>> > >  }
>> > >  #endif
>> > > +
>> > > +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
>> > > +
>> > > +#include <asm/cpuidle_haltpoll.h>
>> > > +
>> > > +bool arch_haltpoll_want(bool force)
>> > > +{
>> > > +       /*
>> > > +        * Enabling haltpoll requires two things:
>> > > +        *
>> > > +        * - Event stream support to provide a terminating condition to the
>> > > +        *   WFE in the poll loop.
>> > > +        *
>> > > +        * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
>> >
>> > typo: "arch_haltpoll_enable" and "arch_haltpoll_enable"
>> >
>> > > +        *
>> > > +        * Given that the second is missing, allow haltpoll to only be force
>> > > +        * loaded.
>> > > +        */
>> > > +       return (arch_timer_evtstrm_available() && false) || force;
>> >
>> > This should always evaluate false without force. Perhaps you meant
>> > something like this?
>> >
>> > ```
>> > -       return (arch_timer_evtstrm_available() && false) || force;
>> > +       return arch_timer_evtstrm_available() || force;
>> > ```
>>
>> No. This was intentional. As I meniton in the comment above, right now
>> the KVM support is missing. Which means that the guest has no way to
>> tell the host to not poll as part of host haltpoll.
>>
>> Until that is available, only allow force loading.
>
> I see, arm64's kvm is missing the poll control mechanism.
>
> I'll follow-up your changes with a patch for AWS Graviton; still seeing
> the same performance gains.

Excellent. Could you Cc me when you send out your changes?

> Tested-by: Haris Okanovic <harisokn@amazon.com>

Thanks!

--
ankur
Okanovic, Haris Aug. 13, 2024, 9:14 p.m. UTC | #9
On Tue, 2024-08-13 at 11:56 -0700, Ankur Arora wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
> 
> 
> 
> Okanovic, Haris <harisokn@amazon.com> writes:
> 
> > On Mon, 2024-07-29 at 11:02 -0700, Ankur Arora wrote:
> > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
> > > 
> > > 
> > > 
> > > Okanovic, Haris <harisokn@amazon.com> writes:
> > > 
> > > > On Fri, 2024-07-26 at 13:21 -0700, Ankur Arora wrote:
> > > > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
> > > > > 
> > > > > 
> > > > > 
> > > > > Add architectural support for cpuidle-haltpoll driver by defining
> > > > > arch_haltpoll_*().
> > > > > 
> > > > > Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
> > > > > selected, and given that we have an optimized polling mechanism
> > > > > in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
> > > > > 
> > > > > smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
> > > > > a memory region in exclusive state and the WFE waiting for any
> > > > > stores to it.
> > > > > 
> > > > > In the edge case -- no CPU stores to the waited region and there's no
> > > > > interrupt -- the event-stream will provide the terminating condition
> > > > > ensuring we don't wait forever, but because the event-stream runs at
> > > > > a fixed frequency (configured at 10kHz) we might spend more time in
> > > > > the polling stage than specified by cpuidle_poll_time().
> > > > > 
> > > > > This would only happen in the last iteration, since overshooting the
> > > > > poll_limit means the governor moves out of the polling stage.
> > > > > 
> > > > > Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> > > > > ---
> > > > >  arch/arm64/Kconfig                        | 10 ++++++++++
> > > > >  arch/arm64/include/asm/cpuidle_haltpoll.h |  9 +++++++++
> > > > >  arch/arm64/kernel/cpuidle.c               | 23 +++++++++++++++++++++++
> > > > >  3 files changed, 42 insertions(+)
> > > > >  create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
> > > > > 
> > > > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > > > > index 5d91259ee7b5..cf1c6681eb0a 100644
> > > > > --- a/arch/arm64/Kconfig
> > > > > +++ b/arch/arm64/Kconfig
> > > > > @@ -35,6 +35,7 @@ config ARM64
> > > > >         select ARCH_HAS_MEMBARRIER_SYNC_CORE
> > > > >         select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
> > > > >         select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> > > > > +       select ARCH_HAS_OPTIMIZED_POLL
> > > > >         select ARCH_HAS_PTE_DEVMAP
> > > > >         select ARCH_HAS_PTE_SPECIAL
> > > > >         select ARCH_HAS_HW_PTE_YOUNG
> > > > > @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
> > > > >  config ARCH_SUSPEND_POSSIBLE
> > > > >         def_bool y
> > > > > 
> > > > > +config ARCH_CPUIDLE_HALTPOLL
> > > > > +       bool "Enable selection of the cpuidle-haltpoll driver"
> > > > > +       default n
> > > > > +       help
> > > > > +         cpuidle-haltpoll allows for adaptive polling based on
> > > > > +         current load before entering the idle state.
> > > > > +
> > > > > +         Some virtualized workloads benefit from using it.
> > > > > +
> > > > >  endmenu # "Power management options"
> > > > > 
> > > > >  menu "CPU Power Management"
> > > > > diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
> > > > > new file mode 100644
> > > > > index 000000000000..65f289407a6c
> > > > > --- /dev/null
> > > > > +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
> > > > > @@ -0,0 +1,9 @@
> > > > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > > > +#ifndef _ARCH_HALTPOLL_H
> > > > > +#define _ARCH_HALTPOLL_H
> > > > > +
> > > > > +static inline void arch_haltpoll_enable(unsigned int cpu) { }
> > > > > +static inline void arch_haltpoll_disable(unsigned int cpu) { }
> > > > > +
> > > > > +bool arch_haltpoll_want(bool force);
> > > > > +#endif
> > > > > diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
> > > > > index f372295207fb..334df82a0eac 100644
> > > > > --- a/arch/arm64/kernel/cpuidle.c
> > > > > +++ b/arch/arm64/kernel/cpuidle.c
> > > > > @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
> > > > >                                              lpi->index, state);
> > > > >  }
> > > > >  #endif
> > > > > +
> > > > > +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
> > > > > +
> > > > > +#include <asm/cpuidle_haltpoll.h>
> > > > > +
> > > > > +bool arch_haltpoll_want(bool force)
> > > > > +{
> > > > > +       /*
> > > > > +        * Enabling haltpoll requires two things:
> > > > > +        *
> > > > > +        * - Event stream support to provide a terminating condition to the
> > > > > +        *   WFE in the poll loop.
> > > > > +        *
> > > > > +        * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
> > > > 
> > > > typo: "arch_haltpoll_enable" and "arch_haltpoll_enable"
> > > > 
> > > > > +        *
> > > > > +        * Given that the second is missing, allow haltpoll to only be force
> > > > > +        * loaded.
> > > > > +        */
> > > > > +       return (arch_timer_evtstrm_available() && false) || force;
> > > > 
> > > > This should always evaluate false without force. Perhaps you meant
> > > > something like this?
> > > > 
> > > > ```
> > > > -       return (arch_timer_evtstrm_available() && false) || force;
> > > > +       return arch_timer_evtstrm_available() || force;
> > > > ```
> > > 
> > > No. This was intentional. As I meniton in the comment above, right now
> > > the KVM support is missing. Which means that the guest has no way to
> > > tell the host to not poll as part of host haltpoll.
> > > 
> > > Until that is available, only allow force loading.
> > 
> > I see, arm64's kvm is missing the poll control mechanism.
> > 
> > I'll follow-up your changes with a patch for AWS Graviton; still seeing
> > the same performance gains.
> 
> Excellent. Could you Cc me when you send out your changes?

Will do

-- Haris Okanovic

> 
> > Tested-by: Haris Okanovic <harisokn@amazon.com>
> 
> Thanks!
> 
> --
> ankur
diff mbox series

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5d91259ee7b5..cf1c6681eb0a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -35,6 +35,7 @@  config ARM64
 	select ARCH_HAS_MEMBARRIER_SYNC_CORE
 	select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
 	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+	select ARCH_HAS_OPTIMIZED_POLL
 	select ARCH_HAS_PTE_DEVMAP
 	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_HAS_HW_PTE_YOUNG
@@ -2376,6 +2377,15 @@  config ARCH_HIBERNATION_HEADER
 config ARCH_SUSPEND_POSSIBLE
 	def_bool y
 
+config ARCH_CPUIDLE_HALTPOLL
+	bool "Enable selection of the cpuidle-haltpoll driver"
+	default n
+	help
+	  cpuidle-haltpoll allows for adaptive polling based on
+	  current load before entering the idle state.
+
+	  Some virtualized workloads benefit from using it.
+
 endmenu # "Power management options"
 
 menu "CPU Power Management"
diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
new file mode 100644
index 000000000000..65f289407a6c
--- /dev/null
+++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
@@ -0,0 +1,9 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARCH_HALTPOLL_H
+#define _ARCH_HALTPOLL_H
+
+static inline void arch_haltpoll_enable(unsigned int cpu) { }
+static inline void arch_haltpoll_disable(unsigned int cpu) { }
+
+bool arch_haltpoll_want(bool force);
+#endif
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index f372295207fb..334df82a0eac 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -72,3 +72,26 @@  __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
 					     lpi->index, state);
 }
 #endif
+
+#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
+
+#include <asm/cpuidle_haltpoll.h>
+
+bool arch_haltpoll_want(bool force)
+{
+	/*
+	 * Enabling haltpoll requires two things:
+	 *
+	 * - Event stream support to provide a terminating condition to the
+	 *   WFE in the poll loop.
+	 *
+	 * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
+	 *
+	 * Given that the second is missing, allow haltpoll to only be force
+	 * loaded.
+	 */
+	return (arch_timer_evtstrm_available() && false) || force;
+}
+
+EXPORT_SYMBOL_GPL(arch_haltpoll_want);
+#endif