diff mbox series

[RFC,1/2] riscv: process: Introduce idle thread using Zawrs extension

Message ID 20240418114942.52770-2-luxu.kernel@bytedance.com (mailing list archive)
State Superseded
Headers show
Series riscv: Idle thread using Zawrs extension | expand

Checks

Context Check Description
conchuod/vmtest-for-next-PR fail PR summary
conchuod/patch-1-test-1 success .github/scripts/patches/tests/build_rv32_defconfig.sh
conchuod/patch-1-test-2 success .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh
conchuod/patch-1-test-3 success .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh
conchuod/patch-1-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh
conchuod/patch-1-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh
conchuod/patch-1-test-6 warning .github/scripts/patches/tests/checkpatch.sh
conchuod/patch-1-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh
conchuod/patch-1-test-8 success .github/scripts/patches/tests/header_inline.sh
conchuod/patch-1-test-9 success .github/scripts/patches/tests/kdoc.sh
conchuod/patch-1-test-10 success .github/scripts/patches/tests/module_param.sh
conchuod/patch-1-test-11 success .github/scripts/patches/tests/verify_fixes.sh
conchuod/patch-1-test-12 success .github/scripts/patches/tests/verify_signedoff.sh
conchuod/vmtest-fixes-PR fail merge-conflict

Commit Message

Xu Lu April 18, 2024, 11:49 a.m. UTC
The Zawrs extension introduces a new instruction WRS.NTO, which will
register a reservation set and causes the hart to temporarily stall
execution in a low-power state until a store occurs to the reservation
set or an interrupt is observed.

This commit implements new version of idle thread for RISC-V via Zawrs
extension.

Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
Reviewed-by: Hangjing Li <lihangjing@bytedance.com>
Reviewed-by: Liang Deng <dengliang.1214@bytedance.com>
Reviewed-by: Wen Chai <chaiwen.cc@bytedance.com>
---
 arch/riscv/Kconfig                 | 24 +++++++++++++++++
 arch/riscv/include/asm/cpuidle.h   | 11 +-------
 arch/riscv/include/asm/hwcap.h     |  1 +
 arch/riscv/include/asm/processor.h | 17 +++++++++++++
 arch/riscv/kernel/cpu.c            |  5 ++++
 arch/riscv/kernel/cpufeature.c     |  1 +
 arch/riscv/kernel/process.c        | 41 +++++++++++++++++++++++++++++-
 7 files changed, 89 insertions(+), 11 deletions(-)

Comments

Conor Dooley April 18, 2024, 3:05 p.m. UTC | #1
+ Drew,

On Thu, Apr 18, 2024 at 07:49:41PM +0800, Xu Lu wrote:
> The Zawrs extension introduces a new instruction WRS.NTO, which will
> register a reservation set and causes the hart to temporarily stall
> execution in a low-power state until a store occurs to the reservation
> set or an interrupt is observed.
> 
> This commit implements new version of idle thread for RISC-V via Zawrs
> extension.
> 
> Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
> Reviewed-by: Hangjing Li <lihangjing@bytedance.com>
> Reviewed-by: Liang Deng <dengliang.1214@bytedance.com>
> Reviewed-by: Wen Chai <chaiwen.cc@bytedance.com>
> ---
>  arch/riscv/Kconfig                 | 24 +++++++++++++++++
>  arch/riscv/include/asm/cpuidle.h   | 11 +-------
>  arch/riscv/include/asm/hwcap.h     |  1 +
>  arch/riscv/include/asm/processor.h | 17 +++++++++++++
>  arch/riscv/kernel/cpu.c            |  5 ++++
>  arch/riscv/kernel/cpufeature.c     |  1 +
>  arch/riscv/kernel/process.c        | 41 +++++++++++++++++++++++++++++-
>  7 files changed, 89 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index be09c8836d56..a0d344e9803f 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -19,6 +19,7 @@ config RISCV
>  	select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
>  	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
>  	select ARCH_HAS_BINFMT_FLAT
> +	select ARCH_HAS_CPU_FINALIZE_INIT
>  	select ARCH_HAS_CURRENT_STACK_POINTER
>  	select ARCH_HAS_DEBUG_VIRTUAL if MMU
>  	select ARCH_HAS_DEBUG_VM_PGTABLE
> @@ -525,6 +526,20 @@ config RISCV_ISA_SVPBMT
>  
>  	   If you don't know what to do here, say Y.
>  
> +config RISCV_ISA_ZAWRS
> +	bool "Zawrs extension support for wait-on-reservation-set instructions"
> +	depends on RISCV_ALTERNATIVE
> +	default y
> +	help
> +	   Adds support to dynamically detect the presence of the Zawrs
> +	   extension and enable its usage.

Drew, could you, in your update, use the wording:
	   Add support for enabling optimisations in the kernel when the
	   Zawrs extension is detected at boot.

There was some confusion recently about what these options were actually
for, because this option doesn't control "dynamic detection" as the
ACPI or DT detection is compiled at all times. I had written a patch for
this wording in other options at the time but had forgotten to properly
send it:
https://lore.kernel.org/linux-riscv/20240418-stable-railway-7cce07e1e440@spud/T/#u

> +
> +	   The Zawrs extension defines a pair of instructions to be used
> +	   in polling loops that allows a core to enter a low-power state
> +	   and wait on a store to a memory location.
> +
> +	   If you don't know what to do here, say Y.
> +
>  config TOOLCHAIN_HAS_V
>  	bool
>  	default y
> @@ -1075,6 +1090,15 @@ endmenu # "Power management options"
>  
>  menu "CPU Power Management"
>  
> +config RISCV_ZAWRS_IDLE
> +	bool "Idle thread using ZAWRS extensions"
> +	depends on RISCV_ISA_ZAWRS
> +	default y
> +	help
> +		Adds support to implement idle thread using ZAWRS extension.
> +
> +		If you don't know what to do here, say Y.

I don't think this second option is needed, why would we not always want
to use the Zawrs version of this when it is available? Can we do it
unconditionally when RISCV_ISA_ZAWRS is set and the extension is
detected at runtime?

Cheers,
Conor.
Xu Lu April 18, 2024, 4:14 p.m. UTC | #2
On Thu, Apr 18, 2024 at 11:06 PM Conor Dooley <conor@kernel.org> wrote:
>
> + Drew,
>
> On Thu, Apr 18, 2024 at 07:49:41PM +0800, Xu Lu wrote:
> > The Zawrs extension introduces a new instruction WRS.NTO, which will
> > register a reservation set and causes the hart to temporarily stall
> > execution in a low-power state until a store occurs to the reservation
> > set or an interrupt is observed.
> >
> > This commit implements new version of idle thread for RISC-V via Zawrs
> > extension.
> >
> > Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
> > Reviewed-by: Hangjing Li <lihangjing@bytedance.com>
> > Reviewed-by: Liang Deng <dengliang.1214@bytedance.com>
> > Reviewed-by: Wen Chai <chaiwen.cc@bytedance.com>
> > ---
> >  arch/riscv/Kconfig                 | 24 +++++++++++++++++
> >  arch/riscv/include/asm/cpuidle.h   | 11 +-------
> >  arch/riscv/include/asm/hwcap.h     |  1 +
> >  arch/riscv/include/asm/processor.h | 17 +++++++++++++
> >  arch/riscv/kernel/cpu.c            |  5 ++++
> >  arch/riscv/kernel/cpufeature.c     |  1 +
> >  arch/riscv/kernel/process.c        | 41 +++++++++++++++++++++++++++++-
> >  7 files changed, 89 insertions(+), 11 deletions(-)
> >
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index be09c8836d56..a0d344e9803f 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -19,6 +19,7 @@ config RISCV
> >       select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
> >       select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
> >       select ARCH_HAS_BINFMT_FLAT
> > +     select ARCH_HAS_CPU_FINALIZE_INIT
> >       select ARCH_HAS_CURRENT_STACK_POINTER
> >       select ARCH_HAS_DEBUG_VIRTUAL if MMU
> >       select ARCH_HAS_DEBUG_VM_PGTABLE
> > @@ -525,6 +526,20 @@ config RISCV_ISA_SVPBMT
> >
> >          If you don't know what to do here, say Y.
> >
> > +config RISCV_ISA_ZAWRS
> > +     bool "Zawrs extension support for wait-on-reservation-set instructions"
> > +     depends on RISCV_ALTERNATIVE
> > +     default y
> > +     help
> > +        Adds support to dynamically detect the presence of the Zawrs
> > +        extension and enable its usage.
>
> Drew, could you, in your update, use the wording:
>            Add support for enabling optimisations in the kernel when the
>            Zawrs extension is detected at boot.
>
> There was some confusion recently about what these options were actually
> for, because this option doesn't control "dynamic detection" as the
> ACPI or DT detection is compiled at all times. I had written a patch for
> this wording in other options at the time but had forgotten to properly
> send it:
> https://lore.kernel.org/linux-riscv/20240418-stable-railway-7cce07e1e440@spud/T/#u
>
> > +
> > +        The Zawrs extension defines a pair of instructions to be used
> > +        in polling loops that allows a core to enter a low-power state
> > +        and wait on a store to a memory location.
> > +
> > +        If you don't know what to do here, say Y.
> > +
> >  config TOOLCHAIN_HAS_V
> >       bool
> >       default y
> > @@ -1075,6 +1090,15 @@ endmenu # "Power management options"
> >
> >  menu "CPU Power Management"
> >
> > +config RISCV_ZAWRS_IDLE
> > +     bool "Idle thread using ZAWRS extensions"
> > +     depends on RISCV_ISA_ZAWRS
> > +     default y
> > +     help
> > +             Adds support to implement idle thread using ZAWRS extension.
> > +
> > +             If you don't know what to do here, say Y.
>
> I don't think this second option is needed, why would we not always want
> to use the Zawrs version of this when it is available? Can we do it
> unconditionally when RISCV_ISA_ZAWRS is set and the extension is
> detected at runtime?
>
> Cheers,
> Conor.

Indeed, we can always choose WRS.NTO when entering idle.

This config is introduced for the second commit in this patch series.
In the second commit, we detect whether the target cpu is idle when
sending IPI and write IPI info to the reserve set of idle cpu so as to
avoid sending a physical IPI. Besides, the target idle cpu need not to
go through traditional interrupt handling routine. However, if all
cpus are busy and hardly enter idle, this commit may introduce
performance overhead of extra instructions when sending IPI. Thus we
introduce this config just in case.

Regards,
Xu Lu

>
>
Andrew Jones April 18, 2024, 7:10 p.m. UTC | #3
On Thu, Apr 18, 2024 at 04:05:55PM +0100, Conor Dooley wrote:
> + Drew,
> 
> On Thu, Apr 18, 2024 at 07:49:41PM +0800, Xu Lu wrote:
> > The Zawrs extension introduces a new instruction WRS.NTO, which will
> > register a reservation set and causes the hart to temporarily stall
> > execution in a low-power state until a store occurs to the reservation
> > set or an interrupt is observed.
> > 
> > This commit implements new version of idle thread for RISC-V via Zawrs
> > extension.
> > 
> > Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
> > Reviewed-by: Hangjing Li <lihangjing@bytedance.com>
> > Reviewed-by: Liang Deng <dengliang.1214@bytedance.com>
> > Reviewed-by: Wen Chai <chaiwen.cc@bytedance.com>
> > ---
> >  arch/riscv/Kconfig                 | 24 +++++++++++++++++
> >  arch/riscv/include/asm/cpuidle.h   | 11 +-------
> >  arch/riscv/include/asm/hwcap.h     |  1 +
> >  arch/riscv/include/asm/processor.h | 17 +++++++++++++
> >  arch/riscv/kernel/cpu.c            |  5 ++++
> >  arch/riscv/kernel/cpufeature.c     |  1 +
> >  arch/riscv/kernel/process.c        | 41 +++++++++++++++++++++++++++++-
> >  7 files changed, 89 insertions(+), 11 deletions(-)
> > 
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index be09c8836d56..a0d344e9803f 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -19,6 +19,7 @@ config RISCV
> >  	select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
> >  	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
> >  	select ARCH_HAS_BINFMT_FLAT
> > +	select ARCH_HAS_CPU_FINALIZE_INIT
> >  	select ARCH_HAS_CURRENT_STACK_POINTER
> >  	select ARCH_HAS_DEBUG_VIRTUAL if MMU
> >  	select ARCH_HAS_DEBUG_VM_PGTABLE
> > @@ -525,6 +526,20 @@ config RISCV_ISA_SVPBMT
> >  
> >  	   If you don't know what to do here, say Y.
> >  
> > +config RISCV_ISA_ZAWRS
> > +	bool "Zawrs extension support for wait-on-reservation-set instructions"
> > +	depends on RISCV_ALTERNATIVE
> > +	default y
> > +	help
> > +	   Adds support to dynamically detect the presence of the Zawrs
> > +	   extension and enable its usage.
> 
> Drew, could you, in your update, use the wording:
> 	   Add support for enabling optimisations in the kernel when the
> 	   Zawrs extension is detected at boot.

How about

  The Zawrs extension defines a pair of instructions to be used in
  polling loops which allow a hart to enter a low-power state or to
  trap to the hypervisor while waiting on a store to a memory location.
  Enable the use of these instructions when the Zawrs extension is
  detected at boot.

Thanks,
drew
Samuel Holland April 18, 2024, 10 p.m. UTC | #4
Hi Drew,

On 2024-04-18 2:10 PM, Andrew Jones wrote:
> On Thu, Apr 18, 2024 at 04:05:55PM +0100, Conor Dooley wrote:
>> + Drew,
>>
>> On Thu, Apr 18, 2024 at 07:49:41PM +0800, Xu Lu wrote:
>>> The Zawrs extension introduces a new instruction WRS.NTO, which will
>>> register a reservation set and causes the hart to temporarily stall
>>> execution in a low-power state until a store occurs to the reservation
>>> set or an interrupt is observed.
>>>
>>> This commit implements new version of idle thread for RISC-V via Zawrs
>>> extension.
>>>
>>> Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
>>> Reviewed-by: Hangjing Li <lihangjing@bytedance.com>
>>> Reviewed-by: Liang Deng <dengliang.1214@bytedance.com>
>>> Reviewed-by: Wen Chai <chaiwen.cc@bytedance.com>
>>> ---
>>>  arch/riscv/Kconfig                 | 24 +++++++++++++++++
>>>  arch/riscv/include/asm/cpuidle.h   | 11 +-------
>>>  arch/riscv/include/asm/hwcap.h     |  1 +
>>>  arch/riscv/include/asm/processor.h | 17 +++++++++++++
>>>  arch/riscv/kernel/cpu.c            |  5 ++++
>>>  arch/riscv/kernel/cpufeature.c     |  1 +
>>>  arch/riscv/kernel/process.c        | 41 +++++++++++++++++++++++++++++-
>>>  7 files changed, 89 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>> index be09c8836d56..a0d344e9803f 100644
>>> --- a/arch/riscv/Kconfig
>>> +++ b/arch/riscv/Kconfig
>>> @@ -19,6 +19,7 @@ config RISCV
>>>  	select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
>>>  	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
>>>  	select ARCH_HAS_BINFMT_FLAT
>>> +	select ARCH_HAS_CPU_FINALIZE_INIT
>>>  	select ARCH_HAS_CURRENT_STACK_POINTER
>>>  	select ARCH_HAS_DEBUG_VIRTUAL if MMU
>>>  	select ARCH_HAS_DEBUG_VM_PGTABLE
>>> @@ -525,6 +526,20 @@ config RISCV_ISA_SVPBMT
>>>  
>>>  	   If you don't know what to do here, say Y.
>>>  
>>> +config RISCV_ISA_ZAWRS
>>> +	bool "Zawrs extension support for wait-on-reservation-set instructions"
>>> +	depends on RISCV_ALTERNATIVE
>>> +	default y
>>> +	help
>>> +	   Adds support to dynamically detect the presence of the Zawrs
>>> +	   extension and enable its usage.
>>
>> Drew, could you, in your update, use the wording:
>> 	   Add support for enabling optimisations in the kernel when the
>> 	   Zawrs extension is detected at boot.
> 
> How about
> 
>   The Zawrs extension defines a pair of instructions to be used in
>   polling loops which allow a hart to enter a low-power state or to
>   trap to the hypervisor while waiting on a store to a memory location.
>   Enable the use of these instructions when the Zawrs extension is

                                        ^ in the kernel

I believe "in the kernel" was an important part of the clarification that these
Kconfig options do not affect whether userspace can use these instructions.

Regards,
Samuel

>   detected at boot.
> 
> Thanks,
> drew
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Conor Dooley April 18, 2024, 10:09 p.m. UTC | #5
On Thu, Apr 18, 2024 at 05:00:42PM -0500, Samuel Holland wrote:
> Hi Drew,
> 
> On 2024-04-18 2:10 PM, Andrew Jones wrote:
> > On Thu, Apr 18, 2024 at 04:05:55PM +0100, Conor Dooley wrote:
> >> + Drew,
> >>
> >> On Thu, Apr 18, 2024 at 07:49:41PM +0800, Xu Lu wrote:
> >>> The Zawrs extension introduces a new instruction WRS.NTO, which will
> >>> register a reservation set and causes the hart to temporarily stall
> >>> execution in a low-power state until a store occurs to the reservation
> >>> set or an interrupt is observed.
> >>>
> >>> This commit implements new version of idle thread for RISC-V via Zawrs
> >>> extension.
> >>>
> >>> Signed-off-by: Xu Lu <luxu.kernel@bytedance.com>
> >>> Reviewed-by: Hangjing Li <lihangjing@bytedance.com>
> >>> Reviewed-by: Liang Deng <dengliang.1214@bytedance.com>
> >>> Reviewed-by: Wen Chai <chaiwen.cc@bytedance.com>
> >>> ---
> >>>  arch/riscv/Kconfig                 | 24 +++++++++++++++++
> >>>  arch/riscv/include/asm/cpuidle.h   | 11 +-------
> >>>  arch/riscv/include/asm/hwcap.h     |  1 +
> >>>  arch/riscv/include/asm/processor.h | 17 +++++++++++++
> >>>  arch/riscv/kernel/cpu.c            |  5 ++++
> >>>  arch/riscv/kernel/cpufeature.c     |  1 +
> >>>  arch/riscv/kernel/process.c        | 41 +++++++++++++++++++++++++++++-
> >>>  7 files changed, 89 insertions(+), 11 deletions(-)
> >>>
> >>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> >>> index be09c8836d56..a0d344e9803f 100644
> >>> --- a/arch/riscv/Kconfig
> >>> +++ b/arch/riscv/Kconfig
> >>> @@ -19,6 +19,7 @@ config RISCV
> >>>  	select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
> >>>  	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
> >>>  	select ARCH_HAS_BINFMT_FLAT
> >>> +	select ARCH_HAS_CPU_FINALIZE_INIT
> >>>  	select ARCH_HAS_CURRENT_STACK_POINTER
> >>>  	select ARCH_HAS_DEBUG_VIRTUAL if MMU
> >>>  	select ARCH_HAS_DEBUG_VM_PGTABLE
> >>> @@ -525,6 +526,20 @@ config RISCV_ISA_SVPBMT
> >>>  
> >>>  	   If you don't know what to do here, say Y.
> >>>  
> >>> +config RISCV_ISA_ZAWRS
> >>> +	bool "Zawrs extension support for wait-on-reservation-set instructions"
> >>> +	depends on RISCV_ALTERNATIVE
> >>> +	default y
> >>> +	help
> >>> +	   Adds support to dynamically detect the presence of the Zawrs
> >>> +	   extension and enable its usage.
> >>
> >> Drew, could you, in your update, use the wording:
> >> 	   Add support for enabling optimisations in the kernel when the
> >> 	   Zawrs extension is detected at boot.
> > 
> > How about

Probably should have said, this was just a replacement for the first
paragraph, not the entire text.

> > 
> >   The Zawrs extension defines a pair of instructions to be used in
> >   polling loops which allow a hart to enter a low-power state or to
> >   trap to the hypervisor while waiting on a store to a memory location.
> >   Enable the use of these instructions when the Zawrs extension is
> 
>                                         ^ in the kernel
> 
> I believe "in the kernel" was an important part of the clarification that these
> Kconfig options do not affect whether userspace can use these instructions.

Meant to reply earlier but forgot. Samuel's correct, it is indeed the
key bit I wanted, I just suggest what's above to match what was in the
patch I had sent earlier today. Don't really care all that much if it
is a match nor not, but I do care about the help text actually
describing /who/ gets to use the extension when the option is enabled.

Thanks,
Conor.
Conor Dooley April 22, 2024, 8:21 a.m. UTC | #6
On Fri, Apr 19, 2024 at 12:14:47AM +0800, Xu Lu wrote:
> On Thu, Apr 18, 2024 at 11:06 PM Conor Dooley <conor@kernel.org> wrote:
> > On Thu, Apr 18, 2024 at 07:49:41PM +0800, Xu Lu wrote:

> > > +        The Zawrs extension defines a pair of instructions to be used
> > > +        in polling loops that allows a core to enter a low-power state
> > > +        and wait on a store to a memory location.
> > > +
> > > +        If you don't know what to do here, say Y.
> > > +
> > >  config TOOLCHAIN_HAS_V
> > >       bool
> > >       default y
> > > @@ -1075,6 +1090,15 @@ endmenu # "Power management options"
> > >
> > >  menu "CPU Power Management"
> > >
> > > +config RISCV_ZAWRS_IDLE
> > > +     bool "Idle thread using ZAWRS extensions"
> > > +     depends on RISCV_ISA_ZAWRS
> > > +     default y
> > > +     help
> > > +             Adds support to implement idle thread using ZAWRS extension.
> > > +
> > > +             If you don't know what to do here, say Y.
> >
> > I don't think this second option is needed, why would we not always want
> > to use the Zawrs version of this when it is available? Can we do it
> > unconditionally when RISCV_ISA_ZAWRS is set and the extension is
> > detected at runtime?
> >
> > Cheers,
> > Conor.
> 
> Indeed, we can always choose WRS.NTO when entering idle.
> 
> This config is introduced for the second commit in this patch series.
> In the second commit, we detect whether the target cpu is idle when
> sending IPI and write IPI info to the reserve set of idle cpu so as to
> avoid sending a physical IPI. Besides, the target idle cpu need not to
> go through traditional interrupt handling routine. However, if all
> cpus are busy and hardly enter idle, this commit may introduce
> performance overhead of extra instructions when sending IPI. Thus we
> introduce this config just in case.

Could you add the downsides into the help text of the config option so
that people can understand why to enable/disable the option?

Thanks,
Conor.
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index be09c8836d56..a0d344e9803f 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -19,6 +19,7 @@  config RISCV
 	select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
 	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
 	select ARCH_HAS_BINFMT_FLAT
+	select ARCH_HAS_CPU_FINALIZE_INIT
 	select ARCH_HAS_CURRENT_STACK_POINTER
 	select ARCH_HAS_DEBUG_VIRTUAL if MMU
 	select ARCH_HAS_DEBUG_VM_PGTABLE
@@ -525,6 +526,20 @@  config RISCV_ISA_SVPBMT
 
 	   If you don't know what to do here, say Y.
 
+config RISCV_ISA_ZAWRS
+	bool "Zawrs extension support for wait-on-reservation-set instructions"
+	depends on RISCV_ALTERNATIVE
+	default y
+	help
+	   Adds support to dynamically detect the presence of the Zawrs
+	   extension and enable its usage.
+
+	   The Zawrs extension defines a pair of instructions to be used
+	   in polling loops that allows a core to enter a low-power state
+	   and wait on a store to a memory location.
+
+	   If you don't know what to do here, say Y.
+
 config TOOLCHAIN_HAS_V
 	bool
 	default y
@@ -1075,6 +1090,15 @@  endmenu # "Power management options"
 
 menu "CPU Power Management"
 
+config RISCV_ZAWRS_IDLE
+	bool "Idle thread using ZAWRS extensions"
+	depends on RISCV_ISA_ZAWRS
+	default y
+	help
+		Adds support to implement idle thread using ZAWRS extension.
+
+		If you don't know what to do here, say Y.
+
 source "drivers/cpuidle/Kconfig"
 
 source "drivers/cpufreq/Kconfig"
diff --git a/arch/riscv/include/asm/cpuidle.h b/arch/riscv/include/asm/cpuidle.h
index 71fdc607d4bc..94c9ecb46571 100644
--- a/arch/riscv/include/asm/cpuidle.h
+++ b/arch/riscv/include/asm/cpuidle.h
@@ -10,15 +10,6 @@ 
 #include <asm/barrier.h>
 #include <asm/processor.h>
 
-static inline void cpu_do_idle(void)
-{
-	/*
-	 * Add mb() here to ensure that all
-	 * IO/MEM accesses are completed prior
-	 * to entering WFI.
-	 */
-	mb();
-	wait_for_interrupt();
-}
+void cpu_do_idle(void);
 
 #endif
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index e17d0078a651..5b358c3cf212 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -81,6 +81,7 @@ 
 #define RISCV_ISA_EXT_ZTSO		72
 #define RISCV_ISA_EXT_ZACAS		73
 #define RISCV_ISA_EXT_XANDESPMU		74
+#define RISCV_ISA_EXT_ZAWRS		75
 
 #define RISCV_ISA_EXT_XLINUXENVCFG	127
 
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 0faf5f161f1e..1143367de8c6 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -157,6 +157,21 @@  static inline void wait_for_interrupt(void)
 	__asm__ __volatile__ ("wfi");
 }
 
+static inline void wrs_nto(unsigned long *addr)
+{
+	int val;
+
+	__asm__ __volatile__(
+#ifdef CONFIG_64BIT
+			"lr.d %[p], %[v] \n\t"
+#else
+			"lr.w %[p], %[v] \n\t"
+#endif
+			".long 0x00d00073 \n\t"
+			: [p] "=&r" (val), [v] "+A" (*addr)
+			: : "memory");
+}
+
 extern phys_addr_t dma32_phys_limit;
 
 struct device_node;
@@ -183,6 +198,8 @@  extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
 #define GET_UNALIGN_CTL(tsk, addr)	get_unalign_ctl((tsk), (addr))
 #define SET_UNALIGN_CTL(tsk, val)	set_unalign_ctl((tsk), (val))
 
+extern void select_idle_routine(void);
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_RISCV_PROCESSOR_H */
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index d11d6320fb0d..69cebd41f5f3 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -22,6 +22,11 @@  bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
 	return phys_id == cpuid_to_hartid_map(cpu);
 }
 
+void __init arch_cpu_finalize_init(void)
+{
+	select_idle_routine();
+}
+
 /*
  * Returns the hart ID of the given device tree node, or -ENODEV if the node
  * isn't an enabled and valid RISC-V hart node.
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 3ed2359eae35..c080e6ca54ba 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -305,6 +305,7 @@  const struct riscv_isa_ext_data riscv_isa_ext[] = {
 	__RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
 	__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
 	__RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_EXT_XANDESPMU),
+	__RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS),
 };
 
 const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext);
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 92922dbd5b5c..9f0f7b888bc1 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -15,6 +15,7 @@ 
 #include <linux/tick.h>
 #include <linux/ptrace.h>
 #include <linux/uaccess.h>
+#include <linux/static_call.h>
 
 #include <asm/unistd.h>
 #include <asm/processor.h>
@@ -37,11 +38,49 @@  EXPORT_SYMBOL(__stack_chk_guard);
 
 extern asmlinkage void ret_from_fork(void);
 
-void arch_cpu_idle(void)
+static __cpuidle void default_idle(void)
+{
+	/*
+	 * Add mb() here to ensure that all
+	 * IO/MEM accesses are completed prior
+	 * to entering WFI.
+	 */
+	mb();
+	wait_for_interrupt();
+}
+
+static __cpuidle void wrs_idle(void)
+{
+	/*
+	 * Add mb() here to ensure that all
+	 * IO/MEM accesses are completed prior
+	 * to entering WRS.NTO.
+	 */
+	mb();
+	wrs_nto(&current_thread_info()->flags);
+}
+
+DEFINE_STATIC_CALL_NULL(riscv_idle, default_idle);
+
+void __cpuidle cpu_do_idle(void)
+{
+	static_call(riscv_idle)();
+}
+
+void __cpuidle arch_cpu_idle(void)
 {
 	cpu_do_idle();
 }
 
+void __init select_idle_routine(void)
+{
+	if (IS_ENABLED(CONFIG_RISCV_ZAWRS_IDLE) &&
+			riscv_has_extension_likely(RISCV_ISA_EXT_ZAWRS))
+		static_call_update(riscv_idle, wrs_idle);
+	else
+		static_call_update(riscv_idle, default_idle);
+}
+
 int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
 {
 	if (!unaligned_ctl_available())