diff mbox series

[v2,01/10] riscv: Implement cmpxchg32/64() using Zacas

Message ID 20240626130347.520750-2-alexghiti@rivosinc.com (mailing list archive)
State Superseded
Headers show
Series Zacas/Zabha support and qspinlocks | expand

Checks

Context Check Description
conchuod/vmtest-fixes-PR fail PR summary
conchuod/patch-1-test-1 success .github/scripts/patches/tests/build_rv32_defconfig.sh
conchuod/patch-1-test-2 success .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh
conchuod/patch-1-test-3 success .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh
conchuod/patch-1-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh
conchuod/patch-1-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh
conchuod/patch-1-test-6 warning .github/scripts/patches/tests/checkpatch.sh
conchuod/patch-1-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh
conchuod/patch-1-test-8 success .github/scripts/patches/tests/header_inline.sh
conchuod/patch-1-test-9 success .github/scripts/patches/tests/kdoc.sh
conchuod/patch-1-test-10 success .github/scripts/patches/tests/module_param.sh
conchuod/patch-1-test-11 success .github/scripts/patches/tests/verify_fixes.sh
conchuod/patch-1-test-12 success .github/scripts/patches/tests/verify_signedoff.sh

Commit Message

Alexandre Ghiti June 26, 2024, 1:03 p.m. UTC
This adds runtime support for Zacas in cmpxchg operations.

Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
 arch/riscv/Kconfig               | 17 +++++++++++++++++
 arch/riscv/Makefile              |  3 +++
 arch/riscv/include/asm/cmpxchg.h | 27 ++++++++++++++++++++++++---
 3 files changed, 44 insertions(+), 3 deletions(-)

Comments

Andrea Parri June 27, 2024, 11:06 a.m. UTC | #1
> -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n)	\
> +#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n)	\
>  ({									\
> +	__label__ zacas, end;						\
>  	register unsigned int __rc;					\
>  									\
> +	if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) {			\
> +		asm goto(ALTERNATIVE("nop", "j %[zacas]", 0,		\
> +				     RISCV_ISA_EXT_ZACAS, 1)		\
> +			 : : : : zacas);				\
> +	}								\
> +									\
>  	__asm__ __volatile__ (						\
>  		prepend							\
>  		"0:	lr" lr_sfx " %0, %2\n"				\
>  		"	bne  %0, %z3, 1f\n"				\
> -		"	sc" sc_sfx " %1, %z4, %2\n"			\
> +		"	sc" sc_cas_sfx " %1, %z4, %2\n"			\
>  		"	bnez %1, 0b\n"					\
>  		append							\
>  		"1:\n"							\
>  		: "=&r" (r), "=&r" (__rc), "+A" (*(p))			\
>  		: "rJ" (co o), "rJ" (n)					\
>  		: "memory");						\
> +	goto end;							\
> +									\
> +zacas:									\
> +	if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) {			\
> +		__asm__ __volatile__ (					\
> +			prepend						\
> +			"	amocas" sc_cas_sfx " %0, %z2, %1\n"	\
> +			append						\
> +			: "+&r" (r), "+A" (*(p))			\
> +			: "rJ" (n)					\
> +			: "memory");					\
> +	}								\

Is this second IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check actually needed?
(just wondering - no real objection)


> +end:;									\

Why the semicolon?


>  })
>  
>  #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append)		\
> @@ -156,7 +177,7 @@
>  	__typeof__(ptr) __ptr = (ptr);					\
>  	__typeof__(*(__ptr)) __old = (old);				\
>  	__typeof__(*(__ptr)) __new = (new);				\
> -	__typeof__(*(__ptr)) __ret;					\
> +	__typeof__(*(__ptr)) __ret = (old);				\

This is because the compiler doesn't realize __ret is actually
initialized, right?  IAC, seems a bit unexpected to initialize
with (old) (which indicates SUCCESS of the CMPXCHG operation);
how about using (new) for the initialization of __ret instead?
would (new) still work for you?

  Andrea
kernel test robot July 4, 2024, 3:38 a.m. UTC | #2
Hi Alexandre,

kernel test robot noticed the following build errors:

[auto build test ERROR on soc/for-next]
[also build test ERROR on linus/master v6.10-rc6 next-20240703]
[cannot apply to arnd-asm-generic/master robh/for-next tip/locking/core]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Alexandre-Ghiti/riscv-Implement-cmpxchg32-64-using-Zacas/20240627-034946
base:   https://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git for-next
patch link:    https://lore.kernel.org/r/20240626130347.520750-2-alexghiti%40rivosinc.com
patch subject: [PATCH v2 01/10] riscv: Implement cmpxchg32/64() using Zacas
config: riscv-randconfig-002-20240704 (https://download.01.org/0day-ci/archive/20240704/202407041157.odTZAYZ6-lkp@intel.com/config)
compiler: clang version 16.0.6 (https://github.com/llvm/llvm-project 7cbf1a2591520c2491aa35339f227775f4d3adf6)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240704/202407041157.odTZAYZ6-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202407041157.odTZAYZ6-lkp@intel.com/

All errors (new ones prefixed by >>):

>> kernel/sched/core.c:11873:7: error: cannot jump from this asm goto statement to one of its possible targets
                   if (try_cmpxchg(&pcpu_cid->cid, &lazy_cid, MM_CID_UNSET))
                       ^
   include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg'
           raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
           ^
   include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg'
           ___r = raw_cmpxchg((_ptr), ___o, (_new)); \
                  ^
   include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg'
   #define raw_cmpxchg arch_cmpxchg
                       ^
   arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg'
           _arch_cmpxchg((ptr), (o), (n), ".rl", "", "     fence rw, rw\n")
           ^
   arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg'
                   __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append,      \
                   ^
   arch/riscv/include/asm/cmpxchg.h:144:3: note: expanded from macro '__arch_cmpxchg'
                   asm goto(ALTERNATIVE("nop", "j %[zacas]", 0,            \
                   ^
   kernel/sched/core.c:11840:7: note: possible target of asm goto statement
           if (!try_cmpxchg(&pcpu_cid->cid, &cid, lazy_cid))
                ^
   include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg'
           raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
           ^
   include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg'
           ___r = raw_cmpxchg((_ptr), ___o, (_new)); \
                  ^
   include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg'
   #define raw_cmpxchg arch_cmpxchg
                       ^
   arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg'
           _arch_cmpxchg((ptr), (o), (n), ".rl", "", "     fence rw, rw\n")
           ^
   arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg'
                   __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append,      \
                   ^
   arch/riscv/include/asm/cmpxchg.h:161:10: note: expanded from macro '__arch_cmpxchg'
                                                                           \
                                                                           ^
   kernel/sched/core.c:11872:2: note: jump exits scope of variable with __attribute__((cleanup))
           scoped_guard (irqsave) {
           ^
   include/linux/cleanup.h:169:20: note: expanded from macro 'scoped_guard'
           for (CLASS(_name, scope)(args),                                 \
                             ^
   kernel/sched/core.c:11840:7: error: cannot jump from this asm goto statement to one of its possible targets
           if (!try_cmpxchg(&pcpu_cid->cid, &cid, lazy_cid))
                ^
   include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg'
           raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
           ^
   include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg'
           ___r = raw_cmpxchg((_ptr), ___o, (_new)); \
                  ^
   include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg'
   #define raw_cmpxchg arch_cmpxchg
                       ^
   arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg'
           _arch_cmpxchg((ptr), (o), (n), ".rl", "", "     fence rw, rw\n")
           ^
   arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg'
                   __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append,      \
                   ^
   arch/riscv/include/asm/cmpxchg.h:144:3: note: expanded from macro '__arch_cmpxchg'
                   asm goto(ALTERNATIVE("nop", "j %[zacas]", 0,            \
                   ^
   kernel/sched/core.c:11873:7: note: possible target of asm goto statement
                   if (try_cmpxchg(&pcpu_cid->cid, &lazy_cid, MM_CID_UNSET))
                       ^
   include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg'
           raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
           ^
   include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg'
           ___r = raw_cmpxchg((_ptr), ___o, (_new)); \
                  ^
   include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg'
   #define raw_cmpxchg arch_cmpxchg
                       ^
   arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg'
           _arch_cmpxchg((ptr), (o), (n), ".rl", "", "     fence rw, rw\n")
           ^
   arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg'
                   __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append,      \
                   ^
   arch/riscv/include/asm/cmpxchg.h:161:10: note: expanded from macro '__arch_cmpxchg'
                                                                           \
                                                                           ^
   kernel/sched/core.c:11872:2: note: jump bypasses initialization of variable with __attribute__((cleanup))
           scoped_guard (irqsave) {
           ^
   include/linux/cleanup.h:169:20: note: expanded from macro 'scoped_guard'
           for (CLASS(_name, scope)(args),                                 \
                             ^
   2 errors generated.


vim +11873 kernel/sched/core.c

223baf9d17f25e Mathieu Desnoyers 2023-04-20  11821  
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11822  static void sched_mm_cid_remote_clear(struct mm_struct *mm, struct mm_cid *pcpu_cid,
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11823  				      int cpu)
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11824  {
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11825  	struct rq *rq = cpu_rq(cpu);
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11826  	struct task_struct *t;
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11827  	int cid, lazy_cid;
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11828  
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11829  	cid = READ_ONCE(pcpu_cid->cid);
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11830  	if (!mm_cid_is_valid(cid))
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11831  		return;
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11832  
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11833  	/*
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11834  	 * Clear the cpu cid if it is set to keep cid allocation compact.  If
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11835  	 * there happens to be other tasks left on the source cpu using this
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11836  	 * mm, the next task using this mm will reallocate its cid on context
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11837  	 * switch.
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11838  	 */
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11839  	lazy_cid = mm_cid_set_lazy_put(cid);
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11840  	if (!try_cmpxchg(&pcpu_cid->cid, &cid, lazy_cid))
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11841  		return;
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11842  
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11843  	/*
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11844  	 * The implicit barrier after cmpxchg per-mm/cpu cid before loading
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11845  	 * rq->curr->mm matches the scheduler barrier in context_switch()
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11846  	 * between store to rq->curr and load of prev and next task's
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11847  	 * per-mm/cpu cid.
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11848  	 *
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11849  	 * The implicit barrier after cmpxchg per-mm/cpu cid before loading
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11850  	 * rq->curr->mm_cid_active matches the barrier in
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11851  	 * sched_mm_cid_exit_signals(), sched_mm_cid_before_execve(), and
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11852  	 * sched_mm_cid_after_execve() between store to t->mm_cid_active and
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11853  	 * load of per-mm/cpu cid.
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11854  	 */
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11855  
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11856  	/*
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11857  	 * If we observe an active task using the mm on this rq after setting
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11858  	 * the lazy-put flag, that task will be responsible for transitioning
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11859  	 * from lazy-put flag set to MM_CID_UNSET.
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11860  	 */
0e34600ac9317d Peter Zijlstra    2023-06-09  11861  	scoped_guard (rcu) {
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11862  		t = rcu_dereference(rq->curr);
0e34600ac9317d Peter Zijlstra    2023-06-09  11863  		if (READ_ONCE(t->mm_cid_active) && t->mm == mm)
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11864  			return;
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11865  	}
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11866  
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11867  	/*
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11868  	 * The cid is unused, so it can be unset.
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11869  	 * Disable interrupts to keep the window of cid ownership without rq
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11870  	 * lock small.
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11871  	 */
0e34600ac9317d Peter Zijlstra    2023-06-09  11872  	scoped_guard (irqsave) {
223baf9d17f25e Mathieu Desnoyers 2023-04-20 @11873  		if (try_cmpxchg(&pcpu_cid->cid, &lazy_cid, MM_CID_UNSET))
223baf9d17f25e Mathieu Desnoyers 2023-04-20  11874  			__mm_cid_put(mm, cid);
0e34600ac9317d Peter Zijlstra    2023-06-09  11875  	}
af7f588d8f7355 Mathieu Desnoyers 2022-11-22  11876  }
af7f588d8f7355 Mathieu Desnoyers 2022-11-22  11877
Alexandre Ghiti July 4, 2024, 4:25 p.m. UTC | #3
Hi Andrea,

On 27/06/2024 13:06, Andrea Parri wrote:
>> -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n)	\
>> +#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n)	\
>>   ({									\
>> +	__label__ zacas, end;						\
>>   	register unsigned int __rc;					\
>>   									\
>> +	if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) {			\
>> +		asm goto(ALTERNATIVE("nop", "j %[zacas]", 0,		\
>> +				     RISCV_ISA_EXT_ZACAS, 1)		\
>> +			 : : : : zacas);				\
>> +	}								\
>> +									\
>>   	__asm__ __volatile__ (						\
>>   		prepend							\
>>   		"0:	lr" lr_sfx " %0, %2\n"				\
>>   		"	bne  %0, %z3, 1f\n"				\
>> -		"	sc" sc_sfx " %1, %z4, %2\n"			\
>> +		"	sc" sc_cas_sfx " %1, %z4, %2\n"			\
>>   		"	bnez %1, 0b\n"					\
>>   		append							\
>>   		"1:\n"							\
>>   		: "=&r" (r), "=&r" (__rc), "+A" (*(p))			\
>>   		: "rJ" (co o), "rJ" (n)					\
>>   		: "memory");						\
>> +	goto end;							\
>> +									\
>> +zacas:									\
>> +	if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) {			\
>> +		__asm__ __volatile__ (					\
>> +			prepend						\
>> +			"	amocas" sc_cas_sfx " %0, %z2, %1\n"	\
>> +			append						\
>> +			: "+&r" (r), "+A" (*(p))			\
>> +			: "rJ" (n)					\
>> +			: "memory");					\
>> +	}								\
> Is this second IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check actually needed?
> (just wondering - no real objection)


To me yes, otherwise a toolchain without zacas support would fail to 
assemble the amocas instruction.


>
>
>> +end:;									\
> Why the semicolon?


That fixes a clang warning reported by Nathan here: 
https://lore.kernel.org/linux-riscv/20240528193110.GA2196855@thelio-3990X/


>
>
>>   })
>>   
>>   #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append)		\
>> @@ -156,7 +177,7 @@
>>   	__typeof__(ptr) __ptr = (ptr);					\
>>   	__typeof__(*(__ptr)) __old = (old);				\
>>   	__typeof__(*(__ptr)) __new = (new);				\
>> -	__typeof__(*(__ptr)) __ret;					\
>> +	__typeof__(*(__ptr)) __ret = (old);				\
> This is because the compiler doesn't realize __ret is actually
> initialized, right?  IAC, seems a bit unexpected to initialize
> with (old) (which indicates SUCCESS of the CMPXCHG operation);
> how about using (new) for the initialization of __ret instead?
> would (new) still work for you?


But amocas rd register must contain the expected old value in order to 
actually work right?


>
>    Andrea
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Nathan Chancellor July 5, 2024, 5:27 p.m. UTC | #4
On Thu, Jul 04, 2024 at 11:38:46AM +0800, kernel test robot wrote:
> Hi Alexandre,
> 
> kernel test robot noticed the following build errors:
> 
> [auto build test ERROR on soc/for-next]
> [also build test ERROR on linus/master v6.10-rc6 next-20240703]
> [cannot apply to arnd-asm-generic/master robh/for-next tip/locking/core]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
> 
> url:    https://github.com/intel-lab-lkp/linux/commits/Alexandre-Ghiti/riscv-Implement-cmpxchg32-64-using-Zacas/20240627-034946
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git for-next
> patch link:    https://lore.kernel.org/r/20240626130347.520750-2-alexghiti%40rivosinc.com
> patch subject: [PATCH v2 01/10] riscv: Implement cmpxchg32/64() using Zacas
> config: riscv-randconfig-002-20240704 (https://download.01.org/0day-ci/archive/20240704/202407041157.odTZAYZ6-lkp@intel.com/config)
> compiler: clang version 16.0.6 (https://github.com/llvm/llvm-project 7cbf1a2591520c2491aa35339f227775f4d3adf6)
> reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240704/202407041157.odTZAYZ6-lkp@intel.com/reproduce)
> 
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://lore.kernel.org/oe-kbuild-all/202407041157.odTZAYZ6-lkp@intel.com/
> 
> All errors (new ones prefixed by >>):
> 
> >> kernel/sched/core.c:11873:7: error: cannot jump from this asm goto statement to one of its possible targets
>                    if (try_cmpxchg(&pcpu_cid->cid, &lazy_cid, MM_CID_UNSET))
>                        ^
>    include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg'
>            raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
>            ^
>    include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg'
>            ___r = raw_cmpxchg((_ptr), ___o, (_new)); \
>                   ^
>    include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg'
>    #define raw_cmpxchg arch_cmpxchg
>                        ^
>    arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg'
>            _arch_cmpxchg((ptr), (o), (n), ".rl", "", "     fence rw, rw\n")
>            ^
>    arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg'
>                    __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append,      \
>                    ^
>    arch/riscv/include/asm/cmpxchg.h:144:3: note: expanded from macro '__arch_cmpxchg'
>                    asm goto(ALTERNATIVE("nop", "j %[zacas]", 0,            \
>                    ^
>    kernel/sched/core.c:11840:7: note: possible target of asm goto statement
>            if (!try_cmpxchg(&pcpu_cid->cid, &cid, lazy_cid))
>                 ^
>    include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg'
>            raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
>            ^
>    include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg'
>            ___r = raw_cmpxchg((_ptr), ___o, (_new)); \
>                   ^
>    include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg'
>    #define raw_cmpxchg arch_cmpxchg
>                        ^
>    arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg'
>            _arch_cmpxchg((ptr), (o), (n), ".rl", "", "     fence rw, rw\n")
>            ^
>    arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg'
>                    __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append,      \
>                    ^
>    arch/riscv/include/asm/cmpxchg.h:161:10: note: expanded from macro '__arch_cmpxchg'
>                                                                            \
>                                                                            ^
>    kernel/sched/core.c:11872:2: note: jump exits scope of variable with __attribute__((cleanup))
>            scoped_guard (irqsave) {
>            ^
>    include/linux/cleanup.h:169:20: note: expanded from macro 'scoped_guard'
>            for (CLASS(_name, scope)(args),                                 \
>                              ^
>    kernel/sched/core.c:11840:7: error: cannot jump from this asm goto statement to one of its possible targets
>            if (!try_cmpxchg(&pcpu_cid->cid, &cid, lazy_cid))
>                 ^
>    include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg'
>            raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
>            ^
>    include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg'
>            ___r = raw_cmpxchg((_ptr), ___o, (_new)); \
>                   ^
>    include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg'
>    #define raw_cmpxchg arch_cmpxchg
>                        ^
>    arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg'
>            _arch_cmpxchg((ptr), (o), (n), ".rl", "", "     fence rw, rw\n")
>            ^
>    arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg'
>                    __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append,      \
>                    ^
>    arch/riscv/include/asm/cmpxchg.h:144:3: note: expanded from macro '__arch_cmpxchg'
>                    asm goto(ALTERNATIVE("nop", "j %[zacas]", 0,            \
>                    ^
>    kernel/sched/core.c:11873:7: note: possible target of asm goto statement
>                    if (try_cmpxchg(&pcpu_cid->cid, &lazy_cid, MM_CID_UNSET))
>                        ^
>    include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg'
>            raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
>            ^
>    include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg'
>            ___r = raw_cmpxchg((_ptr), ___o, (_new)); \
>                   ^
>    include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg'
>    #define raw_cmpxchg arch_cmpxchg
>                        ^
>    arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg'
>            _arch_cmpxchg((ptr), (o), (n), ".rl", "", "     fence rw, rw\n")
>            ^
>    arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg'
>                    __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append,      \
>                    ^
>    arch/riscv/include/asm/cmpxchg.h:161:10: note: expanded from macro '__arch_cmpxchg'
>                                                                            \
>                                                                            ^
>    kernel/sched/core.c:11872:2: note: jump bypasses initialization of variable with __attribute__((cleanup))
>            scoped_guard (irqsave) {
>            ^
>    include/linux/cleanup.h:169:20: note: expanded from macro 'scoped_guard'
>            for (CLASS(_name, scope)(args),                                 \
>                              ^
>    2 errors generated.

Ugh, this is an unfortunate interaction with clang's jump scope analysis
and asm goto in LLVM releases prior to 17 :/

https://github.com/ClangBuiltLinux/linux/issues/1886#issuecomment-1645979992

Unfortunately, 'if (0)' does not prevent this (the analysis runs early
in the front end as far as I understand it), we would need to workaround
this with full preprocessor guards...

Another alternative would be to require LLVM 17+ for RISC-V, which may
not be the worst alternative, since I think most people doing serious
work with clang will probably be living close to tip of tree anyways
because of all the extension work that goes on upstream.

I am open to other thoughts though.

Cheers,
Nathan
Andrea Parri July 9, 2024, 11:47 p.m. UTC | #5
> > Is this second IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check actually needed?
> > (just wondering - no real objection)
> 
> To me yes, otherwise a toolchain without zacas support would fail to
> assemble the amocas instruction.

To elaborate on my question:  Such a toolchain may be able to recognize
that the block of code following the zacas: label (and comprising the
amocas instruction) can't be reached/executed if the first IS_ENABLED()
evaluates to false (due to the goto end; statement), and consequently it
may compile out the entire block/instruction no matter the presence or
not of the second IS_ENABLE() check.  IOW, such a toolchain/compiler may
not actually have to assemble the amocas instruction under such config.
In fact, this is how the current gcc trunk (which doesn't support zacas)
seems to behave.  And this very same optimization/code removal seems to
be performed by clang when CONFIG_RISCV_ISA_ZACAS=n.  IAC, I'd agree it
is good to be explicit in the sources and keep both of these checks.


> > Why the semicolon?
> 
> That fixes a clang warning reported by Nathan here:
> https://lore.kernel.org/linux-riscv/20240528193110.GA2196855@thelio-3990X/

I see.  Thanks for the pointer.


> > This is because the compiler doesn't realize __ret is actually
> > initialized, right?  IAC, seems a bit unexpected to initialize
> > with (old) (which indicates SUCCESS of the CMPXCHG operation);
> > how about using (new) for the initialization of __ret instead?
> > would (new) still work for you?
> 
> But amocas rd register must contain the expected old value in order to
> actually work right?

Agreed.  Thanks for the clarification.

  Andrea
Alexandre Ghiti July 15, 2024, 11:48 a.m. UTC | #6
Hi Andrea,

On Wed, Jul 10, 2024 at 1:47 AM Andrea Parri <parri.andrea@gmail.com> wrote:
>
> > > Is this second IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check actually needed?
> > > (just wondering - no real objection)
> >
> > To me yes, otherwise a toolchain without zacas support would fail to
> > assemble the amocas instruction.
>
> To elaborate on my question:  Such a toolchain may be able to recognize
> that the block of code following the zacas: label (and comprising the
> amocas instruction) can't be reached/executed if the first IS_ENABLED()
> evaluates to false (due to the goto end; statement), and consequently it
> may compile out the entire block/instruction no matter the presence or
> not of the second IS_ENABLE() check.  IOW, such a toolchain/compiler may
> not actually have to assemble the amocas instruction under such config.
> In fact, this is how the current gcc trunk (which doesn't support zacas)
> seems to behave.  And this very same optimization/code removal seems to
> be performed by clang when CONFIG_RISCV_ISA_ZACAS=n.  IAC, I'd agree it
> is good to be explicit in the sources and keep both of these checks.

Indeed, clang works fine without the second IS_ENABLED(). I'll remove
it then as the code is complex enough.

Thanks,

Alex

>
>
> > > Why the semicolon?
> >
> > That fixes a clang warning reported by Nathan here:
> > https://lore.kernel.org/linux-riscv/20240528193110.GA2196855@thelio-3990X/
>
> I see.  Thanks for the pointer.
>
>
> > > This is because the compiler doesn't realize __ret is actually
> > > initialized, right?  IAC, seems a bit unexpected to initialize
> > > with (old) (which indicates SUCCESS of the CMPXCHG operation);
> > > how about using (new) for the initialization of __ret instead?
> > > would (new) still work for you?
> >
> > But amocas rd register must contain the expected old value in order to
> > actually work right?
>
> Agreed.  Thanks for the clarification.
>
>   Andrea
Alexandre Ghiti July 16, 2024, 12:19 p.m. UTC | #7
Hi Nathan,

On Fri, Jul 5, 2024 at 7:27 PM Nathan Chancellor <nathan@kernel.org> wrote:
>
> On Thu, Jul 04, 2024 at 11:38:46AM +0800, kernel test robot wrote:
> > Hi Alexandre,
> >
> > kernel test robot noticed the following build errors:
> >
> > [auto build test ERROR on soc/for-next]
> > [also build test ERROR on linus/master v6.10-rc6 next-20240703]
> > [cannot apply to arnd-asm-generic/master robh/for-next tip/locking/core]
> > [If your patch is applied to the wrong git tree, kindly drop us a note.
> > And when submitting patch, we suggest to use '--base' as documented in
> > https://git-scm.com/docs/git-format-patch#_base_tree_information]
> >
> > url:    https://github.com/intel-lab-lkp/linux/commits/Alexandre-Ghiti/riscv-Implement-cmpxchg32-64-using-Zacas/20240627-034946
> > base:   https://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git for-next
> > patch link:    https://lore.kernel.org/r/20240626130347.520750-2-alexghiti%40rivosinc.com
> > patch subject: [PATCH v2 01/10] riscv: Implement cmpxchg32/64() using Zacas
> > config: riscv-randconfig-002-20240704 (https://download.01.org/0day-ci/archive/20240704/202407041157.odTZAYZ6-lkp@intel.com/config)
> > compiler: clang version 16.0.6 (https://github.com/llvm/llvm-project 7cbf1a2591520c2491aa35339f227775f4d3adf6)
> > reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240704/202407041157.odTZAYZ6-lkp@intel.com/reproduce)
> >
> > If you fix the issue in a separate patch/commit (i.e. not just a new version of
> > the same patch/commit), kindly add following tags
> > | Reported-by: kernel test robot <lkp@intel.com>
> > | Closes: https://lore.kernel.org/oe-kbuild-all/202407041157.odTZAYZ6-lkp@intel.com/
> >
> > All errors (new ones prefixed by >>):
> >
> > >> kernel/sched/core.c:11873:7: error: cannot jump from this asm goto statement to one of its possible targets
> >                    if (try_cmpxchg(&pcpu_cid->cid, &lazy_cid, MM_CID_UNSET))
> >                        ^
> >    include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg'
> >            raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
> >            ^
> >    include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg'
> >            ___r = raw_cmpxchg((_ptr), ___o, (_new)); \
> >                   ^
> >    include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg'
> >    #define raw_cmpxchg arch_cmpxchg
> >                        ^
> >    arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg'
> >            _arch_cmpxchg((ptr), (o), (n), ".rl", "", "     fence rw, rw\n")
> >            ^
> >    arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg'
> >                    __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append,      \
> >                    ^
> >    arch/riscv/include/asm/cmpxchg.h:144:3: note: expanded from macro '__arch_cmpxchg'
> >                    asm goto(ALTERNATIVE("nop", "j %[zacas]", 0,            \
> >                    ^
> >    kernel/sched/core.c:11840:7: note: possible target of asm goto statement
> >            if (!try_cmpxchg(&pcpu_cid->cid, &cid, lazy_cid))
> >                 ^
> >    include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg'
> >            raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
> >            ^
> >    include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg'
> >            ___r = raw_cmpxchg((_ptr), ___o, (_new)); \
> >                   ^
> >    include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg'
> >    #define raw_cmpxchg arch_cmpxchg
> >                        ^
> >    arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg'
> >            _arch_cmpxchg((ptr), (o), (n), ".rl", "", "     fence rw, rw\n")
> >            ^
> >    arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg'
> >                    __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append,      \
> >                    ^
> >    arch/riscv/include/asm/cmpxchg.h:161:10: note: expanded from macro '__arch_cmpxchg'
> >                                                                            \
> >                                                                            ^
> >    kernel/sched/core.c:11872:2: note: jump exits scope of variable with __attribute__((cleanup))
> >            scoped_guard (irqsave) {
> >            ^
> >    include/linux/cleanup.h:169:20: note: expanded from macro 'scoped_guard'
> >            for (CLASS(_name, scope)(args),                                 \
> >                              ^
> >    kernel/sched/core.c:11840:7: error: cannot jump from this asm goto statement to one of its possible targets
> >            if (!try_cmpxchg(&pcpu_cid->cid, &cid, lazy_cid))
> >                 ^
> >    include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg'
> >            raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
> >            ^
> >    include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg'
> >            ___r = raw_cmpxchg((_ptr), ___o, (_new)); \
> >                   ^
> >    include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg'
> >    #define raw_cmpxchg arch_cmpxchg
> >                        ^
> >    arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg'
> >            _arch_cmpxchg((ptr), (o), (n), ".rl", "", "     fence rw, rw\n")
> >            ^
> >    arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg'
> >                    __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append,      \
> >                    ^
> >    arch/riscv/include/asm/cmpxchg.h:144:3: note: expanded from macro '__arch_cmpxchg'
> >                    asm goto(ALTERNATIVE("nop", "j %[zacas]", 0,            \
> >                    ^
> >    kernel/sched/core.c:11873:7: note: possible target of asm goto statement
> >                    if (try_cmpxchg(&pcpu_cid->cid, &lazy_cid, MM_CID_UNSET))
> >                        ^
> >    include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg'
> >            raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
> >            ^
> >    include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg'
> >            ___r = raw_cmpxchg((_ptr), ___o, (_new)); \
> >                   ^
> >    include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg'
> >    #define raw_cmpxchg arch_cmpxchg
> >                        ^
> >    arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg'
> >            _arch_cmpxchg((ptr), (o), (n), ".rl", "", "     fence rw, rw\n")
> >            ^
> >    arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg'
> >                    __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append,      \
> >                    ^
> >    arch/riscv/include/asm/cmpxchg.h:161:10: note: expanded from macro '__arch_cmpxchg'
> >                                                                            \
> >                                                                            ^
> >    kernel/sched/core.c:11872:2: note: jump bypasses initialization of variable with __attribute__((cleanup))
> >            scoped_guard (irqsave) {
> >            ^
> >    include/linux/cleanup.h:169:20: note: expanded from macro 'scoped_guard'
> >            for (CLASS(_name, scope)(args),                                 \
> >                              ^
> >    2 errors generated.
>
> Ugh, this is an unfortunate interaction with clang's jump scope analysis
> and asm goto in LLVM releases prior to 17 :/
>
> https://github.com/ClangBuiltLinux/linux/issues/1886#issuecomment-1645979992
>
> Unfortunately, 'if (0)' does not prevent this (the analysis runs early
> in the front end as far as I understand it), we would need to workaround
> this with full preprocessor guards...

Thanks for jumping in on this one, I was quite puzzled :)

>
> Another alternative would be to require LLVM 17+ for RISC-V, which may
> not be the worst alternative, since I think most people doing serious
> work with clang will probably be living close to tip of tree anyways
> because of all the extension work that goes on upstream.

Stupid question but why the fix in llvm 17 was not backported to
previous versions?

Anyway, I'd rather require llvm 17+ than add a bunch of preprocessor
guards in this file (IIUC what you said above) as it is complex
enough.

@Conor Dooley @Palmer Dabbelt WDYT? Is there any interest in
supporting llvm < 17? We may encounter this bug again in the future so
I'd be in favor of moving to llvm 17+.

Thanks again Nathan,

Alex

>
> I am open to other thoughts though.
>
> Cheers,
> Nathan
Nathan Chancellor July 16, 2024, 2 p.m. UTC | #8
On Tue, Jul 16, 2024 at 02:19:57PM +0200, Alexandre Ghiti wrote:
> On Fri, Jul 5, 2024 at 7:27 PM Nathan Chancellor <nathan@kernel.org> wrote:
> > Another alternative would be to require LLVM 17+ for RISC-V, which may
> > not be the worst alternative, since I think most people doing serious
> > work with clang will probably be living close to tip of tree anyways
> > because of all the extension work that goes on upstream.
> 
> Stupid question but why the fix in llvm 17 was not backported to
> previous versions?

Unfortunately, LLVM releases are only supported for a few months with
fixes, unlike GCC that supported their releases for a few years. By the
time this issue was uncovered and resolved in LLVM main (17 at the
time), LLVM 16 was no longer supported.

I could potentially patch the kernel.org toolchains but that doesn't fix
the issue for other versions of clang out there.

> Anyway, I'd rather require llvm 17+ than add a bunch of preprocessor
> guards in this file (IIUC what you said above) as it is complex
> enough.

Sure, this is not a super unreasonable issue to bump the minimum
supported version for RISC-V over in my opinion, so no real objections
from me.

> @Conor Dooley @Palmer Dabbelt WDYT? Is there any interest in
> supporting llvm < 17? We may encounter this bug again in the future so
> I'd be in favor of moving to llvm 17+.

FWIW, I would envision a diff like this (assuming it actually works to
resolve this issue, I didn't actually test it):

diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh
index 91c91201212c..e81eb7ed257d 100755
--- a/scripts/min-tool-version.sh
+++ b/scripts/min-tool-version.sh
@@ -28,6 +28,8 @@ llvm)
 		echo 15.0.0
 	elif [ "$SRCARCH" = loongarch ]; then
 		echo 18.0.0
+	elif [ "$SRCARCH" = riscv ]; then
+		echo 17.0.0
 	else
 		echo 13.0.1
 	fi

Cheers,
Nathan
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 05ccba8ca33a..1caaedec88c7 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -596,6 +596,23 @@  config RISCV_ISA_V_PREEMPTIVE
 	  preemption. Enabling this config will result in higher memory
 	  consumption due to the allocation of per-task's kernel Vector context.
 
+config TOOLCHAIN_HAS_ZACAS
+	bool
+	default y
+	depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas)
+	depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas)
+	depends on AS_HAS_OPTION_ARCH
+
+config RISCV_ISA_ZACAS
+	bool "Zacas extension support for atomic CAS"
+	depends on TOOLCHAIN_HAS_ZACAS
+	default y
+	help
+	  Enable the use of the Zacas ISA-extension to implement kernel atomic
+	  cmpxchg operations when it is detected at boot.
+
+	  If you don't know what to do here, say Y.
+
 config TOOLCHAIN_HAS_ZBB
 	bool
 	default y
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 06de9d365088..9fd13d7a9cc6 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -85,6 +85,9 @@  endif
 # Check if the toolchain supports Zihintpause extension
 riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause
 
+# Check if the toolchain supports Zacas
+riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
+
 # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
 # matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
 KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 808b4c78462e..a58a2141c6d3 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -9,6 +9,7 @@ 
 #include <linux/bug.h>
 
 #include <asm/fence.h>
+#include <asm/alternative.h>
 
 #define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n)		\
 ({									\
@@ -134,21 +135,41 @@ 
 	r = (__typeof__(*(p)))((__retx & __mask) >> __s);		\
 })
 
-#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n)	\
+#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n)	\
 ({									\
+	__label__ zacas, end;						\
 	register unsigned int __rc;					\
 									\
+	if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) {			\
+		asm goto(ALTERNATIVE("nop", "j %[zacas]", 0,		\
+				     RISCV_ISA_EXT_ZACAS, 1)		\
+			 : : : : zacas);				\
+	}								\
+									\
 	__asm__ __volatile__ (						\
 		prepend							\
 		"0:	lr" lr_sfx " %0, %2\n"				\
 		"	bne  %0, %z3, 1f\n"				\
-		"	sc" sc_sfx " %1, %z4, %2\n"			\
+		"	sc" sc_cas_sfx " %1, %z4, %2\n"			\
 		"	bnez %1, 0b\n"					\
 		append							\
 		"1:\n"							\
 		: "=&r" (r), "=&r" (__rc), "+A" (*(p))			\
 		: "rJ" (co o), "rJ" (n)					\
 		: "memory");						\
+	goto end;							\
+									\
+zacas:									\
+	if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) {			\
+		__asm__ __volatile__ (					\
+			prepend						\
+			"	amocas" sc_cas_sfx " %0, %z2, %1\n"	\
+			append						\
+			: "+&r" (r), "+A" (*(p))			\
+			: "rJ" (n)					\
+			: "memory");					\
+	}								\
+end:;									\
 })
 
 #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append)		\
@@ -156,7 +177,7 @@ 
 	__typeof__(ptr) __ptr = (ptr);					\
 	__typeof__(*(__ptr)) __old = (old);				\
 	__typeof__(*(__ptr)) __new = (new);				\
-	__typeof__(*(__ptr)) __ret;					\
+	__typeof__(*(__ptr)) __ret = (old);				\
 									\
 	switch (sizeof(*__ptr)) {					\
 	case 1:								\