Message ID | 20240626130347.520750-2-alexghiti@rivosinc.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Zacas/Zabha support and qspinlocks | expand |
> -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \ > +#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \ > ({ \ > + __label__ zacas, end; \ > register unsigned int __rc; \ > \ > + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \ > + asm goto(ALTERNATIVE("nop", "j %[zacas]", 0, \ > + RISCV_ISA_EXT_ZACAS, 1) \ > + : : : : zacas); \ > + } \ > + \ > __asm__ __volatile__ ( \ > prepend \ > "0: lr" lr_sfx " %0, %2\n" \ > " bne %0, %z3, 1f\n" \ > - " sc" sc_sfx " %1, %z4, %2\n" \ > + " sc" sc_cas_sfx " %1, %z4, %2\n" \ > " bnez %1, 0b\n" \ > append \ > "1:\n" \ > : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ > : "rJ" (co o), "rJ" (n) \ > : "memory"); \ > + goto end; \ > + \ > +zacas: \ > + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \ > + __asm__ __volatile__ ( \ > + prepend \ > + " amocas" sc_cas_sfx " %0, %z2, %1\n" \ > + append \ > + : "+&r" (r), "+A" (*(p)) \ > + : "rJ" (n) \ > + : "memory"); \ > + } \ Is this second IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check actually needed? (just wondering - no real objection) > +end:; \ Why the semicolon? > }) > > #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \ > @@ -156,7 +177,7 @@ > __typeof__(ptr) __ptr = (ptr); \ > __typeof__(*(__ptr)) __old = (old); \ > __typeof__(*(__ptr)) __new = (new); \ > - __typeof__(*(__ptr)) __ret; \ > + __typeof__(*(__ptr)) __ret = (old); \ This is because the compiler doesn't realize __ret is actually initialized, right? IAC, seems a bit unexpected to initialize with (old) (which indicates SUCCESS of the CMPXCHG operation); how about using (new) for the initialization of __ret instead? would (new) still work for you? Andrea
Hi Alexandre,
kernel test robot noticed the following build errors:
[auto build test ERROR on soc/for-next]
[also build test ERROR on linus/master v6.10-rc6 next-20240703]
[cannot apply to arnd-asm-generic/master robh/for-next tip/locking/core]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Alexandre-Ghiti/riscv-Implement-cmpxchg32-64-using-Zacas/20240627-034946
base: https://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git for-next
patch link: https://lore.kernel.org/r/20240626130347.520750-2-alexghiti%40rivosinc.com
patch subject: [PATCH v2 01/10] riscv: Implement cmpxchg32/64() using Zacas
config: riscv-randconfig-002-20240704 (https://download.01.org/0day-ci/archive/20240704/202407041157.odTZAYZ6-lkp@intel.com/config)
compiler: clang version 16.0.6 (https://github.com/llvm/llvm-project 7cbf1a2591520c2491aa35339f227775f4d3adf6)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240704/202407041157.odTZAYZ6-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202407041157.odTZAYZ6-lkp@intel.com/
All errors (new ones prefixed by >>):
>> kernel/sched/core.c:11873:7: error: cannot jump from this asm goto statement to one of its possible targets
if (try_cmpxchg(&pcpu_cid->cid, &lazy_cid, MM_CID_UNSET))
^
include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg'
raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
^
include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg'
___r = raw_cmpxchg((_ptr), ___o, (_new)); \
^
include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg'
#define raw_cmpxchg arch_cmpxchg
^
arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg'
_arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n")
^
arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg'
__arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \
^
arch/riscv/include/asm/cmpxchg.h:144:3: note: expanded from macro '__arch_cmpxchg'
asm goto(ALTERNATIVE("nop", "j %[zacas]", 0, \
^
kernel/sched/core.c:11840:7: note: possible target of asm goto statement
if (!try_cmpxchg(&pcpu_cid->cid, &cid, lazy_cid))
^
include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg'
raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
^
include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg'
___r = raw_cmpxchg((_ptr), ___o, (_new)); \
^
include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg'
#define raw_cmpxchg arch_cmpxchg
^
arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg'
_arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n")
^
arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg'
__arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \
^
arch/riscv/include/asm/cmpxchg.h:161:10: note: expanded from macro '__arch_cmpxchg'
\
^
kernel/sched/core.c:11872:2: note: jump exits scope of variable with __attribute__((cleanup))
scoped_guard (irqsave) {
^
include/linux/cleanup.h:169:20: note: expanded from macro 'scoped_guard'
for (CLASS(_name, scope)(args), \
^
kernel/sched/core.c:11840:7: error: cannot jump from this asm goto statement to one of its possible targets
if (!try_cmpxchg(&pcpu_cid->cid, &cid, lazy_cid))
^
include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg'
raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
^
include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg'
___r = raw_cmpxchg((_ptr), ___o, (_new)); \
^
include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg'
#define raw_cmpxchg arch_cmpxchg
^
arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg'
_arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n")
^
arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg'
__arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \
^
arch/riscv/include/asm/cmpxchg.h:144:3: note: expanded from macro '__arch_cmpxchg'
asm goto(ALTERNATIVE("nop", "j %[zacas]", 0, \
^
kernel/sched/core.c:11873:7: note: possible target of asm goto statement
if (try_cmpxchg(&pcpu_cid->cid, &lazy_cid, MM_CID_UNSET))
^
include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg'
raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
^
include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg'
___r = raw_cmpxchg((_ptr), ___o, (_new)); \
^
include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg'
#define raw_cmpxchg arch_cmpxchg
^
arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg'
_arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n")
^
arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg'
__arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \
^
arch/riscv/include/asm/cmpxchg.h:161:10: note: expanded from macro '__arch_cmpxchg'
\
^
kernel/sched/core.c:11872:2: note: jump bypasses initialization of variable with __attribute__((cleanup))
scoped_guard (irqsave) {
^
include/linux/cleanup.h:169:20: note: expanded from macro 'scoped_guard'
for (CLASS(_name, scope)(args), \
^
2 errors generated.
vim +11873 kernel/sched/core.c
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11821
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11822 static void sched_mm_cid_remote_clear(struct mm_struct *mm, struct mm_cid *pcpu_cid,
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11823 int cpu)
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11824 {
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11825 struct rq *rq = cpu_rq(cpu);
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11826 struct task_struct *t;
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11827 int cid, lazy_cid;
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11828
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11829 cid = READ_ONCE(pcpu_cid->cid);
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11830 if (!mm_cid_is_valid(cid))
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11831 return;
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11832
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11833 /*
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11834 * Clear the cpu cid if it is set to keep cid allocation compact. If
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11835 * there happens to be other tasks left on the source cpu using this
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11836 * mm, the next task using this mm will reallocate its cid on context
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11837 * switch.
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11838 */
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11839 lazy_cid = mm_cid_set_lazy_put(cid);
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11840 if (!try_cmpxchg(&pcpu_cid->cid, &cid, lazy_cid))
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11841 return;
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11842
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11843 /*
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11844 * The implicit barrier after cmpxchg per-mm/cpu cid before loading
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11845 * rq->curr->mm matches the scheduler barrier in context_switch()
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11846 * between store to rq->curr and load of prev and next task's
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11847 * per-mm/cpu cid.
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11848 *
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11849 * The implicit barrier after cmpxchg per-mm/cpu cid before loading
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11850 * rq->curr->mm_cid_active matches the barrier in
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11851 * sched_mm_cid_exit_signals(), sched_mm_cid_before_execve(), and
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11852 * sched_mm_cid_after_execve() between store to t->mm_cid_active and
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11853 * load of per-mm/cpu cid.
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11854 */
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11855
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11856 /*
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11857 * If we observe an active task using the mm on this rq after setting
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11858 * the lazy-put flag, that task will be responsible for transitioning
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11859 * from lazy-put flag set to MM_CID_UNSET.
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11860 */
0e34600ac9317d Peter Zijlstra 2023-06-09 11861 scoped_guard (rcu) {
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11862 t = rcu_dereference(rq->curr);
0e34600ac9317d Peter Zijlstra 2023-06-09 11863 if (READ_ONCE(t->mm_cid_active) && t->mm == mm)
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11864 return;
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11865 }
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11866
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11867 /*
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11868 * The cid is unused, so it can be unset.
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11869 * Disable interrupts to keep the window of cid ownership without rq
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11870 * lock small.
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11871 */
0e34600ac9317d Peter Zijlstra 2023-06-09 11872 scoped_guard (irqsave) {
223baf9d17f25e Mathieu Desnoyers 2023-04-20 @11873 if (try_cmpxchg(&pcpu_cid->cid, &lazy_cid, MM_CID_UNSET))
223baf9d17f25e Mathieu Desnoyers 2023-04-20 11874 __mm_cid_put(mm, cid);
0e34600ac9317d Peter Zijlstra 2023-06-09 11875 }
af7f588d8f7355 Mathieu Desnoyers 2022-11-22 11876 }
af7f588d8f7355 Mathieu Desnoyers 2022-11-22 11877
Hi Andrea, On 27/06/2024 13:06, Andrea Parri wrote: >> -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \ >> +#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \ >> ({ \ >> + __label__ zacas, end; \ >> register unsigned int __rc; \ >> \ >> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \ >> + asm goto(ALTERNATIVE("nop", "j %[zacas]", 0, \ >> + RISCV_ISA_EXT_ZACAS, 1) \ >> + : : : : zacas); \ >> + } \ >> + \ >> __asm__ __volatile__ ( \ >> prepend \ >> "0: lr" lr_sfx " %0, %2\n" \ >> " bne %0, %z3, 1f\n" \ >> - " sc" sc_sfx " %1, %z4, %2\n" \ >> + " sc" sc_cas_sfx " %1, %z4, %2\n" \ >> " bnez %1, 0b\n" \ >> append \ >> "1:\n" \ >> : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ >> : "rJ" (co o), "rJ" (n) \ >> : "memory"); \ >> + goto end; \ >> + \ >> +zacas: \ >> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \ >> + __asm__ __volatile__ ( \ >> + prepend \ >> + " amocas" sc_cas_sfx " %0, %z2, %1\n" \ >> + append \ >> + : "+&r" (r), "+A" (*(p)) \ >> + : "rJ" (n) \ >> + : "memory"); \ >> + } \ > Is this second IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check actually needed? > (just wondering - no real objection) To me yes, otherwise a toolchain without zacas support would fail to assemble the amocas instruction. > > >> +end:; \ > Why the semicolon? That fixes a clang warning reported by Nathan here: https://lore.kernel.org/linux-riscv/20240528193110.GA2196855@thelio-3990X/ > > >> }) >> >> #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \ >> @@ -156,7 +177,7 @@ >> __typeof__(ptr) __ptr = (ptr); \ >> __typeof__(*(__ptr)) __old = (old); \ >> __typeof__(*(__ptr)) __new = (new); \ >> - __typeof__(*(__ptr)) __ret; \ >> + __typeof__(*(__ptr)) __ret = (old); \ > This is because the compiler doesn't realize __ret is actually > initialized, right? IAC, seems a bit unexpected to initialize > with (old) (which indicates SUCCESS of the CMPXCHG operation); > how about using (new) for the initialization of __ret instead? > would (new) still work for you? But amocas rd register must contain the expected old value in order to actually work right? > > Andrea > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Thu, Jul 04, 2024 at 11:38:46AM +0800, kernel test robot wrote: > Hi Alexandre, > > kernel test robot noticed the following build errors: > > [auto build test ERROR on soc/for-next] > [also build test ERROR on linus/master v6.10-rc6 next-20240703] > [cannot apply to arnd-asm-generic/master robh/for-next tip/locking/core] > [If your patch is applied to the wrong git tree, kindly drop us a note. > And when submitting patch, we suggest to use '--base' as documented in > https://git-scm.com/docs/git-format-patch#_base_tree_information] > > url: https://github.com/intel-lab-lkp/linux/commits/Alexandre-Ghiti/riscv-Implement-cmpxchg32-64-using-Zacas/20240627-034946 > base: https://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git for-next > patch link: https://lore.kernel.org/r/20240626130347.520750-2-alexghiti%40rivosinc.com > patch subject: [PATCH v2 01/10] riscv: Implement cmpxchg32/64() using Zacas > config: riscv-randconfig-002-20240704 (https://download.01.org/0day-ci/archive/20240704/202407041157.odTZAYZ6-lkp@intel.com/config) > compiler: clang version 16.0.6 (https://github.com/llvm/llvm-project 7cbf1a2591520c2491aa35339f227775f4d3adf6) > reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240704/202407041157.odTZAYZ6-lkp@intel.com/reproduce) > > If you fix the issue in a separate patch/commit (i.e. not just a new version of > the same patch/commit), kindly add following tags > | Reported-by: kernel test robot <lkp@intel.com> > | Closes: https://lore.kernel.org/oe-kbuild-all/202407041157.odTZAYZ6-lkp@intel.com/ > > All errors (new ones prefixed by >>): > > >> kernel/sched/core.c:11873:7: error: cannot jump from this asm goto statement to one of its possible targets > if (try_cmpxchg(&pcpu_cid->cid, &lazy_cid, MM_CID_UNSET)) > ^ > include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg' > raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \ > ^ > include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg' > ___r = raw_cmpxchg((_ptr), ___o, (_new)); \ > ^ > include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg' > #define raw_cmpxchg arch_cmpxchg > ^ > arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg' > _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n") > ^ > arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg' > __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \ > ^ > arch/riscv/include/asm/cmpxchg.h:144:3: note: expanded from macro '__arch_cmpxchg' > asm goto(ALTERNATIVE("nop", "j %[zacas]", 0, \ > ^ > kernel/sched/core.c:11840:7: note: possible target of asm goto statement > if (!try_cmpxchg(&pcpu_cid->cid, &cid, lazy_cid)) > ^ > include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg' > raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \ > ^ > include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg' > ___r = raw_cmpxchg((_ptr), ___o, (_new)); \ > ^ > include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg' > #define raw_cmpxchg arch_cmpxchg > ^ > arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg' > _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n") > ^ > arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg' > __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \ > ^ > arch/riscv/include/asm/cmpxchg.h:161:10: note: expanded from macro '__arch_cmpxchg' > \ > ^ > kernel/sched/core.c:11872:2: note: jump exits scope of variable with __attribute__((cleanup)) > scoped_guard (irqsave) { > ^ > include/linux/cleanup.h:169:20: note: expanded from macro 'scoped_guard' > for (CLASS(_name, scope)(args), \ > ^ > kernel/sched/core.c:11840:7: error: cannot jump from this asm goto statement to one of its possible targets > if (!try_cmpxchg(&pcpu_cid->cid, &cid, lazy_cid)) > ^ > include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg' > raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \ > ^ > include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg' > ___r = raw_cmpxchg((_ptr), ___o, (_new)); \ > ^ > include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg' > #define raw_cmpxchg arch_cmpxchg > ^ > arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg' > _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n") > ^ > arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg' > __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \ > ^ > arch/riscv/include/asm/cmpxchg.h:144:3: note: expanded from macro '__arch_cmpxchg' > asm goto(ALTERNATIVE("nop", "j %[zacas]", 0, \ > ^ > kernel/sched/core.c:11873:7: note: possible target of asm goto statement > if (try_cmpxchg(&pcpu_cid->cid, &lazy_cid, MM_CID_UNSET)) > ^ > include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg' > raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \ > ^ > include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg' > ___r = raw_cmpxchg((_ptr), ___o, (_new)); \ > ^ > include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg' > #define raw_cmpxchg arch_cmpxchg > ^ > arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg' > _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n") > ^ > arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg' > __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \ > ^ > arch/riscv/include/asm/cmpxchg.h:161:10: note: expanded from macro '__arch_cmpxchg' > \ > ^ > kernel/sched/core.c:11872:2: note: jump bypasses initialization of variable with __attribute__((cleanup)) > scoped_guard (irqsave) { > ^ > include/linux/cleanup.h:169:20: note: expanded from macro 'scoped_guard' > for (CLASS(_name, scope)(args), \ > ^ > 2 errors generated. Ugh, this is an unfortunate interaction with clang's jump scope analysis and asm goto in LLVM releases prior to 17 :/ https://github.com/ClangBuiltLinux/linux/issues/1886#issuecomment-1645979992 Unfortunately, 'if (0)' does not prevent this (the analysis runs early in the front end as far as I understand it), we would need to workaround this with full preprocessor guards... Another alternative would be to require LLVM 17+ for RISC-V, which may not be the worst alternative, since I think most people doing serious work with clang will probably be living close to tip of tree anyways because of all the extension work that goes on upstream. I am open to other thoughts though. Cheers, Nathan
> > Is this second IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check actually needed? > > (just wondering - no real objection) > > To me yes, otherwise a toolchain without zacas support would fail to > assemble the amocas instruction. To elaborate on my question: Such a toolchain may be able to recognize that the block of code following the zacas: label (and comprising the amocas instruction) can't be reached/executed if the first IS_ENABLED() evaluates to false (due to the goto end; statement), and consequently it may compile out the entire block/instruction no matter the presence or not of the second IS_ENABLE() check. IOW, such a toolchain/compiler may not actually have to assemble the amocas instruction under such config. In fact, this is how the current gcc trunk (which doesn't support zacas) seems to behave. And this very same optimization/code removal seems to be performed by clang when CONFIG_RISCV_ISA_ZACAS=n. IAC, I'd agree it is good to be explicit in the sources and keep both of these checks. > > Why the semicolon? > > That fixes a clang warning reported by Nathan here: > https://lore.kernel.org/linux-riscv/20240528193110.GA2196855@thelio-3990X/ I see. Thanks for the pointer. > > This is because the compiler doesn't realize __ret is actually > > initialized, right? IAC, seems a bit unexpected to initialize > > with (old) (which indicates SUCCESS of the CMPXCHG operation); > > how about using (new) for the initialization of __ret instead? > > would (new) still work for you? > > But amocas rd register must contain the expected old value in order to > actually work right? Agreed. Thanks for the clarification. Andrea
Hi Andrea, On Wed, Jul 10, 2024 at 1:47 AM Andrea Parri <parri.andrea@gmail.com> wrote: > > > > Is this second IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check actually needed? > > > (just wondering - no real objection) > > > > To me yes, otherwise a toolchain without zacas support would fail to > > assemble the amocas instruction. > > To elaborate on my question: Such a toolchain may be able to recognize > that the block of code following the zacas: label (and comprising the > amocas instruction) can't be reached/executed if the first IS_ENABLED() > evaluates to false (due to the goto end; statement), and consequently it > may compile out the entire block/instruction no matter the presence or > not of the second IS_ENABLE() check. IOW, such a toolchain/compiler may > not actually have to assemble the amocas instruction under such config. > In fact, this is how the current gcc trunk (which doesn't support zacas) > seems to behave. And this very same optimization/code removal seems to > be performed by clang when CONFIG_RISCV_ISA_ZACAS=n. IAC, I'd agree it > is good to be explicit in the sources and keep both of these checks. Indeed, clang works fine without the second IS_ENABLED(). I'll remove it then as the code is complex enough. Thanks, Alex > > > > > Why the semicolon? > > > > That fixes a clang warning reported by Nathan here: > > https://lore.kernel.org/linux-riscv/20240528193110.GA2196855@thelio-3990X/ > > I see. Thanks for the pointer. > > > > > This is because the compiler doesn't realize __ret is actually > > > initialized, right? IAC, seems a bit unexpected to initialize > > > with (old) (which indicates SUCCESS of the CMPXCHG operation); > > > how about using (new) for the initialization of __ret instead? > > > would (new) still work for you? > > > > But amocas rd register must contain the expected old value in order to > > actually work right? > > Agreed. Thanks for the clarification. > > Andrea
Hi Nathan, On Fri, Jul 5, 2024 at 7:27 PM Nathan Chancellor <nathan@kernel.org> wrote: > > On Thu, Jul 04, 2024 at 11:38:46AM +0800, kernel test robot wrote: > > Hi Alexandre, > > > > kernel test robot noticed the following build errors: > > > > [auto build test ERROR on soc/for-next] > > [also build test ERROR on linus/master v6.10-rc6 next-20240703] > > [cannot apply to arnd-asm-generic/master robh/for-next tip/locking/core] > > [If your patch is applied to the wrong git tree, kindly drop us a note. > > And when submitting patch, we suggest to use '--base' as documented in > > https://git-scm.com/docs/git-format-patch#_base_tree_information] > > > > url: https://github.com/intel-lab-lkp/linux/commits/Alexandre-Ghiti/riscv-Implement-cmpxchg32-64-using-Zacas/20240627-034946 > > base: https://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git for-next > > patch link: https://lore.kernel.org/r/20240626130347.520750-2-alexghiti%40rivosinc.com > > patch subject: [PATCH v2 01/10] riscv: Implement cmpxchg32/64() using Zacas > > config: riscv-randconfig-002-20240704 (https://download.01.org/0day-ci/archive/20240704/202407041157.odTZAYZ6-lkp@intel.com/config) > > compiler: clang version 16.0.6 (https://github.com/llvm/llvm-project 7cbf1a2591520c2491aa35339f227775f4d3adf6) > > reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240704/202407041157.odTZAYZ6-lkp@intel.com/reproduce) > > > > If you fix the issue in a separate patch/commit (i.e. not just a new version of > > the same patch/commit), kindly add following tags > > | Reported-by: kernel test robot <lkp@intel.com> > > | Closes: https://lore.kernel.org/oe-kbuild-all/202407041157.odTZAYZ6-lkp@intel.com/ > > > > All errors (new ones prefixed by >>): > > > > >> kernel/sched/core.c:11873:7: error: cannot jump from this asm goto statement to one of its possible targets > > if (try_cmpxchg(&pcpu_cid->cid, &lazy_cid, MM_CID_UNSET)) > > ^ > > include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg' > > raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \ > > ^ > > include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg' > > ___r = raw_cmpxchg((_ptr), ___o, (_new)); \ > > ^ > > include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg' > > #define raw_cmpxchg arch_cmpxchg > > ^ > > arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg' > > _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n") > > ^ > > arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg' > > __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \ > > ^ > > arch/riscv/include/asm/cmpxchg.h:144:3: note: expanded from macro '__arch_cmpxchg' > > asm goto(ALTERNATIVE("nop", "j %[zacas]", 0, \ > > ^ > > kernel/sched/core.c:11840:7: note: possible target of asm goto statement > > if (!try_cmpxchg(&pcpu_cid->cid, &cid, lazy_cid)) > > ^ > > include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg' > > raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \ > > ^ > > include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg' > > ___r = raw_cmpxchg((_ptr), ___o, (_new)); \ > > ^ > > include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg' > > #define raw_cmpxchg arch_cmpxchg > > ^ > > arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg' > > _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n") > > ^ > > arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg' > > __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \ > > ^ > > arch/riscv/include/asm/cmpxchg.h:161:10: note: expanded from macro '__arch_cmpxchg' > > \ > > ^ > > kernel/sched/core.c:11872:2: note: jump exits scope of variable with __attribute__((cleanup)) > > scoped_guard (irqsave) { > > ^ > > include/linux/cleanup.h:169:20: note: expanded from macro 'scoped_guard' > > for (CLASS(_name, scope)(args), \ > > ^ > > kernel/sched/core.c:11840:7: error: cannot jump from this asm goto statement to one of its possible targets > > if (!try_cmpxchg(&pcpu_cid->cid, &cid, lazy_cid)) > > ^ > > include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg' > > raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \ > > ^ > > include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg' > > ___r = raw_cmpxchg((_ptr), ___o, (_new)); \ > > ^ > > include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg' > > #define raw_cmpxchg arch_cmpxchg > > ^ > > arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg' > > _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n") > > ^ > > arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg' > > __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \ > > ^ > > arch/riscv/include/asm/cmpxchg.h:144:3: note: expanded from macro '__arch_cmpxchg' > > asm goto(ALTERNATIVE("nop", "j %[zacas]", 0, \ > > ^ > > kernel/sched/core.c:11873:7: note: possible target of asm goto statement > > if (try_cmpxchg(&pcpu_cid->cid, &lazy_cid, MM_CID_UNSET)) > > ^ > > include/linux/atomic/atomic-instrumented.h:4880:2: note: expanded from macro 'try_cmpxchg' > > raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \ > > ^ > > include/linux/atomic/atomic-arch-fallback.h:192:9: note: expanded from macro 'raw_try_cmpxchg' > > ___r = raw_cmpxchg((_ptr), ___o, (_new)); \ > > ^ > > include/linux/atomic/atomic-arch-fallback.h:55:21: note: expanded from macro 'raw_cmpxchg' > > #define raw_cmpxchg arch_cmpxchg > > ^ > > arch/riscv/include/asm/cmpxchg.h:212:2: note: expanded from macro 'arch_cmpxchg' > > _arch_cmpxchg((ptr), (o), (n), ".rl", "", " fence rw, rw\n") > > ^ > > arch/riscv/include/asm/cmpxchg.h:189:3: note: expanded from macro '_arch_cmpxchg' > > __arch_cmpxchg(".w", ".w" sc_sfx, prepend, append, \ > > ^ > > arch/riscv/include/asm/cmpxchg.h:161:10: note: expanded from macro '__arch_cmpxchg' > > \ > > ^ > > kernel/sched/core.c:11872:2: note: jump bypasses initialization of variable with __attribute__((cleanup)) > > scoped_guard (irqsave) { > > ^ > > include/linux/cleanup.h:169:20: note: expanded from macro 'scoped_guard' > > for (CLASS(_name, scope)(args), \ > > ^ > > 2 errors generated. > > Ugh, this is an unfortunate interaction with clang's jump scope analysis > and asm goto in LLVM releases prior to 17 :/ > > https://github.com/ClangBuiltLinux/linux/issues/1886#issuecomment-1645979992 > > Unfortunately, 'if (0)' does not prevent this (the analysis runs early > in the front end as far as I understand it), we would need to workaround > this with full preprocessor guards... Thanks for jumping in on this one, I was quite puzzled :) > > Another alternative would be to require LLVM 17+ for RISC-V, which may > not be the worst alternative, since I think most people doing serious > work with clang will probably be living close to tip of tree anyways > because of all the extension work that goes on upstream. Stupid question but why the fix in llvm 17 was not backported to previous versions? Anyway, I'd rather require llvm 17+ than add a bunch of preprocessor guards in this file (IIUC what you said above) as it is complex enough. @Conor Dooley @Palmer Dabbelt WDYT? Is there any interest in supporting llvm < 17? We may encounter this bug again in the future so I'd be in favor of moving to llvm 17+. Thanks again Nathan, Alex > > I am open to other thoughts though. > > Cheers, > Nathan
On Tue, Jul 16, 2024 at 02:19:57PM +0200, Alexandre Ghiti wrote: > On Fri, Jul 5, 2024 at 7:27 PM Nathan Chancellor <nathan@kernel.org> wrote: > > Another alternative would be to require LLVM 17+ for RISC-V, which may > > not be the worst alternative, since I think most people doing serious > > work with clang will probably be living close to tip of tree anyways > > because of all the extension work that goes on upstream. > > Stupid question but why the fix in llvm 17 was not backported to > previous versions? Unfortunately, LLVM releases are only supported for a few months with fixes, unlike GCC that supported their releases for a few years. By the time this issue was uncovered and resolved in LLVM main (17 at the time), LLVM 16 was no longer supported. I could potentially patch the kernel.org toolchains but that doesn't fix the issue for other versions of clang out there. > Anyway, I'd rather require llvm 17+ than add a bunch of preprocessor > guards in this file (IIUC what you said above) as it is complex > enough. Sure, this is not a super unreasonable issue to bump the minimum supported version for RISC-V over in my opinion, so no real objections from me. > @Conor Dooley @Palmer Dabbelt WDYT? Is there any interest in > supporting llvm < 17? We may encounter this bug again in the future so > I'd be in favor of moving to llvm 17+. FWIW, I would envision a diff like this (assuming it actually works to resolve this issue, I didn't actually test it): diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh index 91c91201212c..e81eb7ed257d 100755 --- a/scripts/min-tool-version.sh +++ b/scripts/min-tool-version.sh @@ -28,6 +28,8 @@ llvm) echo 15.0.0 elif [ "$SRCARCH" = loongarch ]; then echo 18.0.0 + elif [ "$SRCARCH" = riscv ]; then + echo 17.0.0 else echo 13.0.1 fi Cheers, Nathan
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 05ccba8ca33a..1caaedec88c7 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -596,6 +596,23 @@ config RISCV_ISA_V_PREEMPTIVE preemption. Enabling this config will result in higher memory consumption due to the allocation of per-task's kernel Vector context. +config TOOLCHAIN_HAS_ZACAS + bool + default y + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas) + depends on AS_HAS_OPTION_ARCH + +config RISCV_ISA_ZACAS + bool "Zacas extension support for atomic CAS" + depends on TOOLCHAIN_HAS_ZACAS + default y + help + Enable the use of the Zacas ISA-extension to implement kernel atomic + cmpxchg operations when it is detected at boot. + + If you don't know what to do here, say Y. + config TOOLCHAIN_HAS_ZBB bool default y diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 06de9d365088..9fd13d7a9cc6 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -85,6 +85,9 @@ endif # Check if the toolchain supports Zihintpause extension riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause +# Check if the toolchain supports Zacas +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas + # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by # matching non-v and non-multi-letter extensions out with the filter ([^v_]*) KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/') diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 808b4c78462e..a58a2141c6d3 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -9,6 +9,7 @@ #include <linux/bug.h> #include <asm/fence.h> +#include <asm/alternative.h> #define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \ ({ \ @@ -134,21 +135,41 @@ r = (__typeof__(*(p)))((__retx & __mask) >> __s); \ }) -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \ +#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \ ({ \ + __label__ zacas, end; \ register unsigned int __rc; \ \ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \ + asm goto(ALTERNATIVE("nop", "j %[zacas]", 0, \ + RISCV_ISA_EXT_ZACAS, 1) \ + : : : : zacas); \ + } \ + \ __asm__ __volatile__ ( \ prepend \ "0: lr" lr_sfx " %0, %2\n" \ " bne %0, %z3, 1f\n" \ - " sc" sc_sfx " %1, %z4, %2\n" \ + " sc" sc_cas_sfx " %1, %z4, %2\n" \ " bnez %1, 0b\n" \ append \ "1:\n" \ : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \ : "rJ" (co o), "rJ" (n) \ : "memory"); \ + goto end; \ + \ +zacas: \ + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \ + __asm__ __volatile__ ( \ + prepend \ + " amocas" sc_cas_sfx " %0, %z2, %1\n" \ + append \ + : "+&r" (r), "+A" (*(p)) \ + : "rJ" (n) \ + : "memory"); \ + } \ +end:; \ }) #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \ @@ -156,7 +177,7 @@ __typeof__(ptr) __ptr = (ptr); \ __typeof__(*(__ptr)) __old = (old); \ __typeof__(*(__ptr)) __new = (new); \ - __typeof__(*(__ptr)) __ret; \ + __typeof__(*(__ptr)) __ret = (old); \ \ switch (sizeof(*__ptr)) { \ case 1: \
This adds runtime support for Zacas in cmpxchg operations. Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> --- arch/riscv/Kconfig | 17 +++++++++++++++++ arch/riscv/Makefile | 3 +++ arch/riscv/include/asm/cmpxchg.h | 27 ++++++++++++++++++++++++--- 3 files changed, 44 insertions(+), 3 deletions(-)