diff mbox series

[05/12] percpu: Add {raw,this}_cpu_try_cmpxchg()

Message ID 20230531132323.587480729@infradead.org (mailing list archive)
State Awaiting Upstream
Headers show
Series Introduce cmpxchg128() -- aka. the demise of cmpxchg_double() | expand

Commit Message

Peter Zijlstra May 31, 2023, 1:08 p.m. UTC
Add the try_cmpxchg() form to the per-cpu ops.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/asm-generic/percpu.h |  113 +++++++++++++++++++++++++++++++++++++++++--
 include/linux/percpu-defs.h  |   19 +++++++
 2 files changed, 128 insertions(+), 4 deletions(-)

Comments

Konrad Dybcio June 9, 2023, 4:10 p.m. UTC | #1
On 31.05.2023 15:08, Peter Zijlstra wrote:
> Add the try_cmpxchg() form to the per-cpu ops.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
+CC Nathan, llvm list

Hi all, this patch seems to break booting on Qualcomm ARM64 platforms
when compiled with clang (GCC works fine) for some reason..:

next-20230605 - works
next-20230606 - doesn't

grev -m 1 dc4e51fd9846 on next-20230606 - works again
b4 shazam <this_msgid> -P 1-4 - still works
b4 shazam <this_msgid> -P 5 - breaks

Confirmed on at least Qualcomm QCM2290, SM8250.

Checking the serial console, it hits a BUG_ON:

[    0.000000] ------------[ cut here ]------------
[    0.000000] kernel BUG at mm/vmalloc.c:1638!
[    0.000000] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted [snip]
[    0.000000] Hardware name: Qualcomm Technologies, Inc. Robotics RB1 (DT)
[    0.000000] pstate: 000000c5 (nzcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[    0.000000] pc : alloc_vmap_area+0xafc/0xb08
[    0.000000] lr : alloc_vmap_area+0x9e4/0xb08
[    0.000000] sp : ffffa50137f53c20
[    0.000000] x29: ffffa50137f53c60 x28: ffffa50137f30c18 x27: 0000000000000000
[    0.000000] x26: 0000000000007fff x25: ffff800080000000 x24: 000000000000cfff
[    0.000000] x23: ffffffffffff8000 x22: ffffa50137fef970 x21: fffffbfff0000000
[    0.000000] x20: ffff022982003208 x19: ffff0229820031f8 x18: ffffa50137f64f70
[    0.000000] x17: ffffa50137fef980 x16: ffffa501375e6d08 x15: 0000000000000001
[    0.000000] x14: ffffa5013831e1a0 x13: ffffa50137f30c18 x12: 0000000000402dc2
[    0.000000] x11: 0000000000000000 x10: ffff022982003018 x9 : ffffa5013831e188
[    0.000000] x8 : ffffcb55ff003228 x7 : 0000000000000000 x6 : 0000000000000048
[    0.000000] x5 : 0000000000000000 x4 : ffffa50137f53bd0 x3 : ffffa50136490000
[    0.000000] x2 : 0000000000000001 x1 : ffffa5013831e190 x0 : ffff022982003208
[    0.000000] Call trace:
[    0.000000]  alloc_vmap_area+0xafc/0xb08
[    0.000000]  __get_vm_area_node+0x108/0x1e8
[    0.000000]  __vmalloc_node_range+0x1fc/0x728
[    0.000000]  __vmalloc_node+0x5c/0x70
[    0.000000]  init_IRQ+0x90/0x11c
[    0.000000]  start_kernel+0x1ac/0x3bc
[    0.000000]  __primary_switched+0xc4/0xcc
[    0.000000] Code: f000e300 91062000 943bd9ba 17ffff8f (d4210000)
[    0.000000] ---[ end trace 0000000000000000 ]---
[    0.000000] Kernel panic - not syncing: Attempted to kill the idle task!

Compiled with clang 15.0.7 from Arch repos, with
make ARCH=arm64 LLVM=1

Konrad
>  include/asm-generic/percpu.h |  113 +++++++++++++++++++++++++++++++++++++++++--
>  include/linux/percpu-defs.h  |   19 +++++++
>  2 files changed, 128 insertions(+), 4 deletions(-)
> 
> --- a/include/asm-generic/percpu.h
> +++ b/include/asm-generic/percpu.h
> @@ -89,16 +89,37 @@ do {									\
>  	__ret;								\
>  })
>  
> -#define raw_cpu_generic_cmpxchg(pcp, oval, nval)			\
> +#define __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, _cmpxchg)		\
> +({									\
> +	typeof(pcp) __val, __old = *(ovalp);				\
> + 	__val = _cmpxchg(pcp, __old, nval);				\
> +	if (__val != __old)						\
> +		*(ovalp) = __val;					\
> +	__val == __old;							\
> +})
> +
> +#define raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)			\
>  ({									\
>  	typeof(pcp) *__p = raw_cpu_ptr(&(pcp));				\
> -	typeof(pcp) __ret;						\
> -	__ret = *__p;							\
> -	if (__ret == (oval))						\
> +	typeof(pcp) __val = *__p, __old = *(ovalp);			\
> +	bool __ret;							\
> +	if (__val == __old) {						\
>  		*__p = nval;						\
> +		__ret = true;						\
> +	} else {							\
> +		*(ovalp) = __val;					\
> +		__ret = false;						\
> +	}								\
>  	__ret;								\
>  })
>  
> +#define raw_cpu_generic_cmpxchg(pcp, oval, nval)			\
> +({									\
> +	typeof(pcp) __old = (oval);					\
> +	raw_cpu_generic_try_cmpxchg(pcp, &__old, nval);			\
> +	__old;								\
> +})
> +
>  #define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
>  ({									\
>  	typeof(pcp1) *__p1 = raw_cpu_ptr(&(pcp1));			\
> @@ -170,6 +191,16 @@ do {									\
>  	__ret;								\
>  })
>  
> +#define this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)			\
> +({									\
> +	bool __ret;							\
> +	unsigned long __flags;						\
> +	raw_local_irq_save(__flags);					\
> +	__ret = raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval);		\
> +	raw_local_irq_restore(__flags);					\
> +	__ret;								\
> +})
> +
>  #define this_cpu_generic_cmpxchg(pcp, oval, nval)			\
>  ({									\
>  	typeof(pcp) __ret;						\
> @@ -282,6 +313,43 @@ do {									\
>  #define raw_cpu_xchg_8(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
>  #endif
>  
> +#ifndef raw_cpu_try_cmpxchg_1
> +#ifdef raw_cpu_cmpxchg_1
> +#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
> +	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_1)
> +#else
> +#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
> +	raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
> +#endif
> +#endif
> +#ifndef raw_cpu_try_cmpxchg_2
> +#ifdef raw_cpu_cmpxchg_2
> +#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
> +	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_2)
> +#else
> +#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
> +	raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
> +#endif
> +#endif
> +#ifndef raw_cpu_try_cmpxchg_4
> +#ifdef raw_cpu_cmpxchg_4
> +#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
> +	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_4)
> +#else
> +#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
> +	raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
> +#endif
> +#endif
> +#ifndef raw_cpu_try_cmpxchg_8
> +#ifdef raw_cpu_cmpxchg_8
> +#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
> +	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_8)
> +#else
> +#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
> +	raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
> +#endif
> +#endif
> +
>  #ifndef raw_cpu_cmpxchg_1
>  #define raw_cpu_cmpxchg_1(pcp, oval, nval) \
>  	raw_cpu_generic_cmpxchg(pcp, oval, nval)
> @@ -407,6 +475,43 @@ do {									\
>  #define this_cpu_xchg_8(pcp, nval)	this_cpu_generic_xchg(pcp, nval)
>  #endif
>  
> +#ifndef this_cpu_try_cmpxchg_1
> +#ifdef this_cpu_cmpxchg_1
> +#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
> +	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_1)
> +#else
> +#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
> +	this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
> +#endif
> +#endif
> +#ifndef this_cpu_try_cmpxchg_2
> +#ifdef this_cpu_cmpxchg_2
> +#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
> +	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_2)
> +#else
> +#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
> +	this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
> +#endif
> +#endif
> +#ifndef this_cpu_try_cmpxchg_4
> +#ifdef this_cpu_cmpxchg_4
> +#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
> +	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_4)
> +#else
> +#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
> +	this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
> +#endif
> +#endif
> +#ifndef this_cpu_try_cmpxchg_8
> +#ifdef this_cpu_cmpxchg_8
> +#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
> +	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_8)
> +#else
> +#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
> +	this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
> +#endif
> +#endif
> +
>  #ifndef this_cpu_cmpxchg_1
>  #define this_cpu_cmpxchg_1(pcp, oval, nval) \
>  	this_cpu_generic_cmpxchg(pcp, oval, nval)
> --- a/include/linux/percpu-defs.h
> +++ b/include/linux/percpu-defs.h
> @@ -343,6 +343,21 @@ static __always_inline void __this_cpu_p
>  	pscr2_ret__;							\
>  })
>  
> +#define __pcpu_size_call_return2bool(stem, variable, ...)		\
> +({									\
> +	bool pscr2_ret__;						\
> +	__verify_pcpu_ptr(&(variable));					\
> +	switch(sizeof(variable)) {					\
> +	case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break;	\
> +	case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break;	\
> +	case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break;	\
> +	case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break;	\
> +	default:							\
> +		__bad_size_call_parameter(); break;			\
> +	}								\
> +	pscr2_ret__;							\
> +})
> +
>  /*
>   * Special handling for cmpxchg_double.  cmpxchg_double is passed two
>   * percpu variables.  The first has to be aligned to a double word
> @@ -426,6 +441,8 @@ do {									\
>  #define raw_cpu_xchg(pcp, nval)		__pcpu_size_call_return2(raw_cpu_xchg_, pcp, nval)
>  #define raw_cpu_cmpxchg(pcp, oval, nval) \
>  	__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)
> +#define raw_cpu_try_cmpxchg(pcp, ovalp, nval) \
> +	__pcpu_size_call_return2bool(raw_cpu_try_cmpxchg_, pcp, ovalp, nval)
>  #define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
>  	__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
>  
> @@ -513,6 +530,8 @@ do {									\
>  #define this_cpu_xchg(pcp, nval)	__pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
>  #define this_cpu_cmpxchg(pcp, oval, nval) \
>  	__pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval)
> +#define this_cpu_try_cmpxchg(pcp, ovalp, nval) \
> +	__pcpu_size_call_return2bool(this_cpu_try_cmpxchg_, pcp, ovalp, nval)
>  #define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
>  	__pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
>
Nathan Chancellor June 9, 2023, 4:13 p.m. UTC | #2
Hi Konrad,

On Fri, Jun 09, 2023 at 06:10:38PM +0200, Konrad Dybcio wrote:
> 
> 
> On 31.05.2023 15:08, Peter Zijlstra wrote:
> > Add the try_cmpxchg() form to the per-cpu ops.
> > 
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> +CC Nathan, llvm list
> 
> Hi all, this patch seems to break booting on Qualcomm ARM64 platforms
> when compiled with clang (GCC works fine) for some reason..:
> 
> next-20230605 - works
> next-20230606 - doesn't
> 
> grev -m 1 dc4e51fd9846 on next-20230606 - works again
> b4 shazam <this_msgid> -P 1-4 - still works
> b4 shazam <this_msgid> -P 5 - breaks
> 
> Confirmed on at least Qualcomm QCM2290, SM8250.
> 
> Checking the serial console, it hits a BUG_ON:
> 
> [    0.000000] ------------[ cut here ]------------
> [    0.000000] kernel BUG at mm/vmalloc.c:1638!
> [    0.000000] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP
> [    0.000000] Modules linked in:
> [    0.000000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted [snip]
> [    0.000000] Hardware name: Qualcomm Technologies, Inc. Robotics RB1 (DT)
> [    0.000000] pstate: 000000c5 (nzcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> [    0.000000] pc : alloc_vmap_area+0xafc/0xb08
> [    0.000000] lr : alloc_vmap_area+0x9e4/0xb08
> [    0.000000] sp : ffffa50137f53c20
> [    0.000000] x29: ffffa50137f53c60 x28: ffffa50137f30c18 x27: 0000000000000000
> [    0.000000] x26: 0000000000007fff x25: ffff800080000000 x24: 000000000000cfff
> [    0.000000] x23: ffffffffffff8000 x22: ffffa50137fef970 x21: fffffbfff0000000
> [    0.000000] x20: ffff022982003208 x19: ffff0229820031f8 x18: ffffa50137f64f70
> [    0.000000] x17: ffffa50137fef980 x16: ffffa501375e6d08 x15: 0000000000000001
> [    0.000000] x14: ffffa5013831e1a0 x13: ffffa50137f30c18 x12: 0000000000402dc2
> [    0.000000] x11: 0000000000000000 x10: ffff022982003018 x9 : ffffa5013831e188
> [    0.000000] x8 : ffffcb55ff003228 x7 : 0000000000000000 x6 : 0000000000000048
> [    0.000000] x5 : 0000000000000000 x4 : ffffa50137f53bd0 x3 : ffffa50136490000
> [    0.000000] x2 : 0000000000000001 x1 : ffffa5013831e190 x0 : ffff022982003208
> [    0.000000] Call trace:
> [    0.000000]  alloc_vmap_area+0xafc/0xb08
> [    0.000000]  __get_vm_area_node+0x108/0x1e8
> [    0.000000]  __vmalloc_node_range+0x1fc/0x728
> [    0.000000]  __vmalloc_node+0x5c/0x70
> [    0.000000]  init_IRQ+0x90/0x11c
> [    0.000000]  start_kernel+0x1ac/0x3bc
> [    0.000000]  __primary_switched+0xc4/0xcc
> [    0.000000] Code: f000e300 91062000 943bd9ba 17ffff8f (d4210000)
> [    0.000000] ---[ end trace 0000000000000000 ]---
> [    0.000000] Kernel panic - not syncing: Attempted to kill the idle task!
> 
> Compiled with clang 15.0.7 from Arch repos, with
> make ARCH=arm64 LLVM=1

Thanks a lot for testing with LLVM, submitting this report, and doing a
bisect. I sent a patch to fix this a couple of days ago and Peter pushed
it to -tip today, so it should be in the next -next release:

https://git.kernel.org/tip/093d9b240a1fa261ff8aeb7c7cc484dedacfda53

Cheers,
Nathan
Konrad Dybcio June 9, 2023, 4:20 p.m. UTC | #3
On 9.06.2023 18:13, Nathan Chancellor wrote:
> Hi Konrad,
> 
> On Fri, Jun 09, 2023 at 06:10:38PM +0200, Konrad Dybcio wrote:
>>
>>
>> On 31.05.2023 15:08, Peter Zijlstra wrote:
>>> Add the try_cmpxchg() form to the per-cpu ops.
>>>
>>> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>>> ---
>> +CC Nathan, llvm list
>>
>> Hi all, this patch seems to break booting on Qualcomm ARM64 platforms
>> when compiled with clang (GCC works fine) for some reason..:
>>
>> next-20230605 - works
>> next-20230606 - doesn't
>>
>> grev -m 1 dc4e51fd9846 on next-20230606 - works again
>> b4 shazam <this_msgid> -P 1-4 - still works
>> b4 shazam <this_msgid> -P 5 - breaks
>>
>> Confirmed on at least Qualcomm QCM2290, SM8250.
>>
>> Checking the serial console, it hits a BUG_ON:
>>
>> [    0.000000] ------------[ cut here ]------------
>> [    0.000000] kernel BUG at mm/vmalloc.c:1638!
>> [    0.000000] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP
>> [    0.000000] Modules linked in:
>> [    0.000000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted [snip]
>> [    0.000000] Hardware name: Qualcomm Technologies, Inc. Robotics RB1 (DT)
>> [    0.000000] pstate: 000000c5 (nzcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
>> [    0.000000] pc : alloc_vmap_area+0xafc/0xb08
>> [    0.000000] lr : alloc_vmap_area+0x9e4/0xb08
>> [    0.000000] sp : ffffa50137f53c20
>> [    0.000000] x29: ffffa50137f53c60 x28: ffffa50137f30c18 x27: 0000000000000000
>> [    0.000000] x26: 0000000000007fff x25: ffff800080000000 x24: 000000000000cfff
>> [    0.000000] x23: ffffffffffff8000 x22: ffffa50137fef970 x21: fffffbfff0000000
>> [    0.000000] x20: ffff022982003208 x19: ffff0229820031f8 x18: ffffa50137f64f70
>> [    0.000000] x17: ffffa50137fef980 x16: ffffa501375e6d08 x15: 0000000000000001
>> [    0.000000] x14: ffffa5013831e1a0 x13: ffffa50137f30c18 x12: 0000000000402dc2
>> [    0.000000] x11: 0000000000000000 x10: ffff022982003018 x9 : ffffa5013831e188
>> [    0.000000] x8 : ffffcb55ff003228 x7 : 0000000000000000 x6 : 0000000000000048
>> [    0.000000] x5 : 0000000000000000 x4 : ffffa50137f53bd0 x3 : ffffa50136490000
>> [    0.000000] x2 : 0000000000000001 x1 : ffffa5013831e190 x0 : ffff022982003208
>> [    0.000000] Call trace:
>> [    0.000000]  alloc_vmap_area+0xafc/0xb08
>> [    0.000000]  __get_vm_area_node+0x108/0x1e8
>> [    0.000000]  __vmalloc_node_range+0x1fc/0x728
>> [    0.000000]  __vmalloc_node+0x5c/0x70
>> [    0.000000]  init_IRQ+0x90/0x11c
>> [    0.000000]  start_kernel+0x1ac/0x3bc
>> [    0.000000]  __primary_switched+0xc4/0xcc
>> [    0.000000] Code: f000e300 91062000 943bd9ba 17ffff8f (d4210000)
>> [    0.000000] ---[ end trace 0000000000000000 ]---
>> [    0.000000] Kernel panic - not syncing: Attempted to kill the idle task!
>>
>> Compiled with clang 15.0.7 from Arch repos, with
>> make ARCH=arm64 LLVM=1
> 
> Thanks a lot for testing with LLVM, submitting this report, and doing a
> bisect.
No, thank *you* for making it even possible ;)

I sent a patch to fix this a couple of days ago and Peter pushed
> it to -tip today, so it should be in the next -next release:
> 
> https://git.kernel.org/tip/093d9b240a1fa261ff8aeb7c7cc484dedacfda53
Amazing, I can boot the most recent next-20230609 with it again!

Konrad
> 
> Cheers,
> Nathan
diff mbox series

Patch

--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -89,16 +89,37 @@  do {									\
 	__ret;								\
 })
 
-#define raw_cpu_generic_cmpxchg(pcp, oval, nval)			\
+#define __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, _cmpxchg)		\
+({									\
+	typeof(pcp) __val, __old = *(ovalp);				\
+ 	__val = _cmpxchg(pcp, __old, nval);				\
+	if (__val != __old)						\
+		*(ovalp) = __val;					\
+	__val == __old;							\
+})
+
+#define raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)			\
 ({									\
 	typeof(pcp) *__p = raw_cpu_ptr(&(pcp));				\
-	typeof(pcp) __ret;						\
-	__ret = *__p;							\
-	if (__ret == (oval))						\
+	typeof(pcp) __val = *__p, __old = *(ovalp);			\
+	bool __ret;							\
+	if (__val == __old) {						\
 		*__p = nval;						\
+		__ret = true;						\
+	} else {							\
+		*(ovalp) = __val;					\
+		__ret = false;						\
+	}								\
 	__ret;								\
 })
 
+#define raw_cpu_generic_cmpxchg(pcp, oval, nval)			\
+({									\
+	typeof(pcp) __old = (oval);					\
+	raw_cpu_generic_try_cmpxchg(pcp, &__old, nval);			\
+	__old;								\
+})
+
 #define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
 ({									\
 	typeof(pcp1) *__p1 = raw_cpu_ptr(&(pcp1));			\
@@ -170,6 +191,16 @@  do {									\
 	__ret;								\
 })
 
+#define this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)			\
+({									\
+	bool __ret;							\
+	unsigned long __flags;						\
+	raw_local_irq_save(__flags);					\
+	__ret = raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval);		\
+	raw_local_irq_restore(__flags);					\
+	__ret;								\
+})
+
 #define this_cpu_generic_cmpxchg(pcp, oval, nval)			\
 ({									\
 	typeof(pcp) __ret;						\
@@ -282,6 +313,43 @@  do {									\
 #define raw_cpu_xchg_8(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
 #endif
 
+#ifndef raw_cpu_try_cmpxchg_1
+#ifdef raw_cpu_cmpxchg_1
+#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
+	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_1)
+#else
+#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
+	raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef raw_cpu_try_cmpxchg_2
+#ifdef raw_cpu_cmpxchg_2
+#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
+	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_2)
+#else
+#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
+	raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef raw_cpu_try_cmpxchg_4
+#ifdef raw_cpu_cmpxchg_4
+#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
+	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_4)
+#else
+#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
+	raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef raw_cpu_try_cmpxchg_8
+#ifdef raw_cpu_cmpxchg_8
+#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
+	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_8)
+#else
+#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
+	raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+
 #ifndef raw_cpu_cmpxchg_1
 #define raw_cpu_cmpxchg_1(pcp, oval, nval) \
 	raw_cpu_generic_cmpxchg(pcp, oval, nval)
@@ -407,6 +475,43 @@  do {									\
 #define this_cpu_xchg_8(pcp, nval)	this_cpu_generic_xchg(pcp, nval)
 #endif
 
+#ifndef this_cpu_try_cmpxchg_1
+#ifdef this_cpu_cmpxchg_1
+#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
+	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_1)
+#else
+#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
+	this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef this_cpu_try_cmpxchg_2
+#ifdef this_cpu_cmpxchg_2
+#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
+	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_2)
+#else
+#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
+	this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef this_cpu_try_cmpxchg_4
+#ifdef this_cpu_cmpxchg_4
+#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
+	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_4)
+#else
+#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
+	this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef this_cpu_try_cmpxchg_8
+#ifdef this_cpu_cmpxchg_8
+#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
+	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_8)
+#else
+#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
+	this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+
 #ifndef this_cpu_cmpxchg_1
 #define this_cpu_cmpxchg_1(pcp, oval, nval) \
 	this_cpu_generic_cmpxchg(pcp, oval, nval)
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -343,6 +343,21 @@  static __always_inline void __this_cpu_p
 	pscr2_ret__;							\
 })
 
+#define __pcpu_size_call_return2bool(stem, variable, ...)		\
+({									\
+	bool pscr2_ret__;						\
+	__verify_pcpu_ptr(&(variable));					\
+	switch(sizeof(variable)) {					\
+	case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break;	\
+	case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break;	\
+	case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break;	\
+	case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break;	\
+	default:							\
+		__bad_size_call_parameter(); break;			\
+	}								\
+	pscr2_ret__;							\
+})
+
 /*
  * Special handling for cmpxchg_double.  cmpxchg_double is passed two
  * percpu variables.  The first has to be aligned to a double word
@@ -426,6 +441,8 @@  do {									\
 #define raw_cpu_xchg(pcp, nval)		__pcpu_size_call_return2(raw_cpu_xchg_, pcp, nval)
 #define raw_cpu_cmpxchg(pcp, oval, nval) \
 	__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)
+#define raw_cpu_try_cmpxchg(pcp, ovalp, nval) \
+	__pcpu_size_call_return2bool(raw_cpu_try_cmpxchg_, pcp, ovalp, nval)
 #define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
 	__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
 
@@ -513,6 +530,8 @@  do {									\
 #define this_cpu_xchg(pcp, nval)	__pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
 #define this_cpu_cmpxchg(pcp, oval, nval) \
 	__pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval)
+#define this_cpu_try_cmpxchg(pcp, ovalp, nval) \
+	__pcpu_size_call_return2bool(this_cpu_try_cmpxchg_, pcp, ovalp, nval)
 #define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
 	__pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)