diff mbox series

[V2,10/11] x86/rcu: Add rcu_preempt_count

Message ID 20240407090558.3395-11-jiangshanlai@gmail.com (mailing list archive)
State New
Headers show
Series rcu/x86: Use per-cpu rcu preempt count | expand

Commit Message

Lai Jiangshan April 7, 2024, 9:05 a.m. UTC
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>

Implement PCPU_RCU_PREEMPT_COUNT for x86.
Mainly copied from asm/preempt.h

Make rcu_read_[un]lock() inlined for rcu-preempt.
Make rcu_read_lock() only one instruction.
Make rcu_read_unlock() only two instructions in the fast path.

Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
 arch/x86/Kconfig                   |   1 +
 arch/x86/include/asm/current.h     |   3 +
 arch/x86/include/asm/rcu_preempt.h | 107 +++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/common.c       |   4 ++
 4 files changed, 115 insertions(+)
 create mode 100644 arch/x86/include/asm/rcu_preempt.h

Comments

Joel Fernandes April 23, 2024, 6:09 p.m. UTC | #1
On Sun, Apr 7, 2024 at 5:06 AM Lai Jiangshan <jiangshanlai@gmail.com> wrote:
>
> From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
>
> Implement PCPU_RCU_PREEMPT_COUNT for x86.
> Mainly copied from asm/preempt.h
>
> Make rcu_read_[un]lock() inlined for rcu-preempt.

Changelog is wrong. You inlined rcu_read_[un]lock in previous patch,
not this one?

- Joel

> Make rcu_read_lock() only one instruction.
> Make rcu_read_unlock() only two instructions in the fast path.
>
> Cc: "Paul E. McKenney" <paulmck@kernel.org>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Frederic Weisbecker <frederic@kernel.org>
> Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
> ---
>  arch/x86/Kconfig                   |   1 +
>  arch/x86/include/asm/current.h     |   3 +
>  arch/x86/include/asm/rcu_preempt.h | 107 +++++++++++++++++++++++++++++
>  arch/x86/kernel/cpu/common.c       |   4 ++
>  4 files changed, 115 insertions(+)
>  create mode 100644 arch/x86/include/asm/rcu_preempt.h
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 4fff6ed46e90..e805cac3763d 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -256,6 +256,7 @@ config X86
>         select HAVE_OBJTOOL                     if X86_64
>         select HAVE_OPTPROBES
>         select HAVE_PAGE_SIZE_4KB
> +       select HAVE_PCPU_RCU_PREEMPT_COUNT
>         select HAVE_PCSPKR_PLATFORM
>         select HAVE_PERF_EVENTS
>         select HAVE_PERF_EVENTS_NMI
> diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
> index bf5953883ec3..dcc2ef784120 100644
> --- a/arch/x86/include/asm/current.h
> +++ b/arch/x86/include/asm/current.h
> @@ -24,6 +24,9 @@ struct pcpu_hot {
>                         unsigned long           top_of_stack;
>                         void                    *hardirq_stack_ptr;
>                         u16                     softirq_pending;
> +#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
> +                       int                     rcu_preempt_count;
> +#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
>  #ifdef CONFIG_X86_64
>                         bool                    hardirq_stack_inuse;
>  #else
> diff --git a/arch/x86/include/asm/rcu_preempt.h b/arch/x86/include/asm/rcu_preempt.h
> new file mode 100644
> index 000000000000..cb25ebe038a5
> --- /dev/null
> +++ b/arch/x86/include/asm/rcu_preempt.h
> @@ -0,0 +1,107 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __ASM_RCU_PREEMPT_H
> +#define __ASM_RCU_PREEMPT_H
> +
> +#include <asm/rmwcc.h>
> +#include <asm/percpu.h>
> +#include <asm/current.h>
> +
> +#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
> +
> +/* We use the MSB mostly because its available */
> +#define RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED    0x80000000
> +
> +/*
> + * We use the RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED bit as an inverted
> + * current->rcu_read_unlock_special.s such that a decrement hitting 0
> + * means we can and should call rcu_read_unlock_special().
> + */
> +#define RCU_PREEMPT_INIT       (0 + RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED)
> +
> +/*
> + * We mask the RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED bit so as not to
> + * confuse all current users that think a non-zero value indicates we
> + * are in a critical section.
> + */
> +static inline int pcpu_rcu_preempt_count(void)
> +{
> +       return raw_cpu_read_4(pcpu_hot.rcu_preempt_count) & ~RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED;
> +}
> +
> +static inline void pcpu_rcu_preempt_count_set(int count)
> +{
> +       int old, new;
> +
> +       old = raw_cpu_read_4(pcpu_hot.rcu_preempt_count);
> +       do {
> +               new = (old & RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED) |
> +                       (count & ~RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
> +       } while (!raw_cpu_try_cmpxchg_4(pcpu_hot.rcu_preempt_count, &old, new));
> +}
> +
> +/*
> + * We fold the RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED bit into the RCU
> + * preempt count such that rcu_read_unlock() can decrement and test for
> + * the need of unlock-special handling with a single instruction.
> + *
> + * We invert the actual bit, so that when the decrement hits 0 we know
> + * we both reach a quiescent state (no rcu preempt count) and need to
> + * handle unlock-special (the bit is cleared), normally to report the
> + * quiescent state immediately.
> + */
> +
> +static inline void pcpu_rcu_preempt_special_set(void)
> +{
> +       raw_cpu_and_4(pcpu_hot.rcu_preempt_count, ~RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
> +}
> +
> +static inline void pcpu_rcu_preempt_special_clear(void)
> +{
> +       raw_cpu_or_4(pcpu_hot.rcu_preempt_count, RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
> +}
> +
> +static inline bool pcpu_rcu_preempt_special_test(void)
> +{
> +       return !(raw_cpu_read_4(pcpu_hot.rcu_preempt_count) & RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
> +}
> +
> +static inline void pcpu_rcu_preempt_switch(int count, bool special)
> +{
> +       if (likely(!special))
> +               raw_cpu_write(pcpu_hot.rcu_preempt_count, count | RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
> +       else
> +               raw_cpu_write(pcpu_hot.rcu_preempt_count, count);
> +}
> +
> +/*
> + * The various rcu_preempt_count add/sub methods
> + */
> +
> +static __always_inline void pcpu_rcu_preempt_count_add(int val)
> +{
> +       raw_cpu_add_4(pcpu_hot.rcu_preempt_count, val);
> +}
> +
> +static __always_inline void pcpu_rcu_preempt_count_sub(int val)
> +{
> +       raw_cpu_add_4(pcpu_hot.rcu_preempt_count, -val);
> +}
> +
> +/*
> + * Because we keep RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED set when we do
> + * _not_ need to handle unlock-special for a fast-path decrement.
> + */
> +static __always_inline bool pcpu_rcu_preempt_count_dec_and_test(void)
> +{
> +       return GEN_UNARY_RMWcc("decl", __my_cpu_var(pcpu_hot.rcu_preempt_count), e,
> +                              __percpu_arg([var]));
> +}
> +
> +#define pcpu_rcu_read_unlock_special()                                         \
> +do {                                                                           \
> +       rcu_read_unlock_special();                                              \
> +} while (0)
> +
> +#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
> +
> +#endif /* __ASM_RCU_PREEMPT_H */
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index 5c1e6d6be267..918b1f5cb75d 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -1995,6 +1995,10 @@ DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
>         .current_task   = &init_task,
>         .preempt_count  = INIT_PREEMPT_COUNT,
>         .top_of_stack   = TOP_OF_INIT_STACK,
> +
> +#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
> +       .rcu_preempt_count      = RCU_PREEMPT_INIT,
> +#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
>  };
>  EXPORT_PER_CPU_SYMBOL(pcpu_hot);
>  EXPORT_PER_CPU_SYMBOL(const_pcpu_hot);
> --
> 2.19.1.6.gb485710b
>
>
Lai Jiangshan April 24, 2024, 2:53 a.m. UTC | #2
On Wed, Apr 24, 2024 at 2:09 AM Joel Fernandes <joel@joelfernandes.org> wrote:
>
> On Sun, Apr 7, 2024 at 5:06 AM Lai Jiangshan <jiangshanlai@gmail.com> wrote:
> >
> > From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
> >
> > Implement PCPU_RCU_PREEMPT_COUNT for x86.
> > Mainly copied from asm/preempt.h
> >
> > Make rcu_read_[un]lock() inlined for rcu-preempt.
>
> Changelog is wrong. You inlined rcu_read_[un]lock in previous patch,
> not this one?

The previous patch just adds the non-arch framework code. The incline only
happens when CONFIG_PCPU_RCU_PREEMPT_COUNT=y.  This patch implements
PCPU_RCU_PREEMPT_COUNT for x86, so rcu_read_[un]lock() was not made inlined
for x86 for rcu-preempt until this patch.

Thanks
Lai

>
> - Joel
>
> > Make rcu_read_lock() only one instruction.
> > Make rcu_read_unlock() only two instructions in the fast path.
> >
> > Cc: "Paul E. McKenney" <paulmck@kernel.org>
> > Cc: Peter Zijlstra <peterz@infradead.org>
> > Cc: Frederic Weisbecker <frederic@kernel.org>
> > Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
> > ---
> >  arch/x86/Kconfig                   |   1 +
> >  arch/x86/include/asm/current.h     |   3 +
> >  arch/x86/include/asm/rcu_preempt.h | 107 +++++++++++++++++++++++++++++
> >  arch/x86/kernel/cpu/common.c       |   4 ++
> >  4 files changed, 115 insertions(+)
> >  create mode 100644 arch/x86/include/asm/rcu_preempt.h
> >
> > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> > index 4fff6ed46e90..e805cac3763d 100644
> > --- a/arch/x86/Kconfig
> > +++ b/arch/x86/Kconfig
> > @@ -256,6 +256,7 @@ config X86
> >         select HAVE_OBJTOOL                     if X86_64
> >         select HAVE_OPTPROBES
> >         select HAVE_PAGE_SIZE_4KB
> > +       select HAVE_PCPU_RCU_PREEMPT_COUNT
> >         select HAVE_PCSPKR_PLATFORM
> >         select HAVE_PERF_EVENTS
> >         select HAVE_PERF_EVENTS_NMI
diff mbox series

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4fff6ed46e90..e805cac3763d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -256,6 +256,7 @@  config X86
 	select HAVE_OBJTOOL			if X86_64
 	select HAVE_OPTPROBES
 	select HAVE_PAGE_SIZE_4KB
+	select HAVE_PCPU_RCU_PREEMPT_COUNT
 	select HAVE_PCSPKR_PLATFORM
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index bf5953883ec3..dcc2ef784120 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -24,6 +24,9 @@  struct pcpu_hot {
 			unsigned long		top_of_stack;
 			void			*hardirq_stack_ptr;
 			u16			softirq_pending;
+#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
+			int			rcu_preempt_count;
+#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
 #ifdef CONFIG_X86_64
 			bool			hardirq_stack_inuse;
 #else
diff --git a/arch/x86/include/asm/rcu_preempt.h b/arch/x86/include/asm/rcu_preempt.h
new file mode 100644
index 000000000000..cb25ebe038a5
--- /dev/null
+++ b/arch/x86/include/asm/rcu_preempt.h
@@ -0,0 +1,107 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_RCU_PREEMPT_H
+#define __ASM_RCU_PREEMPT_H
+
+#include <asm/rmwcc.h>
+#include <asm/percpu.h>
+#include <asm/current.h>
+
+#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
+
+/* We use the MSB mostly because its available */
+#define RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED	0x80000000
+
+/*
+ * We use the RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED bit as an inverted
+ * current->rcu_read_unlock_special.s such that a decrement hitting 0
+ * means we can and should call rcu_read_unlock_special().
+ */
+#define RCU_PREEMPT_INIT	(0 + RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED)
+
+/*
+ * We mask the RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED bit so as not to
+ * confuse all current users that think a non-zero value indicates we
+ * are in a critical section.
+ */
+static inline int pcpu_rcu_preempt_count(void)
+{
+	return raw_cpu_read_4(pcpu_hot.rcu_preempt_count) & ~RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED;
+}
+
+static inline void pcpu_rcu_preempt_count_set(int count)
+{
+	int old, new;
+
+	old = raw_cpu_read_4(pcpu_hot.rcu_preempt_count);
+	do {
+		new = (old & RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED) |
+			(count & ~RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
+	} while (!raw_cpu_try_cmpxchg_4(pcpu_hot.rcu_preempt_count, &old, new));
+}
+
+/*
+ * We fold the RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED bit into the RCU
+ * preempt count such that rcu_read_unlock() can decrement and test for
+ * the need of unlock-special handling with a single instruction.
+ *
+ * We invert the actual bit, so that when the decrement hits 0 we know
+ * we both reach a quiescent state (no rcu preempt count) and need to
+ * handle unlock-special (the bit is cleared), normally to report the
+ * quiescent state immediately.
+ */
+
+static inline void pcpu_rcu_preempt_special_set(void)
+{
+	raw_cpu_and_4(pcpu_hot.rcu_preempt_count, ~RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
+}
+
+static inline void pcpu_rcu_preempt_special_clear(void)
+{
+	raw_cpu_or_4(pcpu_hot.rcu_preempt_count, RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
+}
+
+static inline bool pcpu_rcu_preempt_special_test(void)
+{
+	return !(raw_cpu_read_4(pcpu_hot.rcu_preempt_count) & RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
+}
+
+static inline void pcpu_rcu_preempt_switch(int count, bool special)
+{
+	if (likely(!special))
+		raw_cpu_write(pcpu_hot.rcu_preempt_count, count | RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
+	else
+		raw_cpu_write(pcpu_hot.rcu_preempt_count, count);
+}
+
+/*
+ * The various rcu_preempt_count add/sub methods
+ */
+
+static __always_inline void pcpu_rcu_preempt_count_add(int val)
+{
+	raw_cpu_add_4(pcpu_hot.rcu_preempt_count, val);
+}
+
+static __always_inline void pcpu_rcu_preempt_count_sub(int val)
+{
+	raw_cpu_add_4(pcpu_hot.rcu_preempt_count, -val);
+}
+
+/*
+ * Because we keep RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED set when we do
+ * _not_ need to handle unlock-special for a fast-path decrement.
+ */
+static __always_inline bool pcpu_rcu_preempt_count_dec_and_test(void)
+{
+	return GEN_UNARY_RMWcc("decl", __my_cpu_var(pcpu_hot.rcu_preempt_count), e,
+			       __percpu_arg([var]));
+}
+
+#define pcpu_rcu_read_unlock_special()						\
+do {										\
+	rcu_read_unlock_special();						\
+} while (0)
+
+#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
+
+#endif /* __ASM_RCU_PREEMPT_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5c1e6d6be267..918b1f5cb75d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1995,6 +1995,10 @@  DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
 	.current_task	= &init_task,
 	.preempt_count	= INIT_PREEMPT_COUNT,
 	.top_of_stack	= TOP_OF_INIT_STACK,
+
+#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
+	.rcu_preempt_count	= RCU_PREEMPT_INIT,
+#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
 };
 EXPORT_PER_CPU_SYMBOL(pcpu_hot);
 EXPORT_PER_CPU_SYMBOL(const_pcpu_hot);