diff mbox series

[V2,09/11] rcu: Implement PCPU_RCU_PREEMPT_COUNT framework

Message ID 20240407090558.3395-10-jiangshanlai@gmail.com (mailing list archive)
State New
Headers show
Series rcu/x86: Use per-cpu rcu preempt count | expand

Commit Message

Lai Jiangshan April 7, 2024, 9:05 a.m. UTC
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>

When the arch code provides HAVE_PCPU_RCU_PREEMPT_COUNT and the
corresponding functions, rcu core uses the functions to implement
rcu_preempt_depth(), special bits, switching and so on.

Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
 include/linux/rcupdate.h | 33 +++++++++++++++++++++++++++++++++
 kernel/rcu/Kconfig       |  8 ++++++++
 kernel/rcu/rcu.h         |  4 ++++
 kernel/rcu/tree_plugin.h |  8 ++++++++
 4 files changed, 53 insertions(+)

Comments

Joel Fernandes April 23, 2024, 6:19 p.m. UTC | #1
On Sun, Apr 7, 2024 at 5:04 AM Lai Jiangshan <jiangshanlai@gmail.com> wrote:
>
> From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
>
> When the arch code provides HAVE_PCPU_RCU_PREEMPT_COUNT and the
> corresponding functions, rcu core uses the functions to implement
> rcu_preempt_depth(), special bits, switching and so on.
>
> Cc: "Paul E. McKenney" <paulmck@kernel.org>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Frederic Weisbecker <frederic@kernel.org>
> Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
> ---
>  include/linux/rcupdate.h | 33 +++++++++++++++++++++++++++++++++
>  kernel/rcu/Kconfig       |  8 ++++++++
>  kernel/rcu/rcu.h         |  4 ++++
>  kernel/rcu/tree_plugin.h |  8 ++++++++
>  4 files changed, 53 insertions(+)
>
> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> index 328667ae8086..e3e5ce44c7dc 100644
> --- a/include/linux/rcupdate.h
> +++ b/include/linux/rcupdate.h
> @@ -70,6 +70,8 @@ static inline bool same_state_synchronize_rcu(unsigned long oldstate1, unsigned
>
>  void rcu_read_unlock_special(void);
>
> +#ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT
> +
>  void __rcu_read_lock(void);
>  void __rcu_read_unlock(void);
>
> @@ -81,6 +83,37 @@ void __rcu_read_unlock(void);
>   */
>  #define rcu_preempt_depth() READ_ONCE(current->rcu_read_lock_nesting)
>  #define rcu_preempt_depth_set(val) WRITE_ONCE(current->rcu_read_lock_nesting, (val))
> +#define pcpu_rcu_preempt_special_set() do { } while (0)
> +#define pcpu_rcu_preempt_special_clear() do { } while (0)
> +
> +#else /* #ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT */
> +
> +#include <asm/rcu_preempt.h>
> +
> +static __always_inline void __rcu_read_lock(void)
> +{
> +       pcpu_rcu_preempt_count_add(1);
> +       barrier();
> +}
> +
> +static __always_inline void __rcu_read_unlock(void)
> +{
> +       barrier();
> +       if (unlikely(pcpu_rcu_preempt_count_dec_and_test()))
> +               pcpu_rcu_read_unlock_special();
> +}

Previous code had comments about the barrier(); , can you add back
those comments?

Also there was a compiler barrier in the body of the if() as well?

For reference:

void __rcu_read_unlock(void)
{
        struct task_struct *t = current;

        barrier();  // critical section before exit code.
        if (rcu_preempt_read_exit() == 0) {
                barrier();  // critical-section exit before .s check.
                if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
                        rcu_read_unlock_special(t);
        }
Lai Jiangshan April 24, 2024, 3:02 a.m. UTC | #2
On Wed, Apr 24, 2024 at 2:19 AM Joel Fernandes <joel@joelfernandes.org> wrote:
>
> On Sun, Apr 7, 2024 at 5:04 AM Lai Jiangshan <jiangshanlai@gmail.com> wrote:
> >
> > From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
> >
> > When the arch code provides HAVE_PCPU_RCU_PREEMPT_COUNT and the
> > corresponding functions, rcu core uses the functions to implement
> > rcu_preempt_depth(), special bits, switching and so on.
> >
> > Cc: "Paul E. McKenney" <paulmck@kernel.org>
> > Cc: Peter Zijlstra <peterz@infradead.org>
> > Cc: Frederic Weisbecker <frederic@kernel.org>
> > Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
> > ---
> >  include/linux/rcupdate.h | 33 +++++++++++++++++++++++++++++++++
> >  kernel/rcu/Kconfig       |  8 ++++++++
> >  kernel/rcu/rcu.h         |  4 ++++
> >  kernel/rcu/tree_plugin.h |  8 ++++++++
> >  4 files changed, 53 insertions(+)
> >
> > diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> > index 328667ae8086..e3e5ce44c7dc 100644
> > --- a/include/linux/rcupdate.h
> > +++ b/include/linux/rcupdate.h
> > @@ -70,6 +70,8 @@ static inline bool same_state_synchronize_rcu(unsigned long oldstate1, unsigned
> >
> >  void rcu_read_unlock_special(void);
> >
> > +#ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT
> > +
> >  void __rcu_read_lock(void);
> >  void __rcu_read_unlock(void);
> >
> > @@ -81,6 +83,37 @@ void __rcu_read_unlock(void);
> >   */
> >  #define rcu_preempt_depth() READ_ONCE(current->rcu_read_lock_nesting)
> >  #define rcu_preempt_depth_set(val) WRITE_ONCE(current->rcu_read_lock_nesting, (val))
> > +#define pcpu_rcu_preempt_special_set() do { } while (0)
> > +#define pcpu_rcu_preempt_special_clear() do { } while (0)
> > +
> > +#else /* #ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT */
> > +
> > +#include <asm/rcu_preempt.h>
> > +
> > +static __always_inline void __rcu_read_lock(void)
> > +{
> > +       pcpu_rcu_preempt_count_add(1);
> > +       barrier();
> > +}
> > +
> > +static __always_inline void __rcu_read_unlock(void)
> > +{
> > +       barrier();
> > +       if (unlikely(pcpu_rcu_preempt_count_dec_and_test()))
> > +               pcpu_rcu_read_unlock_special();
> > +}
>
> Previous code had comments about the barrier(); , can you add back
> those comments?
>
> Also there was a compiler barrier in the body of the if() as well?
>

The two "if"s in the referenced __rcu_read_unlock() are condensed into
a single "if" ("if (unlikely(pcpu_rcu_preempt_count_dec_and_test()))"),
so there is no extra barrier() needed in the body of the "if" which
is analogue to the body of the second "if" of the referenced
__rcu_read_unlock().

The special bit and the rcu_depth_count are condensed, so the code
mostly follows the way how preempt_enable() works.

Thanks
Lai

> For reference:
>
> void __rcu_read_unlock(void)
> {
>         struct task_struct *t = current;
>
>         barrier();  // critical section before exit code.
>         if (rcu_preempt_read_exit() == 0) {
>                 barrier();  // critical-section exit before .s check.
>                 if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
>                         rcu_read_unlock_special(t);
>         }
diff mbox series

Patch

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 328667ae8086..e3e5ce44c7dc 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -70,6 +70,8 @@  static inline bool same_state_synchronize_rcu(unsigned long oldstate1, unsigned
 
 void rcu_read_unlock_special(void);
 
+#ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT
+
 void __rcu_read_lock(void);
 void __rcu_read_unlock(void);
 
@@ -81,6 +83,37 @@  void __rcu_read_unlock(void);
  */
 #define rcu_preempt_depth() READ_ONCE(current->rcu_read_lock_nesting)
 #define rcu_preempt_depth_set(val) WRITE_ONCE(current->rcu_read_lock_nesting, (val))
+#define pcpu_rcu_preempt_special_set() do { } while (0)
+#define pcpu_rcu_preempt_special_clear() do { } while (0)
+
+#else /* #ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT */
+
+#include <asm/rcu_preempt.h>
+
+static __always_inline void __rcu_read_lock(void)
+{
+	pcpu_rcu_preempt_count_add(1);
+	barrier();
+}
+
+static __always_inline void __rcu_read_unlock(void)
+{
+	barrier();
+	if (unlikely(pcpu_rcu_preempt_count_dec_and_test()))
+		pcpu_rcu_read_unlock_special();
+}
+
+static inline int rcu_preempt_depth(void)
+{
+	return pcpu_rcu_preempt_count();
+}
+
+static inline void rcu_preempt_depth_set(int val)
+{
+	pcpu_rcu_preempt_count_set(val);
+}
+
+#endif /* #else #ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT */
 
 #else /* #ifdef CONFIG_PREEMPT_RCU */
 
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index e7d2dd267593..5d91147bc9a3 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -345,4 +345,12 @@  config RCU_DOUBLE_CHECK_CB_TIME
 	  Say Y here if you need tighter callback-limit enforcement.
 	  Say N here if you are unsure.
 
+config HAVE_PCPU_RCU_PREEMPT_COUNT
+	bool
+
+config PCPU_RCU_PREEMPT_COUNT
+	def_bool y
+	depends on PREEMPT_RCU && HAVE_PCPU_RCU_PREEMPT_COUNT
+	depends on !PROVE_LOCKING && !RCU_STRICT_GRACE_PERIOD
+
 endmenu # "RCU Subsystem"
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index ea5ae957c687..2322b040c5cd 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -671,6 +671,10 @@  static inline int rcu_stall_notifier_call_chain(unsigned long val, void *v) { re
 static inline void
 rcu_preempt_switch(struct task_struct *prev, struct task_struct *next)
 {
+#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
+	prev->rcu_read_lock_nesting = rcu_preempt_depth();
+	pcpu_rcu_preempt_switch(next->rcu_read_lock_nesting, next->rcu_read_unlock_special.s);
+#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
 }
 
 #endif /* __KERNEL_RCU_H */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 880b3fef1158..db68d0c1c1f2 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -107,10 +107,13 @@  static bool sync_rcu_exp_done(struct rcu_node *rnp);
 
 #define set_rcu_preempt_special(reason)	do {				\
 	WRITE_ONCE(current->rcu_read_unlock_special.b.reason, true);	\
+	pcpu_rcu_preempt_special_set();					\
 	} while (0)
 
 #define clear_rcu_preempt_special(reason)	do {			\
 	WRITE_ONCE(current->rcu_read_unlock_special.b.reason, false);	\
+	if (!current->rcu_read_unlock_special.s)			\
+		pcpu_rcu_preempt_special_clear();			\
 	} while (0)
 
 /*
@@ -379,6 +382,8 @@  static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
 	return READ_ONCE(rnp->gp_tasks) != NULL;
 }
 
+#ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT
+
 /* limit value for ->rcu_read_lock_nesting. */
 #define RCU_NEST_PMAX (INT_MAX / 2)
 
@@ -436,6 +441,8 @@  void __rcu_read_unlock(void)
 }
 EXPORT_SYMBOL_GPL(__rcu_read_unlock);
 
+#endif /* #ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT */
+
 /*
  * Advance a ->blkd_tasks-list pointer to the next entry, instead
  * returning NULL if at the end of the list.
@@ -489,6 +496,7 @@  rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
 		return;
 	}
 	t->rcu_read_unlock_special.s = 0;
+	pcpu_rcu_preempt_special_clear();
 	if (special.b.need_qs) {
 		if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
 			rdp->cpu_no_qs.b.norm = false;