diff mbox series

[v2,07/10] rcu/tasks: Check RCU watching state for holdout idle injection tasks

Message ID 20241009125127.18902-8-neeraj.upadhyay@kernel.org (mailing list archive)
State New, archived
Headers show
Series Make RCU Tasks scan idle tasks | expand

Commit Message

Neeraj Upadhyay Oct. 9, 2024, 12:51 p.m. UTC
From: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>

Use RCU watching state of a CPU to check whether RCU-tasks GP
need to wait for idle injection task on that CPU. Idle injection
tasks which are in deep-idle states where RCU is not watching or
which have transitioned to/from deep-idle state do not block
RCU-tasks grace period.

Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
---
 kernel/rcu/tasks.h | 63 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 48 insertions(+), 15 deletions(-)

Comments

Frederic Weisbecker Oct. 9, 2024, 2:37 p.m. UTC | #1
Le Wed, Oct 09, 2024 at 06:21:24PM +0530, neeraj.upadhyay@kernel.org a écrit :
> From: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>
> 
> Use RCU watching state of a CPU to check whether RCU-tasks GP
> need to wait for idle injection task on that CPU. Idle injection
> tasks which are in deep-idle states where RCU is not watching or
> which have transitioned to/from deep-idle state do not block
> RCU-tasks grace period.
> 
> Signed-off-by: Neeraj Upadhyay <neeraj.upadhyay@kernel.org>

For now this should work because there is a single user that is
a per-cpu kthread, therefore no RCU-watching writer can race
against another (real idle VS idle injection or idle_injection VS
idle injection) without going first through a voluntary context
switch. But who knows about the future? If an idle injection kthread
is preempted by another idle injection right after clearing PF_IDLE,
there could be some spurious QS accounted for the preempted
kthread.

So perhaps we can consider idle injection as any normal task and
wait for it to voluntary schedule?

Well I see DEFAULT_DURATION_JIFFIES = 6, which is 60 ms on HZ=100.

Yeah that's a lot...so perhaps this patch is needed after all...

> ---
>  kernel/rcu/tasks.h | 63 +++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 48 insertions(+), 15 deletions(-)
> 
> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
> index d8506d2e6f54..1947f9b6346d 100644
> --- a/kernel/rcu/tasks.h
> +++ b/kernel/rcu/tasks.h
> @@ -38,6 +38,8 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
>   * @rtpp: Pointer to the rcu_tasks structure.
>   * @rcu_watching_snap: Per-GP RCU-watching snapshot for idle tasks.
>   * @rcu_watching_snap_rec: RCU-watching snapshot recorded for idle task.
> + * @rcu_watching_idle_inj_snap: Per-GP RCU-watching snapshot for idle inject task.
> + * @rcu_watching_idle_inj_rec: RCU-watching snapshot recorded for idle inject task.
>   */
>  struct rcu_tasks_percpu {
>  	struct rcu_segcblist cblist;
> @@ -56,6 +58,8 @@ struct rcu_tasks_percpu {
>  	struct rcu_tasks *rtpp;
>  	int rcu_watching_snap;
>  	bool rcu_watching_snap_rec;
> +	int rcu_watching_idle_inj_snap;
> +	bool rcu_watching_idle_inj_rec;

So how about:

struct rcu_watching_task {
    int snap;
    bool rec;
}
...
struct rcu_tasks_percpu {
       ...
       struct rcu_watching_task idle_task;
       struct rcu_watching_task idle_inject;
}

Thanks.
diff mbox series

Patch

diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index d8506d2e6f54..1947f9b6346d 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -38,6 +38,8 @@  typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
  * @rtpp: Pointer to the rcu_tasks structure.
  * @rcu_watching_snap: Per-GP RCU-watching snapshot for idle tasks.
  * @rcu_watching_snap_rec: RCU-watching snapshot recorded for idle task.
+ * @rcu_watching_idle_inj_snap: Per-GP RCU-watching snapshot for idle inject task.
+ * @rcu_watching_idle_inj_rec: RCU-watching snapshot recorded for idle inject task.
  */
 struct rcu_tasks_percpu {
 	struct rcu_segcblist cblist;
@@ -56,6 +58,8 @@  struct rcu_tasks_percpu {
 	struct rcu_tasks *rtpp;
 	int rcu_watching_snap;
 	bool rcu_watching_snap_rec;
+	int rcu_watching_idle_inj_snap;
+	bool rcu_watching_idle_inj_rec;
 };
 
 /**
@@ -989,10 +993,34 @@  static void rcu_tasks_pregp_step(struct list_head *hop)
 		struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rcu_tasks.rtpcpu, cpu);
 
 		rtpcp->rcu_watching_snap_rec = false;
+		rtpcp->rcu_watching_idle_inj_rec = false;
 	}
 }
 
 #ifdef CONFIG_SMP
+static bool rcu_idle_check_rcu_watching(int *rcu_watching_snap, bool *rcu_watching_rec, int cpu)
+{
+	if (!*rcu_watching_rec) {
+		/*
+		 * Do plain access. Ordering between remote CPU's pre idle accesses
+		 * and post rcu-tasks grace period is provided by synchronize_rcu()
+		 * in rcu_tasks_postgp().
+		 */
+		*rcu_watching_snap = ct_rcu_watching_cpu(cpu);
+		*rcu_watching_rec = true;
+		if (rcu_watching_snap_in_eqs(*rcu_watching_snap))
+			/*
+			 * RCU-idle contexts are RCU-tasks quiescent state for idle
+			 * (and idle injection) tasks.
+			 */
+			return false;
+	} else if (rcu_watching_snap_stopped_since(cpu, *rcu_watching_snap)) {
+		return false;
+	}
+
+	return true;
+}
+
 static bool rcu_idle_task_is_holdout(struct task_struct *t, int cpu)
 {
 	struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rcu_tasks.rtpcpu, cpu);
@@ -1010,22 +1038,16 @@  static bool rcu_idle_task_is_holdout(struct task_struct *t, int cpu)
 	if (!t->on_cpu)
 		return false;
 
-	if (!rtpcp->rcu_watching_snap_rec) {
-		/*
-		 * Do plain access. Ordering between remote CPU's pre idle accesses
-		 * and post rcu-tasks grace period is provided by synchronize_rcu()
-		 * in rcu_tasks_postgp().
-		 */
-		rtpcp->rcu_watching_snap = ct_rcu_watching_cpu(cpu);
-		rtpcp->rcu_watching_snap_rec = true;
-		/* RCU-idle contexts are RCU-tasks quiescent state for idle tasks. */
-		if (rcu_watching_snap_in_eqs(rtpcp->rcu_watching_snap))
-			return false;
-	} else if (rcu_watching_snap_stopped_since(cpu, rtpcp->rcu_watching_snap)) {
-		return false;
-	}
+	return rcu_idle_check_rcu_watching(&rtpcp->rcu_watching_snap,
+			&rtpcp->rcu_watching_snap_rec, cpu);
+}
 
-	return true;
+static bool rcu_idle_inj_is_holdout(struct task_struct *t, int cpu)
+{
+	struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rcu_tasks.rtpcpu, cpu);
+
+	return rcu_idle_check_rcu_watching(&rtpcp->rcu_watching_idle_inj_snap,
+			&rtpcp->rcu_watching_idle_inj_rec, cpu);
 }
 #else /* #ifdef CONFIG_SMP */
 static inline bool rcu_idle_task_is_holdout(struct task_struct *t, int cpu)
@@ -1037,6 +1059,15 @@  static inline bool rcu_idle_task_is_holdout(struct task_struct *t, int cpu)
 	 */
 	return false;
 }
+
+static inline bool rcu_idle_inj_is_holdout(struct task_struct *t, int cpu)
+{
+	/*
+	 * Idle injection tasks are PF_IDLE within preempt disabled
+	 * region. So, we should not enter this call for !SMP.
+	 */
+	return false;
+}
 #endif
 
 /* Check for quiescent states since the pregp's synchronize_rcu() */
@@ -1060,6 +1091,8 @@  static bool rcu_tasks_is_holdout(struct task_struct *t)
 
 	if (t == idle_task(cpu))
 		return rcu_idle_task_is_holdout(t, cpu);
+	else if (is_idle_task(t))
+		return rcu_idle_inj_is_holdout(t, cpu);
 
 	return true;
 }