diff mbox series

[v3,5/8] tracing: Allow system call tracepoints to handle page faults

Message ID 20241004145818.1726671-6-mathieu.desnoyers@efficios.com (mailing list archive)
State Superseded
Headers show
Series tracing: Allow system call tracepoints to handle page faults | expand

Commit Message

Mathieu Desnoyers Oct. 4, 2024, 2:58 p.m. UTC
Use Tasks Trace RCU to protect iteration of system call enter/exit
tracepoint probes to allow those probes to handle page faults.

In preparation for this change, all tracers registering to system call
enter/exit tracepoints should expect those to be called with preemption
enabled.

This allows tracers to fault-in userspace system call arguments such as
path strings within their probe callbacks.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Michael Jeanson <mjeanson@efficios.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Yonghong Song <yhs@fb.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: bpf@vger.kernel.org
Cc: Joel Fernandes <joel@joelfernandes.org>
---
 include/linux/tracepoint.h | 12 ++++++++++--
 init/Kconfig               |  1 +
 2 files changed, 11 insertions(+), 2 deletions(-)

Comments

Steven Rostedt Oct. 8, 2024, 11:23 p.m. UTC | #1
On Fri,  4 Oct 2024 10:58:15 -0400
Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:

> Use Tasks Trace RCU to protect iteration of system call enter/exit
> tracepoint probes to allow those probes to handle page faults.
> 
> In preparation for this change, all tracers registering to system call
> enter/exit tracepoints should expect those to be called with preemption
> enabled.
> 
> This allows tracers to fault-in userspace system call arguments such as
> path strings within their probe callbacks.
> 
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Cc: Michael Jeanson <mjeanson@efficios.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Masami Hiramatsu <mhiramat@kernel.org>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Yonghong Song <yhs@fb.com>
> Cc: Paul E. McKenney <paulmck@kernel.org>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
> Cc: Namhyung Kim <namhyung@kernel.org>
> Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> Cc: bpf@vger.kernel.org
> Cc: Joel Fernandes <joel@joelfernandes.org>
> ---
>  include/linux/tracepoint.h | 12 ++++++++++--
>  init/Kconfig               |  1 +
>  2 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
> index 014790495ad8..cefd44b7c91f 100644
> --- a/include/linux/tracepoint.h
> +++ b/include/linux/tracepoint.h
> @@ -17,6 +17,7 @@
>  #include <linux/errno.h>
>  #include <linux/types.h>
>  #include <linux/rcupdate.h>
> +#include <linux/rcupdate_trace.h>
>  #include <linux/tracepoint-defs.h>
>  #include <linux/static_call.h>
>  
> @@ -107,6 +108,7 @@ void for_each_tracepoint_in_module(struct module *mod,
>  #ifdef CONFIG_TRACEPOINTS
>  static inline void tracepoint_synchronize_unregister(void)
>  {
> +	synchronize_rcu_tasks_trace();
>  	synchronize_rcu();
>  }
>  #else
> @@ -204,11 +206,17 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
>  		if (!(cond))						\
>  			return;						\
>  									\
> -		preempt_disable_notrace();				\

Should add a comment somewhere stating that the syscall version is to allow faults.

-- Steve

> +		if (syscall)						\
> +			rcu_read_lock_trace();				\
> +		else							\
> +			preempt_disable_notrace();			\
>  									\
>  		__DO_TRACE_CALL(name, TP_ARGS(args));			\
>  									\
> -		preempt_enable_notrace();				\
> +		if (syscall)						\
> +			rcu_read_unlock_trace();			\
> +		else							\
> +			preempt_enable_notrace();			\
>  	} while (0)
>  
>  /*
> diff --git a/init/Kconfig b/init/Kconfig
> index fbd0cb06a50a..eedd0064fb36 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1984,6 +1984,7 @@ config BINDGEN_VERSION_TEXT
>  #
>  config TRACEPOINTS
>  	bool
> +	select TASKS_TRACE_RCU
>  
>  source "kernel/Kconfig.kexec"
>
Mathieu Desnoyers Oct. 9, 2024, 12:56 a.m. UTC | #2
On 2024-10-09 01:23, Steven Rostedt wrote:
> On Fri,  4 Oct 2024 10:58:15 -0400
> Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
> 
>> Use Tasks Trace RCU to protect iteration of system call enter/exit
>> tracepoint probes to allow those probes to handle page faults.
>>
>> In preparation for this change, all tracers registering to system call
>> enter/exit tracepoints should expect those to be called with preemption
>> enabled.
>>
>> This allows tracers to fault-in userspace system call arguments such as
>> path strings within their probe callbacks.
>>
>> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>> Cc: Michael Jeanson <mjeanson@efficios.com>
>> Cc: Steven Rostedt <rostedt@goodmis.org>
>> Cc: Masami Hiramatsu <mhiramat@kernel.org>
>> Cc: Peter Zijlstra <peterz@infradead.org>
>> Cc: Alexei Starovoitov <ast@kernel.org>
>> Cc: Yonghong Song <yhs@fb.com>
>> Cc: Paul E. McKenney <paulmck@kernel.org>
>> Cc: Ingo Molnar <mingo@redhat.com>
>> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
>> Cc: Mark Rutland <mark.rutland@arm.com>
>> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
>> Cc: Namhyung Kim <namhyung@kernel.org>
>> Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
>> Cc: bpf@vger.kernel.org
>> Cc: Joel Fernandes <joel@joelfernandes.org>
>> ---
>>   include/linux/tracepoint.h | 12 ++++++++++--
>>   init/Kconfig               |  1 +
>>   2 files changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
>> index 014790495ad8..cefd44b7c91f 100644
>> --- a/include/linux/tracepoint.h
>> +++ b/include/linux/tracepoint.h
>> @@ -17,6 +17,7 @@
>>   #include <linux/errno.h>
>>   #include <linux/types.h>
>>   #include <linux/rcupdate.h>
>> +#include <linux/rcupdate_trace.h>
>>   #include <linux/tracepoint-defs.h>
>>   #include <linux/static_call.h>
>>   
>> @@ -107,6 +108,7 @@ void for_each_tracepoint_in_module(struct module *mod,
>>   #ifdef CONFIG_TRACEPOINTS
>>   static inline void tracepoint_synchronize_unregister(void)
>>   {
>> +	synchronize_rcu_tasks_trace();
>>   	synchronize_rcu();
>>   }
>>   #else
>> @@ -204,11 +206,17 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
>>   		if (!(cond))						\
>>   			return;						\
>>   									\
>> -		preempt_disable_notrace();				\
> 
> Should add a comment somewhere stating that the syscall version is to allow faults.

I plan to add this comment on top of __TO_TRACE:

+ *
+ * With @syscall=0, the tracepoint callback array dereference is
+ * protected by disabling preemption.
+ * With @syscall=1, the tracepoint callback array dereference is
+ * protected by Tasks Trace RCU, which allows probes to handle page
+ * faults.

Thanks,

Mathieu


> 
> -- Steve
> 
>> +		if (syscall)						\
>> +			rcu_read_lock_trace();				\
>> +		else							\
>> +			preempt_disable_notrace();			\
>>   									\
>>   		__DO_TRACE_CALL(name, TP_ARGS(args));			\
>>   									\
>> -		preempt_enable_notrace();				\
>> +		if (syscall)						\
>> +			rcu_read_unlock_trace();			\
>> +		else							\
>> +			preempt_enable_notrace();			\
>>   	} while (0)
>>   
>>   /*
>> diff --git a/init/Kconfig b/init/Kconfig
>> index fbd0cb06a50a..eedd0064fb36 100644
>> --- a/init/Kconfig
>> +++ b/init/Kconfig
>> @@ -1984,6 +1984,7 @@ config BINDGEN_VERSION_TEXT
>>   #
>>   config TRACEPOINTS
>>   	bool
>> +	select TASKS_TRACE_RCU
>>   
>>   source "kernel/Kconfig.kexec"
>>   
>
diff mbox series

Patch

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 014790495ad8..cefd44b7c91f 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -17,6 +17,7 @@ 
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/rcupdate.h>
+#include <linux/rcupdate_trace.h>
 #include <linux/tracepoint-defs.h>
 #include <linux/static_call.h>
 
@@ -107,6 +108,7 @@  void for_each_tracepoint_in_module(struct module *mod,
 #ifdef CONFIG_TRACEPOINTS
 static inline void tracepoint_synchronize_unregister(void)
 {
+	synchronize_rcu_tasks_trace();
 	synchronize_rcu();
 }
 #else
@@ -204,11 +206,17 @@  static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		if (!(cond))						\
 			return;						\
 									\
-		preempt_disable_notrace();				\
+		if (syscall)						\
+			rcu_read_lock_trace();				\
+		else							\
+			preempt_disable_notrace();			\
 									\
 		__DO_TRACE_CALL(name, TP_ARGS(args));			\
 									\
-		preempt_enable_notrace();				\
+		if (syscall)						\
+			rcu_read_unlock_trace();			\
+		else							\
+			preempt_enable_notrace();			\
 	} while (0)
 
 /*
diff --git a/init/Kconfig b/init/Kconfig
index fbd0cb06a50a..eedd0064fb36 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1984,6 +1984,7 @@  config BINDGEN_VERSION_TEXT
 #
 config TRACEPOINTS
 	bool
+	select TASKS_TRACE_RCU
 
 source "kernel/Kconfig.kexec"