diff mbox series

[v2] tracing/user_events: Use non-RCU context for enabler writes

Message ID 20230517001414.307-1-beaub@linux.microsoft.com (mailing list archive)
State Superseded
Headers show
Series [v2] tracing/user_events: Use non-RCU context for enabler writes | expand

Commit Message

Beau Belgrave May 17, 2023, 12:14 a.m. UTC
Currently when events are enabled/disabled a top level mm enumeration is
done in a non-RCU context, however, the enablers are still in a
RCU-context. Each enabler is updated via user_event_enabler_write() which
uses pin_user_pages_remote(). This function can reschedule and should not
be used from a RCU-context.

Use the same approach as the top level mm enumeration, which builds a
temporary list of target enablers within a RCU-context then exits. Then
use list from a non-RCU context when calling user_event_enabler_write().

Link: https://lore.kernel.org/all/20230515165707.hv65ekwp2djkjj5i@MacBook-Pro-8.local/
Fixes: 7235759084a4 ("tracing/user_events: Use remote writes for event enablement")
Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
---
Changes in v2:
  - Remove break satement that prevents more than 1 enabler in MM for same
    event (rare, but possible).

 kernel/trace/trace_events_user.c | 68 +++++++++++++++++++++++++-------
 1 file changed, 54 insertions(+), 14 deletions(-)


base-commit: 3862f86c1529fa0016de6344eb974877b4cd3838
diff mbox series

Patch

diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index b1ecd7677642..3cd8133d9e42 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -96,12 +96,15 @@  struct user_event {
  * these to track enablement sites that are tied to an event.
  */
 struct user_event_enabler {
-	struct list_head	link;
-	struct user_event	*event;
-	unsigned long		addr;
+	struct list_head		link;
+	struct user_event		*event;
+	unsigned long			addr;
+
+	/* Used to build a list within RCU context for a non-RCU context. */
+	struct user_event_enabler	*next;
 
 	/* Track enable bit, flags, etc. Aligned for bitops. */
-	unsigned int		values;
+	unsigned int			values;
 };
 
 /* Bits 0-5 are for the bit to update upon enable/disable (0-63 allowed) */
@@ -165,6 +168,8 @@  static int user_event_parse(struct user_event_group *group, char *name,
 			    char *args, char *flags,
 			    struct user_event **newuser);
 
+static struct user_event_enabler
+*user_event_enabler_get_all(struct user_event_mm *mm, struct user_event *user);
 static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm);
 static struct user_event_mm *user_event_mm_get_all(struct user_event *user);
 static void user_event_mm_put(struct user_event_mm *mm);
@@ -450,25 +455,28 @@  static bool user_event_enabler_exists(struct user_event_mm *mm,
 
 static void user_event_enabler_update(struct user_event *user)
 {
-	struct user_event_enabler *enabler;
 	struct user_event_mm *mm = user_event_mm_get_all(user);
+	struct user_event_enabler *enabler;
 	struct user_event_mm *next;
 	int attempt;
 
 	while (mm) {
 		next = mm->next;
+		enabler = user_event_enabler_get_all(mm, user);
+
+		if (unlikely(!enabler))
+			goto next_mm;
+
 		mmap_read_lock(mm->mm);
-		rcu_read_lock();
 
-		list_for_each_entry_rcu(enabler, &mm->enablers, link) {
-			if (enabler->event == user) {
-				attempt = 0;
-				user_event_enabler_write(mm, enabler, true, &attempt);
-			}
+		while (enabler) {
+			attempt = 0;
+			user_event_enabler_write(mm, enabler, true, &attempt);
+			enabler = enabler->next;
 		}
 
-		rcu_read_unlock();
 		mmap_read_unlock(mm->mm);
+next_mm:
 		user_event_mm_put(mm);
 		mm = next;
 	}
@@ -500,6 +508,34 @@  static bool user_event_enabler_dup(struct user_event_enabler *orig,
 	return true;
 }
 
+static struct user_event_enabler
+*user_event_enabler_get_all(struct user_event_mm *mm, struct user_event *user)
+{
+	struct user_event_enabler *found = NULL;
+	struct user_event_enabler *enabler;
+
+	lockdep_assert_held(&event_mutex);
+
+	/*
+	 * We need to get enablers from within a RCU context to a non-RCU
+	 * context quickly. We build a single linked list under the RCU
+	 * to enable this. Typically this is required for doing actions
+	 * on enablers that might sleep or schedule.
+	 */
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(enabler, &mm->enablers, link) {
+		if (enabler->event == user) {
+			enabler->next = found;
+			found = enabler;
+		}
+	}
+
+	rcu_read_unlock();
+
+	return found;
+}
+
 static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm)
 {
 	refcount_inc(&mm->refcnt);
@@ -513,6 +549,8 @@  static struct user_event_mm *user_event_mm_get_all(struct user_event *user)
 	struct user_event_enabler *enabler;
 	struct user_event_mm *mm;
 
+	lockdep_assert_held(&event_mutex);
+
 	/*
 	 * We do not want to block fork/exec while enablements are being
 	 * updated, so we use RCU to walk the current tasks that have used
@@ -525,13 +563,15 @@  static struct user_event_mm *user_event_mm_get_all(struct user_event *user)
 	 */
 	rcu_read_lock();
 
-	list_for_each_entry_rcu(mm, &user_event_mms, link)
-		list_for_each_entry_rcu(enabler, &mm->enablers, link)
+	list_for_each_entry_rcu(mm, &user_event_mms, link) {
+		list_for_each_entry_rcu(enabler, &mm->enablers, link) {
 			if (enabler->event == user) {
 				mm->next = found;
 				found = user_event_mm_get(mm);
 				break;
 			}
+		}
+	}
 
 	rcu_read_unlock();