diff mbox series

[1/4] tracing/user_events: Prepare find/delete for same name events

Message ID 20240123220844.928-2-beaub@linux.microsoft.com (mailing list archive)
State Superseded
Headers show
Series tracing/user_events: Introduce multi-format events | expand

Commit Message

Beau Belgrave Jan. 23, 2024, 10:08 p.m. UTC
The current code for finding and deleting events assumes that there will
never be cases when user_events are registered with the same name, but
different formats. In the future this scenario will exist to ensure
user programs can be updated or modify their events and run different
versions of their programs side-by-side without being blocked.

This change does not yet allow for multi-format events. If user_events
are registered with the same name but different arguments the programs
see the same return values as before. This change simply makes it
possible to easily accomodate for this in future changes.

Update find_user_event() to take in argument parameters and register
flags to accomodate future multi-format event scenarios. Have find
validate argument matching and return error pointers to cover address
in use cases, or allocation errors. Update callers to handle error
pointer logic.

Move delete_user_event() to use hash walking directly now that find has
changed. Delete all events found that match the register name, stop
if an error occurs and report back to the user.

Update user_fields_match() to cover list_empty() scenarios instead of
each callsite doing it now that find_user_event() uses it directly.

Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
---
 kernel/trace/trace_events_user.c | 106 +++++++++++++++++--------------
 1 file changed, 58 insertions(+), 48 deletions(-)

Comments

Masami Hiramatsu (Google) Jan. 25, 2024, 12:59 a.m. UTC | #1
On Tue, 23 Jan 2024 22:08:41 +0000
Beau Belgrave <beaub@linux.microsoft.com> wrote:

> The current code for finding and deleting events assumes that there will
> never be cases when user_events are registered with the same name, but
> different formats. In the future this scenario will exist to ensure
> user programs can be updated or modify their events and run different
> versions of their programs side-by-side without being blocked.

Ah, this is a very important point. Kernel always has only one instance
but user program doesn't. Thus it can define the same event name.
For the similar problem, uprobe event assumes that the user (here
admin) will define different group name to avoid it. But for the user
event, it is embedded, hmm.

> 
> This change does not yet allow for multi-format events. If user_events
> are registered with the same name but different arguments the programs
> see the same return values as before. This change simply makes it
> possible to easily accomodate for this in future changes.
> 
> Update find_user_event() to take in argument parameters and register
> flags to accomodate future multi-format event scenarios. Have find
> validate argument matching and return error pointers to cover address
> in use cases, or allocation errors. Update callers to handle error
> pointer logic.

Understand, that is similar to what probe events do.

> 
> Move delete_user_event() to use hash walking directly now that find has
> changed. Delete all events found that match the register name, stop
> if an error occurs and report back to the user.

What happen if we run 2 different version of the applications and terminate
one of them? The event which is used by others will be kept?

Thank you,

> 
> Update user_fields_match() to cover list_empty() scenarios instead of
> each callsite doing it now that find_user_event() uses it directly.
> 
> Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
> ---
>  kernel/trace/trace_events_user.c | 106 +++++++++++++++++--------------
>  1 file changed, 58 insertions(+), 48 deletions(-)
> 
> diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
> index 9365ce407426..0480579ba563 100644
> --- a/kernel/trace/trace_events_user.c
> +++ b/kernel/trace/trace_events_user.c
> @@ -202,6 +202,8 @@ static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm);
>  static struct user_event_mm *user_event_mm_get_all(struct user_event *user);
>  static void user_event_mm_put(struct user_event_mm *mm);
>  static int destroy_user_event(struct user_event *user);
> +static bool user_fields_match(struct user_event *user, int argc,
> +			      const char **argv);
>  
>  static u32 user_event_key(char *name)
>  {
> @@ -1493,17 +1495,24 @@ static int destroy_user_event(struct user_event *user)
>  }
>  
>  static struct user_event *find_user_event(struct user_event_group *group,
> -					  char *name, u32 *outkey)
> +					  char *name, int argc, const char **argv,
> +					  u32 flags, u32 *outkey)
>  {
>  	struct user_event *user;
>  	u32 key = user_event_key(name);
>  
>  	*outkey = key;
>  
> -	hash_for_each_possible(group->register_table, user, node, key)
> -		if (!strcmp(EVENT_NAME(user), name))
> +	hash_for_each_possible(group->register_table, user, node, key) {
> +		if (strcmp(EVENT_NAME(user), name))
> +			continue;
> +
> +		if (user_fields_match(user, argc, argv))
>  			return user_event_get(user);
>  
> +		return ERR_PTR(-EADDRINUSE);
> +	}
> +
>  	return NULL;
>  }
>  
> @@ -1860,6 +1869,9 @@ static bool user_fields_match(struct user_event *user, int argc,
>  	struct list_head *head = &user->fields;
>  	int i = 0;
>  
> +	if (argc == 0)
> +		return list_empty(head);
> +
>  	list_for_each_entry_reverse(field, head, link) {
>  		if (!user_field_match(field, argc, argv, &i))
>  			return false;
> @@ -1880,10 +1892,8 @@ static bool user_event_match(const char *system, const char *event,
>  	match = strcmp(EVENT_NAME(user), event) == 0 &&
>  		(!system || strcmp(system, USER_EVENTS_SYSTEM) == 0);
>  
> -	if (match && argc > 0)
> +	if (match)
>  		match = user_fields_match(user, argc, argv);
> -	else if (match && argc == 0)
> -		match = list_empty(&user->fields);
>  
>  	return match;
>  }
> @@ -1922,11 +1932,11 @@ static int user_event_parse(struct user_event_group *group, char *name,
>  			    char *args, char *flags,
>  			    struct user_event **newuser, int reg_flags)
>  {
> -	int ret;
> -	u32 key;
>  	struct user_event *user;
> +	char **argv = NULL;
>  	int argc = 0;
> -	char **argv;
> +	int ret;
> +	u32 key;
>  
>  	/* Currently don't support any text based flags */
>  	if (flags != NULL)
> @@ -1935,41 +1945,34 @@ static int user_event_parse(struct user_event_group *group, char *name,
>  	if (!user_event_capable(reg_flags))
>  		return -EPERM;
>  
> +	if (args) {
> +		argv = argv_split(GFP_KERNEL, args, &argc);
> +
> +		if (!argv)
> +			return -ENOMEM;
> +	}
> +
>  	/* Prevent dyn_event from racing */
>  	mutex_lock(&event_mutex);
> -	user = find_user_event(group, name, &key);
> +	user = find_user_event(group, name, argc, (const char **)argv,
> +			       reg_flags, &key);
>  	mutex_unlock(&event_mutex);
>  
> -	if (user) {
> -		if (args) {
> -			argv = argv_split(GFP_KERNEL, args, &argc);
> -			if (!argv) {
> -				ret = -ENOMEM;
> -				goto error;
> -			}
> +	if (argv)
> +		argv_free(argv);
>  
> -			ret = user_fields_match(user, argc, (const char **)argv);
> -			argv_free(argv);
> -
> -		} else
> -			ret = list_empty(&user->fields);
> -
> -		if (ret) {
> -			*newuser = user;
> -			/*
> -			 * Name is allocated by caller, free it since it already exists.
> -			 * Caller only worries about failure cases for freeing.
> -			 */
> -			kfree(name);
> -		} else {
> -			ret = -EADDRINUSE;
> -			goto error;
> -		}
> +	if (IS_ERR(user))
> +		return PTR_ERR(user);
> +
> +	if (user) {
> +		*newuser = user;
> +		/*
> +		 * Name is allocated by caller, free it since it already exists.
> +		 * Caller only worries about failure cases for freeing.
> +		 */
> +		kfree(name);
>  
>  		return 0;
> -error:
> -		user_event_put(user, false);
> -		return ret;
>  	}
>  
>  	user = kzalloc(sizeof(*user), GFP_KERNEL_ACCOUNT);
> @@ -2052,25 +2055,32 @@ static int user_event_parse(struct user_event_group *group, char *name,
>  }
>  
>  /*
> - * Deletes a previously created event if it is no longer being used.
> + * Deletes previously created events if they are no longer being used.
>   */
>  static int delete_user_event(struct user_event_group *group, char *name)
>  {
> -	u32 key;
> -	struct user_event *user = find_user_event(group, name, &key);
> +	struct user_event *user;
> +	u32 key = user_event_key(name);
> +	int ret = -ENOENT;
>  
> -	if (!user)
> -		return -ENOENT;
> +	/* Attempt to delete all event(s) with the name passed in */
> +	hash_for_each_possible(group->register_table, user, node, key) {
> +		if (strcmp(EVENT_NAME(user), name))
> +			continue;
>  
> -	user_event_put(user, true);
> +		if (!user_event_last_ref(user))
> +			return -EBUSY;
>  
> -	if (!user_event_last_ref(user))
> -		return -EBUSY;
> +		if (!user_event_capable(user->reg_flags))
> +			return -EPERM;
>  
> -	if (!user_event_capable(user->reg_flags))
> -		return -EPERM;
> +		ret = destroy_user_event(user);
>  
> -	return destroy_user_event(user);
> +		if (ret)
> +			goto out;
> +	}
> +out:
> +	return ret;
>  }
>  
>  /*
> -- 
> 2.34.1
>
Beau Belgrave Jan. 25, 2024, 5:26 p.m. UTC | #2
On Thu, Jan 25, 2024 at 09:59:03AM +0900, Masami Hiramatsu wrote:
> On Tue, 23 Jan 2024 22:08:41 +0000
> Beau Belgrave <beaub@linux.microsoft.com> wrote:
> 
> > The current code for finding and deleting events assumes that there will
> > never be cases when user_events are registered with the same name, but
> > different formats. In the future this scenario will exist to ensure
> > user programs can be updated or modify their events and run different
> > versions of their programs side-by-side without being blocked.
> 
> Ah, this is a very important point. Kernel always has only one instance
> but user program doesn't. Thus it can define the same event name.
> For the similar problem, uprobe event assumes that the user (here
> admin) will define different group name to avoid it. But for the user
> event, it is embedded, hmm.
> 

Yes, the series will handle if multi-processes use the same name, we
will find a matching version of that name within the user_event group.
If there isn't one, a new one is created. Each is backed by an
independent tracepoint which does match up with how uprobe does it. This
actually got brought up in the tracefs meetings we've had and it seemed
to get wide agreement on how to best handle this.

> > 
> > This change does not yet allow for multi-format events. If user_events
> > are registered with the same name but different arguments the programs
> > see the same return values as before. This change simply makes it
> > possible to easily accomodate for this in future changes.
> > 
> > Update find_user_event() to take in argument parameters and register
> > flags to accomodate future multi-format event scenarios. Have find
> > validate argument matching and return error pointers to cover address
> > in use cases, or allocation errors. Update callers to handle error
> > pointer logic.
> 
> Understand, that is similar to what probe events do.
> 
> > 
> > Move delete_user_event() to use hash walking directly now that find has
> > changed. Delete all events found that match the register name, stop
> > if an error occurs and report back to the user.
> 
> What happen if we run 2 different version of the applications and terminate
> one of them? The event which is used by others will be kept?
> 

Each unique version of a user_event has it's own ref-count. If one
version is not-used, but another version is, only the not-used version
will get deleted. The other version that is in use will return a -EBUSY
when it gets to that version via enumeration.

While we only have a single tracepoint per-version, we have several
user_event structures in memory that have the same name, yet different
formats. Each of which have their own lifetime, enablers and ref-counts
to keep them isolated from each other.

Thanks,
-Beau

> Thank you,
> 
> > 
> > Update user_fields_match() to cover list_empty() scenarios instead of
> > each callsite doing it now that find_user_event() uses it directly.
> > 
> > Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
> > ---
> >  kernel/trace/trace_events_user.c | 106 +++++++++++++++++--------------
> >  1 file changed, 58 insertions(+), 48 deletions(-)
> > 
> > diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
> > index 9365ce407426..0480579ba563 100644
> > --- a/kernel/trace/trace_events_user.c
> > +++ b/kernel/trace/trace_events_user.c
> > @@ -202,6 +202,8 @@ static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm);
> >  static struct user_event_mm *user_event_mm_get_all(struct user_event *user);
> >  static void user_event_mm_put(struct user_event_mm *mm);
> >  static int destroy_user_event(struct user_event *user);
> > +static bool user_fields_match(struct user_event *user, int argc,
> > +			      const char **argv);
> >  
> >  static u32 user_event_key(char *name)
> >  {
> > @@ -1493,17 +1495,24 @@ static int destroy_user_event(struct user_event *user)
> >  }
> >  
> >  static struct user_event *find_user_event(struct user_event_group *group,
> > -					  char *name, u32 *outkey)
> > +					  char *name, int argc, const char **argv,
> > +					  u32 flags, u32 *outkey)
> >  {
> >  	struct user_event *user;
> >  	u32 key = user_event_key(name);
> >  
> >  	*outkey = key;
> >  
> > -	hash_for_each_possible(group->register_table, user, node, key)
> > -		if (!strcmp(EVENT_NAME(user), name))
> > +	hash_for_each_possible(group->register_table, user, node, key) {
> > +		if (strcmp(EVENT_NAME(user), name))
> > +			continue;
> > +
> > +		if (user_fields_match(user, argc, argv))
> >  			return user_event_get(user);
> >  
> > +		return ERR_PTR(-EADDRINUSE);
> > +	}
> > +
> >  	return NULL;
> >  }
> >  
> > @@ -1860,6 +1869,9 @@ static bool user_fields_match(struct user_event *user, int argc,
> >  	struct list_head *head = &user->fields;
> >  	int i = 0;
> >  
> > +	if (argc == 0)
> > +		return list_empty(head);
> > +
> >  	list_for_each_entry_reverse(field, head, link) {
> >  		if (!user_field_match(field, argc, argv, &i))
> >  			return false;
> > @@ -1880,10 +1892,8 @@ static bool user_event_match(const char *system, const char *event,
> >  	match = strcmp(EVENT_NAME(user), event) == 0 &&
> >  		(!system || strcmp(system, USER_EVENTS_SYSTEM) == 0);
> >  
> > -	if (match && argc > 0)
> > +	if (match)
> >  		match = user_fields_match(user, argc, argv);
> > -	else if (match && argc == 0)
> > -		match = list_empty(&user->fields);
> >  
> >  	return match;
> >  }
> > @@ -1922,11 +1932,11 @@ static int user_event_parse(struct user_event_group *group, char *name,
> >  			    char *args, char *flags,
> >  			    struct user_event **newuser, int reg_flags)
> >  {
> > -	int ret;
> > -	u32 key;
> >  	struct user_event *user;
> > +	char **argv = NULL;
> >  	int argc = 0;
> > -	char **argv;
> > +	int ret;
> > +	u32 key;
> >  
> >  	/* Currently don't support any text based flags */
> >  	if (flags != NULL)
> > @@ -1935,41 +1945,34 @@ static int user_event_parse(struct user_event_group *group, char *name,
> >  	if (!user_event_capable(reg_flags))
> >  		return -EPERM;
> >  
> > +	if (args) {
> > +		argv = argv_split(GFP_KERNEL, args, &argc);
> > +
> > +		if (!argv)
> > +			return -ENOMEM;
> > +	}
> > +
> >  	/* Prevent dyn_event from racing */
> >  	mutex_lock(&event_mutex);
> > -	user = find_user_event(group, name, &key);
> > +	user = find_user_event(group, name, argc, (const char **)argv,
> > +			       reg_flags, &key);
> >  	mutex_unlock(&event_mutex);
> >  
> > -	if (user) {
> > -		if (args) {
> > -			argv = argv_split(GFP_KERNEL, args, &argc);
> > -			if (!argv) {
> > -				ret = -ENOMEM;
> > -				goto error;
> > -			}
> > +	if (argv)
> > +		argv_free(argv);
> >  
> > -			ret = user_fields_match(user, argc, (const char **)argv);
> > -			argv_free(argv);
> > -
> > -		} else
> > -			ret = list_empty(&user->fields);
> > -
> > -		if (ret) {
> > -			*newuser = user;
> > -			/*
> > -			 * Name is allocated by caller, free it since it already exists.
> > -			 * Caller only worries about failure cases for freeing.
> > -			 */
> > -			kfree(name);
> > -		} else {
> > -			ret = -EADDRINUSE;
> > -			goto error;
> > -		}
> > +	if (IS_ERR(user))
> > +		return PTR_ERR(user);
> > +
> > +	if (user) {
> > +		*newuser = user;
> > +		/*
> > +		 * Name is allocated by caller, free it since it already exists.
> > +		 * Caller only worries about failure cases for freeing.
> > +		 */
> > +		kfree(name);
> >  
> >  		return 0;
> > -error:
> > -		user_event_put(user, false);
> > -		return ret;
> >  	}
> >  
> >  	user = kzalloc(sizeof(*user), GFP_KERNEL_ACCOUNT);
> > @@ -2052,25 +2055,32 @@ static int user_event_parse(struct user_event_group *group, char *name,
> >  }
> >  
> >  /*
> > - * Deletes a previously created event if it is no longer being used.
> > + * Deletes previously created events if they are no longer being used.
> >   */
> >  static int delete_user_event(struct user_event_group *group, char *name)
> >  {
> > -	u32 key;
> > -	struct user_event *user = find_user_event(group, name, &key);
> > +	struct user_event *user;
> > +	u32 key = user_event_key(name);
> > +	int ret = -ENOENT;
> >  
> > -	if (!user)
> > -		return -ENOENT;
> > +	/* Attempt to delete all event(s) with the name passed in */
> > +	hash_for_each_possible(group->register_table, user, node, key) {
> > +		if (strcmp(EVENT_NAME(user), name))
> > +			continue;
> >  
> > -	user_event_put(user, true);
> > +		if (!user_event_last_ref(user))
> > +			return -EBUSY;
> >  
> > -	if (!user_event_last_ref(user))
> > -		return -EBUSY;
> > +		if (!user_event_capable(user->reg_flags))
> > +			return -EPERM;
> >  
> > -	if (!user_event_capable(user->reg_flags))
> > -		return -EPERM;
> > +		ret = destroy_user_event(user);
> >  
> > -	return destroy_user_event(user);
> > +		if (ret)
> > +			goto out;
> > +	}
> > +out:
> > +	return ret;
> >  }
> >  
> >  /*
> > -- 
> > 2.34.1
> > 
> 
> 
> -- 
> Masami Hiramatsu (Google) <mhiramat@kernel.org>
diff mbox series

Patch

diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 9365ce407426..0480579ba563 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -202,6 +202,8 @@  static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm);
 static struct user_event_mm *user_event_mm_get_all(struct user_event *user);
 static void user_event_mm_put(struct user_event_mm *mm);
 static int destroy_user_event(struct user_event *user);
+static bool user_fields_match(struct user_event *user, int argc,
+			      const char **argv);
 
 static u32 user_event_key(char *name)
 {
@@ -1493,17 +1495,24 @@  static int destroy_user_event(struct user_event *user)
 }
 
 static struct user_event *find_user_event(struct user_event_group *group,
-					  char *name, u32 *outkey)
+					  char *name, int argc, const char **argv,
+					  u32 flags, u32 *outkey)
 {
 	struct user_event *user;
 	u32 key = user_event_key(name);
 
 	*outkey = key;
 
-	hash_for_each_possible(group->register_table, user, node, key)
-		if (!strcmp(EVENT_NAME(user), name))
+	hash_for_each_possible(group->register_table, user, node, key) {
+		if (strcmp(EVENT_NAME(user), name))
+			continue;
+
+		if (user_fields_match(user, argc, argv))
 			return user_event_get(user);
 
+		return ERR_PTR(-EADDRINUSE);
+	}
+
 	return NULL;
 }
 
@@ -1860,6 +1869,9 @@  static bool user_fields_match(struct user_event *user, int argc,
 	struct list_head *head = &user->fields;
 	int i = 0;
 
+	if (argc == 0)
+		return list_empty(head);
+
 	list_for_each_entry_reverse(field, head, link) {
 		if (!user_field_match(field, argc, argv, &i))
 			return false;
@@ -1880,10 +1892,8 @@  static bool user_event_match(const char *system, const char *event,
 	match = strcmp(EVENT_NAME(user), event) == 0 &&
 		(!system || strcmp(system, USER_EVENTS_SYSTEM) == 0);
 
-	if (match && argc > 0)
+	if (match)
 		match = user_fields_match(user, argc, argv);
-	else if (match && argc == 0)
-		match = list_empty(&user->fields);
 
 	return match;
 }
@@ -1922,11 +1932,11 @@  static int user_event_parse(struct user_event_group *group, char *name,
 			    char *args, char *flags,
 			    struct user_event **newuser, int reg_flags)
 {
-	int ret;
-	u32 key;
 	struct user_event *user;
+	char **argv = NULL;
 	int argc = 0;
-	char **argv;
+	int ret;
+	u32 key;
 
 	/* Currently don't support any text based flags */
 	if (flags != NULL)
@@ -1935,41 +1945,34 @@  static int user_event_parse(struct user_event_group *group, char *name,
 	if (!user_event_capable(reg_flags))
 		return -EPERM;
 
+	if (args) {
+		argv = argv_split(GFP_KERNEL, args, &argc);
+
+		if (!argv)
+			return -ENOMEM;
+	}
+
 	/* Prevent dyn_event from racing */
 	mutex_lock(&event_mutex);
-	user = find_user_event(group, name, &key);
+	user = find_user_event(group, name, argc, (const char **)argv,
+			       reg_flags, &key);
 	mutex_unlock(&event_mutex);
 
-	if (user) {
-		if (args) {
-			argv = argv_split(GFP_KERNEL, args, &argc);
-			if (!argv) {
-				ret = -ENOMEM;
-				goto error;
-			}
+	if (argv)
+		argv_free(argv);
 
-			ret = user_fields_match(user, argc, (const char **)argv);
-			argv_free(argv);
-
-		} else
-			ret = list_empty(&user->fields);
-
-		if (ret) {
-			*newuser = user;
-			/*
-			 * Name is allocated by caller, free it since it already exists.
-			 * Caller only worries about failure cases for freeing.
-			 */
-			kfree(name);
-		} else {
-			ret = -EADDRINUSE;
-			goto error;
-		}
+	if (IS_ERR(user))
+		return PTR_ERR(user);
+
+	if (user) {
+		*newuser = user;
+		/*
+		 * Name is allocated by caller, free it since it already exists.
+		 * Caller only worries about failure cases for freeing.
+		 */
+		kfree(name);
 
 		return 0;
-error:
-		user_event_put(user, false);
-		return ret;
 	}
 
 	user = kzalloc(sizeof(*user), GFP_KERNEL_ACCOUNT);
@@ -2052,25 +2055,32 @@  static int user_event_parse(struct user_event_group *group, char *name,
 }
 
 /*
- * Deletes a previously created event if it is no longer being used.
+ * Deletes previously created events if they are no longer being used.
  */
 static int delete_user_event(struct user_event_group *group, char *name)
 {
-	u32 key;
-	struct user_event *user = find_user_event(group, name, &key);
+	struct user_event *user;
+	u32 key = user_event_key(name);
+	int ret = -ENOENT;
 
-	if (!user)
-		return -ENOENT;
+	/* Attempt to delete all event(s) with the name passed in */
+	hash_for_each_possible(group->register_table, user, node, key) {
+		if (strcmp(EVENT_NAME(user), name))
+			continue;
 
-	user_event_put(user, true);
+		if (!user_event_last_ref(user))
+			return -EBUSY;
 
-	if (!user_event_last_ref(user))
-		return -EBUSY;
+		if (!user_event_capable(user->reg_flags))
+			return -EPERM;
 
-	if (!user_event_capable(user->reg_flags))
-		return -EPERM;
+		ret = destroy_user_event(user);
 
-	return destroy_user_event(user);
+		if (ret)
+			goto out;
+	}
+out:
+	return ret;
 }
 
 /*