diff mbox series

[v2,1/3] exec: Dynamically allocate memory to store task's full name

Message ID 20250331121820.455916-2-bhupesh@igalia.com (mailing list archive)
State New
Headers show
Series Dynamically allocate memory to store task's full name | expand

Commit Message

Bhupesh March 31, 2025, 12:18 p.m. UTC
Provide a parallel implementation for get_task_comm() called
get_task_full_name() which allows the dynamically allocated
and filled-in task's full name to be passed to interested
users such as 'gdb'.

Currently while running 'gdb', the 'task->comm' value of a long
task name is truncated due to the limitation of TASK_COMM_LEN.

For example using gdb to debug a simple app currently which generate
threads with long task names:
  # gdb ./threadnames -ex "run info thread" -ex "detach" -ex "quit" > log
  # cat log

  NameThatIsTooLo

This patch does not touch 'TASK_COMM_LEN' at all, i.e.
'TASK_COMM_LEN' and the 16-byte design remains untouched. Which means
that all the legacy / existing ABI, continue to work as before using
'/proc/$pid/task/$tid/comm'.

This patch only adds a parallel, dynamically-allocated
'task->full_name' which can be used by interested users
via '/proc/$pid/task/$tid/full_name'.

After this change, gdb is able to show full name of the task:
  # gdb ./threadnames -ex "run info thread" -ex "detach" -ex "quit" > log
  # cat log

  NameThatIsTooLongForComm[4662]

Signed-off-by: Bhupesh <bhupesh@igalia.com>
---
 fs/exec.c             | 21 ++++++++++++++++++---
 include/linux/sched.h |  9 +++++++++
 2 files changed, 27 insertions(+), 3 deletions(-)

Comments

Yafang Shao April 1, 2025, 2:07 a.m. UTC | #1
On Mon, Mar 31, 2025 at 8:18 PM Bhupesh <bhupesh@igalia.com> wrote:
>
> Provide a parallel implementation for get_task_comm() called
> get_task_full_name() which allows the dynamically allocated
> and filled-in task's full name to be passed to interested
> users such as 'gdb'.
>
> Currently while running 'gdb', the 'task->comm' value of a long
> task name is truncated due to the limitation of TASK_COMM_LEN.
>
> For example using gdb to debug a simple app currently which generate
> threads with long task names:
>   # gdb ./threadnames -ex "run info thread" -ex "detach" -ex "quit" > log
>   # cat log
>
>   NameThatIsTooLo
>
> This patch does not touch 'TASK_COMM_LEN' at all, i.e.
> 'TASK_COMM_LEN' and the 16-byte design remains untouched. Which means
> that all the legacy / existing ABI, continue to work as before using
> '/proc/$pid/task/$tid/comm'.
>
> This patch only adds a parallel, dynamically-allocated
> 'task->full_name' which can be used by interested users
> via '/proc/$pid/task/$tid/full_name'.
>
> After this change, gdb is able to show full name of the task:
>   # gdb ./threadnames -ex "run info thread" -ex "detach" -ex "quit" > log
>   # cat log
>
>   NameThatIsTooLongForComm[4662]
>
> Signed-off-by: Bhupesh <bhupesh@igalia.com>
> ---
>  fs/exec.c             | 21 ++++++++++++++++++---
>  include/linux/sched.h |  9 +++++++++
>  2 files changed, 27 insertions(+), 3 deletions(-)
>
> diff --git a/fs/exec.c b/fs/exec.c
> index f45859ad13ac..4219d77a519c 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1208,6 +1208,9 @@ int begin_new_exec(struct linux_binprm * bprm)
>  {
>         struct task_struct *me = current;
>         int retval;
> +       va_list args;
> +       char *name;
> +       const char *fmt;
>
>         /* Once we are committed compute the creds */
>         retval = bprm_creds_from_file(bprm);
> @@ -1348,11 +1351,22 @@ int begin_new_exec(struct linux_binprm * bprm)
>                  * detecting a concurrent rename and just want a terminated name.
>                  */
>                 rcu_read_lock();
> -               __set_task_comm(me, smp_load_acquire(&bprm->file->f_path.dentry->d_name.name),
> -                               true);
> +               fmt = smp_load_acquire(&bprm->file->f_path.dentry->d_name.name);
> +               name = kvasprintf(GFP_KERNEL, fmt, args);
> +               if (!name)
> +                       return -ENOMEM;
> +
> +               me->full_name = name;
> +               __set_task_comm(me, fmt, true);
>                 rcu_read_unlock();
>         } else {
> -               __set_task_comm(me, kbasename(bprm->filename), true);
> +               fmt = kbasename(bprm->filename);
> +               name = kvasprintf(GFP_KERNEL, fmt, args);
> +               if (!name)
> +                       return -ENOMEM;
> +
> +               me->full_name = name;
> +               __set_task_comm(me, fmt, true);
>         }
>
>         /* An exec changes our domain. We are no longer part of the thread
> @@ -1399,6 +1413,7 @@ int begin_new_exec(struct linux_binprm * bprm)
>         return 0;
>
>  out_unlock:
> +       kfree(me->full_name);
>         up_write(&me->signal->exec_update_lock);
>         if (!bprm->cred)
>                 mutex_unlock(&me->signal->cred_guard_mutex);
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 56ddeb37b5cd..053b52606652 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1166,6 +1166,9 @@ struct task_struct {
>          */
>         char                            comm[TASK_COMM_LEN];
>
> +       /* To store the full name if task comm is truncated. */
> +       char                            *full_name;
> +

Adding another field to store the task name isn’t ideal. What about
combining them into a single field, as Linus suggested [0]?

[0]. https://lore.kernel.org/all/CAHk-=wjAmmHUg6vho1KjzQi2=psR30+CogFd4aXrThr2gsiS4g@mail.gmail.com/
Harry Yoo April 1, 2025, 4:02 a.m. UTC | #2
On Mon, Mar 31, 2025 at 05:48:18PM +0530, Bhupesh wrote:
> Provide a parallel implementation for get_task_comm() called
> get_task_full_name() which allows the dynamically allocated
> and filled-in task's full name to be passed to interested
> users such as 'gdb'.
> 
> Currently while running 'gdb', the 'task->comm' value of a long
> task name is truncated due to the limitation of TASK_COMM_LEN.
> 
> For example using gdb to debug a simple app currently which generate
> threads with long task names:
>   # gdb ./threadnames -ex "run info thread" -ex "detach" -ex "quit" > log
>   # cat log
> 
>   NameThatIsTooLo
> 
> This patch does not touch 'TASK_COMM_LEN' at all, i.e.
> 'TASK_COMM_LEN' and the 16-byte design remains untouched. Which means
> that all the legacy / existing ABI, continue to work as before using
> '/proc/$pid/task/$tid/comm'.
> 
> This patch only adds a parallel, dynamically-allocated
> 'task->full_name' which can be used by interested users
> via '/proc/$pid/task/$tid/full_name'.
> 
> After this change, gdb is able to show full name of the task:
>   # gdb ./threadnames -ex "run info thread" -ex "detach" -ex "quit" > log
>   # cat log
> 
>   NameThatIsTooLongForComm[4662]
> 
> Signed-off-by: Bhupesh <bhupesh@igalia.com>
> ---
>  fs/exec.c             | 21 ++++++++++++++++++---
>  include/linux/sched.h |  9 +++++++++
>  2 files changed, 27 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index f45859ad13ac..4219d77a519c 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1208,6 +1208,9 @@ int begin_new_exec(struct linux_binprm * bprm)
>  {
>  	struct task_struct *me = current;
>  	int retval;
> +	va_list args;
> +	char *name;
> +	const char *fmt;
>  
>  	/* Once we are committed compute the creds */
>  	retval = bprm_creds_from_file(bprm);
> @@ -1348,11 +1351,22 @@ int begin_new_exec(struct linux_binprm * bprm)
>  		 * detecting a concurrent rename and just want a terminated name.
>  		 */
>  		rcu_read_lock();
> -		__set_task_comm(me, smp_load_acquire(&bprm->file->f_path.dentry->d_name.name),
> -				true);
> +		fmt = smp_load_acquire(&bprm->file->f_path.dentry->d_name.name);
> +		name = kvasprintf(GFP_KERNEL, fmt, args);
> +		if (!name)
> +			return -ENOMEM;

Is it safe to return error here, instead of jumping to 'out_unlock' label
and then releasing locks?

> +		me->full_name = name;
> +		__set_task_comm(me, fmt, true);
>  		rcu_read_unlock();
>  	} else {
> -		__set_task_comm(me, kbasename(bprm->filename), true);
> +		fmt = kbasename(bprm->filename);
> +		name = kvasprintf(GFP_KERNEL, fmt, args);
> +		if (!name)
> +			return -ENOMEM;
> +
> +		me->full_name = name;
> +		__set_task_comm(me, fmt, true);
>  	}
>  
>  	/* An exec changes our domain. We are no longer part of the thread
> @@ -1399,6 +1413,7 @@ int begin_new_exec(struct linux_binprm * bprm)
>  	return 0;
>  
>  out_unlock:
> +	kfree(me->full_name);
>  	up_write(&me->signal->exec_update_lock);
>  	if (!bprm->cred)
>  		mutex_unlock(&me->signal->cred_guard_mutex);
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 56ddeb37b5cd..053b52606652 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1166,6 +1166,9 @@ struct task_struct {
>  	 */
>  	char				comm[TASK_COMM_LEN];
>  
> +	/* To store the full name if task comm is truncated. */
> +	char				*full_name;
> +
>  	struct nameidata		*nameidata;
>  
>  #ifdef CONFIG_SYSVIPC
> @@ -2007,6 +2010,12 @@ extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec
>  	buf;						\
>  })
>  
> +#define get_task_full_name(buf, buf_size, tsk) ({	\
> +	BUILD_BUG_ON(sizeof(buf) < TASK_COMM_LEN);	\
> +	strscpy_pad(buf, (tsk)->full_name, buf_size);	\
> +	buf;						\
> +})
> +
>  #ifdef CONFIG_SMP
>  static __always_inline void scheduler_ipi(void)
>  {
> -- 
> 2.38.1
> 
>
diff mbox series

Patch

diff --git a/fs/exec.c b/fs/exec.c
index f45859ad13ac..4219d77a519c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1208,6 +1208,9 @@  int begin_new_exec(struct linux_binprm * bprm)
 {
 	struct task_struct *me = current;
 	int retval;
+	va_list args;
+	char *name;
+	const char *fmt;
 
 	/* Once we are committed compute the creds */
 	retval = bprm_creds_from_file(bprm);
@@ -1348,11 +1351,22 @@  int begin_new_exec(struct linux_binprm * bprm)
 		 * detecting a concurrent rename and just want a terminated name.
 		 */
 		rcu_read_lock();
-		__set_task_comm(me, smp_load_acquire(&bprm->file->f_path.dentry->d_name.name),
-				true);
+		fmt = smp_load_acquire(&bprm->file->f_path.dentry->d_name.name);
+		name = kvasprintf(GFP_KERNEL, fmt, args);
+		if (!name)
+			return -ENOMEM;
+
+		me->full_name = name;
+		__set_task_comm(me, fmt, true);
 		rcu_read_unlock();
 	} else {
-		__set_task_comm(me, kbasename(bprm->filename), true);
+		fmt = kbasename(bprm->filename);
+		name = kvasprintf(GFP_KERNEL, fmt, args);
+		if (!name)
+			return -ENOMEM;
+
+		me->full_name = name;
+		__set_task_comm(me, fmt, true);
 	}
 
 	/* An exec changes our domain. We are no longer part of the thread
@@ -1399,6 +1413,7 @@  int begin_new_exec(struct linux_binprm * bprm)
 	return 0;
 
 out_unlock:
+	kfree(me->full_name);
 	up_write(&me->signal->exec_update_lock);
 	if (!bprm->cred)
 		mutex_unlock(&me->signal->cred_guard_mutex);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 56ddeb37b5cd..053b52606652 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1166,6 +1166,9 @@  struct task_struct {
 	 */
 	char				comm[TASK_COMM_LEN];
 
+	/* To store the full name if task comm is truncated. */
+	char				*full_name;
+
 	struct nameidata		*nameidata;
 
 #ifdef CONFIG_SYSVIPC
@@ -2007,6 +2010,12 @@  extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec
 	buf;						\
 })
 
+#define get_task_full_name(buf, buf_size, tsk) ({	\
+	BUILD_BUG_ON(sizeof(buf) < TASK_COMM_LEN);	\
+	strscpy_pad(buf, (tsk)->full_name, buf_size);	\
+	buf;						\
+})
+
 #ifdef CONFIG_SMP
 static __always_inline void scheduler_ipi(void)
 {