diff mbox series

exec: fix up /proc/pid/comm in the execveat(AT_EMPTY_PATH) case

Message ID 20241130045437.work.390-kees@kernel.org (mailing list archive)
State New
Headers show
Series exec: fix up /proc/pid/comm in the execveat(AT_EMPTY_PATH) case | expand

Commit Message

Kees Cook Nov. 30, 2024, 4:54 a.m. UTC
Zbigniew mentioned at Linux Plumber's that systemd is interested in
switching to execveat() for service execution, but can't, because the
contents of /proc/pid/comm are the file descriptor which was used,
instead of the path to the binary. This makes the output of tools like
top and ps useless, especially in a world where most fds are opened
CLOEXEC so the number is truly meaningless.

When the filename passed in is empty (e.g. with AT_EMPTY_PATH), use the
dentry's filename for "comm" instead of using the useless numeral from
the synthetic fdpath construction. This way the actual exec machinery
is unchanged, but cosmetically the comm looks reasonable to admins
investigating things.

Instead of adding TASK_COMM_LEN more bytes to bprm, use one of the unused
flag bits to indicate that we need to set "comm" from the dentry.

Suggested-by: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Suggested-by: Tycho Andersen <tandersen@netflix.com>
Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
CC: Aleksa Sarai <cyphar@cyphar.com>
Link: https://github.com/uapi-group/kernel-features#set-comm-field-before-exec
Signed-off-by: Kees Cook <kees@kernel.org>
---
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: linux-mm@kvack.org
Cc: linux-fsdevel@vger.kernel.org

Here's what I've put together from the various suggestions. I didn't
want to needlessly grow bprm, so I just added a flag instead. Otherwise,
this is very similar to what Linus and Al suggested.
---
 fs/exec.c               | 22 +++++++++++++++++++---
 include/linux/binfmts.h |  4 +++-
 2 files changed, 22 insertions(+), 4 deletions(-)

Comments

Aleksa Sarai Nov. 30, 2024, 5:55 a.m. UTC | #1
On 2024-11-29, Kees Cook <kees@kernel.org> wrote:
> Zbigniew mentioned at Linux Plumber's that systemd is interested in
> switching to execveat() for service execution, but can't, because the
> contents of /proc/pid/comm are the file descriptor which was used,
> instead of the path to the binary. This makes the output of tools like
> top and ps useless, especially in a world where most fds are opened
> CLOEXEC so the number is truly meaningless.
> 
> When the filename passed in is empty (e.g. with AT_EMPTY_PATH), use the
> dentry's filename for "comm" instead of using the useless numeral from
> the synthetic fdpath construction. This way the actual exec machinery
> is unchanged, but cosmetically the comm looks reasonable to admins
> investigating things.
> 
> Instead of adding TASK_COMM_LEN more bytes to bprm, use one of the unused
> flag bits to indicate that we need to set "comm" from the dentry.

Looks reasonable to me, feel free to take my

Reviewed-by: Aleksa Sarai <cyphar@cyphar.com>

> 
> Suggested-by: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
> Suggested-by: Tycho Andersen <tandersen@netflix.com>
> Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Aleksa Sarai <cyphar@cyphar.com>
> Link: https://github.com/uapi-group/kernel-features#set-comm-field-before-exec
> Signed-off-by: Kees Cook <kees@kernel.org>
> ---
> Cc: Al Viro <viro@zeniv.linux.org.uk>
> Cc: Linus Torvalds <torvalds@linux-foundation.org>
> Cc: Eric Biederman <ebiederm@xmission.com>
> Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: Jan Kara <jack@suse.cz>
> Cc: linux-mm@kvack.org
> Cc: linux-fsdevel@vger.kernel.org
> 
> Here's what I've put together from the various suggestions. I didn't
> want to needlessly grow bprm, so I just added a flag instead. Otherwise,
> this is very similar to what Linus and Al suggested.
> ---
>  fs/exec.c               | 22 +++++++++++++++++++---
>  include/linux/binfmts.h |  4 +++-
>  2 files changed, 22 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index 5f16500ac325..d897d60ca5c2 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1347,7 +1347,21 @@ int begin_new_exec(struct linux_binprm * bprm)
>  		set_dumpable(current->mm, SUID_DUMP_USER);
>  
>  	perf_event_exec();
> -	__set_task_comm(me, kbasename(bprm->filename), true);
> +
> +	/*
> +	 * If the original filename was empty, alloc_bprm() made up a path
> +	 * that will probably not be useful to admins running ps or similar.
> +	 * Let's fix it up to be something reasonable.
> +	 */
> +	if (bprm->comm_from_dentry) {
> +		rcu_read_lock();
> +		/* The dentry name won't change while we hold the rcu read lock. */
> +		__set_task_comm(me, smp_load_acquire(&bprm->file->f_path.dentry->d_name.name),
> +				true);
> +		rcu_read_unlock();
> +	} else {
> +		__set_task_comm(me, kbasename(bprm->filename), true);
> +	}
>  
>  	/* An exec changes our domain. We are no longer part of the thread
>  	   group */
> @@ -1521,11 +1535,13 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int fl
>  	if (fd == AT_FDCWD || filename->name[0] == '/') {
>  		bprm->filename = filename->name;
>  	} else {
> -		if (filename->name[0] == '\0')
> +		if (filename->name[0] == '\0') {
>  			bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
> -		else
> +			bprm->comm_from_dentry = 1;
> +		} else {
>  			bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
>  						  fd, filename->name);
> +		}
>  		if (!bprm->fdpath)
>  			goto out_free;
>  
> diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
> index e6c00e860951..3305c849abd6 100644
> --- a/include/linux/binfmts.h
> +++ b/include/linux/binfmts.h
> @@ -42,7 +42,9 @@ struct linux_binprm {
>  		 * Set when errors can no longer be returned to the
>  		 * original userspace.
>  		 */
> -		point_of_no_return:1;
> +		point_of_no_return:1,
> +		/* Set when "comm" must come from the dentry. */
> +		comm_from_dentry:1;
>  	struct file *executable; /* Executable to pass to the interpreter */
>  	struct file *interpreter;
>  	struct file *file;
> -- 
> 2.34.1
>
Christian Brauner Nov. 30, 2024, 12:29 p.m. UTC | #2
On Fri, Nov 29, 2024 at 08:54:38PM -0800, Kees Cook wrote:
> Zbigniew mentioned at Linux Plumber's that systemd is interested in
> switching to execveat() for service execution, but can't, because the
> contents of /proc/pid/comm are the file descriptor which was used,
> instead of the path to the binary. This makes the output of tools like
> top and ps useless, especially in a world where most fds are opened
> CLOEXEC so the number is truly meaningless.
> 
> When the filename passed in is empty (e.g. with AT_EMPTY_PATH), use the
> dentry's filename for "comm" instead of using the useless numeral from
> the synthetic fdpath construction. This way the actual exec machinery
> is unchanged, but cosmetically the comm looks reasonable to admins
> investigating things.
> 
> Instead of adding TASK_COMM_LEN more bytes to bprm, use one of the unused
> flag bits to indicate that we need to set "comm" from the dentry.
> 
> Suggested-by: Zbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
> Suggested-by: Tycho Andersen <tandersen@netflix.com>
> Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> CC: Aleksa Sarai <cyphar@cyphar.com>
> Link: https://github.com/uapi-group/kernel-features#set-comm-field-before-exec
> Signed-off-by: Kees Cook <kees@kernel.org>
> ---
> Cc: Al Viro <viro@zeniv.linux.org.uk>
> Cc: Linus Torvalds <torvalds@linux-foundation.org>
> Cc: Eric Biederman <ebiederm@xmission.com>
> Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: Jan Kara <jack@suse.cz>
> Cc: linux-mm@kvack.org
> Cc: linux-fsdevel@vger.kernel.org
> 
> Here's what I've put together from the various suggestions. I didn't
> want to needlessly grow bprm, so I just added a flag instead. Otherwise,
> this is very similar to what Linus and Al suggested.
> ---
>  fs/exec.c               | 22 +++++++++++++++++++---
>  include/linux/binfmts.h |  4 +++-
>  2 files changed, 22 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index 5f16500ac325..d897d60ca5c2 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1347,7 +1347,21 @@ int begin_new_exec(struct linux_binprm * bprm)
>  		set_dumpable(current->mm, SUID_DUMP_USER);
>  
>  	perf_event_exec();
> -	__set_task_comm(me, kbasename(bprm->filename), true);
> +
> +	/*
> +	 * If the original filename was empty, alloc_bprm() made up a path
> +	 * that will probably not be useful to admins running ps or similar.
> +	 * Let's fix it up to be something reasonable.
> +	 */
> +	if (bprm->comm_from_dentry) {
> +		rcu_read_lock();
> +		/* The dentry name won't change while we hold the rcu read lock. */
> +		__set_task_comm(me, smp_load_acquire(&bprm->file->f_path.dentry->d_name.name),

What does the smp_load_acquire() pair with?
diff mbox series

Patch

diff --git a/fs/exec.c b/fs/exec.c
index 5f16500ac325..d897d60ca5c2 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1347,7 +1347,21 @@  int begin_new_exec(struct linux_binprm * bprm)
 		set_dumpable(current->mm, SUID_DUMP_USER);
 
 	perf_event_exec();
-	__set_task_comm(me, kbasename(bprm->filename), true);
+
+	/*
+	 * If the original filename was empty, alloc_bprm() made up a path
+	 * that will probably not be useful to admins running ps or similar.
+	 * Let's fix it up to be something reasonable.
+	 */
+	if (bprm->comm_from_dentry) {
+		rcu_read_lock();
+		/* The dentry name won't change while we hold the rcu read lock. */
+		__set_task_comm(me, smp_load_acquire(&bprm->file->f_path.dentry->d_name.name),
+				true);
+		rcu_read_unlock();
+	} else {
+		__set_task_comm(me, kbasename(bprm->filename), true);
+	}
 
 	/* An exec changes our domain. We are no longer part of the thread
 	   group */
@@ -1521,11 +1535,13 @@  static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int fl
 	if (fd == AT_FDCWD || filename->name[0] == '/') {
 		bprm->filename = filename->name;
 	} else {
-		if (filename->name[0] == '\0')
+		if (filename->name[0] == '\0') {
 			bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
-		else
+			bprm->comm_from_dentry = 1;
+		} else {
 			bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
 						  fd, filename->name);
+		}
 		if (!bprm->fdpath)
 			goto out_free;
 
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index e6c00e860951..3305c849abd6 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -42,7 +42,9 @@  struct linux_binprm {
 		 * Set when errors can no longer be returned to the
 		 * original userspace.
 		 */
-		point_of_no_return:1;
+		point_of_no_return:1,
+		/* Set when "comm" must come from the dentry. */
+		comm_from_dentry:1;
 	struct file *executable; /* Executable to pass to the interpreter */
 	struct file *interpreter;
 	struct file *file;