@@ -1238,6 +1238,10 @@ int flush_old_exec(struct linux_binprm * bprm)
if (retval)
goto out;
+ retval = mutex_lock_killable(¤t->signal->cred_guard_light);
+ if (retval)
+ goto out;
+
/*
* Must be called _before_ exec_mmap() as bprm->mm is
* not visibile until then. This also enables the update
@@ -1251,7 +1255,7 @@ int flush_old_exec(struct linux_binprm * bprm)
acct_arg_size(bprm, 0);
retval = exec_mmap(bprm->mm);
if (retval)
- goto out;
+ goto out_unlock;
bprm->mm = NULL; /* We're using it now */
@@ -1263,6 +1267,8 @@ int flush_old_exec(struct linux_binprm * bprm)
return 0;
+out_unlock:
+ mutex_unlock(¤t->signal->cred_guard_light);
out:
return retval;
}
@@ -1386,6 +1392,7 @@ void install_exec_creds(struct linux_binprm *bprm)
* credentials; any time after this it may be unlocked.
*/
security_bprm_committed_creds(bprm);
+ mutex_unlock(¤t->signal->cred_guard_light);
mutex_unlock(¤t->signal->cred_guard_mutex);
}
EXPORT_SYMBOL(install_exec_creds);
@@ -1753,6 +1760,12 @@ static int do_execveat_common(int fd, struct filename *filename,
return retval;
out:
+ if (!bprm->mm && bprm->cred) {
+ /* failure after flush_old_exec(), but before
+ * install_exec_creds()
+ */
+ mutex_unlock(¤t->signal->cred_guard_light);
+ }
if (bprm->mm) {
acct_arg_size(bprm, 0);
mmput(bprm->mm);
@@ -58,6 +58,7 @@ extern struct fs_struct init_fs;
INIT_PREV_CPUTIME(sig) \
.cred_guard_mutex = \
__MUTEX_INITIALIZER(sig.cred_guard_mutex), \
+ .cred_guard_light = __MUTEX_INITIALIZER(sig.cred_guard_light) \
}
extern struct nsproxy init_nsproxy;
@@ -808,6 +808,16 @@ struct signal_struct {
struct mutex cred_guard_mutex; /* guard against foreign influences on
* credential calculations
* (notably. ptrace) */
+ /*
+ * Lightweight version of cred_guard_mutex; used to prevent race
+ * conditions where a user can gain information about the post-execve
+ * state of a task to which access should only be granted pre-execve.
+ * Hold this mutex while performing remote task inspection associated
+ * with a security check.
+ * This mutex MUST NOT be used in cases where anything changes about
+ * the security properties of a running execve().
+ */
+ struct mutex cred_guard_light;
};
/*
@@ -1215,6 +1215,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
current->signal->is_child_subreaper;
mutex_init(&sig->cred_guard_mutex);
+ mutex_init(&sig->cred_guard_light);
return 0;
}
@@ -283,6 +283,16 @@ ok:
return security_ptrace_access_check(task, mode);
}
+/*
+ * NOTE: When you call this function, you need to ensure that the target task
+ * can't acquire (via setuid execve) credentials between the ptrace access
+ * check and the privileged access. The recommended way to do this is to hold
+ * one of task->signal->{cred_guard_mutex,cred_guard_light} while calling this
+ * function and performing the requested access.
+ *
+ * This function may only be used if access is requested in the name of
+ * current_cred().
+ */
bool ptrace_may_access(struct task_struct *task, unsigned int mode)
{
int err;
This is a new per-threadgroup lock that can often be taken instead of cred_guard_mutex and has less deadlock potential. I'm doing this because Oleg Nesterov mentioned the potential for deadlocks, in particular if a debugged task is stuck in execve, trying to get rid of a ptrace-stopped thread, and the debugger attempts to inspect procfs files of the debugged task. The binfmt handlers (in particular for elf_fdpic and flat) might still call VFS read and mmap operations on the binary with the lock held, but not open operations (as is the case with cred_guard_mutex). An rwlock would be more appropriate here, but apparently those don't have _killable variants of the locking functions? This is a preparation patch for using proper locking in more places. Reported-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Jann Horn <jann@thejh.net> --- fs/exec.c | 15 ++++++++++++++- include/linux/init_task.h | 1 + include/linux/sched.h | 10 ++++++++++ kernel/fork.c | 1 + kernel/ptrace.c | 10 ++++++++++ 5 files changed, 36 insertions(+), 1 deletion(-)