Message ID | AM6PR03MB5170353DF3575FF7742BB155E4FB0@AM6PR03MB5170.eurprd03.prod.outlook.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/4] exec: Fix a deadlock in ptrace | expand |
On 14.03.2020 12:11, Bernd Edlinger wrote: > The cred_guard_mutex is problematic. The cred_guard_mutex is held > over the userspace accesses as the arguments from userspace are read. > The cred_guard_mutex is held of PTRACE_EVENT_EXIT as the the other > threads are killed. The cred_guard_mutex is held over > "put_user(0, tsk->clear_child_tid)" in exit_mm(). > > Any of those can result in deadlock, as the cred_guard_mutex is held > over a possible indefinite userspace waits for userspace. > > Add exec_update_mutex that is only held over exec updating process > with the new contents of exec, so that code that needs not to be > confused by exec changing the mm and the cred in ways that can not > happen during ordinary execution of a process. > > The plan is to switch the users of cred_guard_mutex to > exec_udpate_mutex one by one. This lets us move forward while still > being careful and not introducing any regressions. > > Link: https://lore.kernel.org/lkml/20160921152946.GA24210@dhcp22.suse.cz/ > Link: https://lore.kernel.org/lkml/AM6PR03MB5170B06F3A2B75EFB98D071AE4E60@AM6PR03MB5170.eurprd03.prod.outlook.com/ > Link: https://lore.kernel.org/linux-fsdevel/20161102181806.GB1112@redhat.com/ > Link: https://lore.kernel.org/lkml/20160923095031.GA14923@redhat.com/ > Link: https://lore.kernel.org/lkml/20170213141452.GA30203@redhat.com/ > Ref: 45c1a159b85b ("Add PTRACE_O_TRACEVFORKDONE and PTRACE_O_TRACEEXIT facilities.") > Ref: 456f17cd1a28 ("[PATCH] user-vm-unlock-2.5.31-A2") > Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> > Signed-off-by: Bernd Edlinger <bernd.edlinger@hotmail.de> > --- > fs/exec.c | 17 ++++++++++++++--- > include/linux/binfmts.h | 8 +++++++- > include/linux/sched/signal.h | 9 ++++++++- > init/init_task.c | 1 + > kernel/fork.c | 1 + > 5 files changed, 31 insertions(+), 5 deletions(-) > > v3: this update fixes lock-order and adds an explicit data member in linux_binprm > > diff --git a/fs/exec.c b/fs/exec.c > index d820a72..11974a1 100644 > --- a/fs/exec.c > +++ b/fs/exec.c > @@ -1014,12 +1014,17 @@ static int exec_mmap(struct mm_struct *mm) > { > struct task_struct *tsk; > struct mm_struct *old_mm, *active_mm; > + int ret; > > /* Notify parent that we're no longer interested in the old VM */ > tsk = current; > old_mm = current->mm; > exec_mm_release(tsk, old_mm); > > + ret = mutex_lock_killable(&tsk->signal->exec_update_mutex); > + if (ret) > + return ret; > + > if (old_mm) { > sync_mm_rss(old_mm); > /* > @@ -1031,9 +1036,11 @@ static int exec_mmap(struct mm_struct *mm) > down_read(&old_mm->mmap_sem); > if (unlikely(old_mm->core_state)) { > up_read(&old_mm->mmap_sem); > + mutex_unlock(&tsk->signal->exec_update_mutex); > return -EINTR; > } > } > + > task_lock(tsk); > active_mm = tsk->active_mm; > membarrier_exec_mmap(mm); > @@ -1288,11 +1295,12 @@ int flush_old_exec(struct linux_binprm * bprm) > goto out; > > /* > - * After clearing bprm->mm (to mark that current is using the > - * prepared mm now), we have nothing left of the original > + * After setting bprm->called_exec_mmap (to mark that current is > + * using the prepared mm now), we have nothing left of the original > * process. If anything from here on returns an error, the check > * in search_binary_handler() will SEGV current. > */ > + bprm->called_exec_mmap = 1; The two below is non-breaking pair: exec_mmap(bprm->mm); bprm->called_exec_mmap = 1; Why not move this into exec_mmap(), so nobody definitely inserts something between them? > bprm->mm = NULL; > > #ifdef CONFIG_POSIX_TIMERS > @@ -1438,6 +1446,8 @@ static void free_bprm(struct linux_binprm *bprm) > { > free_arg_pages(bprm); > if (bprm->cred) { > + if (bprm->called_exec_mmap) > + mutex_unlock(¤t->signal->exec_update_mutex); > mutex_unlock(¤t->signal->cred_guard_mutex); > abort_creds(bprm->cred); > } > @@ -1487,6 +1497,7 @@ void install_exec_creds(struct linux_binprm *bprm) > * credentials; any time after this it may be unlocked. > */ > security_bprm_committed_creds(bprm); > + mutex_unlock(¤t->signal->exec_update_mutex); > mutex_unlock(¤t->signal->cred_guard_mutex); > } > EXPORT_SYMBOL(install_exec_creds); > @@ -1678,7 +1689,7 @@ int search_binary_handler(struct linux_binprm *bprm) > > read_lock(&binfmt_lock); > put_binfmt(fmt); > - if (retval < 0 && !bprm->mm) { > + if (retval < 0 && bprm->called_exec_mmap) { > /* we got to flush_old_exec() and failed after it */ > read_unlock(&binfmt_lock); > force_sigsegv(SIGSEGV); > diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h > index b40fc63..a345d9f 100644 > --- a/include/linux/binfmts.h > +++ b/include/linux/binfmts.h > @@ -44,7 +44,13 @@ struct linux_binprm { > * exec has happened. Used to sanitize execution environment > * and to set AT_SECURE auxv for glibc. > */ > - secureexec:1; > + secureexec:1, > + /* > + * Set by flush_old_exec, when exec_mmap has been called. > + * This is past the point of no return, when the > + * exec_update_mutex has been taken. > + */ > + called_exec_mmap:1; > #ifdef __alpha__ > unsigned int taso:1; > #endif > diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h > index 8805025..a29df79 100644 > --- a/include/linux/sched/signal.h > +++ b/include/linux/sched/signal.h > @@ -224,7 +224,14 @@ struct signal_struct { > > struct mutex cred_guard_mutex; /* guard against foreign influences on > * credential calculations > - * (notably. ptrace) */ > + * (notably. ptrace) > + * Deprecated do not use in new code. > + * Use exec_update_mutex instead. > + */ > + struct mutex exec_update_mutex; /* Held while task_struct is being > + * updated during exec, and may have > + * inconsistent permissions. > + */ > } __randomize_layout; > > /* > diff --git a/init/init_task.c b/init/init_task.c > index 9e5cbe5..bd403ed 100644 > --- a/init/init_task.c > +++ b/init/init_task.c > @@ -26,6 +26,7 @@ > .multiprocess = HLIST_HEAD_INIT, > .rlim = INIT_RLIMITS, > .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex), > + .exec_update_mutex = __MUTEX_INITIALIZER(init_signals.exec_update_mutex), > #ifdef CONFIG_POSIX_TIMERS > .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers), > .cputimer = { > diff --git a/kernel/fork.c b/kernel/fork.c > index 8642530..036b692 100644 > --- a/kernel/fork.c > +++ b/kernel/fork.c > @@ -1594,6 +1594,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) > sig->oom_score_adj_min = current->signal->oom_score_adj_min; > > mutex_init(&sig->cred_guard_mutex); > + mutex_init(&sig->exec_update_mutex); > > return 0; > } >
On 3/17/20 9:56 AM, Kirill Tkhai wrote: > On 14.03.2020 12:11, Bernd Edlinger wrote: >> The cred_guard_mutex is problematic. The cred_guard_mutex is held >> over the userspace accesses as the arguments from userspace are read. >> The cred_guard_mutex is held of PTRACE_EVENT_EXIT as the the other >> threads are killed. The cred_guard_mutex is held over >> "put_user(0, tsk->clear_child_tid)" in exit_mm(). >> >> Any of those can result in deadlock, as the cred_guard_mutex is held >> over a possible indefinite userspace waits for userspace. >> >> Add exec_update_mutex that is only held over exec updating process >> with the new contents of exec, so that code that needs not to be >> confused by exec changing the mm and the cred in ways that can not >> happen during ordinary execution of a process. >> >> The plan is to switch the users of cred_guard_mutex to >> exec_udpate_mutex one by one. This lets us move forward while still >> being careful and not introducing any regressions. >> >> Link: https://lore.kernel.org/lkml/20160921152946.GA24210@dhcp22.suse.cz/ >> Link: https://lore.kernel.org/lkml/AM6PR03MB5170B06F3A2B75EFB98D071AE4E60@AM6PR03MB5170.eurprd03.prod.outlook.com/ >> Link: https://lore.kernel.org/linux-fsdevel/20161102181806.GB1112@redhat.com/ >> Link: https://lore.kernel.org/lkml/20160923095031.GA14923@redhat.com/ >> Link: https://lore.kernel.org/lkml/20170213141452.GA30203@redhat.com/ >> Ref: 45c1a159b85b ("Add PTRACE_O_TRACEVFORKDONE and PTRACE_O_TRACEEXIT facilities.") >> Ref: 456f17cd1a28 ("[PATCH] user-vm-unlock-2.5.31-A2") >> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> >> Signed-off-by: Bernd Edlinger <bernd.edlinger@hotmail.de> >> --- >> fs/exec.c | 17 ++++++++++++++--- >> include/linux/binfmts.h | 8 +++++++- >> include/linux/sched/signal.h | 9 ++++++++- >> init/init_task.c | 1 + >> kernel/fork.c | 1 + >> 5 files changed, 31 insertions(+), 5 deletions(-) >> >> v3: this update fixes lock-order and adds an explicit data member in linux_binprm >> >> diff --git a/fs/exec.c b/fs/exec.c >> index d820a72..11974a1 100644 >> --- a/fs/exec.c >> +++ b/fs/exec.c >> @@ -1014,12 +1014,17 @@ static int exec_mmap(struct mm_struct *mm) >> { >> struct task_struct *tsk; >> struct mm_struct *old_mm, *active_mm; >> + int ret; >> >> /* Notify parent that we're no longer interested in the old VM */ >> tsk = current; >> old_mm = current->mm; >> exec_mm_release(tsk, old_mm); >> >> + ret = mutex_lock_killable(&tsk->signal->exec_update_mutex); >> + if (ret) >> + return ret; >> + >> if (old_mm) { >> sync_mm_rss(old_mm); >> /* >> @@ -1031,9 +1036,11 @@ static int exec_mmap(struct mm_struct *mm) >> down_read(&old_mm->mmap_sem); >> if (unlikely(old_mm->core_state)) { >> up_read(&old_mm->mmap_sem); >> + mutex_unlock(&tsk->signal->exec_update_mutex); >> return -EINTR; >> } >> } >> + >> task_lock(tsk); >> active_mm = tsk->active_mm; >> membarrier_exec_mmap(mm); >> @@ -1288,11 +1295,12 @@ int flush_old_exec(struct linux_binprm * bprm) >> goto out; >> >> /* >> - * After clearing bprm->mm (to mark that current is using the >> - * prepared mm now), we have nothing left of the original >> + * After setting bprm->called_exec_mmap (to mark that current is >> + * using the prepared mm now), we have nothing left of the original >> * process. If anything from here on returns an error, the check >> * in search_binary_handler() will SEGV current. >> */ >> + bprm->called_exec_mmap = 1; > > The two below is non-breaking pair: > > exec_mmap(bprm->mm); > bprm->called_exec_mmap = 1; > > Why not move this into exec_mmap(), so nobody definitely inserts something > between them? > Hmm, could be done, but then I would probably need a different name than "called_exec_mmap". How about adding a nice function comment to exec_mmap that calls out the changed behaviour that the exec_update_mutex is taken unless the function fails? Bernd. >> bprm->mm = NULL; >> >> #ifdef CONFIG_POSIX_TIMERS >> @@ -1438,6 +1446,8 @@ static void free_bprm(struct linux_binprm *bprm) >> { >> free_arg_pages(bprm); >> if (bprm->cred) { >> + if (bprm->called_exec_mmap) >> + mutex_unlock(¤t->signal->exec_update_mutex); >> mutex_unlock(¤t->signal->cred_guard_mutex); >> abort_creds(bprm->cred); >> } >> @@ -1487,6 +1497,7 @@ void install_exec_creds(struct linux_binprm *bprm) >> * credentials; any time after this it may be unlocked. >> */ >> security_bprm_committed_creds(bprm); >> + mutex_unlock(¤t->signal->exec_update_mutex); >> mutex_unlock(¤t->signal->cred_guard_mutex); >> } >> EXPORT_SYMBOL(install_exec_creds); >> @@ -1678,7 +1689,7 @@ int search_binary_handler(struct linux_binprm *bprm) >> >> read_lock(&binfmt_lock); >> put_binfmt(fmt); >> - if (retval < 0 && !bprm->mm) { >> + if (retval < 0 && bprm->called_exec_mmap) { >> /* we got to flush_old_exec() and failed after it */ >> read_unlock(&binfmt_lock); >> force_sigsegv(SIGSEGV); >> diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h >> index b40fc63..a345d9f 100644 >> --- a/include/linux/binfmts.h >> +++ b/include/linux/binfmts.h >> @@ -44,7 +44,13 @@ struct linux_binprm { >> * exec has happened. Used to sanitize execution environment >> * and to set AT_SECURE auxv for glibc. >> */ >> - secureexec:1; >> + secureexec:1, >> + /* >> + * Set by flush_old_exec, when exec_mmap has been called. >> + * This is past the point of no return, when the >> + * exec_update_mutex has been taken. >> + */ >> + called_exec_mmap:1; >> #ifdef __alpha__ >> unsigned int taso:1; >> #endif >> diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h >> index 8805025..a29df79 100644 >> --- a/include/linux/sched/signal.h >> +++ b/include/linux/sched/signal.h >> @@ -224,7 +224,14 @@ struct signal_struct { >> >> struct mutex cred_guard_mutex; /* guard against foreign influences on >> * credential calculations >> - * (notably. ptrace) */ >> + * (notably. ptrace) >> + * Deprecated do not use in new code. >> + * Use exec_update_mutex instead. >> + */ >> + struct mutex exec_update_mutex; /* Held while task_struct is being >> + * updated during exec, and may have >> + * inconsistent permissions. >> + */ >> } __randomize_layout; >> >> /* >> diff --git a/init/init_task.c b/init/init_task.c >> index 9e5cbe5..bd403ed 100644 >> --- a/init/init_task.c >> +++ b/init/init_task.c >> @@ -26,6 +26,7 @@ >> .multiprocess = HLIST_HEAD_INIT, >> .rlim = INIT_RLIMITS, >> .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex), >> + .exec_update_mutex = __MUTEX_INITIALIZER(init_signals.exec_update_mutex), >> #ifdef CONFIG_POSIX_TIMERS >> .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers), >> .cputimer = { >> diff --git a/kernel/fork.c b/kernel/fork.c >> index 8642530..036b692 100644 >> --- a/kernel/fork.c >> +++ b/kernel/fork.c >> @@ -1594,6 +1594,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) >> sig->oom_score_adj_min = current->signal->oom_score_adj_min; >> >> mutex_init(&sig->cred_guard_mutex); >> + mutex_init(&sig->exec_update_mutex); >> >> return 0; >> } >> >
On 18.03.2020 00:53, Bernd Edlinger wrote: > On 3/17/20 9:56 AM, Kirill Tkhai wrote: >> On 14.03.2020 12:11, Bernd Edlinger wrote: >>> The cred_guard_mutex is problematic. The cred_guard_mutex is held >>> over the userspace accesses as the arguments from userspace are read. >>> The cred_guard_mutex is held of PTRACE_EVENT_EXIT as the the other >>> threads are killed. The cred_guard_mutex is held over >>> "put_user(0, tsk->clear_child_tid)" in exit_mm(). >>> >>> Any of those can result in deadlock, as the cred_guard_mutex is held >>> over a possible indefinite userspace waits for userspace. >>> >>> Add exec_update_mutex that is only held over exec updating process >>> with the new contents of exec, so that code that needs not to be >>> confused by exec changing the mm and the cred in ways that can not >>> happen during ordinary execution of a process. >>> >>> The plan is to switch the users of cred_guard_mutex to >>> exec_udpate_mutex one by one. This lets us move forward while still >>> being careful and not introducing any regressions. >>> >>> Link: https://lore.kernel.org/lkml/20160921152946.GA24210@dhcp22.suse.cz/ >>> Link: https://lore.kernel.org/lkml/AM6PR03MB5170B06F3A2B75EFB98D071AE4E60@AM6PR03MB5170.eurprd03.prod.outlook.com/ >>> Link: https://lore.kernel.org/linux-fsdevel/20161102181806.GB1112@redhat.com/ >>> Link: https://lore.kernel.org/lkml/20160923095031.GA14923@redhat.com/ >>> Link: https://lore.kernel.org/lkml/20170213141452.GA30203@redhat.com/ >>> Ref: 45c1a159b85b ("Add PTRACE_O_TRACEVFORKDONE and PTRACE_O_TRACEEXIT facilities.") >>> Ref: 456f17cd1a28 ("[PATCH] user-vm-unlock-2.5.31-A2") >>> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> >>> Signed-off-by: Bernd Edlinger <bernd.edlinger@hotmail.de> >>> --- >>> fs/exec.c | 17 ++++++++++++++--- >>> include/linux/binfmts.h | 8 +++++++- >>> include/linux/sched/signal.h | 9 ++++++++- >>> init/init_task.c | 1 + >>> kernel/fork.c | 1 + >>> 5 files changed, 31 insertions(+), 5 deletions(-) >>> >>> v3: this update fixes lock-order and adds an explicit data member in linux_binprm >>> >>> diff --git a/fs/exec.c b/fs/exec.c >>> index d820a72..11974a1 100644 >>> --- a/fs/exec.c >>> +++ b/fs/exec.c >>> @@ -1014,12 +1014,17 @@ static int exec_mmap(struct mm_struct *mm) >>> { >>> struct task_struct *tsk; >>> struct mm_struct *old_mm, *active_mm; >>> + int ret; >>> >>> /* Notify parent that we're no longer interested in the old VM */ >>> tsk = current; >>> old_mm = current->mm; >>> exec_mm_release(tsk, old_mm); >>> >>> + ret = mutex_lock_killable(&tsk->signal->exec_update_mutex); >>> + if (ret) >>> + return ret; >>> + >>> if (old_mm) { >>> sync_mm_rss(old_mm); >>> /* >>> @@ -1031,9 +1036,11 @@ static int exec_mmap(struct mm_struct *mm) >>> down_read(&old_mm->mmap_sem); >>> if (unlikely(old_mm->core_state)) { >>> up_read(&old_mm->mmap_sem); >>> + mutex_unlock(&tsk->signal->exec_update_mutex); >>> return -EINTR; >>> } >>> } >>> + >>> task_lock(tsk); >>> active_mm = tsk->active_mm; >>> membarrier_exec_mmap(mm); >>> @@ -1288,11 +1295,12 @@ int flush_old_exec(struct linux_binprm * bprm) >>> goto out; >>> >>> /* >>> - * After clearing bprm->mm (to mark that current is using the >>> - * prepared mm now), we have nothing left of the original >>> + * After setting bprm->called_exec_mmap (to mark that current is >>> + * using the prepared mm now), we have nothing left of the original >>> * process. If anything from here on returns an error, the check >>> * in search_binary_handler() will SEGV current. >>> */ >>> + bprm->called_exec_mmap = 1; >> >> The two below is non-breaking pair: >> >> exec_mmap(bprm->mm); >> bprm->called_exec_mmap = 1; >> >> Why not move this into exec_mmap(), so nobody definitely inserts something >> between them? >> > > Hmm, could be done, but then I would probably need a different name than > "called_exec_mmap". > > How about adding a nice function comment to exec_mmap that calls out the > changed behaviour that the exec_update_mutex is taken unless the function > fails? Not sure, I understand correct. Could you post this like a small patch hunk (on top of anything you want)? > Bernd. > > >>> bprm->mm = NULL; >>> >>> #ifdef CONFIG_POSIX_TIMERS >>> @@ -1438,6 +1446,8 @@ static void free_bprm(struct linux_binprm *bprm) >>> { >>> free_arg_pages(bprm); >>> if (bprm->cred) { >>> + if (bprm->called_exec_mmap) >>> + mutex_unlock(¤t->signal->exec_update_mutex); >>> mutex_unlock(¤t->signal->cred_guard_mutex); >>> abort_creds(bprm->cred); >>> } >>> @@ -1487,6 +1497,7 @@ void install_exec_creds(struct linux_binprm *bprm) >>> * credentials; any time after this it may be unlocked. >>> */ >>> security_bprm_committed_creds(bprm); >>> + mutex_unlock(¤t->signal->exec_update_mutex); >>> mutex_unlock(¤t->signal->cred_guard_mutex); >>> } >>> EXPORT_SYMBOL(install_exec_creds); >>> @@ -1678,7 +1689,7 @@ int search_binary_handler(struct linux_binprm *bprm) >>> >>> read_lock(&binfmt_lock); >>> put_binfmt(fmt); >>> - if (retval < 0 && !bprm->mm) { >>> + if (retval < 0 && bprm->called_exec_mmap) { >>> /* we got to flush_old_exec() and failed after it */ >>> read_unlock(&binfmt_lock); >>> force_sigsegv(SIGSEGV); >>> diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h >>> index b40fc63..a345d9f 100644 >>> --- a/include/linux/binfmts.h >>> +++ b/include/linux/binfmts.h >>> @@ -44,7 +44,13 @@ struct linux_binprm { >>> * exec has happened. Used to sanitize execution environment >>> * and to set AT_SECURE auxv for glibc. >>> */ >>> - secureexec:1; >>> + secureexec:1, >>> + /* >>> + * Set by flush_old_exec, when exec_mmap has been called. >>> + * This is past the point of no return, when the >>> + * exec_update_mutex has been taken. >>> + */ >>> + called_exec_mmap:1; >>> #ifdef __alpha__ >>> unsigned int taso:1; >>> #endif >>> diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h >>> index 8805025..a29df79 100644 >>> --- a/include/linux/sched/signal.h >>> +++ b/include/linux/sched/signal.h >>> @@ -224,7 +224,14 @@ struct signal_struct { >>> >>> struct mutex cred_guard_mutex; /* guard against foreign influences on >>> * credential calculations >>> - * (notably. ptrace) */ >>> + * (notably. ptrace) >>> + * Deprecated do not use in new code. >>> + * Use exec_update_mutex instead. >>> + */ >>> + struct mutex exec_update_mutex; /* Held while task_struct is being >>> + * updated during exec, and may have >>> + * inconsistent permissions. >>> + */ >>> } __randomize_layout; >>> >>> /* >>> diff --git a/init/init_task.c b/init/init_task.c >>> index 9e5cbe5..bd403ed 100644 >>> --- a/init/init_task.c >>> +++ b/init/init_task.c >>> @@ -26,6 +26,7 @@ >>> .multiprocess = HLIST_HEAD_INIT, >>> .rlim = INIT_RLIMITS, >>> .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex), >>> + .exec_update_mutex = __MUTEX_INITIALIZER(init_signals.exec_update_mutex), >>> #ifdef CONFIG_POSIX_TIMERS >>> .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers), >>> .cputimer = { >>> diff --git a/kernel/fork.c b/kernel/fork.c >>> index 8642530..036b692 100644 >>> --- a/kernel/fork.c >>> +++ b/kernel/fork.c >>> @@ -1594,6 +1594,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) >>> sig->oom_score_adj_min = current->signal->oom_score_adj_min; >>> >>> mutex_init(&sig->cred_guard_mutex); >>> + mutex_init(&sig->exec_update_mutex); >>> >>> return 0; >>> } >>> >>
On 3/18/20 1:22 PM, Kirill Tkhai wrote: > On 18.03.2020 00:53, Bernd Edlinger wrote: >> On 3/17/20 9:56 AM, Kirill Tkhai wrote: >>> On 14.03.2020 12:11, Bernd Edlinger wrote: >>>> The cred_guard_mutex is problematic. The cred_guard_mutex is held >>>> over the userspace accesses as the arguments from userspace are read. >>>> The cred_guard_mutex is held of PTRACE_EVENT_EXIT as the the other >>>> threads are killed. The cred_guard_mutex is held over >>>> "put_user(0, tsk->clear_child_tid)" in exit_mm(). >>>> >>>> Any of those can result in deadlock, as the cred_guard_mutex is held >>>> over a possible indefinite userspace waits for userspace. >>>> >>>> Add exec_update_mutex that is only held over exec updating process >>>> with the new contents of exec, so that code that needs not to be >>>> confused by exec changing the mm and the cred in ways that can not >>>> happen during ordinary execution of a process. >>>> >>>> The plan is to switch the users of cred_guard_mutex to >>>> exec_udpate_mutex one by one. This lets us move forward while still >>>> being careful and not introducing any regressions. >>>> >>>> Link: https://lore.kernel.org/lkml/20160921152946.GA24210@dhcp22.suse.cz/ >>>> Link: https://lore.kernel.org/lkml/AM6PR03MB5170B06F3A2B75EFB98D071AE4E60@AM6PR03MB5170.eurprd03.prod.outlook.com/ >>>> Link: https://lore.kernel.org/linux-fsdevel/20161102181806.GB1112@redhat.com/ >>>> Link: https://lore.kernel.org/lkml/20160923095031.GA14923@redhat.com/ >>>> Link: https://lore.kernel.org/lkml/20170213141452.GA30203@redhat.com/ >>>> Ref: 45c1a159b85b ("Add PTRACE_O_TRACEVFORKDONE and PTRACE_O_TRACEEXIT facilities.") >>>> Ref: 456f17cd1a28 ("[PATCH] user-vm-unlock-2.5.31-A2") >>>> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> >>>> Signed-off-by: Bernd Edlinger <bernd.edlinger@hotmail.de> >>>> --- >>>> fs/exec.c | 17 ++++++++++++++--- >>>> include/linux/binfmts.h | 8 +++++++- >>>> include/linux/sched/signal.h | 9 ++++++++- >>>> init/init_task.c | 1 + >>>> kernel/fork.c | 1 + >>>> 5 files changed, 31 insertions(+), 5 deletions(-) >>>> >>>> v3: this update fixes lock-order and adds an explicit data member in linux_binprm >>>> >>>> diff --git a/fs/exec.c b/fs/exec.c >>>> index d820a72..11974a1 100644 >>>> --- a/fs/exec.c >>>> +++ b/fs/exec.c >>>> @@ -1014,12 +1014,17 @@ static int exec_mmap(struct mm_struct *mm) >>>> { >>>> struct task_struct *tsk; >>>> struct mm_struct *old_mm, *active_mm; >>>> + int ret; >>>> >>>> /* Notify parent that we're no longer interested in the old VM */ >>>> tsk = current; >>>> old_mm = current->mm; >>>> exec_mm_release(tsk, old_mm); >>>> >>>> + ret = mutex_lock_killable(&tsk->signal->exec_update_mutex); >>>> + if (ret) >>>> + return ret; >>>> + >>>> if (old_mm) { >>>> sync_mm_rss(old_mm); >>>> /* >>>> @@ -1031,9 +1036,11 @@ static int exec_mmap(struct mm_struct *mm) >>>> down_read(&old_mm->mmap_sem); >>>> if (unlikely(old_mm->core_state)) { >>>> up_read(&old_mm->mmap_sem); >>>> + mutex_unlock(&tsk->signal->exec_update_mutex); >>>> return -EINTR; >>>> } >>>> } >>>> + >>>> task_lock(tsk); >>>> active_mm = tsk->active_mm; >>>> membarrier_exec_mmap(mm); >>>> @@ -1288,11 +1295,12 @@ int flush_old_exec(struct linux_binprm * bprm) >>>> goto out; >>>> >>>> /* >>>> - * After clearing bprm->mm (to mark that current is using the >>>> - * prepared mm now), we have nothing left of the original >>>> + * After setting bprm->called_exec_mmap (to mark that current is >>>> + * using the prepared mm now), we have nothing left of the original >>>> * process. If anything from here on returns an error, the check >>>> * in search_binary_handler() will SEGV current. >>>> */ >>>> + bprm->called_exec_mmap = 1; >>> >>> The two below is non-breaking pair: >>> >>> exec_mmap(bprm->mm); >>> bprm->called_exec_mmap = 1; >>> >>> Why not move this into exec_mmap(), so nobody definitely inserts something >>> between them? >>> >> >> Hmm, could be done, but then I would probably need a different name than >> "called_exec_mmap". >> >> How about adding a nice function comment to exec_mmap that calls out the >> changed behaviour that the exec_update_mutex is taken unless the function >> fails? > > Not sure, I understand correct. > > Could you post this like a small patch hunk (on top of anything you want)? > I was thinking of something like that: --- a/fs/exec.c +++ b/fs/exec.c @@ -1010,6 +1010,11 @@ ssize_t read_code(struct file *file, unsigned long addr, } EXPORT_SYMBOL(read_code); +/* + * Maps the mm_struct mm into the current task struct. + * On success, this function returns with the mutex + * exec_update_mutex locked. + */ static int exec_mmap(struct mm_struct *mm) { struct task_struct *tsk; >> Bernd. >> >> >>>> bprm->mm = NULL; >>>> >>>> #ifdef CONFIG_POSIX_TIMERS >>>> @@ -1438,6 +1446,8 @@ static void free_bprm(struct linux_binprm *bprm) >>>> { >>>> free_arg_pages(bprm); >>>> if (bprm->cred) { >>>> + if (bprm->called_exec_mmap) >>>> + mutex_unlock(¤t->signal->exec_update_mutex); >>>> mutex_unlock(¤t->signal->cred_guard_mutex); >>>> abort_creds(bprm->cred); >>>> } >>>> @@ -1487,6 +1497,7 @@ void install_exec_creds(struct linux_binprm *bprm) >>>> * credentials; any time after this it may be unlocked. >>>> */ >>>> security_bprm_committed_creds(bprm); >>>> + mutex_unlock(¤t->signal->exec_update_mutex); >>>> mutex_unlock(¤t->signal->cred_guard_mutex); >>>> } >>>> EXPORT_SYMBOL(install_exec_creds); >>>> @@ -1678,7 +1689,7 @@ int search_binary_handler(struct linux_binprm *bprm) >>>> >>>> read_lock(&binfmt_lock); >>>> put_binfmt(fmt); >>>> - if (retval < 0 && !bprm->mm) { >>>> + if (retval < 0 && bprm->called_exec_mmap) { >>>> /* we got to flush_old_exec() and failed after it */ >>>> read_unlock(&binfmt_lock); >>>> force_sigsegv(SIGSEGV); >>>> diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h >>>> index b40fc63..a345d9f 100644 >>>> --- a/include/linux/binfmts.h >>>> +++ b/include/linux/binfmts.h >>>> @@ -44,7 +44,13 @@ struct linux_binprm { >>>> * exec has happened. Used to sanitize execution environment >>>> * and to set AT_SECURE auxv for glibc. >>>> */ >>>> - secureexec:1; >>>> + secureexec:1, >>>> + /* >>>> + * Set by flush_old_exec, when exec_mmap has been called. >>>> + * This is past the point of no return, when the >>>> + * exec_update_mutex has been taken. >>>> + */ >>>> + called_exec_mmap:1; >>>> #ifdef __alpha__ >>>> unsigned int taso:1; >>>> #endif >>>> diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h >>>> index 8805025..a29df79 100644 >>>> --- a/include/linux/sched/signal.h >>>> +++ b/include/linux/sched/signal.h >>>> @@ -224,7 +224,14 @@ struct signal_struct { >>>> >>>> struct mutex cred_guard_mutex; /* guard against foreign influences on >>>> * credential calculations >>>> - * (notably. ptrace) */ >>>> + * (notably. ptrace) >>>> + * Deprecated do not use in new code. >>>> + * Use exec_update_mutex instead. >>>> + */ >>>> + struct mutex exec_update_mutex; /* Held while task_struct is being >>>> + * updated during exec, and may have >>>> + * inconsistent permissions. >>>> + */ >>>> } __randomize_layout; >>>> >>>> /* >>>> diff --git a/init/init_task.c b/init/init_task.c >>>> index 9e5cbe5..bd403ed 100644 >>>> --- a/init/init_task.c >>>> +++ b/init/init_task.c >>>> @@ -26,6 +26,7 @@ >>>> .multiprocess = HLIST_HEAD_INIT, >>>> .rlim = INIT_RLIMITS, >>>> .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex), >>>> + .exec_update_mutex = __MUTEX_INITIALIZER(init_signals.exec_update_mutex), >>>> #ifdef CONFIG_POSIX_TIMERS >>>> .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers), >>>> .cputimer = { >>>> diff --git a/kernel/fork.c b/kernel/fork.c >>>> index 8642530..036b692 100644 >>>> --- a/kernel/fork.c >>>> +++ b/kernel/fork.c >>>> @@ -1594,6 +1594,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) >>>> sig->oom_score_adj_min = current->signal->oom_score_adj_min; >>>> >>>> mutex_init(&sig->cred_guard_mutex); >>>> + mutex_init(&sig->exec_update_mutex); >>>> >>>> return 0; >>>> } >>>> >>> >
On 18.03.2020 23:06, Bernd Edlinger wrote: > On 3/18/20 1:22 PM, Kirill Tkhai wrote: >> On 18.03.2020 00:53, Bernd Edlinger wrote: >>> On 3/17/20 9:56 AM, Kirill Tkhai wrote: >>>> On 14.03.2020 12:11, Bernd Edlinger wrote: >>>>> The cred_guard_mutex is problematic. The cred_guard_mutex is held >>>>> over the userspace accesses as the arguments from userspace are read. >>>>> The cred_guard_mutex is held of PTRACE_EVENT_EXIT as the the other >>>>> threads are killed. The cred_guard_mutex is held over >>>>> "put_user(0, tsk->clear_child_tid)" in exit_mm(). >>>>> >>>>> Any of those can result in deadlock, as the cred_guard_mutex is held >>>>> over a possible indefinite userspace waits for userspace. >>>>> >>>>> Add exec_update_mutex that is only held over exec updating process >>>>> with the new contents of exec, so that code that needs not to be >>>>> confused by exec changing the mm and the cred in ways that can not >>>>> happen during ordinary execution of a process. >>>>> >>>>> The plan is to switch the users of cred_guard_mutex to >>>>> exec_udpate_mutex one by one. This lets us move forward while still >>>>> being careful and not introducing any regressions. >>>>> >>>>> Link: https://lore.kernel.org/lkml/20160921152946.GA24210@dhcp22.suse.cz/ >>>>> Link: https://lore.kernel.org/lkml/AM6PR03MB5170B06F3A2B75EFB98D071AE4E60@AM6PR03MB5170.eurprd03.prod.outlook.com/ >>>>> Link: https://lore.kernel.org/linux-fsdevel/20161102181806.GB1112@redhat.com/ >>>>> Link: https://lore.kernel.org/lkml/20160923095031.GA14923@redhat.com/ >>>>> Link: https://lore.kernel.org/lkml/20170213141452.GA30203@redhat.com/ >>>>> Ref: 45c1a159b85b ("Add PTRACE_O_TRACEVFORKDONE and PTRACE_O_TRACEEXIT facilities.") >>>>> Ref: 456f17cd1a28 ("[PATCH] user-vm-unlock-2.5.31-A2") >>>>> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> >>>>> Signed-off-by: Bernd Edlinger <bernd.edlinger@hotmail.de> >>>>> --- >>>>> fs/exec.c | 17 ++++++++++++++--- >>>>> include/linux/binfmts.h | 8 +++++++- >>>>> include/linux/sched/signal.h | 9 ++++++++- >>>>> init/init_task.c | 1 + >>>>> kernel/fork.c | 1 + >>>>> 5 files changed, 31 insertions(+), 5 deletions(-) >>>>> >>>>> v3: this update fixes lock-order and adds an explicit data member in linux_binprm >>>>> >>>>> diff --git a/fs/exec.c b/fs/exec.c >>>>> index d820a72..11974a1 100644 >>>>> --- a/fs/exec.c >>>>> +++ b/fs/exec.c >>>>> @@ -1014,12 +1014,17 @@ static int exec_mmap(struct mm_struct *mm) >>>>> { >>>>> struct task_struct *tsk; >>>>> struct mm_struct *old_mm, *active_mm; >>>>> + int ret; >>>>> >>>>> /* Notify parent that we're no longer interested in the old VM */ >>>>> tsk = current; >>>>> old_mm = current->mm; >>>>> exec_mm_release(tsk, old_mm); >>>>> >>>>> + ret = mutex_lock_killable(&tsk->signal->exec_update_mutex); >>>>> + if (ret) >>>>> + return ret; >>>>> + >>>>> if (old_mm) { >>>>> sync_mm_rss(old_mm); >>>>> /* >>>>> @@ -1031,9 +1036,11 @@ static int exec_mmap(struct mm_struct *mm) >>>>> down_read(&old_mm->mmap_sem); >>>>> if (unlikely(old_mm->core_state)) { >>>>> up_read(&old_mm->mmap_sem); >>>>> + mutex_unlock(&tsk->signal->exec_update_mutex); >>>>> return -EINTR; >>>>> } >>>>> } >>>>> + >>>>> task_lock(tsk); >>>>> active_mm = tsk->active_mm; >>>>> membarrier_exec_mmap(mm); >>>>> @@ -1288,11 +1295,12 @@ int flush_old_exec(struct linux_binprm * bprm) >>>>> goto out; >>>>> >>>>> /* >>>>> - * After clearing bprm->mm (to mark that current is using the >>>>> - * prepared mm now), we have nothing left of the original >>>>> + * After setting bprm->called_exec_mmap (to mark that current is >>>>> + * using the prepared mm now), we have nothing left of the original >>>>> * process. If anything from here on returns an error, the check >>>>> * in search_binary_handler() will SEGV current. >>>>> */ >>>>> + bprm->called_exec_mmap = 1; >>>> >>>> The two below is non-breaking pair: >>>> >>>> exec_mmap(bprm->mm); >>>> bprm->called_exec_mmap = 1; >>>> >>>> Why not move this into exec_mmap(), so nobody definitely inserts something >>>> between them? >>>> >>> >>> Hmm, could be done, but then I would probably need a different name than >>> "called_exec_mmap". >>> >>> How about adding a nice function comment to exec_mmap that calls out the >>> changed behaviour that the exec_update_mutex is taken unless the function >>> fails? >> >> Not sure, I understand correct. >> >> Could you post this like a small patch hunk (on top of anything you want)? >> > > I was thinking of something like that: > > --- a/fs/exec.c > +++ b/fs/exec.c > @@ -1010,6 +1010,11 @@ ssize_t read_code(struct file *file, unsigned long addr, > } > EXPORT_SYMBOL(read_code); > > +/* > + * Maps the mm_struct mm into the current task struct. > + * On success, this function returns with the mutex > + * exec_update_mutex locked. > + */ Looks OK for me. > static int exec_mmap(struct mm_struct *mm) > { > struct task_struct *tsk; > > >>> Bernd. >>> >>> >>>>> bprm->mm = NULL; >>>>> >>>>> #ifdef CONFIG_POSIX_TIMERS >>>>> @@ -1438,6 +1446,8 @@ static void free_bprm(struct linux_binprm *bprm) >>>>> { >>>>> free_arg_pages(bprm); >>>>> if (bprm->cred) { >>>>> + if (bprm->called_exec_mmap) >>>>> + mutex_unlock(¤t->signal->exec_update_mutex); >>>>> mutex_unlock(¤t->signal->cred_guard_mutex); >>>>> abort_creds(bprm->cred); >>>>> } >>>>> @@ -1487,6 +1497,7 @@ void install_exec_creds(struct linux_binprm *bprm) >>>>> * credentials; any time after this it may be unlocked. >>>>> */ >>>>> security_bprm_committed_creds(bprm); >>>>> + mutex_unlock(¤t->signal->exec_update_mutex); >>>>> mutex_unlock(¤t->signal->cred_guard_mutex); >>>>> } >>>>> EXPORT_SYMBOL(install_exec_creds); >>>>> @@ -1678,7 +1689,7 @@ int search_binary_handler(struct linux_binprm *bprm) >>>>> >>>>> read_lock(&binfmt_lock); >>>>> put_binfmt(fmt); >>>>> - if (retval < 0 && !bprm->mm) { >>>>> + if (retval < 0 && bprm->called_exec_mmap) { >>>>> /* we got to flush_old_exec() and failed after it */ >>>>> read_unlock(&binfmt_lock); >>>>> force_sigsegv(SIGSEGV); >>>>> diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h >>>>> index b40fc63..a345d9f 100644 >>>>> --- a/include/linux/binfmts.h >>>>> +++ b/include/linux/binfmts.h >>>>> @@ -44,7 +44,13 @@ struct linux_binprm { >>>>> * exec has happened. Used to sanitize execution environment >>>>> * and to set AT_SECURE auxv for glibc. >>>>> */ >>>>> - secureexec:1; >>>>> + secureexec:1, >>>>> + /* >>>>> + * Set by flush_old_exec, when exec_mmap has been called. >>>>> + * This is past the point of no return, when the >>>>> + * exec_update_mutex has been taken. >>>>> + */ >>>>> + called_exec_mmap:1; >>>>> #ifdef __alpha__ >>>>> unsigned int taso:1; >>>>> #endif >>>>> diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h >>>>> index 8805025..a29df79 100644 >>>>> --- a/include/linux/sched/signal.h >>>>> +++ b/include/linux/sched/signal.h >>>>> @@ -224,7 +224,14 @@ struct signal_struct { >>>>> >>>>> struct mutex cred_guard_mutex; /* guard against foreign influences on >>>>> * credential calculations >>>>> - * (notably. ptrace) */ >>>>> + * (notably. ptrace) >>>>> + * Deprecated do not use in new code. >>>>> + * Use exec_update_mutex instead. >>>>> + */ >>>>> + struct mutex exec_update_mutex; /* Held while task_struct is being >>>>> + * updated during exec, and may have >>>>> + * inconsistent permissions. >>>>> + */ >>>>> } __randomize_layout; >>>>> >>>>> /* >>>>> diff --git a/init/init_task.c b/init/init_task.c >>>>> index 9e5cbe5..bd403ed 100644 >>>>> --- a/init/init_task.c >>>>> +++ b/init/init_task.c >>>>> @@ -26,6 +26,7 @@ >>>>> .multiprocess = HLIST_HEAD_INIT, >>>>> .rlim = INIT_RLIMITS, >>>>> .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex), >>>>> + .exec_update_mutex = __MUTEX_INITIALIZER(init_signals.exec_update_mutex), >>>>> #ifdef CONFIG_POSIX_TIMERS >>>>> .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers), >>>>> .cputimer = { >>>>> diff --git a/kernel/fork.c b/kernel/fork.c >>>>> index 8642530..036b692 100644 >>>>> --- a/kernel/fork.c >>>>> +++ b/kernel/fork.c >>>>> @@ -1594,6 +1594,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) >>>>> sig->oom_score_adj_min = current->signal->oom_score_adj_min; >>>>> >>>>> mutex_init(&sig->cred_guard_mutex); >>>>> + mutex_init(&sig->exec_update_mutex); >>>>> >>>>> return 0; >>>>> } >>>>> >>>> >>
On 3/19/20 8:13 AM, Kirill Tkhai wrote: > On 18.03.2020 23:06, Bernd Edlinger wrote: >> >> I was thinking of something like that: >> >> --- a/fs/exec.c >> +++ b/fs/exec.c >> @@ -1010,6 +1010,11 @@ ssize_t read_code(struct file *file, unsigned long addr, >> } >> EXPORT_SYMBOL(read_code); >> >> +/* >> + * Maps the mm_struct mm into the current task struct. >> + * On success, this function returns with the mutex >> + * exec_update_mutex locked. >> + */ > > Looks OK for me. > Cool, yeah, then I will post an updated patch in a moment. Thanks Bernd.
diff --git a/fs/exec.c b/fs/exec.c index d820a72..11974a1 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1014,12 +1014,17 @@ static int exec_mmap(struct mm_struct *mm) { struct task_struct *tsk; struct mm_struct *old_mm, *active_mm; + int ret; /* Notify parent that we're no longer interested in the old VM */ tsk = current; old_mm = current->mm; exec_mm_release(tsk, old_mm); + ret = mutex_lock_killable(&tsk->signal->exec_update_mutex); + if (ret) + return ret; + if (old_mm) { sync_mm_rss(old_mm); /* @@ -1031,9 +1036,11 @@ static int exec_mmap(struct mm_struct *mm) down_read(&old_mm->mmap_sem); if (unlikely(old_mm->core_state)) { up_read(&old_mm->mmap_sem); + mutex_unlock(&tsk->signal->exec_update_mutex); return -EINTR; } } + task_lock(tsk); active_mm = tsk->active_mm; membarrier_exec_mmap(mm); @@ -1288,11 +1295,12 @@ int flush_old_exec(struct linux_binprm * bprm) goto out; /* - * After clearing bprm->mm (to mark that current is using the - * prepared mm now), we have nothing left of the original + * After setting bprm->called_exec_mmap (to mark that current is + * using the prepared mm now), we have nothing left of the original * process. If anything from here on returns an error, the check * in search_binary_handler() will SEGV current. */ + bprm->called_exec_mmap = 1; bprm->mm = NULL; #ifdef CONFIG_POSIX_TIMERS @@ -1438,6 +1446,8 @@ static void free_bprm(struct linux_binprm *bprm) { free_arg_pages(bprm); if (bprm->cred) { + if (bprm->called_exec_mmap) + mutex_unlock(¤t->signal->exec_update_mutex); mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } @@ -1487,6 +1497,7 @@ void install_exec_creds(struct linux_binprm *bprm) * credentials; any time after this it may be unlocked. */ security_bprm_committed_creds(bprm); + mutex_unlock(¤t->signal->exec_update_mutex); mutex_unlock(¤t->signal->cred_guard_mutex); } EXPORT_SYMBOL(install_exec_creds); @@ -1678,7 +1689,7 @@ int search_binary_handler(struct linux_binprm *bprm) read_lock(&binfmt_lock); put_binfmt(fmt); - if (retval < 0 && !bprm->mm) { + if (retval < 0 && bprm->called_exec_mmap) { /* we got to flush_old_exec() and failed after it */ read_unlock(&binfmt_lock); force_sigsegv(SIGSEGV); diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index b40fc63..a345d9f 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -44,7 +44,13 @@ struct linux_binprm { * exec has happened. Used to sanitize execution environment * and to set AT_SECURE auxv for glibc. */ - secureexec:1; + secureexec:1, + /* + * Set by flush_old_exec, when exec_mmap has been called. + * This is past the point of no return, when the + * exec_update_mutex has been taken. + */ + called_exec_mmap:1; #ifdef __alpha__ unsigned int taso:1; #endif diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 8805025..a29df79 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -224,7 +224,14 @@ struct signal_struct { struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations - * (notably. ptrace) */ + * (notably. ptrace) + * Deprecated do not use in new code. + * Use exec_update_mutex instead. + */ + struct mutex exec_update_mutex; /* Held while task_struct is being + * updated during exec, and may have + * inconsistent permissions. + */ } __randomize_layout; /* diff --git a/init/init_task.c b/init/init_task.c index 9e5cbe5..bd403ed 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -26,6 +26,7 @@ .multiprocess = HLIST_HEAD_INIT, .rlim = INIT_RLIMITS, .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex), + .exec_update_mutex = __MUTEX_INITIALIZER(init_signals.exec_update_mutex), #ifdef CONFIG_POSIX_TIMERS .posix_timers = LIST_HEAD_INIT(init_signals.posix_timers), .cputimer = { diff --git a/kernel/fork.c b/kernel/fork.c index 8642530..036b692 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1594,6 +1594,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->oom_score_adj_min = current->signal->oom_score_adj_min; mutex_init(&sig->cred_guard_mutex); + mutex_init(&sig->exec_update_mutex); return 0; }