Message ID | 87k13u5y26.fsf_-_@x220.int.ebiederm.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Infrastructure to allow fixing exec deadlocks | expand |
On 3/8/20 10:36 PM, Eric W. Biederman wrote: > > This makes the code clearer and makes it easier to implement a mutex > that is not taken over any locations that may block indefinitely waiting > for userspace. > > Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> Reviewed-by: Bernd Edlinger <bernd.edlinger@hotmail.de> Bernd. > --- > fs/exec.c | 39 ++++++++++++++++++++++++++------------- > 1 file changed, 26 insertions(+), 13 deletions(-) > > diff --git a/fs/exec.c b/fs/exec.c > index c3f34791f2f0..ff74b9a74d34 100644 > --- a/fs/exec.c > +++ b/fs/exec.c > @@ -1194,6 +1194,23 @@ static int de_thread(struct task_struct *tsk) > flush_itimer_signals(); > #endif > > + BUG_ON(!thread_group_leader(tsk)); > + return 0; > + > +killed: > + /* protects against exit_notify() and __exit_signal() */ > + read_lock(&tasklist_lock); > + sig->group_exit_task = NULL; > + sig->notify_count = 0; > + read_unlock(&tasklist_lock); > + return -EAGAIN; > +} > + > + > +static int unshare_sighand(struct task_struct *me) > +{ > + struct sighand_struct *oldsighand = me->sighand; > + > if (refcount_read(&oldsighand->count) != 1) { > struct sighand_struct *newsighand; > /* > @@ -1210,23 +1227,13 @@ static int de_thread(struct task_struct *tsk) > > write_lock_irq(&tasklist_lock); > spin_lock(&oldsighand->siglock); > - rcu_assign_pointer(tsk->sighand, newsighand); > + rcu_assign_pointer(me->sighand, newsighand); > spin_unlock(&oldsighand->siglock); > write_unlock_irq(&tasklist_lock); > > __cleanup_sighand(oldsighand); > } > - > - BUG_ON(!thread_group_leader(tsk)); > return 0; > - > -killed: > - /* protects against exit_notify() and __exit_signal() */ > - read_lock(&tasklist_lock); > - sig->group_exit_task = NULL; > - sig->notify_count = 0; > - read_unlock(&tasklist_lock); > - return -EAGAIN; > } > > char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk) > @@ -1264,13 +1271,19 @@ int flush_old_exec(struct linux_binprm * bprm) > int retval; > > /* > - * Make sure we have a private signal table and that > - * we are unassociated from the previous thread group. > + * Make this the only thread in the thread group. > */ > retval = de_thread(me); > if (retval) > goto out; > > + /* > + * Make the signal table private. > + */ > + retval = unshare_sighand(me); > + if (retval) > + goto out; > + > /* > * Must be called _before_ exec_mmap() as bprm->mm is > * not visibile until then. This also enables the update >
On Sun, Mar 08, 2020 at 04:36:17PM -0500, Eric W. Biederman wrote: > > This makes the code clearer and makes it easier to implement a mutex > that is not taken over any locations that may block indefinitely waiting > for userspace. > > Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> > --- > fs/exec.c | 39 ++++++++++++++++++++++++++------------- > 1 file changed, 26 insertions(+), 13 deletions(-) > > diff --git a/fs/exec.c b/fs/exec.c > index c3f34791f2f0..ff74b9a74d34 100644 > --- a/fs/exec.c > +++ b/fs/exec.c > @@ -1194,6 +1194,23 @@ static int de_thread(struct task_struct *tsk) > flush_itimer_signals(); > #endif Semi-related (existing behavior): in de_thread(), what keeps the thread group from changing? i.e.: if (thread_group_empty(tsk)) goto no_thread_group; /* * Kill all other threads in the thread group. */ spin_lock_irq(lock); ... kill other threads under lock ... Why is the thread_group_emtpy() test not under lock? > > + BUG_ON(!thread_group_leader(tsk)); > + return 0; > + > +killed: > + /* protects against exit_notify() and __exit_signal() */ I wonder if include/linux/sched/task.h's definition of tasklist_lock should explicitly gain note about group_exit_task and notify_count, or, alternatively, signal.h's section on these fields should gain a comment? tasklist_lock is unmentioned in signal.h... :( > + read_lock(&tasklist_lock); > + sig->group_exit_task = NULL; > + sig->notify_count = 0; > + read_unlock(&tasklist_lock); > + return -EAGAIN; > +} > + > + > +static int unshare_sighand(struct task_struct *me) > +{ > + struct sighand_struct *oldsighand = me->sighand; > + > if (refcount_read(&oldsighand->count) != 1) { > struct sighand_struct *newsighand; > /* > @@ -1210,23 +1227,13 @@ static int de_thread(struct task_struct *tsk) > > write_lock_irq(&tasklist_lock); > spin_lock(&oldsighand->siglock); > - rcu_assign_pointer(tsk->sighand, newsighand); > + rcu_assign_pointer(me->sighand, newsighand); > spin_unlock(&oldsighand->siglock); > write_unlock_irq(&tasklist_lock); > > __cleanup_sighand(oldsighand); > } > - > - BUG_ON(!thread_group_leader(tsk)); > return 0; > - > -killed: > - /* protects against exit_notify() and __exit_signal() */ > - read_lock(&tasklist_lock); > - sig->group_exit_task = NULL; > - sig->notify_count = 0; > - read_unlock(&tasklist_lock); > - return -EAGAIN; > } > > char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk) > @@ -1264,13 +1271,19 @@ int flush_old_exec(struct linux_binprm * bprm) > int retval; > > /* > - * Make sure we have a private signal table and that > - * we are unassociated from the previous thread group. > + * Make this the only thread in the thread group. > */ > retval = de_thread(me); > if (retval) > goto out; > > + /* > + * Make the signal table private. > + */ > + retval = unshare_sighand(me); > + if (retval) > + goto out; > + > /* > * Must be called _before_ exec_mmap() as bprm->mm is > * not visibile until then. This also enables the update > -- > 2.25.0 Otherwise, yes, sensible separation. Reviewed-by: Kees Cook <keescook@chromium.org>
On 3/10/20 9:29 PM, Kees Cook wrote: > On Sun, Mar 08, 2020 at 04:36:17PM -0500, Eric W. Biederman wrote: >> >> This makes the code clearer and makes it easier to implement a mutex >> that is not taken over any locations that may block indefinitely waiting >> for userspace. >> >> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> >> --- >> fs/exec.c | 39 ++++++++++++++++++++++++++------------- >> 1 file changed, 26 insertions(+), 13 deletions(-) >> >> diff --git a/fs/exec.c b/fs/exec.c >> index c3f34791f2f0..ff74b9a74d34 100644 >> --- a/fs/exec.c >> +++ b/fs/exec.c >> @@ -1194,6 +1194,23 @@ static int de_thread(struct task_struct *tsk) >> flush_itimer_signals(); >> #endif > > Semi-related (existing behavior): in de_thread(), what keeps the thread > group from changing? i.e.: > > if (thread_group_empty(tsk)) > goto no_thread_group; > > /* > * Kill all other threads in the thread group. > */ > spin_lock_irq(lock); > ... kill other threads under lock ... > > Why is the thread_group_emtpy() test not under lock? > A new thread cannot created when only one thread is executing, right? >> >> + BUG_ON(!thread_group_leader(tsk)); >> + return 0; >> + >> +killed: >> + /* protects against exit_notify() and __exit_signal() */ > > I wonder if include/linux/sched/task.h's definition of tasklist_lock > should explicitly gain note about group_exit_task and notify_count, > or, alternatively, signal.h's section on these fields should gain a > comment? tasklist_lock is unmentioned in signal.h... :( > >> + read_lock(&tasklist_lock); >> + sig->group_exit_task = NULL; >> + sig->notify_count = 0; >> + read_unlock(&tasklist_lock); >> + return -EAGAIN; >> +} >> + >> + >> +static int unshare_sighand(struct task_struct *me) >> +{ >> + struct sighand_struct *oldsighand = me->sighand; >> + >> if (refcount_read(&oldsighand->count) != 1) { >> struct sighand_struct *newsighand; >> /* >> @@ -1210,23 +1227,13 @@ static int de_thread(struct task_struct *tsk) >> >> write_lock_irq(&tasklist_lock); >> spin_lock(&oldsighand->siglock); >> - rcu_assign_pointer(tsk->sighand, newsighand); >> + rcu_assign_pointer(me->sighand, newsighand); >> spin_unlock(&oldsighand->siglock); >> write_unlock_irq(&tasklist_lock); >> >> __cleanup_sighand(oldsighand); >> } >> - >> - BUG_ON(!thread_group_leader(tsk)); >> return 0; >> - >> -killed: >> - /* protects against exit_notify() and __exit_signal() */ >> - read_lock(&tasklist_lock); >> - sig->group_exit_task = NULL; >> - sig->notify_count = 0; >> - read_unlock(&tasklist_lock); >> - return -EAGAIN; >> } >> >> char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk) >> @@ -1264,13 +1271,19 @@ int flush_old_exec(struct linux_binprm * bprm) >> int retval; >> >> /* >> - * Make sure we have a private signal table and that >> - * we are unassociated from the previous thread group. >> + * Make this the only thread in the thread group. >> */ >> retval = de_thread(me); >> if (retval) >> goto out; >> >> + /* >> + * Make the signal table private. >> + */ >> + retval = unshare_sighand(me); >> + if (retval) >> + goto out; >> + >> /* >> * Must be called _before_ exec_mmap() as bprm->mm is >> * not visibile until then. This also enables the update >> -- >> 2.25.0 > > Otherwise, yes, sensible separation. > > Reviewed-by: Kees Cook <keescook@chromium.org> >
On Tue, Mar 10, 2020 at 09:34:03PM +0100, Bernd Edlinger wrote: > On 3/10/20 9:29 PM, Kees Cook wrote: > > On Sun, Mar 08, 2020 at 04:36:17PM -0500, Eric W. Biederman wrote: > >> > >> This makes the code clearer and makes it easier to implement a mutex > >> that is not taken over any locations that may block indefinitely waiting > >> for userspace. > >> > >> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> > >> --- > >> fs/exec.c | 39 ++++++++++++++++++++++++++------------- > >> 1 file changed, 26 insertions(+), 13 deletions(-) > >> > >> diff --git a/fs/exec.c b/fs/exec.c > >> index c3f34791f2f0..ff74b9a74d34 100644 > >> --- a/fs/exec.c > >> +++ b/fs/exec.c > >> @@ -1194,6 +1194,23 @@ static int de_thread(struct task_struct *tsk) > >> flush_itimer_signals(); > >> #endif > > > > Semi-related (existing behavior): in de_thread(), what keeps the thread > > group from changing? i.e.: > > > > if (thread_group_empty(tsk)) > > goto no_thread_group; > > > > /* > > * Kill all other threads in the thread group. > > */ > > spin_lock_irq(lock); > > ... kill other threads under lock ... > > > > Why is the thread_group_emtpy() test not under lock? > > > > A new thread cannot created when only one thread is executing, > right? *face palm* Yes, of course. :) I'm thinking too hard.
On Sun, Mar 08, 2020 at 04:36:17PM -0500, Eric W. Biederman wrote: > > This makes the code clearer and makes it easier to implement a mutex > that is not taken over any locations that may block indefinitely waiting > for userspace. > > Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> > --- > fs/exec.c | 39 ++++++++++++++++++++++++++------------- > 1 file changed, 26 insertions(+), 13 deletions(-) > > diff --git a/fs/exec.c b/fs/exec.c > index c3f34791f2f0..ff74b9a74d34 100644 > --- a/fs/exec.c > +++ b/fs/exec.c > @@ -1194,6 +1194,23 @@ static int de_thread(struct task_struct *tsk) > flush_itimer_signals(); > #endif > > + BUG_ON(!thread_group_leader(tsk)); > + return 0; > + > +killed: > + /* protects against exit_notify() and __exit_signal() */ > + read_lock(&tasklist_lock); > + sig->group_exit_task = NULL; > + sig->notify_count = 0; > + read_unlock(&tasklist_lock); > + return -EAGAIN; > +} > + > + > +static int unshare_sighand(struct task_struct *me) > +{ > + struct sighand_struct *oldsighand = me->sighand; > + > if (refcount_read(&oldsighand->count) != 1) { > struct sighand_struct *newsighand; > /* > @@ -1210,23 +1227,13 @@ static int de_thread(struct task_struct *tsk) > > write_lock_irq(&tasklist_lock); > spin_lock(&oldsighand->siglock); > - rcu_assign_pointer(tsk->sighand, newsighand); > + rcu_assign_pointer(me->sighand, newsighand); > spin_unlock(&oldsighand->siglock); > write_unlock_irq(&tasklist_lock); > > __cleanup_sighand(oldsighand); > } This is fine for now but we share an aweful lot of code with copy_sighand(). We should earmark this to look into consolidating the core operations into a common helper called from both copy_sighand() and unshare_sighand() maybe even dumbing it down to one helper. But not needed for now. Otherwise: Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
diff --git a/fs/exec.c b/fs/exec.c index c3f34791f2f0..ff74b9a74d34 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1194,6 +1194,23 @@ static int de_thread(struct task_struct *tsk) flush_itimer_signals(); #endif + BUG_ON(!thread_group_leader(tsk)); + return 0; + +killed: + /* protects against exit_notify() and __exit_signal() */ + read_lock(&tasklist_lock); + sig->group_exit_task = NULL; + sig->notify_count = 0; + read_unlock(&tasklist_lock); + return -EAGAIN; +} + + +static int unshare_sighand(struct task_struct *me) +{ + struct sighand_struct *oldsighand = me->sighand; + if (refcount_read(&oldsighand->count) != 1) { struct sighand_struct *newsighand; /* @@ -1210,23 +1227,13 @@ static int de_thread(struct task_struct *tsk) write_lock_irq(&tasklist_lock); spin_lock(&oldsighand->siglock); - rcu_assign_pointer(tsk->sighand, newsighand); + rcu_assign_pointer(me->sighand, newsighand); spin_unlock(&oldsighand->siglock); write_unlock_irq(&tasklist_lock); __cleanup_sighand(oldsighand); } - - BUG_ON(!thread_group_leader(tsk)); return 0; - -killed: - /* protects against exit_notify() and __exit_signal() */ - read_lock(&tasklist_lock); - sig->group_exit_task = NULL; - sig->notify_count = 0; - read_unlock(&tasklist_lock); - return -EAGAIN; } char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk) @@ -1264,13 +1271,19 @@ int flush_old_exec(struct linux_binprm * bprm) int retval; /* - * Make sure we have a private signal table and that - * we are unassociated from the previous thread group. + * Make this the only thread in the thread group. */ retval = de_thread(me); if (retval) goto out; + /* + * Make the signal table private. + */ + retval = unshare_sighand(me); + if (retval) + goto out; + /* * Must be called _before_ exec_mmap() as bprm->mm is * not visibile until then. This also enables the update
This makes the code clearer and makes it easier to implement a mutex that is not taken over any locations that may block indefinitely waiting for userspace. Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> --- fs/exec.c | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-)