diff mbox series

signal: remove the wrong signal_pending() check in restore_user_sigmask()

Message ID 20190604134117.GA29963@redhat.com (mailing list archive)
State New, archived
Headers show
Series signal: remove the wrong signal_pending() check in restore_user_sigmask() | expand

Commit Message

Oleg Nesterov June 4, 2019, 1:41 p.m. UTC
This is the minimal fix for stable, I'll send cleanups later.

The commit 854a6ed56839a40f6b5d02a2962f48841482eec4 ("signal: Add
restore_user_sigmask()") introduced the visible change which breaks
user-space: a signal temporary unblocked by set_user_sigmask() can
be delivered even if the caller returns success or timeout.

Change restore_user_sigmask() to accept the additional "interrupted"
argument which should be used instead of signal_pending() check, and
update the callers.

Reported-by: Eric Wong <e@80x24.org>
Fixes: 854a6ed56839a40f6b5d02a2962f48841482eec4 ("signal: Add restore_user_sigmask()")
cc: stable@vger.kernel.org (v5.0+)
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 fs/aio.c               | 28 ++++++++++++++++++++--------
 fs/eventpoll.c         |  4 ++--
 fs/io_uring.c          |  7 ++++---
 fs/select.c            | 18 ++++++------------
 include/linux/signal.h |  2 +-
 kernel/signal.c        |  5 +++--
 6 files changed, 36 insertions(+), 28 deletions(-)

Comments

Eric W. Biederman June 4, 2019, 3:31 p.m. UTC | #1
Oleg Nesterov <oleg@redhat.com> writes:

> This is the minimal fix for stable, I'll send cleanups later.
>
> The commit 854a6ed56839a40f6b5d02a2962f48841482eec4 ("signal: Add
> restore_user_sigmask()") introduced the visible change which breaks
> user-space: a signal temporary unblocked by set_user_sigmask() can
> be delivered even if the caller returns success or timeout.
>
> Change restore_user_sigmask() to accept the additional "interrupted"
> argument which should be used instead of signal_pending() check, and
> update the callers.
>
> Reported-by: Eric Wong <e@80x24.org>
> Fixes: 854a6ed56839a40f6b5d02a2962f48841482eec4 ("signal: Add restore_user_sigmask()")
> cc: stable@vger.kernel.org (v5.0+)
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>

Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>

I have read through the patch and it looks good.

For clarity.  I don't think this is required by posix, or fundamentally
to remove the races in select.  It is what linux has always done and
we have applications who care so I agree this fix is needed.

Further in any case where the semantic change that this patch rolls back
(aka where allowing a signal to be delivered and the select like call to
complete) would be advantage we can do as well if not better by using
signalfd.

Michael is there any chance we can get this guarantee of the linux
implementation of pselect and friends clearly documented.  The guarantee
that if the system call completes successfully we are guaranteed that
no signal that is unblocked by using sigmask will be delivered?

Eric

> ---
>  fs/aio.c               | 28 ++++++++++++++++++++--------
>  fs/eventpoll.c         |  4 ++--
>  fs/io_uring.c          |  7 ++++---
>  fs/select.c            | 18 ++++++------------
>  include/linux/signal.h |  2 +-
>  kernel/signal.c        |  5 +++--
>  6 files changed, 36 insertions(+), 28 deletions(-)
>
> diff --git a/fs/aio.c b/fs/aio.c
> index 3490d1f..c1e581d 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -2095,6 +2095,7 @@ SYSCALL_DEFINE6(io_pgetevents,
>  	struct __aio_sigset	ksig = { NULL, };
>  	sigset_t		ksigmask, sigsaved;
>  	struct timespec64	ts;
> +	bool interrupted;
>  	int ret;
>  
>  	if (timeout && unlikely(get_timespec64(&ts, timeout)))
> @@ -2108,8 +2109,10 @@ SYSCALL_DEFINE6(io_pgetevents,
>  		return ret;
>  
>  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
> -	restore_user_sigmask(ksig.sigmask, &sigsaved);
> -	if (signal_pending(current) && !ret)
> +
> +	interrupted = signal_pending(current);
> +	restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
> +	if (interrupted && !ret)
>  		ret = -ERESTARTNOHAND;
>  
>  	return ret;
> @@ -2128,6 +2131,7 @@ SYSCALL_DEFINE6(io_pgetevents_time32,
>  	struct __aio_sigset	ksig = { NULL, };
>  	sigset_t		ksigmask, sigsaved;
>  	struct timespec64	ts;
> +	bool interrupted;
>  	int ret;
>  
>  	if (timeout && unlikely(get_old_timespec32(&ts, timeout)))
> @@ -2142,8 +2146,10 @@ SYSCALL_DEFINE6(io_pgetevents_time32,
>  		return ret;
>  
>  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
> -	restore_user_sigmask(ksig.sigmask, &sigsaved);
> -	if (signal_pending(current) && !ret)
> +
> +	interrupted = signal_pending(current);
> +	restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
> +	if (interrupted && !ret)
>  		ret = -ERESTARTNOHAND;
>  
>  	return ret;
> @@ -2193,6 +2199,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
>  	struct __compat_aio_sigset ksig = { NULL, };
>  	sigset_t ksigmask, sigsaved;
>  	struct timespec64 t;
> +	bool interrupted;
>  	int ret;
>  
>  	if (timeout && get_old_timespec32(&t, timeout))
> @@ -2206,8 +2213,10 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
>  		return ret;
>  
>  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
> -	restore_user_sigmask(ksig.sigmask, &sigsaved);
> -	if (signal_pending(current) && !ret)
> +
> +	interrupted = signal_pending(current);
> +	restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
> +	if (interrupted && !ret)
>  		ret = -ERESTARTNOHAND;
>  
>  	return ret;
> @@ -2226,6 +2235,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
>  	struct __compat_aio_sigset ksig = { NULL, };
>  	sigset_t ksigmask, sigsaved;
>  	struct timespec64 t;
> +	bool interrupted;
>  	int ret;
>  
>  	if (timeout && get_timespec64(&t, timeout))
> @@ -2239,8 +2249,10 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
>  		return ret;
>  
>  	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
> -	restore_user_sigmask(ksig.sigmask, &sigsaved);
> -	if (signal_pending(current) && !ret)
> +
> +	interrupted = signal_pending(current);
> +	restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
> +	if (interrupted && !ret)
>  		ret = -ERESTARTNOHAND;
>  
>  	return ret;
> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> index c6f5131..4c74c76 100644
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
> @@ -2325,7 +2325,7 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
>  
>  	error = do_epoll_wait(epfd, events, maxevents, timeout);
>  
> -	restore_user_sigmask(sigmask, &sigsaved);
> +	restore_user_sigmask(sigmask, &sigsaved, error == -EINTR);
>  
>  	return error;
>  }
> @@ -2350,7 +2350,7 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
>  
>  	err = do_epoll_wait(epfd, events, maxevents, timeout);
>  
> -	restore_user_sigmask(sigmask, &sigsaved);
> +	restore_user_sigmask(sigmask, &sigsaved, err == -EINTR);
>  
>  	return err;
>  }
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 0fbb486..1147c5d 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -2201,11 +2201,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>  	}
>  
>  	ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events);
> -	if (ret == -ERESTARTSYS)
> -		ret = -EINTR;
>  
>  	if (sig)
> -		restore_user_sigmask(sig, &sigsaved);
> +		restore_user_sigmask(sig, &sigsaved, ret == -ERESTARTSYS);
> +
> +	if (ret == -ERESTARTSYS)
> +		ret = -EINTR;
>  
>  	return READ_ONCE(ring->r.head) == READ_ONCE(ring->r.tail) ? ret : 0;
>  }
> diff --git a/fs/select.c b/fs/select.c
> index 6cbc9ff..a4d8f6e 100644
> --- a/fs/select.c
> +++ b/fs/select.c
> @@ -758,10 +758,9 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
>  		return ret;
>  
>  	ret = core_sys_select(n, inp, outp, exp, to);
> +	restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND);
>  	ret = poll_select_copy_remaining(&end_time, tsp, type, ret);
>  
> -	restore_user_sigmask(sigmask, &sigsaved);
> -
>  	return ret;
>  }
>  
> @@ -1106,8 +1105,7 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
>  
>  	ret = do_sys_poll(ufds, nfds, to);
>  
> -	restore_user_sigmask(sigmask, &sigsaved);
> -
> +	restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
>  	/* We can restart this syscall, usually */
>  	if (ret == -EINTR)
>  		ret = -ERESTARTNOHAND;
> @@ -1142,8 +1140,7 @@ SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds,
>  
>  	ret = do_sys_poll(ufds, nfds, to);
>  
> -	restore_user_sigmask(sigmask, &sigsaved);
> -
> +	restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
>  	/* We can restart this syscall, usually */
>  	if (ret == -EINTR)
>  		ret = -ERESTARTNOHAND;
> @@ -1350,10 +1347,9 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp,
>  		return ret;
>  
>  	ret = compat_core_sys_select(n, inp, outp, exp, to);
> +	restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND);
>  	ret = poll_select_copy_remaining(&end_time, tsp, type, ret);
>  
> -	restore_user_sigmask(sigmask, &sigsaved);
> -
>  	return ret;
>  }
>  
> @@ -1425,8 +1421,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds,
>  
>  	ret = do_sys_poll(ufds, nfds, to);
>  
> -	restore_user_sigmask(sigmask, &sigsaved);
> -
> +	restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
>  	/* We can restart this syscall, usually */
>  	if (ret == -EINTR)
>  		ret = -ERESTARTNOHAND;
> @@ -1461,8 +1456,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds,
>  
>  	ret = do_sys_poll(ufds, nfds, to);
>  
> -	restore_user_sigmask(sigmask, &sigsaved);
> -
> +	restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
>  	/* We can restart this syscall, usually */
>  	if (ret == -EINTR)
>  		ret = -ERESTARTNOHAND;
> diff --git a/include/linux/signal.h b/include/linux/signal.h
> index 9702016..78c2bb3 100644
> --- a/include/linux/signal.h
> +++ b/include/linux/signal.h
> @@ -276,7 +276,7 @@ extern int sigprocmask(int, sigset_t *, sigset_t *);
>  extern int set_user_sigmask(const sigset_t __user *usigmask, sigset_t *set,
>  	sigset_t *oldset, size_t sigsetsize);
>  extern void restore_user_sigmask(const void __user *usigmask,
> -				 sigset_t *sigsaved);
> +				 sigset_t *sigsaved, bool interrupted);
>  extern void set_current_blocked(sigset_t *);
>  extern void __set_current_blocked(const sigset_t *);
>  extern int show_unhandled_signals;
> diff --git a/kernel/signal.c b/kernel/signal.c
> index 328a01e..aa6a6f1 100644
> --- a/kernel/signal.c
> +++ b/kernel/signal.c
> @@ -2912,7 +2912,8 @@ EXPORT_SYMBOL(set_compat_user_sigmask);
>   * This is useful for syscalls such as ppoll, pselect, io_pgetevents and
>   * epoll_pwait where a new sigmask is passed in from userland for the syscalls.
>   */
> -void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved)
> +void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved,
> +				bool interrupted)
>  {
>  
>  	if (!usigmask)
> @@ -2922,7 +2923,7 @@ void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved)
>  	 * Restoring sigmask here can lead to delivering signals that the above
>  	 * syscalls are intended to block because of the sigmask passed in.
>  	 */
> -	if (signal_pending(current)) {
> +	if (interrupted) {
>  		current->saved_sigmask = *sigsaved;
>  		set_restore_sigmask();
>  		return;
David Laight June 4, 2019, 3:57 p.m. UTC | #2
From: Eric W. Biederman
> Sent: 04 June 2019 16:32
...
> Michael is there any chance we can get this guarantee of the linux
> implementation of pselect and friends clearly documented.  The guarantee
> that if the system call completes successfully we are guaranteed that
> no signal that is unblocked by using sigmask will be delivered?

The behaviour certainly needs documenting - the ToG docs are unclear.
I think you need stronger statement that the one above.

Maybe "signals will only be delivered (ie the handler called) if
the system call has to wait and the wait is interrupted by a signal.
(Even then pselect might find ready fd and return success.)

There is also the 'issue' of ERESTARTNOHAND.
Some of the system calls will return EINTR if the wait is interrupted
by a signal that doesn't have a handler, others get restarted.
I'm not at all sure about why there is a difference.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Arnd Bergmann June 4, 2019, 4:37 p.m. UTC | #3
On Tue, Jun 4, 2019 at 3:41 PM Oleg Nesterov <oleg@redhat.com> wrote:
>
> This is the minimal fix for stable, I'll send cleanups later.
>
> The commit 854a6ed56839a40f6b5d02a2962f48841482eec4 ("signal: Add
> restore_user_sigmask()") introduced the visible change which breaks
> user-space: a signal temporary unblocked by set_user_sigmask() can
> be delivered even if the caller returns success or timeout.
>
> Change restore_user_sigmask() to accept the additional "interrupted"
> argument which should be used instead of signal_pending() check, and
> update the callers.
>
> Reported-by: Eric Wong <e@80x24.org>
> Fixes: 854a6ed56839a40f6b5d02a2962f48841482eec4 ("signal: Add restore_user_sigmask()")
> cc: stable@vger.kernel.org (v5.0+)
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>

Acked-by: Arnd Bergmann <arnd@arndb.de>

I hope Eric can test this with the original reproducer, or maybe someone
could create a test case that can be added into LTP.

      Arnd
Deepa Dinamani June 4, 2019, 6:14 p.m. UTC | #4
> On Tue, Jun 4, 2019 at 3:41 PM Oleg Nesterov <oleg@redhat.com> wrote:
> >
> > This is the minimal fix for stable, I'll send cleanups later.
> >
> > The commit 854a6ed56839a40f6b5d02a2962f48841482eec4 ("signal: Add
> > restore_user_sigmask()") introduced the visible change which breaks
> > user-space: a signal temporary unblocked by set_user_sigmask() can
> > be delivered even if the caller returns success or timeout.
> >
> > Change restore_user_sigmask() to accept the additional "interrupted"
> > argument which should be used instead of signal_pending() check, and
> > update the callers.
> >
> > Reported-by: Eric Wong <e@80x24.org>
> > Fixes: 854a6ed56839a40f6b5d02a2962f48841482eec4 ("signal: Add restore_user_sigmask()")
> > cc: stable@vger.kernel.org (v5.0+)
> > Signed-off-by: Oleg Nesterov <oleg@redhat.com>
>

Acked-by: Deepa Dinamani <deepa.kernel@gmail.com>

The original fix posted:
https://lore.kernel.org/patchwork/patch/1077355/ would also have been
a correct fix for this problem. But, given the cleanups that are in
the pipeline, this is a better fix.

-Deepa
Eric Wong June 4, 2019, 6:35 p.m. UTC | #5
Oleg Nesterov <oleg@redhat.com> wrote:
> This is the minimal fix for stable, I'll send cleanups later.
> 
> The commit 854a6ed56839a40f6b5d02a2962f48841482eec4 ("signal: Add
> restore_user_sigmask()") introduced the visible change which breaks
> user-space: a signal temporary unblocked by set_user_sigmask() can
> be delivered even if the caller returns success or timeout.
> 
> Change restore_user_sigmask() to accept the additional "interrupted"
> argument which should be used instead of signal_pending() check, and
> update the callers.
> 
> Reported-by: Eric Wong <e@80x24.org>
> Fixes: 854a6ed56839a40f6b5d02a2962f48841482eec4 ("signal: Add restore_user_sigmask()")
> cc: stable@vger.kernel.org (v5.0+)
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>

Thanks, for epoll_pwait on top of Linux v5.1.7 and cmogstored v1.7.0:

Tested-by: Eric Wong <e@80x24.org>

(cmogstored v1.7.1 already works around this when it sees a 0
return value (but not >0, yet...))

> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 0fbb486..1147c5d 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -2201,11 +2201,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
>  	}
>  
>  	ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events);
> -	if (ret == -ERESTARTSYS)
> -		ret = -EINTR;
>  
>  	if (sig)
> -		restore_user_sigmask(sig, &sigsaved);
> +		restore_user_sigmask(sig, &sigsaved, ret == -ERESTARTSYS);
> +
> +	if (ret == -ERESTARTSYS)
> +		ret = -EINTR;
>  
>  	return READ_ONCE(ring->r.head) == READ_ONCE(ring->r.tail) ? ret : 0;
>  }

That io_uring bit didn't apply cleanly to stable,
since stable is missing fdb288a679cdf6a71f3c1ae6f348ba4dae742681
("io_uring: use wait_event_interruptible for cq_wait conditional wait")
and related commits.

In any case, I'm not using io_uring anywhere, yet (and probably
won't, since I'll still need threads to deal with open/unlink/rename
on slow JBOD HDDs).
Linus Torvalds June 4, 2019, 9:26 p.m. UTC | #6
On Tue, Jun 4, 2019 at 6:41 AM Oleg Nesterov <oleg@redhat.com> wrote:
>
> This is the minimal fix for stable, I'll send cleanups later.

Ugh. I htink this is correct, but I wish we had a better and more
intuitive interface.

In particular, since restore_user_sigmask() basically wants to check
for "signal_pending()" anyway (to decide if the mask should be
restored by signal handling or by that function), I really get the
feeling that a lot of these patterns like

> -       restore_user_sigmask(ksig.sigmask, &sigsaved);
> -       if (signal_pending(current) && !ret)
> +
> +       interrupted = signal_pending(current);
> +       restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
> +       if (interrupted && !ret)
>                 ret = -ERESTARTNOHAND;

are wrong to begin with, and we really should aim for an interface
which says "tell me whether you completed the system call, and I'll
give you an error return if not".

How about we make restore_user_sigmask() take two return codes: the
'ret' we already have, and the return we would get if there is a
signal pending and w're currently returning zero.

IOW, I think the above could become

        ret = restore_user_sigmask(ksig.sigmask, &sigsaved, ret, -ERESTARTHAND);

instead if we just made the right interface decision.

Hmm?

             Linus
Eric Wong June 4, 2019, 10:24 p.m. UTC | #7
Linus Torvalds <torvalds@linux-foundation.org> wrote:
> On Tue, Jun 4, 2019 at 6:41 AM Oleg Nesterov <oleg@redhat.com> wrote:
> >
> > This is the minimal fix for stable, I'll send cleanups later.
> 
> Ugh. I htink this is correct, but I wish we had a better and more
> intuitive interface.

I had the same thoughts, but am not a regular kernel hacker,
so I didn't say anything earlier.

> In particular, since restore_user_sigmask() basically wants to check
> for "signal_pending()" anyway (to decide if the mask should be
> restored by signal handling or by that function), I really get the
> feeling that a lot of these patterns like
> 
> > -       restore_user_sigmask(ksig.sigmask, &sigsaved);
> > -       if (signal_pending(current) && !ret)
> > +
> > +       interrupted = signal_pending(current);
> > +       restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
> > +       if (interrupted && !ret)
> >                 ret = -ERESTARTNOHAND;
> 
> are wrong to begin with, and we really should aim for an interface
> which says "tell me whether you completed the system call, and I'll
> give you an error return if not".
> 
> How about we make restore_user_sigmask() take two return codes: the
> 'ret' we already have, and the return we would get if there is a
> signal pending and w're currently returning zero.
> 
> IOW, I think the above could become
> 
>         ret = restore_user_sigmask(ksig.sigmask, &sigsaved, ret, -ERESTARTHAND);
> 
> instead if we just made the right interface decision.

But that falls down if ret were ever expected to match several
similar error codes (not sure if it happens)

When I was considering fixing this on my own a few weeks ago, I
was looking for an inline that could quickly tell if `ret' was
any of the EINTR-like error codes; but couldn't find one...

It'd probably end up being switch/case statement so I'm not sure
if it'd be too big and slow or not...

The caller would just do:

	ret = restore_user_sigmask(ksig.sigmask, &sigsaved, ret);

And restore_user_sigmask would call some "was_interrupted(ret)"
inline which could return true if `ret' matched any of the
too-many-to-keep-track-of EINTR-like codes.  But I figured
there's probably a good reason it did not exist, already *shrug*

/me goes back to the wonderful world of userspace...
Eric W. Biederman June 4, 2019, 11:51 p.m. UTC | #8
Linus Torvalds <torvalds@linux-foundation.org> writes:

> On Tue, Jun 4, 2019 at 6:41 AM Oleg Nesterov <oleg@redhat.com> wrote:
>>
>> This is the minimal fix for stable, I'll send cleanups later.
>
> Ugh. I htink this is correct, but I wish we had a better and more
> intuitive interface.
>
> In particular, since restore_user_sigmask() basically wants to check
> for "signal_pending()" anyway (to decide if the mask should be
> restored by signal handling or by that function), I really get the
> feeling that a lot of these patterns like

Linus that checking for signal_pending() in restore_user_sigmask is the
bug that caused the regression.

>> -       restore_user_sigmask(ksig.sigmask, &sigsaved);
>> -       if (signal_pending(current) && !ret)
>> +
>> +       interrupted = signal_pending(current);
>> +       restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
>> +       if (interrupted && !ret)
>>                 ret = -ERESTARTNOHAND;
>
> are wrong to begin with, and we really should aim for an interface
> which says "tell me whether you completed the system call, and I'll
> give you an error return if not".

The pattern you are pointing out is specific to io_pgetevents and it's
variations.  It does look buggy to me but not for the reason you point
out, but instead because it does not appear to let a pending signal
cause io_pgetevents to return early.

I suspect we should fix that and have do_io_getevents return
-EINTR or -ERESTARTNOHAND like everyone else.

The concept of interrupted (aka return -EINTR to userspace) is truly
fundamental to the current semantics.  We effectively put a normally
blocked signal that was triggered back if we won't be returning -EINTR
to userspace.

> How about we make restore_user_sigmask() take two return codes: the
> 'ret' we already have, and the return we would get if there is a
> signal pending and w're currently returning zero.
>
> IOW, I think the above could become
>
>         ret = restore_user_sigmask(ksig.sigmask, &sigsaved, ret, -ERESTARTHAND);
>
> instead if we just made the right interface decision.
>
> Hmm?

At best I think that is a cleanup that will complicate creating a simple
straight forward regression fix.

Unless I am misreading things that is optimizing the interface for
dealing with broken code.

So can we please get this fix in and then look at cleaning up and
simplifying this code.

Eric

p.s. A rather compelling cleanup is to:

- Leave the signal mask alone.
- Register with signalfd_wqh for wake ups.
- Have a helper

   int signal_pending_sigmask(sigset_t *blocked)
   {
   	struct task_struct *tsk = current;
   	int ret = 0;
   	spin_lock_irq(&tsk->sighand->siglock);
        if (next_signal(&tsk->pending, blocked) ||
            next_signal(&tsk->signal->pending, blocked)) {
        	ret = -ERESTARTHAND;
                if (!sigequalsets(&tsk->blocked, blocked)) {
                	tsk->saved_sigmask = tsk->blocked;
                	__set_task_blocked(tsk, blocked);
                        set_restore_sigmask();
		}
        }
        spin_unlock_irq(&tsk->sighand->siglock);
        return ret;
   }
  
- Use that helper instead of signal_pending() in the various
  sleep functions.
- Possibly get the signal mask from tsk instead of passing it into
  all of the helpers.

Eric
Oleg Nesterov June 5, 2019, 8:56 a.m. UTC | #9
On 06/04, Linus Torvalds wrote:
>
> On Tue, Jun 4, 2019 at 6:41 AM Oleg Nesterov <oleg@redhat.com> wrote:
> >
> > This is the minimal fix for stable, I'll send cleanups later.
>
> Ugh. I htink this is correct, but I wish we had a better and more
> intuitive interface.

Yes,

> In particular, since restore_user_sigmask() basically wants to check
> for "signal_pending()" anyway

No, the caller should check signal_pending() anyway and this is enough.

> > -       restore_user_sigmask(ksig.sigmask, &sigsaved);
> > -       if (signal_pending(current) && !ret)
> > +
> > +       interrupted = signal_pending(current);
> > +       restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
> > +       if (interrupted && !ret)
> >                 ret = -ERESTARTNOHAND;
>
> are wrong to begin with,

This is fs/aio.c and I have already mentioned that this code doesn't look
right anyway.

> IOW, I think the above could become
>
>         ret = restore_user_sigmask(ksig.sigmask, &sigsaved, ret, -ERESTARTHAND);
>
> instead if we just made the right interface decision.

I think this particular code should simply do

		ret = do_io_getevents(...);

		if (ret == -ERESTARTSYS)
			ret = -EINTR;

		restore_user_sigmask(ret == -EINTR);

However I agree that another helper(s) which takes/returns the error code makes
sense and I was going to do this. Lets do this step by step, I think we should
kill sigmask/sigsaved first.

Oleg.
David Laight June 5, 2019, 9:02 a.m. UTC | #10
From: Linus Torvalds
> Sent: 04 June 2019 22:27
> Ugh. I htink this is correct, but I wish we had a better and more
> intuitive interface.
> 
> In particular, since restore_user_sigmask() basically wants to check
> for "signal_pending()" anyway (to decide if the mask should be
> restored by signal handling or by that function), I really get the
> feeling that a lot of these patterns like
> 
> > -       restore_user_sigmask(ksig.sigmask, &sigsaved);
> > -       if (signal_pending(current) && !ret)
> > +
> > +       interrupted = signal_pending(current);
> > +       restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
> > +       if (interrupted && !ret)
> >                 ret = -ERESTARTNOHAND;
> 
> are wrong to begin with, and we really should aim for an interface
> which says "tell me whether you completed the system call, and I'll
> give you an error return if not".
> 
> How about we make restore_user_sigmask() take two return codes: the
> 'ret' we already have, and the return we would get if there is a
> signal pending and w're currently returning zero.
> 
> IOW, I think the above could become
> 
>         ret = restore_user_sigmask(ksig.sigmask, &sigsaved, ret, -ERESTARTHAND);
> 
> instead if we just made the right interface decision.

I think we should tell restore_user_sigmask() whether it should
cause any signal handles to be run (using the current mask)
and it should tell us whether it would run any (ie if it deferred
restoring the mask to syscall exit).

So the above would (probably) be:
	if (restore_user_sigmask(ksig.sigmask, &sigsaved, true) && !ret)
		ret = -ERESTARTNOHAND;

epoll() would have:
	if (restore_user_sigmask(xxx.sigmask, &sigsaved, !ret || ret == -EINTR))
		ret = -EINTR;

I also think it could be simplified if code that loaded the 'user sigmask'
saved the old one in 'current->saved_sigmask' (and saved that it had done it).
You'd not need 'sigsaved' nor pass the user sigmask address into
the restore function.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Oleg Nesterov June 5, 2019, 9:04 a.m. UTC | #11
On 06/04, Eric W. Biederman wrote:
>
> >> -       restore_user_sigmask(ksig.sigmask, &sigsaved);
> >> -       if (signal_pending(current) && !ret)
> >> +
> >> +       interrupted = signal_pending(current);
> >> +       restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
> >> +       if (interrupted && !ret)
> >>                 ret = -ERESTARTNOHAND;
> >
> > are wrong to begin with, and we really should aim for an interface
> > which says "tell me whether you completed the system call, and I'll
> > give you an error return if not".
>
> The pattern you are pointing out is specific to io_pgetevents and it's
> variations.  It does look buggy to me but not for the reason you point
> out, but instead because it does not appear to let a pending signal
> cause io_pgetevents to return early.
>
> I suspect we should fix that and have do_io_getevents return
> -EINTR or -ERESTARTNOHAND like everyone else.

Exactly. It should not even check signal_pending(). It can rely on
wait_event_interruptible_hrtimeout().

> So can we please get this fix in and then look at cleaning up and
> simplifying this code.

Yes ;)

Oleg.
Oleg Nesterov June 5, 2019, 9:25 a.m. UTC | #12
On 06/05, David Laight wrote:
>
> epoll() would have:
> 	if (restore_user_sigmask(xxx.sigmask, &sigsaved, !ret || ret == -EINTR))
> 		ret = -EINTR;

I don't think so but lets discuss this later.

> I also think it could be simplified if code that loaded the 'user sigmask'
> saved the old one in 'current->saved_sigmask' (and saved that it had done it).
> You'd not need 'sigsaved' nor pass the user sigmask address into
> the restore function.

Heh. apparently you do not read my emails ;)

This is what I proposed in my very 1st email, and I even showed the patch
and the code with the patch applied twice. Let me do this again.

Let me show the code with the patch applied. I am using epoll_pwait() as an
example because it looks very simple.


	static inline void set_restore_sigmask(void)
	{
// WARN_ON(!TIF_SIGPENDING) was removed by this patch
		current->restore_sigmask = true;
	}

	int set_xxx(const sigset_t __user *umask, size_t sigsetsize)
	{
		sigset_t *kmask;

		if (!umask)
			return 0;
		if (sigsetsize != sizeof(sigset_t))
			return -EINVAL;
		if (copy_from_user(kmask, umask, sizeof(sigset_t)))
			return -EFAULT;

// we can safely modify ->saved_sigmask/restore_sigmask, they has no meaning
// until the syscall returns.
		set_restore_sigmask();
		current->saved_sigmask = current->blocked;
		set_current_blocked(kmask);

		return 0;
	}


	void update_xxx(bool interrupted)
	{
// the main reason for this helper is WARN_ON(!TIF_SIGPENDING) which was "moved"
// from set_restore_sigmask() above.
		if (interrupted)
			WARN_ON(!test_thread_flag(TIF_SIGPENDING));
		else
			restore_saved_sigmask();
	}

	SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
			int, maxevents, int, timeout, const sigset_t __user *, sigmask,
			size_t, sigsetsize)
	{
		int error;

		error = set_xxx(sigmask, sigsetsize);
		if (error)
			return error;

		error = do_epoll_wait(epfd, events, maxevents, timeout);
		update_xxx(error == -EINTR);

		return error;
	}

Oleg.
David Laight June 5, 2019, 9:58 a.m. UTC | #13
From: Oleg Nesterov [mailto:oleg@redhat.com]
> Sent: 05 June 2019 10:25
> On 06/05, David Laight wrote:
> >
> > epoll() would have:
> > 	if (restore_user_sigmask(xxx.sigmask, &sigsaved, !ret || ret == -EINTR))
> > 		ret = -EINTR;
> 
> I don't think so but lets discuss this later.

I certainly think there should be some comments at least
about when/whether signal handlers get called and that
being separate from the return value.

The system call restart stuff does seem strange.
ISTR that was originally added for SIG_SUSPEND (^Z) so that those
signals wouldn't be seen by the appication.
But that makes it a property of the signal, not the system call.

> > I also think it could be simplified if code that loaded the 'user sigmask'
> > saved the old one in 'current->saved_sigmask' (and saved that it had done it).
> > You'd not need 'sigsaved' nor pass the user sigmask address into
> > the restore function.
> 
> Heh. apparently you do not read my emails ;)
> 
> This is what I proposed in my very 1st email, and I even showed the patch
> and the code with the patch applied twice. Let me do this again.

I did read that one, I've even quoted it in the past :-)
It's just not been mentioned recently.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
diff mbox series

Patch

diff --git a/fs/aio.c b/fs/aio.c
index 3490d1f..c1e581d 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2095,6 +2095,7 @@  SYSCALL_DEFINE6(io_pgetevents,
 	struct __aio_sigset	ksig = { NULL, };
 	sigset_t		ksigmask, sigsaved;
 	struct timespec64	ts;
+	bool interrupted;
 	int ret;
 
 	if (timeout && unlikely(get_timespec64(&ts, timeout)))
@@ -2108,8 +2109,10 @@  SYSCALL_DEFINE6(io_pgetevents,
 		return ret;
 
 	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
-	restore_user_sigmask(ksig.sigmask, &sigsaved);
-	if (signal_pending(current) && !ret)
+
+	interrupted = signal_pending(current);
+	restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
+	if (interrupted && !ret)
 		ret = -ERESTARTNOHAND;
 
 	return ret;
@@ -2128,6 +2131,7 @@  SYSCALL_DEFINE6(io_pgetevents_time32,
 	struct __aio_sigset	ksig = { NULL, };
 	sigset_t		ksigmask, sigsaved;
 	struct timespec64	ts;
+	bool interrupted;
 	int ret;
 
 	if (timeout && unlikely(get_old_timespec32(&ts, timeout)))
@@ -2142,8 +2146,10 @@  SYSCALL_DEFINE6(io_pgetevents_time32,
 		return ret;
 
 	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
-	restore_user_sigmask(ksig.sigmask, &sigsaved);
-	if (signal_pending(current) && !ret)
+
+	interrupted = signal_pending(current);
+	restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
+	if (interrupted && !ret)
 		ret = -ERESTARTNOHAND;
 
 	return ret;
@@ -2193,6 +2199,7 @@  COMPAT_SYSCALL_DEFINE6(io_pgetevents,
 	struct __compat_aio_sigset ksig = { NULL, };
 	sigset_t ksigmask, sigsaved;
 	struct timespec64 t;
+	bool interrupted;
 	int ret;
 
 	if (timeout && get_old_timespec32(&t, timeout))
@@ -2206,8 +2213,10 @@  COMPAT_SYSCALL_DEFINE6(io_pgetevents,
 		return ret;
 
 	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
-	restore_user_sigmask(ksig.sigmask, &sigsaved);
-	if (signal_pending(current) && !ret)
+
+	interrupted = signal_pending(current);
+	restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
+	if (interrupted && !ret)
 		ret = -ERESTARTNOHAND;
 
 	return ret;
@@ -2226,6 +2235,7 @@  COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
 	struct __compat_aio_sigset ksig = { NULL, };
 	sigset_t ksigmask, sigsaved;
 	struct timespec64 t;
+	bool interrupted;
 	int ret;
 
 	if (timeout && get_timespec64(&t, timeout))
@@ -2239,8 +2249,10 @@  COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
 		return ret;
 
 	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
-	restore_user_sigmask(ksig.sigmask, &sigsaved);
-	if (signal_pending(current) && !ret)
+
+	interrupted = signal_pending(current);
+	restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
+	if (interrupted && !ret)
 		ret = -ERESTARTNOHAND;
 
 	return ret;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index c6f5131..4c74c76 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2325,7 +2325,7 @@  SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
 
 	error = do_epoll_wait(epfd, events, maxevents, timeout);
 
-	restore_user_sigmask(sigmask, &sigsaved);
+	restore_user_sigmask(sigmask, &sigsaved, error == -EINTR);
 
 	return error;
 }
@@ -2350,7 +2350,7 @@  COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
 
 	err = do_epoll_wait(epfd, events, maxevents, timeout);
 
-	restore_user_sigmask(sigmask, &sigsaved);
+	restore_user_sigmask(sigmask, &sigsaved, err == -EINTR);
 
 	return err;
 }
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0fbb486..1147c5d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2201,11 +2201,12 @@  static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 	}
 
 	ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events);
-	if (ret == -ERESTARTSYS)
-		ret = -EINTR;
 
 	if (sig)
-		restore_user_sigmask(sig, &sigsaved);
+		restore_user_sigmask(sig, &sigsaved, ret == -ERESTARTSYS);
+
+	if (ret == -ERESTARTSYS)
+		ret = -EINTR;
 
 	return READ_ONCE(ring->r.head) == READ_ONCE(ring->r.tail) ? ret : 0;
 }
diff --git a/fs/select.c b/fs/select.c
index 6cbc9ff..a4d8f6e 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -758,10 +758,9 @@  static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
 		return ret;
 
 	ret = core_sys_select(n, inp, outp, exp, to);
+	restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND);
 	ret = poll_select_copy_remaining(&end_time, tsp, type, ret);
 
-	restore_user_sigmask(sigmask, &sigsaved);
-
 	return ret;
 }
 
@@ -1106,8 +1105,7 @@  SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
 
 	ret = do_sys_poll(ufds, nfds, to);
 
-	restore_user_sigmask(sigmask, &sigsaved);
-
+	restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
 	/* We can restart this syscall, usually */
 	if (ret == -EINTR)
 		ret = -ERESTARTNOHAND;
@@ -1142,8 +1140,7 @@  SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds, unsigned int, nfds,
 
 	ret = do_sys_poll(ufds, nfds, to);
 
-	restore_user_sigmask(sigmask, &sigsaved);
-
+	restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
 	/* We can restart this syscall, usually */
 	if (ret == -EINTR)
 		ret = -ERESTARTNOHAND;
@@ -1350,10 +1347,9 @@  static long do_compat_pselect(int n, compat_ulong_t __user *inp,
 		return ret;
 
 	ret = compat_core_sys_select(n, inp, outp, exp, to);
+	restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND);
 	ret = poll_select_copy_remaining(&end_time, tsp, type, ret);
 
-	restore_user_sigmask(sigmask, &sigsaved);
-
 	return ret;
 }
 
@@ -1425,8 +1421,7 @@  COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds,
 
 	ret = do_sys_poll(ufds, nfds, to);
 
-	restore_user_sigmask(sigmask, &sigsaved);
-
+	restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
 	/* We can restart this syscall, usually */
 	if (ret == -EINTR)
 		ret = -ERESTARTNOHAND;
@@ -1461,8 +1456,7 @@  COMPAT_SYSCALL_DEFINE5(ppoll_time64, struct pollfd __user *, ufds,
 
 	ret = do_sys_poll(ufds, nfds, to);
 
-	restore_user_sigmask(sigmask, &sigsaved);
-
+	restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
 	/* We can restart this syscall, usually */
 	if (ret == -EINTR)
 		ret = -ERESTARTNOHAND;
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 9702016..78c2bb3 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -276,7 +276,7 @@  extern int sigprocmask(int, sigset_t *, sigset_t *);
 extern int set_user_sigmask(const sigset_t __user *usigmask, sigset_t *set,
 	sigset_t *oldset, size_t sigsetsize);
 extern void restore_user_sigmask(const void __user *usigmask,
-				 sigset_t *sigsaved);
+				 sigset_t *sigsaved, bool interrupted);
 extern void set_current_blocked(sigset_t *);
 extern void __set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
diff --git a/kernel/signal.c b/kernel/signal.c
index 328a01e..aa6a6f1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2912,7 +2912,8 @@  EXPORT_SYMBOL(set_compat_user_sigmask);
  * This is useful for syscalls such as ppoll, pselect, io_pgetevents and
  * epoll_pwait where a new sigmask is passed in from userland for the syscalls.
  */
-void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved)
+void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved,
+				bool interrupted)
 {
 
 	if (!usigmask)
@@ -2922,7 +2923,7 @@  void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved)
 	 * Restoring sigmask here can lead to delivering signals that the above
 	 * syscalls are intended to block because of the sigmask passed in.
 	 */
-	if (signal_pending(current)) {
+	if (interrupted) {
 		current->saved_sigmask = *sigsaved;
 		set_restore_sigmask();
 		return;