Message ID | 20200122160231.11876-3-axboe@kernel.dk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add io_uring support for epoll_ctl | expand |
On Wed, Jan 22, 2020 at 5:02 PM Jens Axboe <axboe@kernel.dk> wrote: > Also make it available outside of epoll, along with the helper that > decides if we need to copy the passed in epoll_event. [...] > diff --git a/fs/eventpoll.c b/fs/eventpoll.c > index cd848e8d08e2..162af749ea50 100644 > --- a/fs/eventpoll.c > +++ b/fs/eventpoll.c [...] > -static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds) > +static inline int epoll_mutex_lock(struct mutex *mutex, int depth, > + bool nonblock) > +{ > + if (!nonblock) { > + mutex_lock_nested(mutex, depth); > + return 0; > + } > + if (!mutex_trylock(mutex)) > + return 0; > + return -EAGAIN; The documentation for mutex_trylock() says: * Try to acquire the mutex atomically. Returns 1 if the mutex * has been acquired successfully, and 0 on contention. So in the success case, this evaluates to: if (!1) return 0; return -EAGAIN; which is if (0) return 0; return -EAGAIN; which is return -EAGAIN; I think you'll have to get rid of the negation. > +} > + > +int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, > + bool nonblock) > { > int error; > int full_check = 0; > @@ -2145,13 +2152,17 @@ static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds) > * deep wakeup paths from forming in parallel through multiple > * EPOLL_CTL_ADD operations. > */ > - mutex_lock_nested(&ep->mtx, 0); > + error = epoll_mutex_lock(&ep->mtx, 0, nonblock); > + if (error) > + goto error_tgt_fput; > if (op == EPOLL_CTL_ADD) { > if (!list_empty(&f.file->f_ep_links) || > is_file_epoll(tf.file)) { > full_check = 1; > mutex_unlock(&ep->mtx); > - mutex_lock(&epmutex); > + error = epoll_mutex_lock(&epmutex, 0, nonblock); > + if (error) > + goto error_tgt_fput; When we reach the "goto", full_check==1 and epmutex is not held. But at the jump target, this code runs: error_tgt_fput: if (full_check) // true mutex_unlock(&epmutex); So I think we're releasing a lock that we don't hold. > if (is_file_epoll(tf.file)) { > error = -ELOOP; > if (ep_loop_check(ep, tf.file) != 0) { > @@ -2161,10 +2172,17 @@ static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds) > } else > list_add(&tf.file->f_tfile_llink, > &tfile_check_list); > - mutex_lock_nested(&ep->mtx, 0); > + error = epoll_mutex_lock(&ep->mtx, 0, nonblock); > + if (error) { > +out_del: > + list_del(&tf.file->f_tfile_llink); > + goto error_tgt_fput; > + } > if (is_file_epoll(tf.file)) { > tep = tf.file->private_data; > - mutex_lock_nested(&tep->mtx, 1); > + error = epoll_mutex_lock(&tep->mtx, 1, nonblock); > + if (error) > + goto out_del; When we reach this "goto", ep->mtx is held and never dropped. > } > } > } > @@ -2233,7 +2251,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, > copy_from_user(&epds, event, sizeof(struct epoll_event))) > return -EFAULT; > > - return do_epoll_ctl(epfd, op, fd, &epds); > + return do_epoll_ctl(epfd, op, fd, &epds, false); > }
On 1/22/20 9:20 AM, Jann Horn wrote: > On Wed, Jan 22, 2020 at 5:02 PM Jens Axboe <axboe@kernel.dk> wrote: >> Also make it available outside of epoll, along with the helper that >> decides if we need to copy the passed in epoll_event. > [...] >> diff --git a/fs/eventpoll.c b/fs/eventpoll.c >> index cd848e8d08e2..162af749ea50 100644 >> --- a/fs/eventpoll.c >> +++ b/fs/eventpoll.c > [...] >> -static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds) >> +static inline int epoll_mutex_lock(struct mutex *mutex, int depth, >> + bool nonblock) >> +{ >> + if (!nonblock) { >> + mutex_lock_nested(mutex, depth); >> + return 0; >> + } >> + if (!mutex_trylock(mutex)) >> + return 0; >> + return -EAGAIN; > > The documentation for mutex_trylock() says: > > * Try to acquire the mutex atomically. Returns 1 if the mutex > * has been acquired successfully, and 0 on contention. > > So in the success case, this evaluates to: > > if (!1) > return 0; > return -EAGAIN; > > which is > > if (0) > return 0; > return -EAGAIN; > > which is > > return -EAGAIN; > > I think you'll have to get rid of the negation. Doh indeed. I'll rework and run the test case, just rebased this and I think I inadvertently used an older version. Ditto for the below.
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index cd848e8d08e2..162af749ea50 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -354,12 +354,6 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p) return container_of(p, struct ep_pqueue, pt)->epi; } -/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ -static inline int ep_op_has_event(int op) -{ - return op != EPOLL_CTL_DEL; -} - /* Initialize the poll safe wake up structure */ static void ep_nested_calls_init(struct nested_calls *ncalls) { @@ -2074,7 +2068,20 @@ SYSCALL_DEFINE1(epoll_create, int, size) return do_epoll_create(0); } -static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds) +static inline int epoll_mutex_lock(struct mutex *mutex, int depth, + bool nonblock) +{ + if (!nonblock) { + mutex_lock_nested(mutex, depth); + return 0; + } + if (!mutex_trylock(mutex)) + return 0; + return -EAGAIN; +} + +int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, + bool nonblock) { int error; int full_check = 0; @@ -2145,13 +2152,17 @@ static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds) * deep wakeup paths from forming in parallel through multiple * EPOLL_CTL_ADD operations. */ - mutex_lock_nested(&ep->mtx, 0); + error = epoll_mutex_lock(&ep->mtx, 0, nonblock); + if (error) + goto error_tgt_fput; if (op == EPOLL_CTL_ADD) { if (!list_empty(&f.file->f_ep_links) || is_file_epoll(tf.file)) { full_check = 1; mutex_unlock(&ep->mtx); - mutex_lock(&epmutex); + error = epoll_mutex_lock(&epmutex, 0, nonblock); + if (error) + goto error_tgt_fput; if (is_file_epoll(tf.file)) { error = -ELOOP; if (ep_loop_check(ep, tf.file) != 0) { @@ -2161,10 +2172,17 @@ static int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds) } else list_add(&tf.file->f_tfile_llink, &tfile_check_list); - mutex_lock_nested(&ep->mtx, 0); + error = epoll_mutex_lock(&ep->mtx, 0, nonblock); + if (error) { +out_del: + list_del(&tf.file->f_tfile_llink); + goto error_tgt_fput; + } if (is_file_epoll(tf.file)) { tep = tf.file->private_data; - mutex_lock_nested(&tep->mtx, 1); + error = epoll_mutex_lock(&tep->mtx, 1, nonblock); + if (error) + goto out_del; } } } @@ -2233,7 +2251,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, copy_from_user(&epds, event, sizeof(struct epoll_event))) return -EFAULT; - return do_epoll_ctl(epfd, op, fd, &epds); + return do_epoll_ctl(epfd, op, fd, &epds, false); } /* diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index bc6d79b00c4e..8f000fada5a4 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -61,6 +61,15 @@ static inline void eventpoll_release(struct file *file) eventpoll_release_file(file); } +int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, + bool nonblock); + +/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ +static inline int ep_op_has_event(int op) +{ + return op != EPOLL_CTL_DEL; +} + #else static inline void eventpoll_init_file(struct file *file) {}
Also make it available outside of epoll, along with the helper that decides if we need to copy the passed in epoll_event. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- fs/eventpoll.c | 42 ++++++++++++++++++++++++++++----------- include/linux/eventpoll.h | 9 +++++++++ 2 files changed, 39 insertions(+), 12 deletions(-)