From patchwork Tue Jun 11 14:54:52 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Roman Penyaev X-Patchwork-Id: 10987331 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 753E314BB for ; Tue, 11 Jun 2019 14:55:22 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 649662843C for ; Tue, 11 Jun 2019 14:55:22 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 5912128396; Tue, 11 Jun 2019 14:55:22 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id C2101205A8 for ; Tue, 11 Jun 2019 14:55:21 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2404099AbfFKOzU (ORCPT ); Tue, 11 Jun 2019 10:55:20 -0400 Received: from mx2.suse.de ([195.135.220.15]:52598 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2404070AbfFKOzT (ORCPT ); Tue, 11 Jun 2019 10:55:19 -0400 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 76E02AF0C; Tue, 11 Jun 2019 14:55:17 +0000 (UTC) From: Roman Penyaev Cc: Roman Penyaev , Andrew Morton , Al Viro , Linus Torvalds , linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH v4 08/14] epoll: support polling from userspace for ep_insert() Date: Tue, 11 Jun 2019 16:54:52 +0200 Message-Id: <20190611145458.9540-9-rpenyaev@suse.de> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20190611145458.9540-1-rpenyaev@suse.de> References: <20190611145458.9540-1-rpenyaev@suse.de> MIME-Version: 1.0 To: unlisted-recipients:; (no To-header on input) Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP When epfd is polled by userspace and new item is inserted new bit should be get from a bitmap and then user item is set accordingly. Signed-off-by: Roman Penyaev Cc: Andrew Morton Cc: Al Viro Cc: Linus Torvalds Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- fs/eventpoll.c | 118 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 100 insertions(+), 18 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index bcd57ca47564..4f541f85c7e5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -885,6 +885,23 @@ static void epi_rcu_free(struct rcu_head *head) kmem_cache_free(epi_cache, epi); } +static inline int ep_get_bit(struct eventpoll *ep) +{ + bool was_set; + int bit; + + lockdep_assert_held(&ep->mtx); + + bit = find_first_zero_bit(ep->items_bm, ep->max_items_nr); + if (bit >= ep->max_items_nr) + return -ENOSPC; + + was_set = test_and_set_bit(bit, ep->items_bm); + WARN_ON(was_set); + + return bit; +} + #define set_unless_zero_atomically(ptr, flags) \ ({ \ typeof(ptr) _ptr = (ptr); \ @@ -1996,6 +2013,33 @@ static noinline void ep_destroy_wakeup_source(struct epitem *epi) wakeup_source_unregister(ws); } +static inline struct epitem *epi_alloc(struct eventpoll *ep) +{ + struct epitem *epi; + + if (ep_polled_by_user(ep)) { + struct uepitem *uepi; + + uepi = kmem_cache_alloc(uepi_cache, GFP_KERNEL); + if (likely(uepi)) + epi = &uepi->epi; + else + epi = NULL; + } else { + epi = kmem_cache_alloc(epi_cache, GFP_KERNEL); + } + + return epi; +} + +static inline void epi_free(struct eventpoll *ep, struct epitem *epi) +{ + if (ep_polled_by_user(ep)) + kmem_cache_free(uepi_cache, uep_item_from_epi(epi)); + else + kmem_cache_free(epi_cache, epi); +} + /* * Must be called with "mtx" held. */ @@ -2008,29 +2052,55 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, struct epitem *epi; struct ep_pqueue epq; + lockdep_assert_held(&ep->mtx); lockdep_assert_irqs_enabled(); user_watches = atomic_long_read(&ep->user->epoll_watches); if (unlikely(user_watches >= max_user_watches)) return -ENOSPC; - if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL))) + epi = epi_alloc(ep); + if (unlikely(!epi)) return -ENOMEM; /* Item initialization follow here ... */ INIT_LIST_HEAD(&epi->rdllink); INIT_LIST_HEAD(&epi->fllink); INIT_LIST_HEAD(&epi->pwqlist); + RCU_INIT_POINTER(epi->ws, NULL); epi->ep = ep; ep_set_ffd(&epi->ffd, tfile, fd); epi->event = *event; epi->nwait = 0; epi->next = EP_UNACTIVE_PTR; - if (epi->event.events & EPOLLWAKEUP) { + + if (ep_polled_by_user(ep)) { + struct uepitem *uepi = uep_item_from_epi(epi); + struct epoll_uitem *uitem; + int bit; + + INIT_WORK(&uepi->work, ep_poll_callback_work); + + bit = ep_get_bit(ep); + if (unlikely(bit < 0)) { + error = bit; + goto error_get_bit; + } + uepi->bit = bit; + + /* + * Now fill-in user item. Do not touch ready_events, since + * it can be EPOLLREMOVED (has been set by previous user + * item), thus user index entry can be not yet consumed + * by userspace. See ep_remove_user_item() and + * ep_add_event_to_uring() for details. + */ + uitem = &ep->user_header->items[uepi->bit]; + uitem->events = event->events; + uitem->data = event->data; + } else if (epi->event.events & EPOLLWAKEUP) { error = ep_create_wakeup_source(epi); if (error) goto error_create_wakeup_source; - } else { - RCU_INIT_POINTER(epi->ws, NULL); } /* Initialize the poll table using the queue callback */ @@ -2077,16 +2147,23 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, /* record NAPI ID of new item if present */ ep_set_busy_poll_napi_id(epi); - /* If the file is already "ready" we drop it inside the ready list */ - if (revents && !ep_is_linked(epi)) { - list_add_tail(&epi->rdllink, &ep->rdllist); - ep_pm_stay_awake(epi); + if (revents) { + bool added = false; - /* Notify waiting tasks that events are available */ - if (waitqueue_active(&ep->wq)) - wake_up(&ep->wq); - if (waitqueue_active(&ep->poll_wait)) - pwake++; + if (ep_polled_by_user(ep)) { + added = ep_add_event_to_uring(epi, revents); + } else if (!ep_is_linked(epi)) { + list_add_tail(&epi->rdllink, &ep->rdllist); + ep_pm_stay_awake(epi); + added = true; + } + if (added) { + /* Notify waiting tasks that events are available */ + if (waitqueue_active(&ep->wq)) + wake_up(&ep->wq); + if (waitqueue_active(&ep->poll_wait)) + pwake++; + } } write_unlock_irq(&ep->lock); @@ -2115,15 +2192,20 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, * list, since that is used/cleaned only inside a section bound by "mtx". * And ep_insert() is called with "mtx" held. */ - write_lock_irq(&ep->lock); - if (ep_is_linked(epi)) - list_del_init(&epi->rdllink); - write_unlock_irq(&ep->lock); + if (ep_polled_by_user(ep)) { + ep_remove_user_item(epi); + } else { + write_lock_irq(&ep->lock); + if (ep_is_linked(epi)) + list_del_init(&epi->rdllink); + write_unlock_irq(&ep->lock); + } wakeup_source_unregister(ep_wakeup_source(epi)); +error_get_bit: error_create_wakeup_source: - kmem_cache_free(epi_cache, epi); + epi_free(ep, epi); return error; }