From patchwork Sun May 30 20:24:01 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tejun Heo X-Patchwork-Id: 103161 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o4UKQ35G013696 for ; Sun, 30 May 2010 20:26:04 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754730Ab0E3UZg (ORCPT ); Sun, 30 May 2010 16:25:36 -0400 Received: from hera.kernel.org ([140.211.167.34]:59197 "EHLO hera.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754611Ab0E3UZc (ORCPT ); Sun, 30 May 2010 16:25:32 -0400 Received: from htj.dyndns.org (localhost [127.0.0.1]) by hera.kernel.org (8.14.3/8.14.3) with ESMTP id o4UKO2vB005385; Sun, 30 May 2010 20:24:03 GMT X-Virus-Status: Clean X-Virus-Scanned: clamav-milter 0.95.2 at hera.kernel.org Received: from [127.0.0.2] (htj.dyndns.org [127.0.0.2]) by htj.dyndns.org (Postfix) with ESMTPSA id 2C69610045216; Sun, 30 May 2010 22:24:02 +0200 (CEST) Message-ID: <4C02C961.9050606@kernel.org> Date: Sun, 30 May 2010 22:24:01 +0200 From: Tejun Heo User-Agent: Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US; rv:1.9.1.9) Gecko/20100317 Thunderbird/3.0.4 MIME-Version: 1.0 To: "Michael S. Tsirkin" CC: Oleg Nesterov , Sridhar Samudrala , netdev , lkml , "kvm@vger.kernel.org" , Andrew Morton , Dmitri Vorobiev , Jiri Kosina , Thomas Gleixner , Ingo Molnar , Andi Kleen Subject: [PATCH 1/3] vhost: replace vhost_workqueue with per-vhost kthread References: <20100527091426.GA6308@redhat.com> <20100527124448.GA4241@redhat.com> <20100527131254.GB7974@redhat.com> <4BFE9ABA.6030907@kernel.org> <20100527163954.GA21710@redhat.com> <4BFEA434.6080405@kernel.org> <20100527173207.GA21880@redhat.com> <4BFEE216.2070807@kernel.org> <20100528150830.GB21880@redhat.com> <4BFFE742.2060205@kernel.org> <20100530112925.GB27611@redhat.com> In-Reply-To: <20100530112925.GB27611@redhat.com> X-Enigmail-Version: 1.0.1 X-Spam-Status: No, score=-2.9 required=5.0 tests=ALL_TRUSTED,BAYES_00 autolearn=ham version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on hera.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Sun, 30 May 2010 20:26:04 +0000 (UTC) X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.2.3 (hera.kernel.org [127.0.0.1]); Sun, 30 May 2010 20:24:05 +0000 (UTC) Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org Index: work/drivers/vhost/net.c =================================================================== --- work.orig/drivers/vhost/net.c +++ work/drivers/vhost/net.c @@ -294,54 +294,60 @@ static void handle_rx(struct vhost_net * unuse_mm(net->dev.mm); } -static void handle_tx_kick(struct work_struct *work) +static void handle_tx_kick(struct vhost_poll *poll) { - struct vhost_virtqueue *vq; - struct vhost_net *net; - vq = container_of(work, struct vhost_virtqueue, poll.work); - net = container_of(vq->dev, struct vhost_net, dev); + struct vhost_virtqueue *vq = + container_of(poll, struct vhost_virtqueue, poll); + struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); + handle_tx(net); } -static void handle_rx_kick(struct work_struct *work) +static void handle_rx_kick(struct vhost_poll *poll) { - struct vhost_virtqueue *vq; - struct vhost_net *net; - vq = container_of(work, struct vhost_virtqueue, poll.work); - net = container_of(vq->dev, struct vhost_net, dev); + struct vhost_virtqueue *vq = + container_of(poll, struct vhost_virtqueue, poll); + struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev); + handle_rx(net); } -static void handle_tx_net(struct work_struct *work) +static void handle_tx_net(struct vhost_poll *poll) { - struct vhost_net *net; - net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_TX].work); + struct vhost_net *net = + container_of(poll, struct vhost_net, poll[VHOST_NET_VQ_TX]); + handle_tx(net); } -static void handle_rx_net(struct work_struct *work) +static void handle_rx_net(struct vhost_poll *poll) { - struct vhost_net *net; - net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_RX].work); + struct vhost_net *net = + container_of(poll, struct vhost_net, poll[VHOST_NET_VQ_RX]); + handle_rx(net); } static int vhost_net_open(struct inode *inode, struct file *f) { struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); + struct vhost_dev *dev; int r; + if (!n) return -ENOMEM; + + dev = &n->dev; n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick; n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick; - r = vhost_dev_init(&n->dev, n->vqs, VHOST_NET_VQ_MAX); + r = vhost_dev_init(dev, n->vqs, VHOST_NET_VQ_MAX); if (r < 0) { kfree(n); return r; } - vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT); - vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN); + vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev); + vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev); n->tx_poll_state = VHOST_NET_POLL_DISABLED; f->private_data = n; @@ -644,25 +650,13 @@ static struct miscdevice vhost_net_misc static int vhost_net_init(void) { - int r = vhost_init(); - if (r) - goto err_init; - r = misc_register(&vhost_net_misc); - if (r) - goto err_reg; - return 0; -err_reg: - vhost_cleanup(); -err_init: - return r; - + return misc_register(&vhost_net_misc); } module_init(vhost_net_init); static void vhost_net_exit(void) { misc_deregister(&vhost_net_misc); - vhost_cleanup(); } module_exit(vhost_net_exit); Index: work/drivers/vhost/vhost.c =================================================================== --- work.orig/drivers/vhost/vhost.c +++ work/drivers/vhost/vhost.c @@ -17,12 +17,12 @@ #include #include #include -#include #include #include #include #include #include +#include #include #include @@ -37,8 +37,6 @@ enum { VHOST_MEMORY_F_LOG = 0x1, }; -static struct workqueue_struct *vhost_workqueue; - static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { @@ -52,23 +50,27 @@ static void vhost_poll_func(struct file static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) { - struct vhost_poll *poll; - poll = container_of(wait, struct vhost_poll, wait); + struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); + if (!((unsigned long)key & poll->mask)) return 0; - queue_work(vhost_workqueue, &poll->work); + vhost_poll_queue(poll); return 0; } /* Init poll structure */ -void vhost_poll_init(struct vhost_poll *poll, work_func_t func, - unsigned long mask) +void vhost_poll_init(struct vhost_poll *poll, vhost_poll_fn_t fn, + unsigned long mask, struct vhost_dev *dev) { - INIT_WORK(&poll->work, func); + poll->fn = fn; init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); init_poll_funcptr(&poll->table, vhost_poll_func); + INIT_LIST_HEAD(&poll->node); + init_waitqueue_head(&poll->done); poll->mask = mask; + poll->dev = dev; + poll->queue_seq = poll->done_seq = 0; } /* Start polling a file. We add ourselves to file's wait queue. The caller must @@ -88,16 +90,28 @@ void vhost_poll_stop(struct vhost_poll * remove_wait_queue(poll->wqh, &poll->wait); } -/* Flush any work that has been scheduled. When calling this, don't hold any +/* Flush any poll that has been scheduled. When calling this, don't hold any * locks that are also used by the callback. */ void vhost_poll_flush(struct vhost_poll *poll) { - flush_work(&poll->work); + int seq = poll->queue_seq; + + if (seq - poll->done_seq > 0) + wait_event(poll->done, seq - poll->done_seq <= 0); + smp_rmb(); /* paired with wmb in vhost_poller() */ } void vhost_poll_queue(struct vhost_poll *poll) { - queue_work(vhost_workqueue, &poll->work); + struct vhost_dev *dev = poll->dev; + + spin_lock(&dev->poller_lock); + if (list_empty(&poll->node)) { + list_add_tail(&poll->node, &dev->poll_list); + poll->queue_seq++; + wake_up_process(dev->poller); + } + spin_unlock(&dev->poller_lock); } static void vhost_vq_reset(struct vhost_dev *dev, @@ -125,10 +139,50 @@ static void vhost_vq_reset(struct vhost_ vq->log_ctx = NULL; } +static int vhost_poller(void *data) +{ + struct vhost_dev *dev = data; + struct vhost_poll *poll; + +repeat: + set_current_state(TASK_INTERRUPTIBLE); /* mb paired w/ kthread_stop */ + + if (kthread_should_stop()) { + __set_current_state(TASK_RUNNING); + return 0; + } + + poll = NULL; + spin_lock(&dev->poller_lock); + if (!list_empty(&dev->poll_list)) { + poll = list_first_entry(&dev->poll_list, + struct vhost_poll, node); + list_del_init(&poll->node); + } + spin_unlock(&dev->poller_lock); + + if (poll) { + __set_current_state(TASK_RUNNING); + poll->fn(poll); + smp_wmb(); /* paired with rmb in vhost_poll_flush() */ + poll->done_seq = poll->queue_seq; + wake_up_all(&poll->done); + } else + schedule(); + + goto repeat; +} + long vhost_dev_init(struct vhost_dev *dev, struct vhost_virtqueue *vqs, int nvqs) { + struct task_struct *poller; int i; + + poller = kthread_create(vhost_poller, dev, "vhost-%d", current->pid); + if (IS_ERR(poller)) + return PTR_ERR(poller); + dev->vqs = vqs; dev->nvqs = nvqs; mutex_init(&dev->mutex); @@ -136,6 +190,9 @@ long vhost_dev_init(struct vhost_dev *de dev->log_file = NULL; dev->memory = NULL; dev->mm = NULL; + spin_lock_init(&dev->poller_lock); + INIT_LIST_HEAD(&dev->poll_list); + dev->poller = poller; for (i = 0; i < dev->nvqs; ++i) { dev->vqs[i].dev = dev; @@ -143,8 +200,7 @@ long vhost_dev_init(struct vhost_dev *de vhost_vq_reset(dev, dev->vqs + i); if (dev->vqs[i].handle_kick) vhost_poll_init(&dev->vqs[i].poll, - dev->vqs[i].handle_kick, - POLLIN); + dev->vqs[i].handle_kick, POLLIN, dev); } return 0; } @@ -217,6 +273,8 @@ void vhost_dev_cleanup(struct vhost_dev if (dev->mm) mmput(dev->mm); dev->mm = NULL; + + kthread_stop(dev->poller); } static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) @@ -1113,16 +1171,3 @@ void vhost_disable_notify(struct vhost_v vq_err(vq, "Failed to enable notification at %p: %d\n", &vq->used->flags, r); } - -int vhost_init(void) -{ - vhost_workqueue = create_singlethread_workqueue("vhost"); - if (!vhost_workqueue) - return -ENOMEM; - return 0; -} - -void vhost_cleanup(void) -{ - destroy_workqueue(vhost_workqueue); -} Index: work/drivers/vhost/vhost.h =================================================================== --- work.orig/drivers/vhost/vhost.h +++ work/drivers/vhost/vhost.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -20,19 +19,26 @@ enum { VHOST_NET_MAX_SG = MAX_SKB_FRAGS + 2, }; +struct vhost_poll; +typedef void (*vhost_poll_fn_t)(struct vhost_poll *poll); + /* Poll a file (eventfd or socket) */ /* Note: there's nothing vhost specific about this structure. */ struct vhost_poll { + vhost_poll_fn_t fn; poll_table table; wait_queue_head_t *wqh; wait_queue_t wait; - /* struct which will handle all actual work. */ - struct work_struct work; + struct list_head node; + wait_queue_head_t done; unsigned long mask; + struct vhost_dev *dev; + int queue_seq; + int done_seq; }; -void vhost_poll_init(struct vhost_poll *poll, work_func_t func, - unsigned long mask); +void vhost_poll_init(struct vhost_poll *poll, vhost_poll_fn_t fn, + unsigned long mask, struct vhost_dev *dev); void vhost_poll_start(struct vhost_poll *poll, struct file *file); void vhost_poll_stop(struct vhost_poll *poll); void vhost_poll_flush(struct vhost_poll *poll); @@ -63,7 +69,7 @@ struct vhost_virtqueue { struct vhost_poll poll; /* The routine to call when the Guest pings us, or timeout. */ - work_func_t handle_kick; + vhost_poll_fn_t handle_kick; /* Last available index we saw. */ u16 last_avail_idx; @@ -86,11 +92,11 @@ struct vhost_virtqueue { struct iovec hdr[VHOST_NET_MAX_SG]; size_t hdr_size; /* We use a kind of RCU to access private pointer. - * All readers access it from workqueue, which makes it possible to - * flush the workqueue instead of synchronize_rcu. Therefore readers do + * All readers access it from poller, which makes it possible to + * flush the vhost_poll instead of synchronize_rcu. Therefore readers do * not need to call rcu_read_lock/rcu_read_unlock: the beginning of - * work item execution acts instead of rcu_read_lock() and the end of - * work item execution acts instead of rcu_read_lock(). + * vhost_poll execution acts instead of rcu_read_lock() and the end of + * vhost_poll execution acts instead of rcu_read_lock(). * Writers use virtqueue mutex. */ void *private_data; /* Log write descriptors */ @@ -110,6 +116,9 @@ struct vhost_dev { int nvqs; struct file *log_file; struct eventfd_ctx *log_ctx; + spinlock_t poller_lock; + struct list_head poll_list; + struct task_struct *poller; }; long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs); @@ -136,9 +145,6 @@ bool vhost_enable_notify(struct vhost_vi int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, unsigned int log_num, u64 len); -int vhost_init(void); -void vhost_cleanup(void); - #define vq_err(vq, fmt, ...) do { \ pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ if ((vq)->error_ctx) \