From patchwork Fri Sep 17 10:03:19 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Krishna Kumar X-Patchwork-Id: 187762 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id o8HA43Nn006100 for ; Fri, 17 Sep 2010 10:04:03 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753353Ab0IQKDg (ORCPT ); Fri, 17 Sep 2010 06:03:36 -0400 Received: from e28smtp08.in.ibm.com ([122.248.162.8]:46265 "EHLO e28smtp08.in.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752875Ab0IQKDe (ORCPT ); Fri, 17 Sep 2010 06:03:34 -0400 Received: from d28relay05.in.ibm.com (d28relay05.in.ibm.com [9.184.220.62]) by e28smtp08.in.ibm.com (8.14.4/8.13.1) with ESMTP id o8H9humZ005550; Fri, 17 Sep 2010 15:13:56 +0530 Received: from d28av02.in.ibm.com (d28av02.in.ibm.com [9.184.220.64]) by d28relay05.in.ibm.com (8.13.8/8.13.8/NCO v10.0) with ESMTP id o8HA3M922191442; Fri, 17 Sep 2010 15:33:22 +0530 Received: from d28av02.in.ibm.com (loopback [127.0.0.1]) by d28av02.in.ibm.com (8.14.4/8.13.1/NCO v10.0 AVout) with ESMTP id o8HA3LbM031604; Fri, 17 Sep 2010 20:03:22 +1000 Received: from krkumar2.in.ibm.com ([9.124.220.25]) by d28av02.in.ibm.com (8.14.4/8.13.1/NCO v10.0 AVin) with ESMTP id o8HA3JXY031592; Fri, 17 Sep 2010 20:03:20 +1000 From: Krishna Kumar To: rusty@rustcorp.com.au, davem@davemloft.net, mst@redhat.com Cc: kvm@vger.kernel.org, arnd@arndb.de, netdev@vger.kernel.org, avi@redhat.com, anthony@codemonkey.ws, Krishna Kumar Date: Fri, 17 Sep 2010 15:33:19 +0530 Message-Id: <20100917100319.21276.271.sendpatchset@krkumar2.in.ibm.com> In-Reply-To: <20100917100307.21276.79185.sendpatchset@krkumar2.in.ibm.com> References: <20100917100307.21276.79185.sendpatchset@krkumar2.in.ibm.com> Subject: [v2 RFC PATCH 2/4] Changes for virtio-net Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter1.kernel.org [140.211.167.41]); Fri, 17 Sep 2010 10:04:03 +0000 (UTC) diff -ruNp org2/include/linux/virtio_net.h tx_only2/include/linux/virtio_net.h --- org2/include/linux/virtio_net.h 2010-02-10 13:20:27.000000000 +0530 +++ tx_only2/include/linux/virtio_net.h 2010-09-16 15:24:01.000000000 +0530 @@ -26,6 +26,7 @@ #define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support */ #define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering */ #define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */ +#define VIRTIO_NET_F_NUMTXQS 21 /* Device supports multiple TX queue */ #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ @@ -34,6 +35,8 @@ struct virtio_net_config { __u8 mac[6]; /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ __u16 status; + /* number of transmit queues */ + __u16 numtxqs; } __attribute__((packed)); /* This is the first element of the scatter-gather list. If you don't diff -ruNp org2/drivers/net/virtio_net.c tx_only2/drivers/net/virtio_net.c --- org2/drivers/net/virtio_net.c 2010-07-08 12:54:32.000000000 +0530 +++ tx_only2/drivers/net/virtio_net.c 2010-09-16 15:24:01.000000000 +0530 @@ -40,9 +40,20 @@ module_param(gso, bool, 0444); #define VIRTNET_SEND_COMMAND_SG_MAX 2 +/* Our representation of a send virtqueue */ +struct send_queue { + struct virtqueue *svq; + + /* TX: fragments + linear part + virtio header */ + struct scatterlist tx_sg[MAX_SKB_FRAGS + 2]; +}; + struct virtnet_info { struct virtio_device *vdev; - struct virtqueue *rvq, *svq, *cvq; + int numtxqs; /* Number of tx queues */ + struct send_queue *sq; + struct virtqueue *rvq; + struct virtqueue *cvq; struct net_device *dev; struct napi_struct napi; unsigned int status; @@ -62,9 +73,8 @@ struct virtnet_info { /* Chain pages by the private ptr. */ struct page *pages; - /* fragments + linear part + virtio header */ + /* RX: fragments + linear part + virtio header */ struct scatterlist rx_sg[MAX_SKB_FRAGS + 2]; - struct scatterlist tx_sg[MAX_SKB_FRAGS + 2]; }; struct skb_vnet_hdr { @@ -120,12 +130,13 @@ static struct page *get_a_page(struct vi static void skb_xmit_done(struct virtqueue *svq) { struct virtnet_info *vi = svq->vdev->priv; + int qnum = svq->queue_index - 1; /* 0 is RX vq */ /* Suppress further interrupts. */ virtqueue_disable_cb(svq); /* We were probably waiting for more output buffers. */ - netif_wake_queue(vi->dev); + netif_wake_subqueue(vi->dev, qnum); } static void set_skb_frag(struct sk_buff *skb, struct page *page, @@ -495,12 +506,13 @@ again: return received; } -static unsigned int free_old_xmit_skbs(struct virtnet_info *vi) +static unsigned int free_old_xmit_skbs(struct virtnet_info *vi, + struct virtqueue *svq) { struct sk_buff *skb; unsigned int len, tot_sgs = 0; - while ((skb = virtqueue_get_buf(vi->svq, &len)) != NULL) { + while ((skb = virtqueue_get_buf(svq, &len)) != NULL) { pr_debug("Sent skb %p\n", skb); vi->dev->stats.tx_bytes += skb->len; vi->dev->stats.tx_packets++; @@ -510,7 +522,8 @@ static unsigned int free_old_xmit_skbs(s return tot_sgs; } -static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) +static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb, + struct virtqueue *svq, struct scatterlist *tx_sg) { struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb); const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; @@ -548,12 +561,12 @@ static int xmit_skb(struct virtnet_info /* Encode metadata header at front. */ if (vi->mergeable_rx_bufs) - sg_set_buf(vi->tx_sg, &hdr->mhdr, sizeof hdr->mhdr); + sg_set_buf(tx_sg, &hdr->mhdr, sizeof hdr->mhdr); else - sg_set_buf(vi->tx_sg, &hdr->hdr, sizeof hdr->hdr); + sg_set_buf(tx_sg, &hdr->hdr, sizeof hdr->hdr); - hdr->num_sg = skb_to_sgvec(skb, vi->tx_sg + 1, 0, skb->len) + 1; - return virtqueue_add_buf(vi->svq, vi->tx_sg, hdr->num_sg, + hdr->num_sg = skb_to_sgvec(skb, tx_sg + 1, 0, skb->len) + 1; + return virtqueue_add_buf(svq, tx_sg, hdr->num_sg, 0, skb); } @@ -561,31 +574,34 @@ static netdev_tx_t start_xmit(struct sk_ { struct virtnet_info *vi = netdev_priv(dev); int capacity; + int qnum = skb_get_queue_mapping(skb); + struct virtqueue *svq = vi->sq[qnum].svq; /* Free up any pending old buffers before queueing new ones. */ - free_old_xmit_skbs(vi); + free_old_xmit_skbs(vi, svq); /* Try to transmit */ - capacity = xmit_skb(vi, skb); + capacity = xmit_skb(vi, skb, svq, vi->sq[qnum].tx_sg); /* This can happen with OOM and indirect buffers. */ if (unlikely(capacity < 0)) { if (net_ratelimit()) { if (likely(capacity == -ENOMEM)) { dev_warn(&dev->dev, - "TX queue failure: out of memory\n"); + "TXQ (%d) failure: out of memory\n", + qnum); } else { dev->stats.tx_fifo_errors++; dev_warn(&dev->dev, - "Unexpected TX queue failure: %d\n", - capacity); + "Unexpected TXQ (%d) failure: %d\n", + qnum, capacity); } } dev->stats.tx_dropped++; kfree_skb(skb); return NETDEV_TX_OK; } - virtqueue_kick(vi->svq); + virtqueue_kick(svq); /* Don't wait up for transmitted skbs to be freed. */ skb_orphan(skb); @@ -594,13 +610,13 @@ static netdev_tx_t start_xmit(struct sk_ /* Apparently nice girls don't return TX_BUSY; stop the queue * before it gets out of hand. Naturally, this wastes entries. */ if (capacity < 2+MAX_SKB_FRAGS) { - netif_stop_queue(dev); - if (unlikely(!virtqueue_enable_cb(vi->svq))) { + netif_stop_subqueue(dev, qnum); + if (unlikely(!virtqueue_enable_cb(svq))) { /* More just got used, free them then recheck. */ - capacity += free_old_xmit_skbs(vi); + capacity += free_old_xmit_skbs(vi, svq); if (capacity >= 2+MAX_SKB_FRAGS) { - netif_start_queue(dev); - virtqueue_disable_cb(vi->svq); + netif_start_subqueue(dev, qnum); + virtqueue_disable_cb(svq); } } } @@ -871,10 +887,10 @@ static void virtnet_update_status(struct if (vi->status & VIRTIO_NET_S_LINK_UP) { netif_carrier_on(vi->dev); - netif_wake_queue(vi->dev); + netif_tx_wake_all_queues(vi->dev); } else { netif_carrier_off(vi->dev); - netif_stop_queue(vi->dev); + netif_tx_stop_all_queues(vi->dev); } } @@ -885,18 +901,112 @@ static void virtnet_config_changed(struc virtnet_update_status(vi); } +#define MAX_DEVICE_NAME 16 +static int initialize_vqs(struct virtnet_info *vi, int numtxqs) +{ + vq_callback_t **callbacks; + struct virtqueue **vqs; + int i, err = -ENOMEM; + int totalvqs; + char **names; + + /* Allocate send queues */ + vi->sq = kzalloc(numtxqs * sizeof(*vi->sq), GFP_KERNEL); + if (!vi->sq) + goto out; + + /* setup initial send queue parameters */ + for (i = 0; i < numtxqs; i++) + sg_init_table(vi->sq[i].tx_sg, ARRAY_SIZE(vi->sq[i].tx_sg)); + + /* + * We expect 1 RX virtqueue followed by 'numtxqs' TX virtqueues, and + * optionally one control virtqueue. + */ + totalvqs = 1 + numtxqs + + virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); + + /* Setup parameters for find_vqs */ + vqs = kmalloc(totalvqs * sizeof(*vqs), GFP_KERNEL); + callbacks = kmalloc(totalvqs * sizeof(*callbacks), GFP_KERNEL); + names = kzalloc(totalvqs * sizeof(*names), GFP_KERNEL); + if (!vqs || !callbacks || !names) + goto free_mem; + + /* Parameters for recv virtqueue */ + callbacks[0] = skb_recv_done; + names[0] = "input"; + + /* Parameters for send virtqueues */ + for (i = 1; i <= numtxqs; i++) { + callbacks[i] = skb_xmit_done; + names[i] = kmalloc(MAX_DEVICE_NAME * sizeof(*names[i]), + GFP_KERNEL); + if (!names[i]) + goto free_mem; + sprintf(names[i], "output.%d", i - 1); + } + + /* Parameters for control virtqueue, if any */ + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) { + callbacks[i] = NULL; + names[i] = "control"; + } + + err = vi->vdev->config->find_vqs(vi->vdev, totalvqs, vqs, callbacks, + (const char **)names); + if (err) + goto free_mem; + + vi->rvq = vqs[0]; + for (i = 0; i < numtxqs; i++) + vi->sq[i].svq = vqs[i + 1]; + + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) { + vi->cvq = vqs[i + 1]; + + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) + vi->dev->features |= NETIF_F_HW_VLAN_FILTER; + } + +free_mem: + if (names) { + for (i = 1; i <= numtxqs; i++) + kfree(names[i]); + kfree(names); + } + + kfree(callbacks); + kfree(vqs); + + if (err) + kfree(vi->sq); + +out: + return err; +} + static int virtnet_probe(struct virtio_device *vdev) { int err; + u16 numtxqs; struct net_device *dev; struct virtnet_info *vi; - struct virtqueue *vqs[3]; - vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL}; - const char *names[] = { "input", "output", "control" }; - int nvqs; + + /* + * Find if host passed the number of transmit queues supported + * by the device + */ + err = virtio_config_val(vdev, VIRTIO_NET_F_NUMTXQS, + offsetof(struct virtio_net_config, numtxqs), + &numtxqs); + + /* We need atleast one txq */ + if (err || !numtxqs) + numtxqs = 1; /* Allocate ourselves a network device with room for our info */ - dev = alloc_etherdev(sizeof(struct virtnet_info)); + dev = alloc_etherdev_mq(sizeof(struct virtnet_info), numtxqs); if (!dev) return -ENOMEM; @@ -940,9 +1050,9 @@ static int virtnet_probe(struct virtio_d vi->vdev = vdev; vdev->priv = vi; vi->pages = NULL; + vi->numtxqs = numtxqs; INIT_DELAYED_WORK(&vi->refill, refill_work); sg_init_table(vi->rx_sg, ARRAY_SIZE(vi->rx_sg)); - sg_init_table(vi->tx_sg, ARRAY_SIZE(vi->tx_sg)); /* If we can receive ANY GSO packets, we must allocate large ones. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || @@ -953,23 +1063,10 @@ static int virtnet_probe(struct virtio_d if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) vi->mergeable_rx_bufs = true; - /* We expect two virtqueues, receive then send, - * and optionally control. */ - nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2; - - err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names); + /* Initialize our rx/tx queue parameters, and invoke find_vqs */ + err = initialize_vqs(vi, numtxqs); if (err) - goto free; - - vi->rvq = vqs[0]; - vi->svq = vqs[1]; - - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) { - vi->cvq = vqs[2]; - - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) - dev->features |= NETIF_F_HW_VLAN_FILTER; - } + goto free_netdev; err = register_netdev(dev); if (err) { @@ -986,6 +1083,9 @@ static int virtnet_probe(struct virtio_d goto unregister; } + dev_info(&dev->dev, "(virtio-net) Allocated 1 RX and %d TX vq's\n", + numtxqs); + vi->status = VIRTIO_NET_S_LINK_UP; virtnet_update_status(vi); netif_carrier_on(dev); @@ -998,7 +1098,8 @@ unregister: cancel_delayed_work_sync(&vi->refill); free_vqs: vdev->config->del_vqs(vdev); -free: + kfree(vi->sq); +free_netdev: free_netdev(dev); return err; } @@ -1006,11 +1107,17 @@ free: static void free_unused_bufs(struct virtnet_info *vi) { void *buf; - while (1) { - buf = virtqueue_detach_unused_buf(vi->svq); - if (!buf) - break; - dev_kfree_skb(buf); + int i; + + for (i = 0; i < vi->numtxqs; i++) { + struct virtqueue *svq = vi->sq[i].svq; + + while (1) { + buf = virtqueue_detach_unused_buf(svq); + if (!buf) + break; + dev_kfree_skb(buf); + } } while (1) { buf = virtqueue_detach_unused_buf(vi->rvq); @@ -1059,7 +1166,7 @@ static unsigned int features[] = { VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, - VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, + VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, VIRTIO_NET_F_NUMTXQS, }; static struct virtio_driver virtio_net_driver = {