From patchwork Tue Aug 28 11:54:14 2012
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Paolo Bonzini <pbonzini@redhat.com>
X-Patchwork-Id: 1381161
Return-Path: <kvm-owner@vger.kernel.org>
X-Original-To: patchwork-kvm@patchwork.kernel.org
Delivered-To: patchwork-process-083081@patchwork2.kernel.org
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by patchwork2.kernel.org (Postfix) with ESMTP id 71E3EDF283
	for <patchwork-kvm@patchwork.kernel.org>;
	Tue, 28 Aug 2012 11:56:29 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1752845Ab2H1LzA (ORCPT
	<rfc822;patchwork-kvm@patchwork.kernel.org>);
	Tue, 28 Aug 2012 07:55:00 -0400
Received: from mail-gh0-f174.google.com ([209.85.160.174]:53486 "EHLO
	mail-gh0-f174.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S1752625Ab2H1Ly4 (ORCPT <rfc822; kvm@vger.kernel.org>);
	Tue, 28 Aug 2012 07:54:56 -0400
Received: by ghrr11 with SMTP id r11so1062393ghr.19
	for <multiple recipients>; Tue, 28 Aug 2012 04:54:55 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
	d=gmail.com; s=20120113;
	h=sender:from:to:cc:subject:date:message-id:x-mailer:in-reply-to
	:references; bh=M/zkaZ6MmcaAfbpLO0aJKROA7Ck7Y8W2CW+ljG0l62I=;
	b=zOP/vP6sM6WbF/8nl9M39OWrDZQkM3rFwx2aHCCQE9fuEZUy+VYfrRgtMTEB0kdVNd
	Hy2iRkC5E4wCPRVH/++7PskxBeqeQATfPVo9G8G9oKcaxxgnt2geYUo+25U8jGP+bKPO
	hfKtN1Gr0luyTcdkdPwphaKkP5ucfQa2JCyOQMaGml5mmql+eSD2ym+fZ0tJEO2xTIwr
	aUzc2qtiY6sIbavsvM5Udqs3i/VLOExax1HzxGonmMD77ACexZ1/ElIEjGHaDhSVkCRu
	Mdh7+EIIdr48ng6+JyeMkne8zOzBl5n7sQEIgXnc2H7d8DnwvqExyscEN36Et2vIAE2+
	Mz8w==
Received: by 10.100.243.8 with SMTP id q8mr1720389anh.24.1346154895093;
	Tue, 28 Aug 2012 04:54:55 -0700 (PDT)
Received: from yakj.usersys.redhat.com (93-34-169-1.ip50.fastwebnet.it.
	[93.34.169.1]) by mx.google.com with ESMTPS id
	n5sm19461435ang.18.2012.08.28.04.54.49
	(version=TLSv1/SSLv3 cipher=OTHER);
	Tue, 28 Aug 2012 04:54:54 -0700 (PDT)
From: Paolo Bonzini <pbonzini@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: linux-scsi@vger.kernel.org, kvm@vger.kernel.org,
	rusty@rustcorp.com.au, jasowang@redhat.com, mst@redhat.com,
	virtualization@lists.linux-foundation.org
Subject: [PATCH 2/5] virtio: introduce an API to set affinity for a virtqueue
Date: Tue, 28 Aug 2012 13:54:14 +0200
Message-Id: <1346154857-12487-3-git-send-email-pbonzini@redhat.com>
X-Mailer: git-send-email 1.7.11.2
In-Reply-To: <1346154857-12487-1-git-send-email-pbonzini@redhat.com>
References: <1346154857-12487-1-git-send-email-pbonzini@redhat.com>
Sender: kvm-owner@vger.kernel.org
Precedence: bulk
List-ID: <kvm.vger.kernel.org>
X-Mailing-List: kvm@vger.kernel.org

From: Jason Wang <jasowang@redhat.com>

Sometimes, virtio device need to configure irq affinity hint to maximize the
performance. Instead of just exposing the irq of a virtqueue, this patch
introduce an API to set the affinity for a virtqueue.

The api is best-effort, the affinity hint may not be set as expected due to
platform support, irq sharing or irq type. Currently, only pci method were
implemented and we set the affinity according to:

- if device uses INTX, we just ignore the request
- if device has per vq vector, we force the affinity hint
- if the virtqueues share MSI, make the affinity OR over all affinities
  requested

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_pci.c   |   46 +++++++++++++++++++++++++++++++++++++++++
 include/linux/virtio_config.h |   21 ++++++++++++++++++
 2 files changed, 67 insertions(+), 0 deletions(-)
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index adb24f2..2ff0451 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -48,6 +48,7 @@ struct virtio_pci_device
 	int msix_enabled;
 	int intx_enabled;
 	struct msix_entry *msix_entries;
+	cpumask_var_t *msix_affinity_masks;
 	/* Name strings for interrupts. This size should be enough,
 	 * and I'm too lazy to allocate each name separately. */
 	char (*msix_names)[256];
@@ -276,6 +277,10 @@ static void vp_free_vectors(struct virtio_device *vdev)
 	for (i = 0; i < vp_dev->msix_used_vectors; ++i)
 		free_irq(vp_dev->msix_entries[i].vector, vp_dev);
 
+	for (i = 0; i < vp_dev->msix_vectors; i++)
+		if (vp_dev->msix_affinity_masks[i])
+			free_cpumask_var(vp_dev->msix_affinity_masks[i]);
+
 	if (vp_dev->msix_enabled) {
 		/* Disable the vector used for configuration */
 		iowrite16(VIRTIO_MSI_NO_VECTOR,
@@ -293,6 +298,8 @@ static void vp_free_vectors(struct virtio_device *vdev)
 	vp_dev->msix_names = NULL;
 	kfree(vp_dev->msix_entries);
 	vp_dev->msix_entries = NULL;
+	kfree(vp_dev->msix_affinity_masks);
+	vp_dev->msix_affinity_masks = NULL;
 }
 
 static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
@@ -311,6 +318,15 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
 				     GFP_KERNEL);
 	if (!vp_dev->msix_names)
 		goto error;
+	vp_dev->msix_affinity_masks
+		= kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
+			  GFP_KERNEL);
+	if (!vp_dev->msix_affinity_masks)
+		goto error;
+	for (i = 0; i < nvectors; ++i)
+		if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
+					GFP_KERNEL))
+			goto error;
 
 	for (i = 0; i < nvectors; ++i)
 		vp_dev->msix_entries[i].entry = i;
@@ -607,6 +623,35 @@ static const char *vp_bus_name(struct virtio_device *vdev)
 	return pci_name(vp_dev->pci_dev);
 }
 
+/* Setup the affinity for a virtqueue:
+ * - force the affinity for per vq vector
+ * - OR over all affinities for shared MSI
+ * - ignore the affinity request if we're using INTX
+ */
+static int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
+{
+	struct virtio_device *vdev = vq->vdev;
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	struct virtio_pci_vq_info *info = vq->priv;
+	struct cpumask *mask;
+	unsigned int irq;
+
+	if (!vq->callback)
+		return -EINVAL;
+
+	if (vp_dev->msix_enabled) {
+		mask = vp_dev->msix_affinity_masks[info->msix_vector];
+		irq = vp_dev->msix_entries[info->msix_vector].vector;
+		if (cpu == -1)
+			irq_set_affinity_hint(irq, NULL);
+		else {
+			cpumask_set_cpu(cpu, mask);
+			irq_set_affinity_hint(irq, mask);
+		}
+	}
+	return 0;
+}
+
 static struct virtio_config_ops virtio_pci_config_ops = {
 	.get		= vp_get,
 	.set		= vp_set,
@@ -618,6 +663,7 @@ static struct virtio_config_ops virtio_pci_config_ops = {
 	.get_features	= vp_get_features,
 	.finalize_features = vp_finalize_features,
 	.bus_name	= vp_bus_name,
+	.set_vq_affinity = vp_set_vq_affinity,
 };
 
 static void virtio_pci_release_dev(struct device *_d)
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index fc457f4..2c4a989 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -98,6 +98,7 @@
  *	vdev: the virtio_device
  *      This returns a pointer to the bus name a la pci_name from which
  *      the caller can then copy.
+ * @set_vq_affinity: set the affinity for a virtqueue.
  */
 typedef void vq_callback_t(struct virtqueue *);
 struct virtio_config_ops {
@@ -116,6 +117,7 @@ struct virtio_config_ops {
 	u32 (*get_features)(struct virtio_device *vdev);
 	void (*finalize_features)(struct virtio_device *vdev);
 	const char *(*bus_name)(struct virtio_device *vdev);
+	int (*set_vq_affinity)(struct virtqueue *vq, int cpu);
 };
 
 /* If driver didn't advertise the feature, it will never appear. */
@@ -190,5 +192,24 @@ const char *virtio_bus_name(struct virtio_device *vdev)
 	return vdev->config->bus_name(vdev);
 }
 
+/**
+ * virtqueue_set_affinity - setting affinity for a virtqueue
+ * @vq: the virtqueue
+ * @cpu: the cpu no.
+ *
+ * Pay attention the function are best-effort: the affinity hint may not be set
+ * due to config support, irq type and sharing.
+ *
+ */
+static inline
+int virtqueue_set_affinity(struct virtqueue *vq, int cpu)
+{
+	struct virtio_device *vdev = vq->vdev;
+	if (vdev->config->set_vq_affinity)
+		return vdev->config->set_vq_affinity(vq, cpu);
+	return 0;
+}
+
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_VIRTIO_CONFIG_H */