From patchwork Mon Nov  9 02:46:48 2015
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Wu, Feng" <feng.wu@intel.com>
X-Patchwork-Id: 7580311
Return-Path: <kvm-owner@kernel.org>
X-Original-To: patchwork-kvm@patchwork.kernel.org
Delivered-To: patchwork-parsemail@patchwork1.web.kernel.org
Received: from mail.kernel.org (mail.kernel.org [198.145.29.136])
	by patchwork1.web.kernel.org (Postfix) with ESMTP id 9F9969F392
	for <patchwork-kvm@patchwork.kernel.org>;
	Mon,  9 Nov 2015 03:04:29 +0000 (UTC)
Received: from mail.kernel.org (localhost [127.0.0.1])
	by mail.kernel.org (Postfix) with ESMTP id 890E920636
	for <patchwork-kvm@patchwork.kernel.org>;
	Mon,  9 Nov 2015 03:04:28 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by mail.kernel.org (Postfix) with ESMTP id 66B1420650
	for <patchwork-kvm@patchwork.kernel.org>;
	Mon,  9 Nov 2015 03:04:27 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1752598AbbKIDDn (ORCPT
	<rfc822;patchwork-kvm@patchwork.kernel.org>);
	Sun, 8 Nov 2015 22:03:43 -0500
Received: from mga09.intel.com ([134.134.136.24]:52897 "EHLO mga09.intel.com"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S1752478AbbKIDDm (ORCPT <rfc822;kvm@vger.kernel.org>);
	Sun, 8 Nov 2015 22:03:42 -0500
Received: from fmsmga001.fm.intel.com ([10.253.24.23])
	by orsmga102.jf.intel.com with ESMTP; 08 Nov 2015 19:03:32 -0800
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.20,264,1444719600"; d="scan'208";a="831217035"
Received: from feng-bdw-de-pi.bj.intel.com ([10.238.154.76])
	by fmsmga001.fm.intel.com with ESMTP; 08 Nov 2015 19:03:31 -0800
From: Feng Wu <feng.wu@intel.com>
To: pbonzini@redhat.com
Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
	Feng Wu <feng.wu@intel.com>
Subject: [PATCH] KVM: x86: Add lowest-priority support for vt-d
	posted-interrupts
Date: Mon,  9 Nov 2015 10:46:48 +0800
Message-Id: <1447037208-75615-1-git-send-email-feng.wu@intel.com>
X-Mailer: git-send-email 2.1.0
Sender: kvm-owner@vger.kernel.org
Precedence: bulk
List-ID: <kvm.vger.kernel.org>
X-Mailing-List: kvm@vger.kernel.org
X-Spam-Status: No, score=-6.9 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI,
	T_RP_MATCHES_RCVD,
	UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1
X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org
X-Virus-Scanned: ClamAV using ClamSMTP

Use vector-hashing to handle lowest-priority interrupts for
posted-interrupts. As an example, modern Intel CPUs use this
method to handle lowest-priority interrupts.

Signed-off-by: Feng Wu <feng.wu@intel.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/irq_comm.c         | 52 +++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/lapic.c            | 57 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/lapic.h            |  2 ++
 arch/x86/kvm/vmx.c              | 14 ++++++++--
 5 files changed, 125 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9265196..e225106 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1258,6 +1258,8 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
 
 bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			     struct kvm_vcpu **dest_vcpu);
+struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
+					      struct kvm_lapic_irq *irq);
 
 void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
 		     struct kvm_lapic_irq *irq);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 84b96d3..8156e45 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -266,6 +266,58 @@ out:
 	return r;
 }
 
+/*
+ * This routine handles lowest-priority interrupts using vector-hashing
+ * mechanism. As an example, modern Intel CPUs use this method to handle
+ * lowest-priority interrupts.
+ *
+ * Here is the details about the vector-hashing mechanism:
+ * 1. For lowest-priority interrupts, store all the possible destination
+ *    vCPUs in an array.
+ * 2. Use "guest vector % max number of destination vCPUs" to find the right
+ *    destination vCPU in the array for the lowest-priority interrupt.
+ */
+struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
+					      struct kvm_lapic_irq *irq)
+
+{
+	unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+	unsigned int dest_vcpus = 0;
+	struct kvm_vcpu *vcpu;
+	unsigned int i, mod, idx = 0;
+
+	vcpu = kvm_intr_vector_hashing_dest_fast(kvm, irq);
+	if (vcpu)
+		return vcpu;
+
+	memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (!kvm_apic_present(vcpu))
+			continue;
+
+		if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
+					irq->dest_id, irq->dest_mode))
+			continue;
+
+		__set_bit(vcpu->vcpu_id, dest_vcpu_bitmap);
+		dest_vcpus++;
+	}
+
+	if (dest_vcpus == 0)
+		return NULL;
+
+	mod = irq->vector % dest_vcpus;
+
+	for (i = 0; i <= mod; i++) {
+		idx = find_next_bit(dest_vcpu_bitmap, KVM_MAX_VCPUS, idx) + 1;
+		BUG_ON(idx >= KVM_MAX_VCPUS);
+	}
+
+	return kvm_get_vcpu(kvm, idx - 1);
+}
+EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
+
 bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			     struct kvm_vcpu **dest_vcpu)
 {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ecd4ea1..4937aa4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -816,6 +816,63 @@ out:
 	return ret;
 }
 
+struct kvm_vcpu *kvm_intr_vector_hashing_dest_fast(struct kvm *kvm,
+						   struct kvm_lapic_irq *irq)
+{
+	struct kvm_apic_map *map;
+	struct kvm_vcpu *vcpu = NULL;
+
+	if (irq->shorthand)
+		return NULL;
+
+	rcu_read_lock();
+	map = rcu_dereference(kvm->arch.apic_map);
+
+	if (!map)
+		goto out;
+
+	if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
+			kvm_lowest_prio_delivery(irq)) {
+		u16 cid;
+		int i, idx = 0;
+		unsigned long bitmap = 1;
+		unsigned int mod, dest_vcpus = 0;
+		struct kvm_lapic **dst = NULL;
+
+
+		if (!kvm_apic_logical_map_valid(map))
+			goto out;
+
+		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
+
+		if (cid >= ARRAY_SIZE(map->logical_map))
+			goto out;
+
+		dst = map->logical_map[cid];
+
+		for_each_set_bit(i, &bitmap, 16) {
+			if (!dst[i])
+				continue;
+
+			dest_vcpus++;
+		}
+
+		mod = irq->vector % dest_vcpus;
+
+		for (i = 0; i <= mod; i++) {
+			idx = find_next_bit(&bitmap, KVM_MAX_VCPUS, idx) + 1;
+			BUG_ON(idx >= KVM_MAX_VCPUS);
+		}
+
+		if (kvm_apic_present(dst[idx-1]->vcpu))
+			vcpu = dst[idx-1]->vcpu;
+	}
+
+out:
+	rcu_read_unlock();
+	return vcpu;
+}
+
 /*
  * Add a pending IRQ into lapic.
  * Return 1 if successfully added and 0 if discarded.
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index fde8e35d..a6a775d 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -170,4 +170,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu);
 
 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			struct kvm_vcpu **dest_vcpu);
+struct kvm_vcpu *kvm_intr_vector_hashing_dest_fast(struct kvm *kvm,
+						   struct kvm_lapic_irq *irq);
 #endif
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5eb56ed..57f71ee 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -159,6 +159,9 @@ static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
 static int ple_window_max        = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
 module_param(ple_window_max, int, S_IRUGO);
 
+static bool __read_mostly enable_pi_vector_hashing = 1;
+module_param(enable_pi_vector_hashing, bool, S_IRUGO);
+
 extern const ulong vmx_return;
 
 #define NR_AUTOLOAD_MSRS 8
@@ -10702,8 +10705,15 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
 		 */
 
 		kvm_set_msi_irq(e, &irq);
-		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
-			continue;
+		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+			if ((!enable_pi_vector_hashing ||
+				irq.delivery_mode != APIC_DM_LOWEST))
+				continue;
+
+			vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
+			if (!vcpu)
+				continue;
+		}
 
 		vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
 		vcpu_info.vector = irq.vector;