diff mbox series

[v2,1/5] KVM: X86: Add MSR_KVM_PREEMPT_COUNT support

Message ID 1648800605-18074-2-git-send-email-wanpengli@tencent.com (mailing list archive)
State New, archived
Headers show
Series KVM: X86: Scaling Guest OS Critical Sections with boosting | expand

Commit Message

Wanpeng Li April 1, 2022, 8:10 a.m. UTC
From: Wanpeng Li <wanpengli@tencent.com>

x86 preempt_count is per-cpu, any non-zero value for preempt_count
indicates that either preemption has been disabled explicitly or the
CPU is currently servicing some sort of interrupt. The guest will
pass this value to the hypervisor, so the hypervisor knows whether
the guest is running in the critical section.

Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 arch/x86/include/asm/kvm_host.h      |  6 +++++
 arch/x86/include/uapi/asm/kvm_para.h |  2 ++
 arch/x86/kvm/x86.c                   | 35 ++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+)
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4138939532c6..c13c9ed50903 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -852,6 +852,12 @@  struct kvm_vcpu_arch {
 
 	u64 msr_kvm_poll_control;
 
+	struct {
+		u64 msr_val;
+		bool preempt_count_enabled;
+		struct gfn_to_hva_cache preempt_count_cache;
+	} pv_pc;
+
 	/*
 	 * Indicates the guest is trying to write a gfn that contains one or
 	 * more of the PTEs used to translate the write itself, i.e. the access
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 6e64b27b2c1e..f99fa4407604 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -36,6 +36,7 @@ 
 #define KVM_FEATURE_MSI_EXT_DEST_ID	15
 #define KVM_FEATURE_HC_MAP_GPA_RANGE	16
 #define KVM_FEATURE_MIGRATION_CONTROL	17
+#define KVM_FEATURE_PREEMPT_COUNT	18
 
 #define KVM_HINTS_REALTIME      0
 
@@ -58,6 +59,7 @@ 
 #define MSR_KVM_ASYNC_PF_INT	0x4b564d06
 #define MSR_KVM_ASYNC_PF_ACK	0x4b564d07
 #define MSR_KVM_MIGRATION_CONTROL	0x4b564d08
+#define MSR_KVM_PREEMPT_COUNT	0x4b564d09
 
 struct kvm_steal_time {
 	__u64 steal;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 02cf0a7e1d14..f2d2e3d25230 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1456,6 +1456,7 @@  static const u32 emulated_msrs_all[] = {
 
 	MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
 	MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK,
+	MSR_KVM_PREEMPT_COUNT,
 
 	MSR_IA32_TSC_ADJUST,
 	MSR_IA32_TSC_DEADLINE,
@@ -3442,6 +3443,25 @@  static void record_steal_time(struct kvm_vcpu *vcpu)
 	mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
 }
 
+static int kvm_pv_enable_preempt_count(struct kvm_vcpu *vcpu, u64 data)
+{
+	u64 addr = data & ~KVM_MSR_ENABLED;
+	struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_pc.preempt_count_cache;
+
+	vcpu->arch.pv_pc.preempt_count_enabled = false;
+	vcpu->arch.pv_pc.msr_val = data;
+
+	if (!(data & KVM_MSR_ENABLED))
+		return 0;
+
+	if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, sizeof(int)))
+		return 1;
+
+	vcpu->arch.pv_pc.preempt_count_enabled = true;
+
+	return 0;
+}
+
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	bool pr = false;
@@ -3661,6 +3681,14 @@  int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		vcpu->arch.msr_kvm_poll_control = data;
 		break;
 
+	case MSR_KVM_PREEMPT_COUNT:
+		if (!guest_pv_has(vcpu, KVM_FEATURE_PREEMPT_COUNT))
+			return 1;
+
+		if (kvm_pv_enable_preempt_count(vcpu, data))
+			return 1;
+		break;
+
 	case MSR_IA32_MCG_CTL:
 	case MSR_IA32_MCG_STATUS:
 	case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
@@ -4001,6 +4029,12 @@  int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 		msr_info->data = vcpu->arch.msr_kvm_poll_control;
 		break;
+	case MSR_KVM_PREEMPT_COUNT:
+		if (!guest_pv_has(vcpu, KVM_FEATURE_PREEMPT_COUNT))
+			return 1;
+
+		msr_info->data = vcpu->arch.pv_pc.msr_val;
+		break;
 	case MSR_IA32_P5_MC_ADDR:
 	case MSR_IA32_P5_MC_TYPE:
 	case MSR_IA32_MCG_CAP:
@@ -11192,6 +11226,7 @@  int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.pending_external_vector = -1;
 	vcpu->arch.preempted_in_kernel = false;
+	vcpu->arch.pv_pc.preempt_count_enabled = false;
 
 #if IS_ENABLED(CONFIG_HYPERV)
 	vcpu->arch.hv_root_tdp = INVALID_PAGE;