diff mbox series

[12/15] KVM: x86: Track possible passthrough MSRs in kvm_x86_ops

Message ID 20241127201929.4005605-13-aaronlewis@google.com (mailing list archive)
State New
Headers show
Series Unify MSR intercepts in x86 | expand

Commit Message

Aaron Lewis Nov. 27, 2024, 8:19 p.m. UTC
Move the possible passthrough MSRs to kvm_x86_ops.  Doing this allows
them to be accessed from common x86 code.

In order to set the passthrough MSRs in kvm_x86_ops for VMX,
"vmx_possible_passthrough_msrs" had to be relocated to main.c, and with
that vmx_msr_filter_changed() had to be moved too because it uses
"vmx_possible_passthrough_msrs".

Signed-off-by: Sean Christopherson <seanjc@google.com>
Co-developed-by: Aaron Lewis <aaronlewis@google.com>
---
 arch/x86/include/asm/kvm_host.h |  3 ++
 arch/x86/kvm/svm/svm.c          | 18 ++-------
 arch/x86/kvm/vmx/main.c         | 58 ++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/vmx.c          | 67 ++-------------------------------
 arch/x86/kvm/x86.c              | 13 +++++++
 arch/x86/kvm/x86.h              |  1 +
 6 files changed, 83 insertions(+), 77 deletions(-)

Comments

Sean Christopherson Nov. 27, 2024, 9:57 p.m. UTC | #1
+Xin, Boris, and Dapeng

On Wed, Nov 27, 2024, Aaron Lewis wrote:
> Move the possible passthrough MSRs to kvm_x86_ops.  Doing this allows
> them to be accessed from common x86 code.

...

> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3e8afc82ae2fb..7e9fee4d36cc2 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1817,6 +1817,9 @@ struct kvm_x86_ops {
>  	int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu);
>  
>  	void (*migrate_timers)(struct kvm_vcpu *vcpu);
> +
> +	const u32 * const possible_passthrough_msrs;
> +	const u32 nr_possible_passthrough_msrs;
>  	void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
>  	int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);

...

> +/*
> + * List of MSRs that can be directly passed to the guest.
> + * In addition to these x2apic, PT and LBR MSRs are handled specially.
> + */
> +static const u32 vmx_possible_passthrough_msrs[] = {
> +	MSR_IA32_SPEC_CTRL,
> +	MSR_IA32_PRED_CMD,
> +	MSR_IA32_FLUSH_CMD,
> +	MSR_IA32_TSC,
> +#ifdef CONFIG_X86_64
> +	MSR_FS_BASE,
> +	MSR_GS_BASE,
> +	MSR_KERNEL_GS_BASE,
> +	MSR_IA32_XFD,
> +	MSR_IA32_XFD_ERR,
> +#endif
> +	MSR_IA32_SYSENTER_CS,
> +	MSR_IA32_SYSENTER_ESP,
> +	MSR_IA32_SYSENTER_EIP,
> +	MSR_CORE_C1_RES,
> +	MSR_CORE_C3_RESIDENCY,
> +	MSR_CORE_C6_RESIDENCY,
> +	MSR_CORE_C7_RESIDENCY,
> +};

Looking at this with fresh eyes, the "possible" passthrough MSR list, and SVM's
direct_access_msrs, are confusing and flat out stupid.  VMX's list isn't the set
of "possible" passthrough MSRs, it's the set of MSRs for which KVM may disable
interceptions without dedicated handling in .msr_filter_changed().  Ditto for
SVM's list, but at least SVM's array uses a slightly less awful name.

Xin Li and Boris have been bikeshedding over the VMX array, and it's all a giant
waste of time.

In all cases, KVM *already knows* which MSRs it wants to pass-through to the
guest.  In a few cases the logic isn't super intuitive, e.g. for SPEC_CTRL, but
it's always fairly easy to understand what KVM wants to do.

Rather than expose the lists to common code, I think we should pivot after
"KVM: SVM: Drop "always" flag from list of possible passthrough MSRs" and rip
them out entirely.

The attached patch is compile-tested only (the nested interactions in particular
need a bit of scrutiny) and needs to be chunked into multiple patches, but I don't
see any obvious blockers, and the diffstats speak volumes:

 arch/x86/include/asm/kvm-x86-ops.h |   2 +-
 arch/x86/include/asm/kvm_host.h    |   2 +-
 arch/x86/kvm/lapic.h               |   2 +
 arch/x86/kvm/svm/svm.c             | 310 ++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------
 arch/x86/kvm/svm/svm.h             |   6 ---
 arch/x86/kvm/vmx/main.c            |   2 +-
 arch/x86/kvm/vmx/vmx.c             | 179 ++++++++++++++++++---------------------------------------------------------
 arch/x86/kvm/vmx/vmx.h             |   9 ----
 arch/x86/kvm/vmx/x86_ops.h         |   2 +-
 arch/x86/kvm/x86.c                 |  10 ++++-
 10 files changed, 147 insertions(+), 377 deletions(-)

[*] https://lore.kernel.org/all/20241001050110.3643764-10-xin@zytor.com
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3e8afc82ae2fb..7e9fee4d36cc2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1817,6 +1817,9 @@  struct kvm_x86_ops {
 	int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu);
 
 	void (*migrate_timers)(struct kvm_vcpu *vcpu);
+
+	const u32 * const possible_passthrough_msrs;
+	const u32 nr_possible_passthrough_msrs;
 	void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
 	int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
 
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4e30efe90c541..23e6515bb7904 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -755,18 +755,6 @@  static void clr_dr_intercepts(struct vcpu_svm *svm)
 	recalc_intercepts(svm);
 }
 
-static int direct_access_msr_slot(u32 msr)
-{
-	u32 i;
-
-	for (i = 0; i < ARRAY_SIZE(direct_access_msrs); i++) {
-		if (direct_access_msrs[i] == msr)
-			return i;
-	}
-
-	return -ENOENT;
-}
-
 static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
 {
 	u8 bit_write;
@@ -832,7 +820,7 @@  void svm_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	int slot;
 
-	slot = direct_access_msr_slot(msr);
+	slot = kvm_passthrough_msr_slot(msr);
 	WARN_ON(slot == -ENOENT);
 	if (slot >= 0) {
 		/* Set the shadow bitmaps to the desired intercept states */
@@ -871,7 +859,7 @@  void svm_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	int slot;
 
-	slot = direct_access_msr_slot(msr);
+	slot = kvm_passthrough_msr_slot(msr);
 	WARN_ON(slot == -ENOENT);
 	if (slot >= 0) {
 		/* Set the shadow bitmaps to the desired intercept states */
@@ -5165,6 +5153,8 @@  static struct kvm_x86_ops svm_x86_ops __initdata = {
 
 	.apic_init_signal_blocked = svm_apic_init_signal_blocked,
 
+	.possible_passthrough_msrs = direct_access_msrs,
+	.nr_possible_passthrough_msrs = ARRAY_SIZE(direct_access_msrs),
 	.msr_filter_changed = svm_msr_filter_changed,
 	.complete_emulated_msr = svm_complete_emulated_msr,
 
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 92d35cc6cd15d..6d52693b0fd6c 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -7,6 +7,62 @@ 
 #include "pmu.h"
 #include "posted_intr.h"
 
+/*
+ * List of MSRs that can be directly passed to the guest.
+ * In addition to these x2apic, PT and LBR MSRs are handled specially.
+ */
+static const u32 vmx_possible_passthrough_msrs[] = {
+	MSR_IA32_SPEC_CTRL,
+	MSR_IA32_PRED_CMD,
+	MSR_IA32_FLUSH_CMD,
+	MSR_IA32_TSC,
+#ifdef CONFIG_X86_64
+	MSR_FS_BASE,
+	MSR_GS_BASE,
+	MSR_KERNEL_GS_BASE,
+	MSR_IA32_XFD,
+	MSR_IA32_XFD_ERR,
+#endif
+	MSR_IA32_SYSENTER_CS,
+	MSR_IA32_SYSENTER_ESP,
+	MSR_IA32_SYSENTER_EIP,
+	MSR_CORE_C1_RES,
+	MSR_CORE_C3_RESIDENCY,
+	MSR_CORE_C6_RESIDENCY,
+	MSR_CORE_C7_RESIDENCY,
+};
+
+void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u32 i;
+
+	if (!cpu_has_vmx_msr_bitmap())
+		return;
+
+	/*
+	 * Redo intercept permissions for MSRs that KVM is passing through to
+	 * the guest.  Disabling interception will check the new MSR filter and
+	 * ensure that KVM enables interception if usersepace wants to filter
+	 * the MSR.  MSRs that KVM is already intercepting don't need to be
+	 * refreshed since KVM is going to intercept them regardless of what
+	 * userspace wants.
+	 */
+	for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
+		u32 msr = vmx_possible_passthrough_msrs[i];
+
+		if (!test_bit(i, vmx->shadow_msr_intercept.read))
+			vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_R);
+
+		if (!test_bit(i, vmx->shadow_msr_intercept.write))
+			vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_W);
+	}
+
+	/* PT MSRs can be passed through iff PT is exposed to the guest. */
+	if (vmx_pt_mode_is_host_guest())
+		pt_update_intercept_for_msr(vcpu);
+}
+
 #define VMX_REQUIRED_APICV_INHIBITS				\
 	(BIT(APICV_INHIBIT_REASON_DISABLED) |			\
 	 BIT(APICV_INHIBIT_REASON_ABSENT) |			\
@@ -152,6 +208,8 @@  struct kvm_x86_ops vt_x86_ops __initdata = {
 	.apic_init_signal_blocked = vmx_apic_init_signal_blocked,
 	.migrate_timers = vmx_migrate_timers,
 
+	.possible_passthrough_msrs = vmx_possible_passthrough_msrs,
+	.nr_possible_passthrough_msrs = ARRAY_SIZE(vmx_possible_passthrough_msrs),
 	.msr_filter_changed = vmx_msr_filter_changed,
 	.complete_emulated_msr = kvm_complete_insn_gp,
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index bc64e7cc02704..1c2c0c06f3d35 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -163,31 +163,6 @@  module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
 	RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
 	RTIT_STATUS_BYTECNT))
 
-/*
- * List of MSRs that can be directly passed to the guest.
- * In addition to these x2apic, PT and LBR MSRs are handled specially.
- */
-static const u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
-	MSR_IA32_SPEC_CTRL,
-	MSR_IA32_PRED_CMD,
-	MSR_IA32_FLUSH_CMD,
-	MSR_IA32_TSC,
-#ifdef CONFIG_X86_64
-	MSR_FS_BASE,
-	MSR_GS_BASE,
-	MSR_KERNEL_GS_BASE,
-	MSR_IA32_XFD,
-	MSR_IA32_XFD_ERR,
-#endif
-	MSR_IA32_SYSENTER_CS,
-	MSR_IA32_SYSENTER_ESP,
-	MSR_IA32_SYSENTER_EIP,
-	MSR_CORE_C1_RES,
-	MSR_CORE_C3_RESIDENCY,
-	MSR_CORE_C6_RESIDENCY,
-	MSR_CORE_C7_RESIDENCY,
-};
-
 /*
  * These 2 parameters are used to config the controls for Pause-Loop Exiting:
  * ple_gap:    upper bound on the amount of time between two successive
@@ -669,7 +644,7 @@  static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
 
 static int vmx_get_passthrough_msr_slot(u32 msr)
 {
-	int i;
+	int r;
 
 	switch (msr) {
 	case 0x800 ... 0x8ff:
@@ -692,13 +667,10 @@  static int vmx_get_passthrough_msr_slot(u32 msr)
 		return -ENOENT;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
-		if (vmx_possible_passthrough_msrs[i] == msr)
-			return i;
-	}
+	r = kvm_passthrough_msr_slot(msr);
 
-	WARN(1, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
-	return -ENOENT;
+	WARN(!r, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
+	return r;
 }
 
 struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
@@ -4145,37 +4117,6 @@  void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu)
 	}
 }
 
-void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
-{
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	u32 i;
-
-	if (!cpu_has_vmx_msr_bitmap())
-		return;
-
-	/*
-	 * Redo intercept permissions for MSRs that KVM is passing through to
-	 * the guest.  Disabling interception will check the new MSR filter and
-	 * ensure that KVM enables interception if usersepace wants to filter
-	 * the MSR.  MSRs that KVM is already intercepting don't need to be
-	 * refreshed since KVM is going to intercept them regardless of what
-	 * userspace wants.
-	 */
-	for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
-		u32 msr = vmx_possible_passthrough_msrs[i];
-
-		if (!test_bit(i, vmx->shadow_msr_intercept.read))
-			vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_R);
-
-		if (!test_bit(i, vmx->shadow_msr_intercept.write))
-			vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_W);
-	}
-
-	/* PT MSRs can be passed through iff PT is exposed to the guest. */
-	if (vmx_pt_mode_is_host_guest())
-		pt_update_intercept_for_msr(vcpu);
-}
-
 static inline void kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
 						     int pi_vec)
 {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8637bc0010965..20b6cce793af5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1806,6 +1806,19 @@  bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
 }
 EXPORT_SYMBOL_GPL(kvm_msr_allowed);
 
+int kvm_passthrough_msr_slot(u32 msr)
+{
+	u32 i;
+
+	for (i = 0; i < kvm_x86_ops.nr_possible_passthrough_msrs; i++) {
+		if (kvm_x86_ops.possible_passthrough_msrs[i] == msr)
+			return i;
+	}
+
+	return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(kvm_passthrough_msr_slot);
+
 /*
  * Write @data into the MSR specified by @index.  Select MSR specific fault
  * checks are bypassed if @host_initiated is %true.
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index ec623d23d13d2..208f0698c64e2 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -555,6 +555,7 @@  int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
 			      struct x86_exception *e);
 int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva);
 bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
+int kvm_passthrough_msr_slot(u32 msr);
 
 enum kvm_msr_access {
 	MSR_TYPE_R	= BIT(0),