diff mbox series

[v2,49/49] *** DO NOT APPLY *** KVM: x86: Verify KVM initializes all consumed guest caps

Message ID 20240517173926.965351-50-seanjc@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: CPUID overhaul, fixes, and caching | expand

Commit Message

Sean Christopherson May 17, 2024, 5:39 p.m. UTC
Assert that all features queried via guest_cpu_cap_has() are known to KVM,
i.e. that KVM doesn't check for a feature that can never actually be set.

This is for demonstration purposes only, as the proper way to enforce this
is to do post-processing at build time (and there are other shortcomings
of this PoC, e.g. it requires all KVM modules to be built-in).

Not-signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/cpuid.c              | 81 +++++++++++++++++++++++--------
 arch/x86/kvm/cpuid.h              | 16 +++++-
 arch/x86/kvm/x86.c                |  2 +
 include/asm-generic/vmlinux.lds.h |  4 ++
 4 files changed, 81 insertions(+), 22 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0e64a6332052..18ded0e682f2 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -37,6 +37,7 @@  u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
 EXPORT_SYMBOL_GPL(kvm_cpu_caps);
 
 static u32 kvm_vmm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
+static u32 kvm_known_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
 
 u32 xstate_required_size(u64 xstate_bv, bool compacted)
 {
@@ -143,6 +144,26 @@  u32 xstate_required_size(u64 xstate_bv, bool compacted)
 	0;									\
 })
 
+/*
+ * Vendor Features - For features that KVM supports, but are added in later
+ * because they require additional vendor enabling.
+ */
+#define VEND_F(name)						\
+({								\
+	KVM_VALIDATE_CPU_CAP_USAGE(name);			\
+	0;							\
+})
+
+/*
+ * Operating System Features - For features that KVM dynamically sets/clears at
+ * runtime, e.g. when CR4 changes, but are never advertised to userspace.
+ */
+#define OS_F(name)						\
+({								\
+	KVM_VALIDATE_CPU_CAP_USAGE(name);			\
+	0;							\
+})
+
 /*
  * Magic value used by KVM when querying userspace-provided CPUID entries and
  * doesn't care about the CPIUD index because the index of the function in
@@ -727,6 +748,7 @@  do {									\
 	u32 __leaf = __feature_leaf(X86_FEATURE_##name);		\
 									\
 	BUILD_BUG_ON(__leaf != kvm_cpu_cap_init_in_progress);		\
+	kvm_known_cpu_caps[__leaf] |= feature_bit(name);		\
 } while (0)
 
 /*
@@ -771,14 +793,14 @@  void kvm_set_cpu_caps(void)
 		 * NOTE: MONITOR (and MWAIT) are emulated as NOP, but *not*
 		 * advertised to guests via CPUID!
 		 */
-		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64 */ | VMM_F(MWAIT) |
-		0 /* DS-CPL, VMX, SMX, EST */ |
+		F(XMM3) | F(PCLMULQDQ) | VEND_F(DTES64) | VMM_F(MWAIT) |
+		VEND_F(VMX) | 0 /* DS-CPL, SMX, EST */ |
 		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
 		F(FMA) | F(CX16) | 0 /* xTPR Update */ | F(PDCM) |
 		F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
 		F(XMM4_2) | EMUL_F(X2APIC) | F(MOVBE) | F(POPCNT) |
 		EMUL_F(TSC_DEADLINE_TIMER) | F(AES) | F(XSAVE) |
-		0 /* OSXSAVE */ | F(AVX) | F(F16C) | F(RDRAND) |
+		OS_F(OSXSAVE) | F(AVX) | F(F16C) | F(RDRAND) |
 		EMUL_F(HYPERVISOR)
 	);
 
@@ -788,7 +810,7 @@  void kvm_set_cpu_caps(void)
 		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
 		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
 		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
-		0 /* Reserved, DS, ACPI */ | F(MMX) |
+		0 /* Reserved */ | F(DS) | 0 /* ACPI */ | F(MMX) |
 		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
 		0 /* HTT, TM, Reserved, PBE */
 	);
@@ -796,17 +818,17 @@  void kvm_set_cpu_caps(void)
 	kvm_cpu_cap_init(CPUID_7_0_EBX,
 		F(FSGSBASE) | EMUL_F(TSC_ADJUST) | F(SGX) | F(BMI1) | F(HLE) |
 		F(AVX2) | F(FDP_EXCPTN_ONLY) | F(SMEP) | F(BMI2) | F(ERMS) |
-		F(INVPCID) | F(RTM) | F(ZERO_FCS_FDS) | 0 /*MPX*/ |
+		F(INVPCID) | F(RTM) | F(ZERO_FCS_FDS) | VEND_F(MPX) |
 		F(AVX512F) | F(AVX512DQ) | F(RDSEED) | F(ADX) | F(SMAP) |
-		F(AVX512IFMA) | F(CLFLUSHOPT) | F(CLWB) | 0 /*INTEL_PT*/ |
+		F(AVX512IFMA) | F(CLFLUSHOPT) | F(CLWB) | VEND_F(INTEL_PT) |
 		F(AVX512PF) | F(AVX512ER) | F(AVX512CD) | F(SHA_NI) |
 		F(AVX512BW) | F(AVX512VL));
 
 	kvm_cpu_cap_init(CPUID_7_ECX,
-		F(AVX512VBMI) | RAW_F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
+		F(AVX512VBMI) | RAW_F(LA57) | F(PKU) | OS_F(OSPKE) | F(RDPID) |
 		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
 		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
-		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
+		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | VEND_F(WAITPKG) |
 		F(SGX_LC) | F(BUS_LOCK_DETECT)
 	);
 
@@ -858,11 +880,11 @@  void kvm_set_cpu_caps(void)
 	);
 
 	kvm_cpu_cap_init(CPUID_8000_0001_ECX,
-		F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
+		F(LAHF_LM) | F(CMP_LEGACY) | VEND_F(SVM) | 0 /* ExtApicSpace */ |
 		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
 		F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
 		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
-		F(TOPOEXT) | 0 /* PERFCTR_CORE */
+		F(TOPOEXT) | VEND_F(PERFCTR_CORE)
 	);
 
 	kvm_cpu_cap_init(CPUID_8000_0001_EDX,
@@ -905,23 +927,22 @@  void kvm_set_cpu_caps(void)
 		kvm_cpu_cap_set(X86_FEATURE_AMD_SSBD);
 	if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
 		kvm_cpu_cap_set(X86_FEATURE_AMD_SSB_NO);
-	/*
-	 * The preference is to use SPEC CTRL MSR instead of the
-	 * VIRT_SPEC MSR.
-	 */
-	if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
-	    !boot_cpu_has(X86_FEATURE_AMD_SSBD))
-		kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
 
 	/*
 	 * Hide all SVM features by default, SVM will set the cap bits for
 	 * features it emulates and/or exposes for L1.
 	 */
-	kvm_cpu_cap_init(CPUID_8000_000A_EDX, 0);
+	kvm_cpu_cap_init(CPUID_8000_000A_EDX,
+		VEND_F(VMCBCLEAN) | VEND_F(FLUSHBYASID) | VEND_F(NRIPS) |
+		VEND_F(TSCRATEMSR) | VEND_F(V_VMSAVE_VMLOAD) | VEND_F(LBRV) |
+		VEND_F(PAUSEFILTER) | VEND_F(PFTHRESHOLD) | VEND_F(VGIF) |
+		VEND_F(VNMI) | VEND_F(SVME_ADDR_CHK)
+	);
 
 	kvm_cpu_cap_init(CPUID_8000_001F_EAX,
-		0 /* SME */ | 0 /* SEV */ | 0 /* VM_PAGE_FLUSH */ | 0 /* SEV_ES */ |
-		F(SME_COHERENT));
+		VEND_F(SME) | VEND_F(SEV) | 0 /* VM_PAGE_FLUSH */ | VEND_F(SEV_ES) |
+		F(SME_COHERENT)
+	);
 
 	kvm_cpu_cap_init(CPUID_8000_0021_EAX,
 		F(NO_NESTED_DATA_BP) | F(LFENCE_RDTSC) | 0 /* SmmPgCfgLock */ |
@@ -977,6 +998,26 @@  EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);
 #undef KVM_VALIDATE_CPU_CAP_USAGE
 #define KVM_VALIDATE_CPU_CAP_USAGE(name)
 
+
+extern unsigned int __start___kvm_features[];
+extern unsigned int __stop___kvm_features[];
+
+void kvm_validate_cpu_caps(void)
+{
+	int i;
+
+	for (i = 0; i < __stop___kvm_features - __start___kvm_features; i++) {
+		u32 feature = __feature_translate(__start___kvm_features[i]);
+		u32 leaf = feature / 32;
+
+		if (kvm_known_cpu_caps[leaf] & BIT(feature & 31))
+			continue;
+
+		pr_warn("Word %u, bit %u (%lx) checked but not supported\n",
+			leaf, feature & 31, BIT(feature & 31));
+	}
+
+}
 struct kvm_cpuid_array {
 	struct kvm_cpuid_entry2 *entries;
 	int maxnent;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 0bf3bddd0e29..32a86de980c7 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -10,6 +10,7 @@ 
 
 extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
 void kvm_set_cpu_caps(void);
+void kvm_validate_cpu_caps(void);
 
 void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu);
 void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
@@ -245,8 +246,8 @@  static __always_inline void guest_cpu_cap_change(struct kvm_vcpu *vcpu,
 		guest_cpu_cap_clear(vcpu, x86_feature);
 }
 
-static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu,
-					      unsigned int x86_feature)
+static __always_inline bool __guest_cpu_cap_has(struct kvm_vcpu *vcpu,
+					        unsigned int x86_feature)
 {
 	unsigned int x86_leaf = __feature_leaf(x86_feature);
 
@@ -254,6 +255,17 @@  static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu,
 	return vcpu->arch.cpu_caps[x86_leaf] & __feature_bit(x86_feature);
 }
 
+#define guest_cpu_cap_has(vcpu, x86_feature)			\
+({								\
+	asm volatile(						\
+		" .pushsection \"__kvm_features\",\"a\"\n"	\
+		" .balign 4\n"					\
+		" .long " __stringify(x86_feature) " \n"	\
+		" .popsection\n"				\
+	);							\
+	__guest_cpu_cap_has(vcpu, x86_feature);			\
+})
+
 static inline bool kvm_vcpu_is_legal_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_LAM))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5aa7581802f7..f6b7c5c862fb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9790,6 +9790,8 @@  int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
 	if (r != 0)
 		goto out_mmu_exit;
 
+	kvm_validate_cpu_caps();
+
 	kvm_ops_update(ops);
 
 	for_each_online_cpu(cpu) {
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index f7749d0f2562..102fc2a39083 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -533,6 +533,10 @@ 
 		BOUNDED_SECTION_BY(__modver, ___modver)			\
 	}								\
 									\
+	__kvm_features : AT(ADDR(__kvm_features) - LOAD_OFFSET) {	\
+		BOUNDED_SECTION_BY(__kvm_features, ___kvm_features)	\
+	}								\
+									\
 	KCFI_TRAPS							\
 									\
 	RO_EXCEPTION_TABLE						\