Message ID | 20191128014016.4389-11-sean.j.christopherson@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | x86/cpu: Clean up handling of VMX features | expand |
On Wed, Nov 27, 2019 at 05:40:07PM -0800, Sean Christopherson wrote: > diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c > index a46c9e46f937..93268bde662a 100644 > --- a/arch/x86/kernel/cpu/feat_ctl.c > +++ b/arch/x86/kernel/cpu/feat_ctl.c > @@ -4,6 +4,72 @@ > #include <asm/cpufeature.h> > #include <asm/msr-index.h> > #include <asm/processor.h> > +#include <asm/vmx.h> > + > +#ifdef CONFIG_X86_VMX_FEATURE_NAMES > +enum vmx_feature_leafs { > + MISC_FEATURES = 0, > + PRIMARY_PROC_CTLS, > + SECONDARY_PROC_CTLS, > + NR_VMX_FEATURE_WORDS, > +}; > + > +#define F(x) BIT(VMX_FEATURE_##x & 0x1f) Eww, this F-thing has been always bugging me, especially if it means something a little different each time: arch/x86/crypto/blowfish-x86_64-asm_64.S:59:#define F() \ arch/x86/kernel/cpu/feat_ctl.c:17:#define F(x) BIT(VMX_FEATURE_##x & 0x1f) arch/x86/kvm/cpuid.c:65:#define F(x) bit(X86_FEATURE_##x) arch/x86/kvm/emulate.c:4393:#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } arch/x86/kvm/svm.c:5927:#define F(x) bit(X86_FEATURE_##x) I guess you can call yours VMX_F() or so, just so that it's name is something different. > +static void init_vmx_capabilities(struct cpuinfo_x86 *c) > +{ > + u32 supported, funcs, ept, vpid, ign; > + > + BUILD_BUG_ON(NVMXINTS != NR_VMX_FEATURE_WORDS); > + > + /* > + * The high bits contain the allowed-1 settings, i.e. features that can > + * be turned on. The low bits contain the allowed-0 settings, i.e. > + * features that can be turned off. Ignore the allowed-0 settings, > + * if a feature can be turned on then it's supported. > + */ > + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, ign, supported); > + c->vmx_capability[PRIMARY_PROC_CTLS] = supported; > + > + rdmsr_safe(MSR_IA32_VMX_PROCBASED_CTLS2, &ign, &supported); > + c->vmx_capability[SECONDARY_PROC_CTLS] = supported; > + > + rdmsr(MSR_IA32_VMX_PINBASED_CTLS, ign, supported); > + rdmsr_safe(MSR_IA32_VMX_VMFUNC, &ign, &funcs); > + > + /* > + * Except for EPT+VPID, which enumerates support for both in a single > + * MSR, low for EPT, high for VPID. > + */ > + rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, &ept, &vpid); Right, so this is a garden variety of rdmsr() and rdmsr_safe() and the safe variant's retval needs to be checked, strictly speaking. It probably doesn't matter here since you'll get 0s if it fails, which means feature not supported, so all good. But I guess you can still use rdmsr_safe() everywhere just so it doesn't cause head scratching in the future, when one looks at that code. > +#endif /* CONFIG_X86_VMX_FEATURE_NAMES */ > > #undef pr_fmt > #define pr_fmt(fmt) "x86/cpu: " fmt > @@ -50,5 +116,9 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c) > pr_err_once("VMX (%s TXT) disabled by BIOS\n", > tboot ? "inside" : "outside"); > clear_cpu_cap(c, X86_FEATURE_VMX); > + } else { > +#ifdef CONFIG_X86_VMX_FEATURE_NAMES > + init_vmx_capabilities(c); > +#endif Can't say that I'm happy about all that ifdeffery but I guess we need to perpetuate this since X86_FEATURE_NAMES is there for embedded. In practice, probably no one disables it...
On Thu, Dec 12, 2019 at 12:38:38PM +0100, Borislav Petkov wrote: > On Wed, Nov 27, 2019 at 05:40:07PM -0800, Sean Christopherson wrote: > > +static void init_vmx_capabilities(struct cpuinfo_x86 *c) > > +{ > > + u32 supported, funcs, ept, vpid, ign; > > + > > + BUILD_BUG_ON(NVMXINTS != NR_VMX_FEATURE_WORDS); > > + > > + /* > > + * The high bits contain the allowed-1 settings, i.e. features that can > > + * be turned on. The low bits contain the allowed-0 settings, i.e. > > + * features that can be turned off. Ignore the allowed-0 settings, > > + * if a feature can be turned on then it's supported. > > + */ > > + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, ign, supported); > > + c->vmx_capability[PRIMARY_PROC_CTLS] = supported; > > + > > + rdmsr_safe(MSR_IA32_VMX_PROCBASED_CTLS2, &ign, &supported); > > + c->vmx_capability[SECONDARY_PROC_CTLS] = supported; > > + > > + rdmsr(MSR_IA32_VMX_PINBASED_CTLS, ign, supported); > > + rdmsr_safe(MSR_IA32_VMX_VMFUNC, &ign, &funcs); > > + > > + /* > > + * Except for EPT+VPID, which enumerates support for both in a single > > + * MSR, low for EPT, high for VPID. > > + */ > > + rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, &ept, &vpid); > > Right, so this is a garden variety of rdmsr() and rdmsr_safe() and > the safe variant's retval needs to be checked, strictly speaking. It > probably doesn't matter here since you'll get 0s if it fails, which > means feature not supported, so all good. > > But I guess you can still use rdmsr_safe() everywhere just so it doesn't > cause head scratching in the future, when one looks at that code. The reasoning behind using vanilla rdmsr() on PROC and PIN controls is that those MSRs should exist on any CPU that supports VMX, i.e. we want the WARN. The alternative would be to use rdmsr_safe() for everything and then explicitly disable VMX if a fault on PROC or PIN occurs, but that circles us back to the handling a fault on rdmsr(MSR_IA32_FEAT_CTL), i.e. is it really worth gracefully handling a fault that should never occur? > > > +#endif /* CONFIG_X86_VMX_FEATURE_NAMES */ > > > > #undef pr_fmt > > #define pr_fmt(fmt) "x86/cpu: " fmt > > @@ -50,5 +116,9 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c) > > pr_err_once("VMX (%s TXT) disabled by BIOS\n", > > tboot ? "inside" : "outside"); > > clear_cpu_cap(c, X86_FEATURE_VMX); > > + } else { > > +#ifdef CONFIG_X86_VMX_FEATURE_NAMES > > + init_vmx_capabilities(c); > > +#endif > > Can't say that I'm happy about all that ifdeffery but I guess we need > to perpetuate this since X86_FEATURE_NAMES is there for embedded. In > practice, probably no one disables it... Ya, systemd wasn't happy when I tried booting without X86_FEATURE_NAMES.
On Thu, Dec 12, 2019 at 09:55:11AM -0800, Sean Christopherson wrote: > The reasoning behind using vanilla rdmsr() on PROC and PIN controls is that > those MSRs should exist on any CPU that supports VMX, i.e. we want the WARN. > > The alternative would be to use rdmsr_safe() for everything and then > explicitly disable VMX if a fault on PROC or PIN occurs, but that circles > us back to the handling a fault on rdmsr(MSR_IA32_FEAT_CTL), i.e. is it > really worth gracefully handling a fault that should never occur? No but pls put a comment above it explaining why we're doing rdmsr() only with those two MSRs. Thx.
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 526425fcaedc..bc3a497c029c 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -391,6 +391,10 @@ config IA32_FEAT_CTL def_bool y depends on CPU_SUP_INTEL || CPU_SUP_CENTAUR || CPU_SUP_ZHAOXIN +config X86_VMX_FEATURE_NAMES + def_bool y + depends on IA32_FEAT_CTL && X86_FEATURE_NAMES + menuconfig PROCESSOR_SELECT bool "Supported processor vendors" if EXPERT ---help--- diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index bab513404606..3324cf036dfb 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -85,6 +85,9 @@ struct cpuinfo_x86 { #ifdef CONFIG_X86_64 /* Number of 4K pages in DTLB/ITLB combined(in pages): */ int x86_tlbsize; +#endif +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + __u32 vmx_capability[NVMXINTS]; #endif __u8 x86_virt_bits; __u8 x86_phys_bits; diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h index 1b96b03c1147..ab42d94e2359 100644 --- a/arch/x86/include/asm/vmxfeatures.h +++ b/arch/x86/include/asm/vmxfeatures.h @@ -2,6 +2,11 @@ #ifndef _ASM_X86_VMXFEATURES_H #define _ASM_X86_VMXFEATURES_H +/* + * Defines VMX CPU feature bits + */ +#define NVMXINTS 3 /* N 32-bit words worth of info */ + /* * Note: If the comment begins with a quoted string, that string is used * in /proc/cpuinfo instead of the macro name. If the string is "", diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9d6a35a4586e..df1eacd26443 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1449,6 +1449,9 @@ static void identify_cpu(struct cpuinfo_x86 *c) #endif c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof(c->x86_capability)); +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + memset(&c->vmx_capability, 0, sizeof(c->vmx_capability)); +#endif generic_identify(c); diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c index a46c9e46f937..93268bde662a 100644 --- a/arch/x86/kernel/cpu/feat_ctl.c +++ b/arch/x86/kernel/cpu/feat_ctl.c @@ -4,6 +4,72 @@ #include <asm/cpufeature.h> #include <asm/msr-index.h> #include <asm/processor.h> +#include <asm/vmx.h> + +#ifdef CONFIG_X86_VMX_FEATURE_NAMES +enum vmx_feature_leafs { + MISC_FEATURES = 0, + PRIMARY_PROC_CTLS, + SECONDARY_PROC_CTLS, + NR_VMX_FEATURE_WORDS, +}; + +#define F(x) BIT(VMX_FEATURE_##x & 0x1f) + +static void init_vmx_capabilities(struct cpuinfo_x86 *c) +{ + u32 supported, funcs, ept, vpid, ign; + + BUILD_BUG_ON(NVMXINTS != NR_VMX_FEATURE_WORDS); + + /* + * The high bits contain the allowed-1 settings, i.e. features that can + * be turned on. The low bits contain the allowed-0 settings, i.e. + * features that can be turned off. Ignore the allowed-0 settings, + * if a feature can be turned on then it's supported. + */ + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, ign, supported); + c->vmx_capability[PRIMARY_PROC_CTLS] = supported; + + rdmsr_safe(MSR_IA32_VMX_PROCBASED_CTLS2, &ign, &supported); + c->vmx_capability[SECONDARY_PROC_CTLS] = supported; + + rdmsr(MSR_IA32_VMX_PINBASED_CTLS, ign, supported); + rdmsr_safe(MSR_IA32_VMX_VMFUNC, &ign, &funcs); + + /* + * Except for EPT+VPID, which enumerates support for both in a single + * MSR, low for EPT, high for VPID. + */ + rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, &ept, &vpid); + + /* Pin, EPT, VPID and VM-Func are merged into a single word. */ + WARN_ON_ONCE(supported >> 16); + WARN_ON_ONCE(funcs >> 4); + c->vmx_capability[MISC_FEATURES] = (supported & 0xffff) | + ((vpid & 0x1) << 16) | + ((funcs & 0xf) << 28); + + /* EPT bits are full on scattered and must be manually handled. */ + if (ept & VMX_EPT_EXECUTE_ONLY_BIT) + c->vmx_capability[MISC_FEATURES] |= F(EPT_EXECUTE_ONLY); + if (ept & VMX_EPT_AD_BIT) + c->vmx_capability[MISC_FEATURES] |= F(EPT_AD); + if (ept & VMX_EPT_1GB_PAGE_BIT) + c->vmx_capability[MISC_FEATURES] |= F(EPT_1GB); + + /* Synthetic APIC features that are aggregates of multiple features. */ + if ((c->vmx_capability[PRIMARY_PROC_CTLS] & F(VIRTUAL_TPR)) && + (c->vmx_capability[SECONDARY_PROC_CTLS] & F(VIRT_APIC_ACCESSES))) + c->vmx_capability[MISC_FEATURES] |= F(FLEXPRIORITY); + + if ((c->vmx_capability[PRIMARY_PROC_CTLS] & F(VIRTUAL_TPR)) && + (c->vmx_capability[SECONDARY_PROC_CTLS] & F(APIC_REGISTER_VIRT)) && + (c->vmx_capability[SECONDARY_PROC_CTLS] & F(VIRT_INTR_DELIVERY)) && + (c->vmx_capability[MISC_FEATURES] & F(POSTED_INTR))) + c->vmx_capability[MISC_FEATURES] |= F(APICV); +} +#endif /* CONFIG_X86_VMX_FEATURE_NAMES */ #undef pr_fmt #define pr_fmt(fmt) "x86/cpu: " fmt @@ -50,5 +116,9 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c) pr_err_once("VMX (%s TXT) disabled by BIOS\n", tboot ? "inside" : "outside"); clear_cpu_cap(c, X86_FEATURE_VMX); + } else { +#ifdef CONFIG_X86_VMX_FEATURE_NAMES + init_vmx_capabilities(c); +#endif } }
Add an entry in struct cpuinfo_x86 to track VMX capabilities and fill the capabilities during IA32_FEAT_CTL MSR initialization. Make the VMX capabilities dependent on IA32_FEAT_CTL and X86_FEATURE_NAMES so as to avoid unnecessary overhead on CPUs that can't possibly support VMX, or when /proc/cpuinfo is not available. Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> --- arch/x86/Kconfig.cpu | 4 ++ arch/x86/include/asm/processor.h | 3 ++ arch/x86/include/asm/vmxfeatures.h | 5 +++ arch/x86/kernel/cpu/common.c | 3 ++ arch/x86/kernel/cpu/feat_ctl.c | 70 ++++++++++++++++++++++++++++++ 5 files changed, 85 insertions(+)