Message ID | 1517938181-15317-8-git-send-email-dwmw@amazon.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 06/02/2018 18:29, David Woodhouse wrote: > From: KarimAllah Ahmed <karahmed@amazon.de> > > Intel processors use MSR_IA32_ARCH_CAPABILITIES MSR to indicate RDCL_NO > (bit 0) and IBRS_ALL (bit 1). This is a read-only MSR. By default the > contents will come directly from the hardware, but user-space can still > override it. Uhm, taking contents from the hardware is wrong (guess why---live migration). I'll send a revert of those two lines. Paolo > [dwmw2: The bit in kvm_cpuid_7_0_edx_x86_features can be unconditional] > > Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> > Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> > Reviewed-by: Darren Kenny <darren.kenny@oracle.com> > Reviewed-by: Jim Mattson <jmattson@google.com> > Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > Cc: Andrea Arcangeli <aarcange@redhat.com> > Cc: Andi Kleen <ak@linux.intel.com> > Cc: Jun Nakajima <jun.nakajima@intel.com> > Cc: kvm@vger.kernel.org > Cc: Dave Hansen <dave.hansen@intel.com> > Cc: Linus Torvalds <torvalds@linux-foundation.org> > Cc: Andy Lutomirski <luto@kernel.org> > Cc: Asit Mallick <asit.k.mallick@intel.com> > Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com> > Cc: Greg KH <gregkh@linuxfoundation.org> > Cc: Dan Williams <dan.j.williams@intel.com> > Cc: Tim Chen <tim.c.chen@linux.intel.com> > Cc: Ashok Raj <ashok.raj@intel.com> > Link: https://lkml.kernel.org/r/1517522386-18410-4-git-send-email-karahmed@amazon.de > > (cherry picked from commit 28c1c9fabf48d6ad596273a11c46e0d0da3e14cd) > Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> > --- > arch/x86/kvm/cpuid.c | 8 +++++++- > arch/x86/kvm/cpuid.h | 8 ++++++++ > arch/x86/kvm/vmx.c | 15 +++++++++++++++ > arch/x86/kvm/x86.c | 1 + > 4 files changed, 31 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > index 6f24483..9c6493f 100644 > --- a/arch/x86/kvm/cpuid.c > +++ b/arch/x86/kvm/cpuid.c > @@ -380,6 +380,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, > /* cpuid 7.0.ecx*/ > const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/; > > + /* cpuid 7.0.edx*/ > + const u32 kvm_cpuid_7_0_edx_x86_features = > + F(ARCH_CAPABILITIES); > + > /* all calls to cpuid_count() should be made on the same cpu */ > get_cpu(); > > @@ -462,12 +466,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, > /* PKU is not yet implemented for shadow paging. */ > if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) > entry->ecx &= ~F(PKU); > + entry->edx &= kvm_cpuid_7_0_edx_x86_features; > + cpuid_mask(&entry->edx, CPUID_7_EDX); > } else { > entry->ebx = 0; > entry->ecx = 0; > + entry->edx = 0; > } > entry->eax = 0; > - entry->edx = 0; > break; > } > case 9: > diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h > index ec4f9dc..8719997 100644 > --- a/arch/x86/kvm/cpuid.h > +++ b/arch/x86/kvm/cpuid.h > @@ -171,6 +171,14 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) > return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); > } > > +static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) > +{ > + struct kvm_cpuid_entry2 *best; > + > + best = kvm_find_cpuid_entry(vcpu, 7, 0); > + return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES)); > +} > + > > /* > * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index dd6c831..92bf61f 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -551,6 +551,8 @@ struct vcpu_vmx { > u64 msr_guest_kernel_gs_base; > #endif > > + u64 arch_capabilities; > + > u32 vm_entry_controls_shadow; > u32 vm_exit_controls_shadow; > /* > @@ -2979,6 +2981,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > case MSR_IA32_TSC: > msr_info->data = guest_read_tsc(vcpu); > break; > + case MSR_IA32_ARCH_CAPABILITIES: > + if (!msr_info->host_initiated && > + !guest_cpuid_has_arch_capabilities(vcpu)) > + return 1; > + msr_info->data = to_vmx(vcpu)->arch_capabilities; > + break; > case MSR_IA32_SYSENTER_CS: > msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); > break; > @@ -3110,6 +3118,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, > MSR_TYPE_W); > break; > + case MSR_IA32_ARCH_CAPABILITIES: > + if (!msr_info->host_initiated) > + return 1; > + vmx->arch_capabilities = data; > + break; > case MSR_IA32_CR_PAT: > if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { > if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) > @@ -5200,6 +5213,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) > ++vmx->nmsrs; > } > > + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) > + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); > > vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index e023ef9..94d1573 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -975,6 +975,7 @@ static u32 msrs_to_save[] = { > #endif > MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, > MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, > + MSR_IA32_ARCH_CAPABILITIES > }; > > static unsigned num_msrs_to_save; >
On Fri, Feb 16, 2018 at 6:18 AM, Paolo Bonzini <pbonzini@redhat.com> wrote: > Uhm, taking contents from the hardware is wrong (guess why---live > migration). I'll send a revert of those two lines. Hardware seems like a reasonable place to get the default value (cf. the VMX capability MSRs). Should these two lines just be moved to vmx_create_cpu?
On Fri, 2018-02-16 at 08:29 -0800, Jim Mattson wrote: > On Fri, Feb 16, 2018 at 6:18 AM, Paolo Bonzini <pbonzini@redhat.com> wrote: > > > Uhm, taking contents from the hardware is wrong (guess why---live > > migration). I'll send a revert of those two lines. > > Hardware seems like a reasonable place to get the default value (cf. > the VMX capability MSRs). Should these two lines just be moved to > vmx_create_cpu? They're already in vmx_create_cpu(). (Well, in vmx_cpu_setup() which is a static function called only once, from vmx_create_cpu().)
On 16/02/2018 17:29, Jim Mattson wrote: > On Fri, Feb 16, 2018 at 6:18 AM, Paolo Bonzini <pbonzini@redhat.com> wrote: > >> Uhm, taking contents from the hardware is wrong (guess why---live >> migration). I'll send a revert of those two lines. > > Hardware seems like a reasonable place to get the default value (cf. > the VMX capability MSRs). There are some differences: - a zero value for ARCH_CAPABILITIES should be safe, while a zero value for VMX capabilities doesn't really make sense. On the contrary, a nonzero value for ARCH_CAPABILITIES is not safe across live migration. - VMX doesn't support live migration; before adding that support we will probably have Tom's patches to retrieve MSR capabilities. Thanks, Paolo > Should these two lines just be moved to > vmx_create_cpu? >
On Mon, 2018-02-19 at 14:10 +0100, Paolo Bonzini wrote: > > Hardware seems like a reasonable place to get the default value (cf. > > the VMX capability MSRs). > > There are some differences: > > - a zero value for ARCH_CAPABILITIES should be safe, while a zero value > for VMX capabilities doesn't really make sense. On the contrary, a > nonzero value for ARCH_CAPABILITIES is not safe across live migration. Any VMM which is going to support live migration surely needs to pay at least a small amount of attention to the features it exposes? Exposing the ARCH_CAPABILITIES CPUID bit without actually looking at the contents of the associated MSR which that bit advertises would be... a little strange, would it not? I don't see why we care so much about the *default* value, in that context.
On 19/02/2018 14:35, David Woodhouse wrote: > On Mon, 2018-02-19 at 14:10 +0100, Paolo Bonzini wrote: >>> Hardware seems like a reasonable place to get the default value (cf. >>> the VMX capability MSRs). >> >> There are some differences: >> >> - a zero value for ARCH_CAPABILITIES should be safe, while a zero value >> for VMX capabilities doesn't really make sense. On the contrary, a >> nonzero value for ARCH_CAPABILITIES is not safe across live migration. > > Any VMM which is going to support live migration surely needs to pay at > least a small amount of attention to the features it exposes? Exposing > the ARCH_CAPABILITIES CPUID bit without actually looking at the > contents of the associated MSR which that bit advertises would be... a > little strange, would it not? I think what we should do is simply backport Tom Lendacky's series to 4.14 and 4.9 ASAP, and add ARCH_CAPABILITIES support there. Then the question of the default becomes moot, more or less. Paolo > I don't see why we care so much about the *default* value, in that > context.
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 6f24483..9c6493f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -380,6 +380,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/; + /* cpuid 7.0.edx*/ + const u32 kvm_cpuid_7_0_edx_x86_features = + F(ARCH_CAPABILITIES); + /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -462,12 +466,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* PKU is not yet implemented for shadow paging. */ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); + entry->edx &= kvm_cpuid_7_0_edx_x86_features; + cpuid_mask(&entry->edx, CPUID_7_EDX); } else { entry->ebx = 0; entry->ecx = 0; + entry->edx = 0; } entry->eax = 0; - entry->edx = 0; break; } case 9: diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index ec4f9dc..8719997 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -171,6 +171,14 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu) return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL)); } +static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES)); +} + /* * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index dd6c831..92bf61f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -551,6 +551,8 @@ struct vcpu_vmx { u64 msr_guest_kernel_gs_base; #endif + u64 arch_capabilities; + u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; /* @@ -2979,6 +2981,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC: msr_info->data = guest_read_tsc(vcpu); break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has_arch_capabilities(vcpu)) + return 1; + msr_info->data = to_vmx(vcpu)->arch_capabilities; + break; case MSR_IA32_SYSENTER_CS: msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); break; @@ -3110,6 +3118,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_W); break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated) + return 1; + vmx->arch_capabilities = data; + break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) @@ -5200,6 +5213,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) ++vmx->nmsrs; } + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities); vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e023ef9..94d1573 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -975,6 +975,7 @@ static u32 msrs_to_save[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, + MSR_IA32_ARCH_CAPABILITIES }; static unsigned num_msrs_to_save;