Message ID | 20190712082907.29137-3-tao3.xu@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: x86: Enable user wait instructions | expand |
On Fri, Jul 12, 2019 at 04:29:06PM +0800, Tao Xu wrote: > diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c > index 6a204e7336c1..631152a67c6e 100644 > --- a/arch/x86/kernel/cpu/umwait.c > +++ b/arch/x86/kernel/cpu/umwait.c > @@ -15,7 +15,8 @@ > * Cache IA32_UMWAIT_CONTROL MSR. This is a systemwide control. By default, > * umwait max time is 100000 in TSC-quanta and C0.2 is enabled > */ > -static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); > +u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); > +EXPORT_SYMBOL_GPL(umwait_control_cached); It'd probably be better to add an accessor to expose umwait_control_cached given that umwait.c is using {READ,WRITE}_ONCE() and there shouldn't be a need to write it outside of umwait.c. > /* > * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index f411c9ae5589..0787f140d155 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -1676,6 +1676,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > #endif > case MSR_EFER: > return kvm_get_msr_common(vcpu, msr_info); > + case MSR_IA32_UMWAIT_CONTROL: > + if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) > + return 1; > + > + msr_info->data = vmx->msr_ia32_umwait_control; > + break; > case MSR_IA32_SPEC_CTRL: > if (!msr_info->host_initiated && > !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) > @@ -1838,6 +1844,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > return 1; > vmcs_write64(GUEST_BNDCFGS, data); > break; > + case MSR_IA32_UMWAIT_CONTROL: > + if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) > + return 1; > + > + /* The reserved bit IA32_UMWAIT_CONTROL[1] should be zero */ > + if (data & BIT_ULL(1)) > + return 1; > + > + vmx->msr_ia32_umwait_control = data; The SDM only defines bits 31:0, and the kernel uses a u32 to cache its value. I assume bits 63:32 are reserved? I'm guessing we also need an SDM update... > + break; > case MSR_IA32_SPEC_CTRL: > if (!msr_info->host_initiated && > !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) > @@ -4139,6 +4155,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > vmx->rmode.vm86_active = 0; > vmx->spec_ctrl = 0; > > + vmx->msr_ia32_umwait_control = 0; > + > vcpu->arch.microcode_version = 0x100000000ULL; > vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); > kvm_set_cr8(vcpu, 0); > @@ -6352,6 +6370,19 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) > msrs[i].host, false); > } >
On 7/12/2019 11:52 PM, Sean Christopherson wrote: > On Fri, Jul 12, 2019 at 04:29:06PM +0800, Tao Xu wrote: >> diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c >> index 6a204e7336c1..631152a67c6e 100644 >> --- a/arch/x86/kernel/cpu/umwait.c >> +++ b/arch/x86/kernel/cpu/umwait.c >> @@ -15,7 +15,8 @@ >> * Cache IA32_UMWAIT_CONTROL MSR. This is a systemwide control. By default, >> * umwait max time is 100000 in TSC-quanta and C0.2 is enabled >> */ >> -static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); >> +u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); >> +EXPORT_SYMBOL_GPL(umwait_control_cached); > > It'd probably be better to add an accessor to expose umwait_control_cached > given that umwait.c is using {READ,WRITE}_ONCE() and there shouldn't be a > need to write it outside of umwait.c. > OKay >> /* >> * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in >> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c >> index f411c9ae5589..0787f140d155 100644 >> --- a/arch/x86/kvm/vmx/vmx.c >> +++ b/arch/x86/kvm/vmx/vmx.c >> @@ -1676,6 +1676,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) >> #endif >> case MSR_EFER: >> return kvm_get_msr_common(vcpu, msr_info); >> + case MSR_IA32_UMWAIT_CONTROL: >> + if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) >> + return 1; >> + >> + msr_info->data = vmx->msr_ia32_umwait_control; >> + break; >> case MSR_IA32_SPEC_CTRL: >> if (!msr_info->host_initiated && >> !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) >> @@ -1838,6 +1844,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) >> return 1; >> vmcs_write64(GUEST_BNDCFGS, data); >> break; >> + case MSR_IA32_UMWAIT_CONTROL: >> + if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) >> + return 1; >> + >> + /* The reserved bit IA32_UMWAIT_CONTROL[1] should be zero */ >> + if (data & BIT_ULL(1)) >> + return 1; >> + >> + vmx->msr_ia32_umwait_control = data; > > The SDM only defines bits 31:0, and the kernel uses a u32 to cache its > value. I assume bits 63:32 are reserved? I'm guessing we also need an > SDM update... > The SDM define IA32_UMWAIT_CONTROL is a 32bit MSR. So need me to set 63:32 reserved? >> + break; >> case MSR_IA32_SPEC_CTRL: >> if (!msr_info->host_initiated && >> !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) >> @@ -4139,6 +4155,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) >> vmx->rmode.vm86_active = 0; >> vmx->spec_ctrl = 0; >> >> + vmx->msr_ia32_umwait_control = 0; >> + >> vcpu->arch.microcode_version = 0x100000000ULL; >> vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); >> kvm_set_cr8(vcpu, 0); >> @@ -6352,6 +6370,19 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) >> msrs[i].host, false); >> } >>
On Mon, Jul 15, 2019 at 09:22:14AM +0800, Tao Xu wrote: > On 7/12/2019 11:52 PM, Sean Christopherson wrote: > >The SDM only defines bits 31:0, and the kernel uses a u32 to cache its > >value. I assume bits 63:32 are reserved? I'm guessing we also need an > >SDM update... > > > > The SDM define IA32_UMWAIT_CONTROL is a 32bit MSR. So need me to set 63:32 > reserved? Huh, I didn't realize the SDM allows 32 bit MSRs, I assumed all bits needed to be explicitly defined even if the underlying implementation only tracked 32 bits. RDMSR: If fewer than 64 bits are implemented in the MSR being read, the values return in EDX:EAX in unimplemented bit locations are undefined. WRMSR: Undefined or reserved bits in an MSR should be set to values previously read. From KVM's perspective, bits 63:32 should be treated as reserved-to-zero. This also means that struct vcpu_vmx can track a u32 instead of a u64 for msr_ia32_umwait_control.
On Fri, Jul 12, 2019 at 04:29:06PM +0800, Tao Xu wrote: > UMWAIT and TPAUSE instructions use IA32_UMWAIT_CONTROL at MSR index E1H > to determines the maximum time in TSC-quanta that the processor can reside > in either C0.1 or C0.2. > > This patch emulates MSR IA32_UMWAIT_CONTROL in guest and differentiate > IA32_UMWAIT_CONTROL between host and guest. The variable > mwait_control_cached in arch/x86/power/umwait.c caches the MSR value, so > this patch uses it to avoid frequently rdmsr of IA32_UMWAIT_CONTROL. > > Co-developed-by: Jingqi Liu <jingqi.liu@intel.com> > Signed-off-by: Jingqi Liu <jingqi.liu@intel.com> > Signed-off-by: Tao Xu <tao3.xu@intel.com> > --- [...] > +static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx) > +{ > + if (!vmx_has_waitpkg(vmx)) > + return; > + > + if (vmx->msr_ia32_umwait_control != umwait_control_cached) > + add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL, > + vmx->msr_ia32_umwait_control, > + umwait_control_cached, false); How exactly do we ensure NR_AUTOLOAD_MSRS (8) is still large enough? I see 3 existing add_atomic_switch_msr() calls, but the one at atomic_switch_perf_msrs() is in a loop. Are we absolutely sure that perf_guest_get_msrs() will never return more than 5 MSRs? > + else > + clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL); > +} > + > static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val) > { > vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val); [...]
On 7/17/2019 12:03 AM, Eduardo Habkost wrote: > On Fri, Jul 12, 2019 at 04:29:06PM +0800, Tao Xu wrote: >> UMWAIT and TPAUSE instructions use IA32_UMWAIT_CONTROL at MSR index E1H >> to determines the maximum time in TSC-quanta that the processor can reside >> in either C0.1 or C0.2. >> >> This patch emulates MSR IA32_UMWAIT_CONTROL in guest and differentiate >> IA32_UMWAIT_CONTROL between host and guest. The variable >> mwait_control_cached in arch/x86/power/umwait.c caches the MSR value, so >> this patch uses it to avoid frequently rdmsr of IA32_UMWAIT_CONTROL. >> >> Co-developed-by: Jingqi Liu <jingqi.liu@intel.com> >> Signed-off-by: Jingqi Liu <jingqi.liu@intel.com> >> Signed-off-by: Tao Xu <tao3.xu@intel.com> >> --- > [...] >> +static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx) >> +{ >> + if (!vmx_has_waitpkg(vmx)) >> + return; >> + >> + if (vmx->msr_ia32_umwait_control != umwait_control_cached) >> + add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL, >> + vmx->msr_ia32_umwait_control, >> + umwait_control_cached, false); > > How exactly do we ensure NR_AUTOLOAD_MSRS (8) is still large enough? > > I see 3 existing add_atomic_switch_msr() calls, but the one at > atomic_switch_perf_msrs() is in a loop. Are we absolutely sure > that perf_guest_get_msrs() will never return more than 5 MSRs? > Quote the code of intel_guest_get_msrs: static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) { [...] arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; if (x86_pmu.flags & PMU_FL_PEBS_ALL) arr[0].guest &= ~cpuc->pebs_enabled; else arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); *nr = 1; if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) { [...] arr[1].msr = MSR_IA32_PEBS_ENABLE; arr[1].host = cpuc->pebs_enabled; arr[1].guest = 0; *nr = 2; [...] There are most 2 msrs now. By default umwait is disabled in KVM. So by default there is no MSR_IA32_UMWAIT_CONTROL added into add_atomic_switch_msr(). Thanks. > >> + else >> + clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL); >> +} >> + >> static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val) >> { >> vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val); > [...] > >
On 7/17/2019 9:17 AM, Tao Xu wrote: > On 7/17/2019 12:03 AM, Eduardo Habkost wrote: >> On Fri, Jul 12, 2019 at 04:29:06PM +0800, Tao Xu wrote: >>> UMWAIT and TPAUSE instructions use IA32_UMWAIT_CONTROL at MSR index E1H >>> to determines the maximum time in TSC-quanta that the processor can >>> reside >>> in either C0.1 or C0.2. >>> >>> This patch emulates MSR IA32_UMWAIT_CONTROL in guest and differentiate >>> IA32_UMWAIT_CONTROL between host and guest. The variable >>> mwait_control_cached in arch/x86/power/umwait.c caches the MSR value, so >>> this patch uses it to avoid frequently rdmsr of IA32_UMWAIT_CONTROL. >>> >>> Co-developed-by: Jingqi Liu <jingqi.liu@intel.com> >>> Signed-off-by: Jingqi Liu <jingqi.liu@intel.com> >>> Signed-off-by: Tao Xu <tao3.xu@intel.com> >>> --- >> [...] >>> +static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx) >>> +{ >>> + if (!vmx_has_waitpkg(vmx)) >>> + return; >>> + >>> + if (vmx->msr_ia32_umwait_control != umwait_control_cached) >>> + add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL, >>> + vmx->msr_ia32_umwait_control, >>> + umwait_control_cached, false); >> >> How exactly do we ensure NR_AUTOLOAD_MSRS (8) is still large enough? >> >> I see 3 existing add_atomic_switch_msr() calls, but the one at >> atomic_switch_perf_msrs() is in a loop. Are we absolutely sure >> that perf_guest_get_msrs() will never return more than 5 MSRs? >> > > Quote the code of intel_guest_get_msrs: > > static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) > { > [...] > arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; > arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; > arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; > if (x86_pmu.flags & PMU_FL_PEBS_ALL) > arr[0].guest &= ~cpuc->pebs_enabled; > else > arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); > *nr = 1; > > if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) { > [...] > arr[1].msr = MSR_IA32_PEBS_ENABLE; > arr[1].host = cpuc->pebs_enabled; > arr[1].guest = 0; > *nr = 2; > [...] > > There are most 2 msrs now. By default umwait is disabled in KVM. So by > default there is no MSR_IA32_UMWAIT_CONTROL added into > add_atomic_switch_msr(). > > Thanks. And for old hardware, kvm use core_guest_get_msrs, but umwait is for now hardware, and if hardware in host doesn't have the cpuid, there is no MSR_IA32_UMWAIT_CONTROL in kvm as well. >> >>> + else >>> + clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL); >>> +} >>> + >>> static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val) >>> { >>> vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val); >> [...] >> >> >
diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c index 6a204e7336c1..631152a67c6e 100644 --- a/arch/x86/kernel/cpu/umwait.c +++ b/arch/x86/kernel/cpu/umwait.c @@ -15,7 +15,8 @@ * Cache IA32_UMWAIT_CONTROL MSR. This is a systemwide control. By default, * umwait max time is 100000 in TSC-quanta and C0.2 is enabled */ -static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); +u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); +EXPORT_SYMBOL_GPL(umwait_control_cached); /* * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f411c9ae5589..0787f140d155 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1676,6 +1676,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) #endif case MSR_EFER: return kvm_get_msr_common(vcpu, msr_info); + case MSR_IA32_UMWAIT_CONTROL: + if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) + return 1; + + msr_info->data = vmx->msr_ia32_umwait_control; + break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) @@ -1838,6 +1844,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; vmcs_write64(GUEST_BNDCFGS, data); break; + case MSR_IA32_UMWAIT_CONTROL: + if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) + return 1; + + /* The reserved bit IA32_UMWAIT_CONTROL[1] should be zero */ + if (data & BIT_ULL(1)) + return 1; + + vmx->msr_ia32_umwait_control = data; + break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) @@ -4139,6 +4155,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->rmode.vm86_active = 0; vmx->spec_ctrl = 0; + vmx->msr_ia32_umwait_control = 0; + vcpu->arch.microcode_version = 0x100000000ULL; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(vcpu, 0); @@ -6352,6 +6370,19 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) msrs[i].host, false); } +static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx) +{ + if (!vmx_has_waitpkg(vmx)) + return; + + if (vmx->msr_ia32_umwait_control != umwait_control_cached) + add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL, + vmx->msr_ia32_umwait_control, + umwait_control_cached, false); + else + clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL); +} + static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val) { vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val); @@ -6460,6 +6491,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) atomic_switch_perf_msrs(vmx); + atomic_switch_umwait_control_msr(vmx); + vmx_update_hv_timer(vcpu); /* diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 61128b48c503..b4ca34f7a2da 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -14,6 +14,8 @@ extern const u32 vmx_msr_index[]; extern u64 host_efer; +extern u32 umwait_control_cached; + #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 #define MSR_TYPE_RW 3 @@ -194,6 +196,7 @@ struct vcpu_vmx { #endif u64 spec_ctrl; + u64 msr_ia32_umwait_control; u32 vm_entry_controls_shadow; u32 vm_exit_controls_shadow; @@ -523,6 +526,12 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx) vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); } +static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) +{ + return vmx->secondary_exec_control & + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; +} + void dump_vmcs(void); #endif /* __KVM_X86_VMX_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 63bb1ee8258e..e914e4d03cce 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1136,6 +1136,7 @@ static u32 msrs_to_save[] = { MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B, MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B, MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B, + MSR_IA32_UMWAIT_CONTROL, }; static unsigned num_msrs_to_save;