Message ID | 20231108183003.5981-9-xin3.li@intel.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Enable FRED with KVM VMX | expand |
On Wed, Nov 08, 2023 at 10:29:48AM -0800, Xin Li wrote: >Initialize host VMCS FRED fields with host FRED MSRs' value and >guest VMCS FRED fields to 0. > >FRED CPU states are managed in 9 new FRED MSRs, as well as a few >existing CPU registers and MSRs, e.g., CR4.FRED. To support FRED >context management, new VMCS fields corresponding to most of FRED >CPU state MSRs are added to both the host-state and guest-state >areas of VMCS. > >Specifically no VMCS fields are added for FRED RSP0 and SSP0 MSRs, >because the 2 FRED MSRs are used during ring 3 event delivery only, >thus KVM, running on ring 0, can run safely even with guest FRED >RSP0 and SSP0. It can be deferred to load host FRED RSP0 and SSP0 >until before returning to user level. > >Tested-by: Shan Kang <shan.kang@intel.com> >Signed-off-by: Xin Li <xin3.li@intel.com> >--- > arch/x86/include/asm/vmx.h | 16 ++++++++++++++++ > arch/x86/kvm/vmx/vmx.c | 32 ++++++++++++++++++++++++++++++++ > 2 files changed, 48 insertions(+) > >diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h >index 41796a733bc9..d54a1a1057b0 100644 >--- a/arch/x86/include/asm/vmx.h >+++ b/arch/x86/include/asm/vmx.h >@@ -277,12 +277,28 @@ enum vmcs_field { > GUEST_BNDCFGS_HIGH = 0x00002813, > GUEST_IA32_RTIT_CTL = 0x00002814, > GUEST_IA32_RTIT_CTL_HIGH = 0x00002815, >+ GUEST_IA32_FRED_CONFIG = 0x0000281a, >+ GUEST_IA32_FRED_RSP1 = 0x0000281c, >+ GUEST_IA32_FRED_RSP2 = 0x0000281e, >+ GUEST_IA32_FRED_RSP3 = 0x00002820, >+ GUEST_IA32_FRED_STKLVLS = 0x00002822, >+ GUEST_IA32_FRED_SSP1 = 0x00002824, >+ GUEST_IA32_FRED_SSP2 = 0x00002826, >+ GUEST_IA32_FRED_SSP3 = 0x00002828, > HOST_IA32_PAT = 0x00002c00, > HOST_IA32_PAT_HIGH = 0x00002c01, > HOST_IA32_EFER = 0x00002c02, > HOST_IA32_EFER_HIGH = 0x00002c03, > HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, > HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05, >+ HOST_IA32_FRED_CONFIG = 0x00002c08, >+ HOST_IA32_FRED_RSP1 = 0x00002c0a, >+ HOST_IA32_FRED_RSP2 = 0x00002c0c, >+ HOST_IA32_FRED_RSP3 = 0x00002c0e, >+ HOST_IA32_FRED_STKLVLS = 0x00002c10, >+ HOST_IA32_FRED_SSP1 = 0x00002c12, >+ HOST_IA32_FRED_SSP2 = 0x00002c14, >+ HOST_IA32_FRED_SSP3 = 0x00002c16, > PIN_BASED_VM_EXEC_CONTROL = 0x00004000, > CPU_BASED_VM_EXEC_CONTROL = 0x00004002, > EXCEPTION_BITMAP = 0x00004004, >diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c >index 327e052d90c1..41772ecdd368 100644 >--- a/arch/x86/kvm/vmx/vmx.c >+++ b/arch/x86/kvm/vmx/vmx.c >@@ -1477,6 +1477,18 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, > (unsigned long)(cpu_entry_stack(cpu) + 1)); > } > >+#ifdef CONFIG_X86_64 >+ /* Per-CPU FRED MSRs */ >+ if (cpu_feature_enabled(X86_FEATURE_FRED)) { how about kvm_cpu_cap_has()? to decouple KVM's capability to virtualize a feature and host's enabling a feature. >+ vmcs_write64(HOST_IA32_FRED_RSP1, read_msr(MSR_IA32_FRED_RSP1)); >+ vmcs_write64(HOST_IA32_FRED_RSP2, read_msr(MSR_IA32_FRED_RSP2)); >+ vmcs_write64(HOST_IA32_FRED_RSP3, read_msr(MSR_IA32_FRED_RSP3)); >+ vmcs_write64(HOST_IA32_FRED_SSP1, read_msr(MSR_IA32_FRED_SSP1)); >+ vmcs_write64(HOST_IA32_FRED_SSP2, read_msr(MSR_IA32_FRED_SSP2)); >+ vmcs_write64(HOST_IA32_FRED_SSP3, read_msr(MSR_IA32_FRED_SSP3)); >+ } >+#endif why is this hunk enclosed in #ifdef CONFIG_X86_64 while the one below isn't? >+ > vmx->loaded_vmcs->cpu = cpu; > } > } >@@ -4375,6 +4387,15 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) > > if (cpu_has_load_ia32_efer()) > vmcs_write64(HOST_IA32_EFER, host_efer); >+ >+ /* >+ * FRED MSRs are per-cpu, however FRED CONFIG and STKLVLS MSRs >+ * are the same on all CPUs, thus they are initialized here. >+ */ >+ if (cpu_feature_enabled(X86_FEATURE_FRED)) { >+ vmcs_write64(HOST_IA32_FRED_CONFIG, read_msr(MSR_IA32_FRED_CONFIG)); >+ vmcs_write64(HOST_IA32_FRED_STKLVLS, read_msr(MSR_IA32_FRED_STKLVLS)); >+ } > } > > void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) >@@ -4936,6 +4957,17 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > vmcs_writel(GUEST_IDTR_BASE, 0); > vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); > >+ if (cpu_feature_enabled(X86_FEATURE_FRED)) { >+ vmcs_write64(GUEST_IA32_FRED_CONFIG, 0); >+ vmcs_write64(GUEST_IA32_FRED_RSP1, 0); >+ vmcs_write64(GUEST_IA32_FRED_RSP2, 0); >+ vmcs_write64(GUEST_IA32_FRED_RSP3, 0); >+ vmcs_write64(GUEST_IA32_FRED_STKLVLS, 0); >+ vmcs_write64(GUEST_IA32_FRED_SSP1, 0); >+ vmcs_write64(GUEST_IA32_FRED_SSP2, 0); >+ vmcs_write64(GUEST_IA32_FRED_SSP3, 0); >+ } >+ move this hunk to __vmx_vcpu_reset() because FRED spec says "INIT does not change the value of the new MSRs." > vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); > vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); > vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0); >-- >2.42.0 > >
> >@@ -1477,6 +1477,18 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int > cpu, > > (unsigned long)(cpu_entry_stack(cpu) + 1)); > > } > > > >+#ifdef CONFIG_X86_64 > >+ /* Per-CPU FRED MSRs */ > >+ if (cpu_feature_enabled(X86_FEATURE_FRED)) { > > how about kvm_cpu_cap_has()? to decouple KVM's capability to virtualize a feature > and host's enabling a feature. Very likely I guess. > >+ vmcs_write64(HOST_IA32_FRED_RSP1, > read_msr(MSR_IA32_FRED_RSP1)); > >+ vmcs_write64(HOST_IA32_FRED_RSP2, > read_msr(MSR_IA32_FRED_RSP2)); > >+ vmcs_write64(HOST_IA32_FRED_RSP3, > read_msr(MSR_IA32_FRED_RSP3)); > >+ vmcs_write64(HOST_IA32_FRED_SSP1, > read_msr(MSR_IA32_FRED_SSP1)); > >+ vmcs_write64(HOST_IA32_FRED_SSP2, > read_msr(MSR_IA32_FRED_SSP2)); > >+ vmcs_write64(HOST_IA32_FRED_SSP3, > read_msr(MSR_IA32_FRED_SSP3)); > >+ } > >+#endif > > why is this hunk enclosed in #ifdef CONFIG_X86_64 while the one below isn't? As if the compiler doesn't complain, I should NOT add it. > > >+ if (cpu_feature_enabled(X86_FEATURE_FRED)) { > >+ vmcs_write64(GUEST_IA32_FRED_CONFIG, 0); > >+ vmcs_write64(GUEST_IA32_FRED_RSP1, 0); > >+ vmcs_write64(GUEST_IA32_FRED_RSP2, 0); > >+ vmcs_write64(GUEST_IA32_FRED_RSP3, 0); > >+ vmcs_write64(GUEST_IA32_FRED_STKLVLS, 0); > >+ vmcs_write64(GUEST_IA32_FRED_SSP1, 0); > >+ vmcs_write64(GUEST_IA32_FRED_SSP2, 0); > >+ vmcs_write64(GUEST_IA32_FRED_SSP3, 0); > >+ } > >+ > > move this hunk to __vmx_vcpu_reset() because FRED spec says > > "INIT does not change the value of the new MSRs." > Yeah, will do.
> > > >+ vmcs_write64(HOST_IA32_FRED_RSP1, > > read_msr(MSR_IA32_FRED_RSP1)); > > >+ vmcs_write64(HOST_IA32_FRED_RSP2, > > read_msr(MSR_IA32_FRED_RSP2)); > > >+ vmcs_write64(HOST_IA32_FRED_RSP3, > > read_msr(MSR_IA32_FRED_RSP3)); > > >+ vmcs_write64(HOST_IA32_FRED_SSP1, > > read_msr(MSR_IA32_FRED_SSP1)); > > >+ vmcs_write64(HOST_IA32_FRED_SSP2, > > read_msr(MSR_IA32_FRED_SSP2)); > > >+ vmcs_write64(HOST_IA32_FRED_SSP3, > > read_msr(MSR_IA32_FRED_SSP3)); > > >+ } > > >+#endif > > > > why is this hunk enclosed in #ifdef CONFIG_X86_64 while the one below isn't? > > As if the compiler doesn't complain, I should NOT add it. I think I don't need to add CONFIG_X86_64 for the above, but somehow this was left over. Let me double check.
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 41796a733bc9..d54a1a1057b0 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -277,12 +277,28 @@ enum vmcs_field { GUEST_BNDCFGS_HIGH = 0x00002813, GUEST_IA32_RTIT_CTL = 0x00002814, GUEST_IA32_RTIT_CTL_HIGH = 0x00002815, + GUEST_IA32_FRED_CONFIG = 0x0000281a, + GUEST_IA32_FRED_RSP1 = 0x0000281c, + GUEST_IA32_FRED_RSP2 = 0x0000281e, + GUEST_IA32_FRED_RSP3 = 0x00002820, + GUEST_IA32_FRED_STKLVLS = 0x00002822, + GUEST_IA32_FRED_SSP1 = 0x00002824, + GUEST_IA32_FRED_SSP2 = 0x00002826, + GUEST_IA32_FRED_SSP3 = 0x00002828, HOST_IA32_PAT = 0x00002c00, HOST_IA32_PAT_HIGH = 0x00002c01, HOST_IA32_EFER = 0x00002c02, HOST_IA32_EFER_HIGH = 0x00002c03, HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05, + HOST_IA32_FRED_CONFIG = 0x00002c08, + HOST_IA32_FRED_RSP1 = 0x00002c0a, + HOST_IA32_FRED_RSP2 = 0x00002c0c, + HOST_IA32_FRED_RSP3 = 0x00002c0e, + HOST_IA32_FRED_STKLVLS = 0x00002c10, + HOST_IA32_FRED_SSP1 = 0x00002c12, + HOST_IA32_FRED_SSP2 = 0x00002c14, + HOST_IA32_FRED_SSP3 = 0x00002c16, PIN_BASED_VM_EXEC_CONTROL = 0x00004000, CPU_BASED_VM_EXEC_CONTROL = 0x00004002, EXCEPTION_BITMAP = 0x00004004, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 327e052d90c1..41772ecdd368 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1477,6 +1477,18 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, (unsigned long)(cpu_entry_stack(cpu) + 1)); } +#ifdef CONFIG_X86_64 + /* Per-CPU FRED MSRs */ + if (cpu_feature_enabled(X86_FEATURE_FRED)) { + vmcs_write64(HOST_IA32_FRED_RSP1, read_msr(MSR_IA32_FRED_RSP1)); + vmcs_write64(HOST_IA32_FRED_RSP2, read_msr(MSR_IA32_FRED_RSP2)); + vmcs_write64(HOST_IA32_FRED_RSP3, read_msr(MSR_IA32_FRED_RSP3)); + vmcs_write64(HOST_IA32_FRED_SSP1, read_msr(MSR_IA32_FRED_SSP1)); + vmcs_write64(HOST_IA32_FRED_SSP2, read_msr(MSR_IA32_FRED_SSP2)); + vmcs_write64(HOST_IA32_FRED_SSP3, read_msr(MSR_IA32_FRED_SSP3)); + } +#endif + vmx->loaded_vmcs->cpu = cpu; } } @@ -4375,6 +4387,15 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) if (cpu_has_load_ia32_efer()) vmcs_write64(HOST_IA32_EFER, host_efer); + + /* + * FRED MSRs are per-cpu, however FRED CONFIG and STKLVLS MSRs + * are the same on all CPUs, thus they are initialized here. + */ + if (cpu_feature_enabled(X86_FEATURE_FRED)) { + vmcs_write64(HOST_IA32_FRED_CONFIG, read_msr(MSR_IA32_FRED_CONFIG)); + vmcs_write64(HOST_IA32_FRED_STKLVLS, read_msr(MSR_IA32_FRED_STKLVLS)); + } } void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) @@ -4936,6 +4957,17 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_writel(GUEST_IDTR_BASE, 0); vmcs_write32(GUEST_IDTR_LIMIT, 0xffff); + if (cpu_feature_enabled(X86_FEATURE_FRED)) { + vmcs_write64(GUEST_IA32_FRED_CONFIG, 0); + vmcs_write64(GUEST_IA32_FRED_RSP1, 0); + vmcs_write64(GUEST_IA32_FRED_RSP2, 0); + vmcs_write64(GUEST_IA32_FRED_RSP3, 0); + vmcs_write64(GUEST_IA32_FRED_STKLVLS, 0); + vmcs_write64(GUEST_IA32_FRED_SSP1, 0); + vmcs_write64(GUEST_IA32_FRED_SSP2, 0); + vmcs_write64(GUEST_IA32_FRED_SSP3, 0); + } + vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);