Message ID | 20240124024200.102792-27-weijiang.yang@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Enable CET Virtualization | expand |
On Tue, Jan 23, 2024 at 06:41:59PM -0800, Yang Weijiang wrote: >Set up CET MSRs, related VM_ENTRY/EXIT control bits and fixed CR4 setting >to enable CET for nested VM. > >vmcs12 and vmcs02 needs to be synced when L2 exits to L1 or when L1 wants >to resume L2, that way correct CET states can be observed by one another. > >Suggested-by: Chao Gao <chao.gao@intel.com> >Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> >Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com> >--- > arch/x86/kvm/vmx/nested.c | 57 +++++++++++++++++++++++++++++++++++++-- > arch/x86/kvm/vmx/vmcs12.c | 6 +++++ > arch/x86/kvm/vmx/vmcs12.h | 14 +++++++++- > arch/x86/kvm/vmx/vmx.c | 2 ++ > 4 files changed, 76 insertions(+), 3 deletions(-) > >diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c >index 468a7cf75035..e330897a7e5e 100644 >--- a/arch/x86/kvm/vmx/nested.c >+++ b/arch/x86/kvm/vmx/nested.c >@@ -691,6 +691,28 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, > nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, > MSR_IA32_FLUSH_CMD, MSR_TYPE_W); > >+ /* Pass CET MSRs to nested VM if L0 and L1 are set to pass-through. */ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_U_CET, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_S_CET, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_PL0_SSP, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_PL1_SSP, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_PL2_SSP, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_PL3_SSP, MSR_TYPE_RW); >+ >+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >+ MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW); >+ > kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); > > vmx->nested.force_msr_bitmap_recalc = false; >@@ -2506,6 +2528,17 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) > if (kvm_mpx_supported() && vmx->nested.nested_run_pending && > (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) > vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); >+ >+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE) { >+ if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK)) { >+ vmcs_writel(GUEST_SSP, vmcs12->guest_ssp); >+ vmcs_writel(GUEST_INTR_SSP_TABLE, >+ vmcs12->guest_ssp_tbl); >+ } >+ if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK) || >+ guest_can_use(&vmx->vcpu, X86_FEATURE_IBT)) >+ vmcs_writel(GUEST_S_CET, vmcs12->guest_s_cet); >+ } I think you need to move this hunk outside the outmost if-statement, i.e., if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) { otherwise, the whole block may be skipped (e.g., when evmcs is enabled and GUEST_GRP1 is clean), leaving CET state not context-switched. And if VM_ENTRY_LOAD_CET_STATE of vmcs12 is cleared, L1's values should be propagated to vmcs02 on nested VMenter; see pre_vmenter_debugctl in struct nested_vmx. I believe we need similar handling for the three CET fields. > } > > if (nested_cpu_has_xsaves(vmcs12)) >@@ -4344,6 +4377,15 @@ static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, > vmcs12->guest_pending_dbg_exceptions = > vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); > >+ if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK)) { >+ vmcs12->guest_ssp = vmcs_readl(GUEST_SSP); >+ vmcs12->guest_ssp_tbl = vmcs_readl(GUEST_INTR_SSP_TABLE); >+ } >+ if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK) || >+ guest_can_use(&vmx->vcpu, X86_FEATURE_IBT)) { >+ vmcs12->guest_s_cet = vmcs_readl(GUEST_S_CET); >+ } unnecessary braces.
On 1/29/2024 3:04 PM, Chao Gao wrote: > On Tue, Jan 23, 2024 at 06:41:59PM -0800, Yang Weijiang wrote: >> Set up CET MSRs, related VM_ENTRY/EXIT control bits and fixed CR4 setting >> to enable CET for nested VM. >> >> vmcs12 and vmcs02 needs to be synced when L2 exits to L1 or when L1 wants >> to resume L2, that way correct CET states can be observed by one another. >> >> Suggested-by: Chao Gao <chao.gao@intel.com> >> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com> >> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com> >> --- >> arch/x86/kvm/vmx/nested.c | 57 +++++++++++++++++++++++++++++++++++++-- >> arch/x86/kvm/vmx/vmcs12.c | 6 +++++ >> arch/x86/kvm/vmx/vmcs12.h | 14 +++++++++- >> arch/x86/kvm/vmx/vmx.c | 2 ++ >> 4 files changed, 76 insertions(+), 3 deletions(-) >> >> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c >> index 468a7cf75035..e330897a7e5e 100644 >> --- a/arch/x86/kvm/vmx/nested.c >> +++ b/arch/x86/kvm/vmx/nested.c >> @@ -691,6 +691,28 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, >> nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> MSR_IA32_FLUSH_CMD, MSR_TYPE_W); >> >> + /* Pass CET MSRs to nested VM if L0 and L1 are set to pass-through. */ >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_U_CET, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_S_CET, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_PL0_SSP, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_PL1_SSP, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_PL2_SSP, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_PL3_SSP, MSR_TYPE_RW); >> + >> + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, >> + MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW); >> + >> kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); >> >> vmx->nested.force_msr_bitmap_recalc = false; >> @@ -2506,6 +2528,17 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) >> if (kvm_mpx_supported() && vmx->nested.nested_run_pending && >> (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) >> vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); >> + >> + if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE) { >> + if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK)) { >> + vmcs_writel(GUEST_SSP, vmcs12->guest_ssp); >> + vmcs_writel(GUEST_INTR_SSP_TABLE, >> + vmcs12->guest_ssp_tbl); >> + } >> + if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK) || >> + guest_can_use(&vmx->vcpu, X86_FEATURE_IBT)) >> + vmcs_writel(GUEST_S_CET, vmcs12->guest_s_cet); >> + } > I think you need to move this hunk outside the outmost if-statement, i.e., > > if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & > HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) { Yes, I should move them to theĀ end of the function, will do it, thanks! > > otherwise, the whole block may be skipped (e.g., when evmcs is enabled and > GUEST_GRP1 is clean), leaving CET state not context-switched. > > And if VM_ENTRY_LOAD_CET_STATE of vmcs12 is cleared, L1's values should be > propagated to vmcs02 on nested VMenter; see pre_vmenter_debugctl in struct > nested_vmx. I believe we need similar handling for the three CET fields. The code used to be there, but I thought it's not the valid usage model, so removed them. After a second thought, I think I shouldn't assume L1's CET usage, just check the flags and sync CET states between L2<-->L1. Will add the handling, thanks! > >> } >> >> if (nested_cpu_has_xsaves(vmcs12)) >> @@ -4344,6 +4377,15 @@ static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, >> vmcs12->guest_pending_dbg_exceptions = >> vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); >> >> + if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK)) { >> + vmcs12->guest_ssp = vmcs_readl(GUEST_SSP); >> + vmcs12->guest_ssp_tbl = vmcs_readl(GUEST_INTR_SSP_TABLE); >> + } >> + if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK) || >> + guest_can_use(&vmx->vcpu, X86_FEATURE_IBT)) { >> + vmcs12->guest_s_cet = vmcs_readl(GUEST_S_CET); >> + } > unnecessary braces. Will remove it.
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 468a7cf75035..e330897a7e5e 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -691,6 +691,28 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, MSR_IA32_FLUSH_CMD, MSR_TYPE_W); + /* Pass CET MSRs to nested VM if L0 and L1 are set to pass-through. */ + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_U_CET, MSR_TYPE_RW); + + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_S_CET, MSR_TYPE_RW); + + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PL0_SSP, MSR_TYPE_RW); + + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PL1_SSP, MSR_TYPE_RW); + + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PL2_SSP, MSR_TYPE_RW); + + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PL3_SSP, MSR_TYPE_RW); + + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_INT_SSP_TAB, MSR_TYPE_RW); + kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); vmx->nested.force_msr_bitmap_recalc = false; @@ -2506,6 +2528,17 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) if (kvm_mpx_supported() && vmx->nested.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); + + if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_CET_STATE) { + if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK)) { + vmcs_writel(GUEST_SSP, vmcs12->guest_ssp); + vmcs_writel(GUEST_INTR_SSP_TABLE, + vmcs12->guest_ssp_tbl); + } + if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK) || + guest_can_use(&vmx->vcpu, X86_FEATURE_IBT)) + vmcs_writel(GUEST_S_CET, vmcs12->guest_s_cet); + } } if (nested_cpu_has_xsaves(vmcs12)) @@ -4344,6 +4377,15 @@ static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, vmcs12->guest_pending_dbg_exceptions = vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); + if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK)) { + vmcs12->guest_ssp = vmcs_readl(GUEST_SSP); + vmcs12->guest_ssp_tbl = vmcs_readl(GUEST_INTR_SSP_TABLE); + } + if (guest_can_use(&vmx->vcpu, X86_FEATURE_SHSTK) || + guest_can_use(&vmx->vcpu, X86_FEATURE_IBT)) { + vmcs12->guest_s_cet = vmcs_readl(GUEST_S_CET); + } + vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false; } @@ -4569,6 +4611,16 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS) vmcs_write64(GUEST_BNDCFGS, 0); + if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_CET_STATE) { + if (guest_can_use(vcpu, X86_FEATURE_SHSTK)) { + vmcs_writel(GUEST_SSP, vmcs12->host_ssp); + vmcs_writel(GUEST_INTR_SSP_TABLE, vmcs12->host_ssp_tbl); + } + if (guest_can_use(vcpu, X86_FEATURE_SHSTK) || + guest_can_use(vcpu, X86_FEATURE_IBT)) + vmcs_writel(GUEST_S_CET, vmcs12->host_s_cet); + } + if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) { vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); vcpu->arch.pat = vmcs12->host_ia32_pat; @@ -6840,7 +6892,7 @@ static void nested_vmx_setup_exit_ctls(struct vmcs_config *vmcs_conf, VM_EXIT_HOST_ADDR_SPACE_SIZE | #endif VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | - VM_EXIT_CLEAR_BNDCFGS; + VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_CET_STATE; msrs->exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | @@ -6862,7 +6914,8 @@ static void nested_vmx_setup_entry_ctls(struct vmcs_config *vmcs_conf, #ifdef CONFIG_X86_64 VM_ENTRY_IA32E_MODE | #endif - VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS; + VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS | + VM_ENTRY_LOAD_CET_STATE; msrs->entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL); diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c index 106a72c923ca..4233b5ca9461 100644 --- a/arch/x86/kvm/vmx/vmcs12.c +++ b/arch/x86/kvm/vmx/vmcs12.c @@ -139,6 +139,9 @@ const unsigned short vmcs12_field_offsets[] = { FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions), FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp), FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip), + FIELD(GUEST_S_CET, guest_s_cet), + FIELD(GUEST_SSP, guest_ssp), + FIELD(GUEST_INTR_SSP_TABLE, guest_ssp_tbl), FIELD(HOST_CR0, host_cr0), FIELD(HOST_CR3, host_cr3), FIELD(HOST_CR4, host_cr4), @@ -151,5 +154,8 @@ const unsigned short vmcs12_field_offsets[] = { FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip), FIELD(HOST_RSP, host_rsp), FIELD(HOST_RIP, host_rip), + FIELD(HOST_S_CET, host_s_cet), + FIELD(HOST_SSP, host_ssp), + FIELD(HOST_INTR_SSP_TABLE, host_ssp_tbl), }; const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs12_field_offsets); diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h index 01936013428b..3884489e7f7e 100644 --- a/arch/x86/kvm/vmx/vmcs12.h +++ b/arch/x86/kvm/vmx/vmcs12.h @@ -117,7 +117,13 @@ struct __packed vmcs12 { natural_width host_ia32_sysenter_eip; natural_width host_rsp; natural_width host_rip; - natural_width paddingl[8]; /* room for future expansion */ + natural_width host_s_cet; + natural_width host_ssp; + natural_width host_ssp_tbl; + natural_width guest_s_cet; + natural_width guest_ssp; + natural_width guest_ssp_tbl; + natural_width paddingl[2]; /* room for future expansion */ u32 pin_based_vm_exec_control; u32 cpu_based_vm_exec_control; u32 exception_bitmap; @@ -292,6 +298,12 @@ static inline void vmx_check_vmcs12_offsets(void) CHECK_OFFSET(host_ia32_sysenter_eip, 656); CHECK_OFFSET(host_rsp, 664); CHECK_OFFSET(host_rip, 672); + CHECK_OFFSET(host_s_cet, 680); + CHECK_OFFSET(host_ssp, 688); + CHECK_OFFSET(host_ssp_tbl, 696); + CHECK_OFFSET(guest_s_cet, 704); + CHECK_OFFSET(guest_ssp, 712); + CHECK_OFFSET(guest_ssp_tbl, 720); CHECK_OFFSET(pin_based_vm_exec_control, 744); CHECK_OFFSET(cpu_based_vm_exec_control, 748); CHECK_OFFSET(exception_bitmap, 752); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ef7aca954228..5e5ec8b0223b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7731,6 +7731,8 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) cr4_fixed1_update(X86_CR4_PKE, ecx, feature_bit(PKU)); cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP)); cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57)); + cr4_fixed1_update(X86_CR4_CET, ecx, feature_bit(SHSTK)); + cr4_fixed1_update(X86_CR4_CET, edx, feature_bit(IBT)); entry = kvm_find_cpuid_entry_index(vcpu, 0x7, 1); cr4_fixed1_update(X86_CR4_LAM_SUP, eax, feature_bit(LAM));