diff mbox series

[v3,10/27] KVM: VMX: Set FRED MSR interception

Message ID 20241001050110.3643764-11-xin@zytor.com (mailing list archive)
State New, archived
Headers show
Series Enable FRED with KVM VMX | expand

Commit Message

Xin Li Oct. 1, 2024, 5 a.m. UTC
From: Xin Li <xin3.li@intel.com>

Add FRED MSRs to the VMX passthrough MSR list and set FRED MSRs
interception.

8 FRED MSRs, i.e., MSR_IA32_FRED_RSP[123], MSR_IA32_FRED_STKLVLS,
MSR_IA32_FRED_SSP[123] and MSR_IA32_FRED_CONFIG, are all safe to be
passthrough, because they all have a pair of corresponding host and
guest VMCS fields.

Both MSR_IA32_FRED_RSP0 and MSR_IA32_FRED_SSP0 are dedicated for user
level event delivery only, IOW they are NOT used in any kernel event
delivery and the execution of ERETS.  Thus KVM can run safely with
guest values in the 2 MSRs.  As a result, save and restore of their
guest values are postponed until vCPU context switching and their host
values are restored on returning to userspace.

Save/restore of MSR_IA32_FRED_RSP0 is done in the next patch.

Note, as MSR_IA32_FRED_SSP0 is an alias of MSR_IA32_PL0_SSP, its save
and restore is done through the CET supervisor context management.

Signed-off-by: Xin Li <xin3.li@intel.com>
Signed-off-by: Xin Li (Intel) <xin@zytor.com>
Tested-by: Shan Kang <shan.kang@intel.com>
---
 arch/x86/kvm/vmx/vmx.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

Comments

Chao Gao Nov. 13, 2024, 11:31 a.m. UTC | #1
On Mon, Sep 30, 2024 at 10:00:53PM -0700, Xin Li (Intel) wrote:
>From: Xin Li <xin3.li@intel.com>
>
>Add FRED MSRs to the VMX passthrough MSR list and set FRED MSRs
>interception.
>
>8 FRED MSRs, i.e., MSR_IA32_FRED_RSP[123], MSR_IA32_FRED_STKLVLS,
>MSR_IA32_FRED_SSP[123] and MSR_IA32_FRED_CONFIG, are all safe to be
>passthrough, because they all have a pair of corresponding host and
>guest VMCS fields.
>
>Both MSR_IA32_FRED_RSP0 and MSR_IA32_FRED_SSP0 are dedicated for user
>level event delivery only, IOW they are NOT used in any kernel event
>delivery and the execution of ERETS.  Thus KVM can run safely with
>guest values in the 2 MSRs.  As a result, save and restore of their
>guest values are postponed until vCPU context switching and their host
>values are restored on returning to userspace.
>
>Save/restore of MSR_IA32_FRED_RSP0 is done in the next patch.
>
>Note, as MSR_IA32_FRED_SSP0 is an alias of MSR_IA32_PL0_SSP, its save
>and restore is done through the CET supervisor context management.

But CET may be not supported by either the host or the guest. How will
MSR_IA32_FRED_SSP0 be switched in this case? I think that's part of the reason
why Sean suggested [*] intercepting the MSR when CET isn't exposed to the
guest.

[*]: https://lore.kernel.org/kvm/ZvQaNRhrsSJTYji3@google.com/#t

>
>Signed-off-by: Xin Li <xin3.li@intel.com>
>Signed-off-by: Xin Li (Intel) <xin@zytor.com>
>Tested-by: Shan Kang <shan.kang@intel.com>
>---
> arch/x86/kvm/vmx/vmx.c | 34 ++++++++++++++++++++++++++++++++++
> 1 file changed, 34 insertions(+)
>
>diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>index 28cf89c97bda..c10c955722a3 100644
>--- a/arch/x86/kvm/vmx/vmx.c
>+++ b/arch/x86/kvm/vmx/vmx.c
>@@ -176,6 +176,16 @@ static u32 vmx_possible_passthrough_msrs[] = {
> 	MSR_FS_BASE,
> 	MSR_GS_BASE,
> 	MSR_KERNEL_GS_BASE,
>+	MSR_IA32_FRED_RSP0,
>+	MSR_IA32_FRED_RSP1,
>+	MSR_IA32_FRED_RSP2,
>+	MSR_IA32_FRED_RSP3,
>+	MSR_IA32_FRED_STKLVLS,
>+	MSR_IA32_FRED_SSP1,
>+	MSR_IA32_FRED_SSP2,
>+	MSR_IA32_FRED_SSP3,
>+	MSR_IA32_FRED_CONFIG,
>+	MSR_IA32_FRED_SSP0,		/* Should be added through CET */
> 	MSR_IA32_XFD,
> 	MSR_IA32_XFD_ERR,
> #endif
>@@ -7880,6 +7890,28 @@ static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
> 		vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4));
> }
> 
>+static void vmx_set_intercept_for_fred_msr(struct kvm_vcpu *vcpu)
>+{
>+	bool flag = !guest_can_use(vcpu, X86_FEATURE_FRED);
>+
>+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_RSP0, MSR_TYPE_RW, flag);
>+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_RSP1, MSR_TYPE_RW, flag);
>+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_RSP2, MSR_TYPE_RW, flag);
>+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_RSP3, MSR_TYPE_RW, flag);
>+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_STKLVLS, MSR_TYPE_RW, flag);
>+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_SSP1, MSR_TYPE_RW, flag);
>+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_SSP2, MSR_TYPE_RW, flag);
>+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_SSP3, MSR_TYPE_RW, flag);
>+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_CONFIG, MSR_TYPE_RW, flag);
>+
>+	/*
>+	 * flag = !(CET.SUPERVISOR_SHADOW_STACK || FRED)
>+	 *
>+	 * A possible optimization is to intercept SSPs when FRED && !CET.SUPERVISOR_SHADOW_STACK.
>+	 */
>+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_SSP0, MSR_TYPE_RW, flag);

To implement the "optimization", you can simply remove this line. Then the CET
series will take care of the interception of this MSR. And please leave a
comment here to explain why this MSR is treated differently from other FRED
MSRs.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 28cf89c97bda..c10c955722a3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -176,6 +176,16 @@  static u32 vmx_possible_passthrough_msrs[] = {
 	MSR_FS_BASE,
 	MSR_GS_BASE,
 	MSR_KERNEL_GS_BASE,
+	MSR_IA32_FRED_RSP0,
+	MSR_IA32_FRED_RSP1,
+	MSR_IA32_FRED_RSP2,
+	MSR_IA32_FRED_RSP3,
+	MSR_IA32_FRED_STKLVLS,
+	MSR_IA32_FRED_SSP1,
+	MSR_IA32_FRED_SSP2,
+	MSR_IA32_FRED_SSP3,
+	MSR_IA32_FRED_CONFIG,
+	MSR_IA32_FRED_SSP0,		/* Should be added through CET */
 	MSR_IA32_XFD,
 	MSR_IA32_XFD_ERR,
 #endif
@@ -7880,6 +7890,28 @@  static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
 		vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4));
 }
 
+static void vmx_set_intercept_for_fred_msr(struct kvm_vcpu *vcpu)
+{
+	bool flag = !guest_can_use(vcpu, X86_FEATURE_FRED);
+
+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_RSP0, MSR_TYPE_RW, flag);
+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_RSP1, MSR_TYPE_RW, flag);
+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_RSP2, MSR_TYPE_RW, flag);
+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_RSP3, MSR_TYPE_RW, flag);
+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_STKLVLS, MSR_TYPE_RW, flag);
+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_SSP1, MSR_TYPE_RW, flag);
+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_SSP2, MSR_TYPE_RW, flag);
+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_SSP3, MSR_TYPE_RW, flag);
+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_CONFIG, MSR_TYPE_RW, flag);
+
+	/*
+	 * flag = !(CET.SUPERVISOR_SHADOW_STACK || FRED)
+	 *
+	 * A possible optimization is to intercept SSPs when FRED && !CET.SUPERVISOR_SHADOW_STACK.
+	 */
+	vmx_set_intercept_for_msr(vcpu, MSR_IA32_FRED_SSP0, MSR_TYPE_RW, flag);
+}
+
 void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7957,6 +7989,8 @@  void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 
 	/* Refresh #PF interception to account for MAXPHYADDR changes. */
 	vmx_update_exception_bitmap(vcpu);
+
+	vmx_set_intercept_for_fred_msr(vcpu);
 }
 
 static __init u64 vmx_get_perf_capabilities(void)