diff mbox series

[v2,10/25] KVM: VMX: Add support for FRED context save/restore

Message ID 20240207172646.3981-11-xin3.li@intel.com (mailing list archive)
State New
Headers show
Series Enable FRED with KVM VMX | expand

Commit Message

Li, Xin3 Feb. 7, 2024, 5:26 p.m. UTC
Handle host initiated FRED MSR access requests to allow FRED context
to be set/get from user level.

During VM save/restore and live migration, FRED context needs to be
saved/restored, which requires FRED MSRs to be accessed from a user
level application, e.g., Qemu.

Note, handling of MSR_IA32_FRED_SSP0, i.e., MSR_IA32_PL0_SSP, is not
added yet, which is done in the KVM CET patch set.

Signed-off-by: Xin Li <xin3.li@intel.com>
Tested-by: Shan Kang <shan.kang@intel.com>
---

Changes since v1:
* Use kvm_cpu_cap_has() instead of cpu_feature_enabled() (Chao Gao).
* Fail host requested FRED MSRs access if KVM cannot virtualize FRED
  (Chao Gao).
* Handle the case FRED MSRs are valid but KVM cannot virtualize FRED
  (Chao Gao).
* Add sanity checks when writing to FRED MSRs.
---
 arch/x86/kvm/vmx/vmx.c | 72 ++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c     | 47 +++++++++++++++++++++++++++
 2 files changed, 119 insertions(+)

Comments

Chao Gao April 29, 2024, 6:31 a.m. UTC | #1
On Thu, Feb 08, 2024 at 01:26:30AM +0800, Xin Li wrote:
>Handle host initiated FRED MSR access requests to allow FRED context
>to be set/get from user level.
>

The changelog isn't accurate because guest accesses are also handled
by this patch, specifically in the "else" branch.

	>+		if (host_initiated) {
	>+			if (!kvm_cpu_cap_has(X86_FEATURE_FRED))
	>+				return 1;
	>+		} else {



> void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
>@@ -2019,6 +2037,33 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> 	case MSR_KERNEL_GS_BASE:
> 		msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
> 		break;
>+	case MSR_IA32_FRED_RSP0:
>+		msr_info->data = vmx_read_guest_fred_rsp0(vmx);
>+		break;
>+	case MSR_IA32_FRED_RSP1:
>+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_RSP1);
>+		break;
>+	case MSR_IA32_FRED_RSP2:
>+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_RSP2);
>+		break;
>+	case MSR_IA32_FRED_RSP3:
>+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_RSP3);
>+		break;
>+	case MSR_IA32_FRED_STKLVLS:
>+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_STKLVLS);
>+		break;
>+	case MSR_IA32_FRED_SSP1:
>+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_SSP1);
>+		break;
>+	case MSR_IA32_FRED_SSP2:
>+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_SSP2);
>+		break;
>+	case MSR_IA32_FRED_SSP3:
>+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_SSP3);
>+		break;
>+	case MSR_IA32_FRED_CONFIG:
>+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_CONFIG);
>+		break;

how about adding a helper function to convert MSR index to the VMCS field id?
Then do:

	case MSR_IA32_FRED_RSP1 ... MSR_IA32_FRED_STKLVLS:
	case MSR_IA32_FRED_SSP1 ... MSR_IA32_FRED_CONFIG:
		msr_info->data = vmcs_read64(msr_to_vmcs(index));
		break;

and ...

> #endif
> 	case MSR_EFER:
> 		return kvm_get_msr_common(vcpu, msr_info);
>@@ -2226,6 +2271,33 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> 			vmx_update_exception_bitmap(vcpu);
> 		}
> 		break;
>+	case MSR_IA32_FRED_RSP0:
>+		vmx_write_guest_fred_rsp0(vmx, data);
>+		break;
>+	case MSR_IA32_FRED_RSP1:
>+		vmcs_write64(GUEST_IA32_FRED_RSP1, data);
>+		break;
>+	case MSR_IA32_FRED_RSP2:
>+		vmcs_write64(GUEST_IA32_FRED_RSP2, data);
>+		break;
>+	case MSR_IA32_FRED_RSP3:
>+		vmcs_write64(GUEST_IA32_FRED_RSP3, data);
>+		break;
>+	case MSR_IA32_FRED_STKLVLS:
>+		vmcs_write64(GUEST_IA32_FRED_STKLVLS, data);
>+		break;
>+	case MSR_IA32_FRED_SSP1:
>+		vmcs_write64(GUEST_IA32_FRED_SSP1, data);
>+		break;
>+	case MSR_IA32_FRED_SSP2:
>+		vmcs_write64(GUEST_IA32_FRED_SSP2, data);
>+		break;
>+	case MSR_IA32_FRED_SSP3:
>+		vmcs_write64(GUEST_IA32_FRED_SSP3, data);
>+		break;
>+	case MSR_IA32_FRED_CONFIG:
>+		vmcs_write64(GUEST_IA32_FRED_CONFIG, data);
>+		break;

	case MSR_IA32_FRED_RSP1 ... MSR_IA32_FRED_STKLVLS:
	case MSR_IA32_FRED_SSP1 ... MSR_IA32_FRED_CONFIG:
		vmcs_write64(msr_to_vmcs(index), data);
		break;

The code will be more compact and generate less instructions.  I believe CET
series can do the same change [*]. Performance here isn't critical. I just
think it looks cumbersome to repeat the same pattern for 8 (and more with
CET considered) MSRs.

[*]: https://lore.kernel.org/kvm/20240219074733.122080-21-weijiang.yang@intel.com/

> #endif
> 	case MSR_IA32_SYSENTER_CS:
> 		if (is_guest_mode(vcpu))
>diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>index 363b1c080205..4e8d60f248e3 100644
>--- a/arch/x86/kvm/x86.c
>+++ b/arch/x86/kvm/x86.c
>@@ -1451,6 +1451,9 @@ static const u32 msrs_to_save_base[] = {
> 	MSR_STAR,
> #ifdef CONFIG_X86_64
> 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
>+	MSR_IA32_FRED_RSP0, MSR_IA32_FRED_RSP1, MSR_IA32_FRED_RSP2,
>+	MSR_IA32_FRED_RSP3, MSR_IA32_FRED_STKLVLS, MSR_IA32_FRED_SSP1,
>+	MSR_IA32_FRED_SSP2, MSR_IA32_FRED_SSP3, MSR_IA32_FRED_CONFIG,
> #endif
> 	MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
> 	MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
>@@ -1892,6 +1895,30 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
> 			return 1;
> 
> 		data = (u32)data;
>+		break;
>+	case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_CONFIG:
>+		if (index != MSR_IA32_FRED_STKLVLS && is_noncanonical_address(data, vcpu))
>+			return 1;
>+		if ((index >= MSR_IA32_FRED_RSP0 && index <= MSR_IA32_FRED_RSP3) &&
>+		    (data & GENMASK_ULL(5, 0)))
>+			return 1;
>+		if ((index >= MSR_IA32_FRED_SSP1 && index <= MSR_IA32_FRED_SSP3) &&
>+		    (data & GENMASK_ULL(2, 0)))
>+			return 1;
>+
>+		if (host_initiated) {
>+			if (!kvm_cpu_cap_has(X86_FEATURE_FRED))
>+				return 1;

Should be:
			if (!kvm_cpu_cap_has(X86_FEATURE_FRED) && data)

KVM ABI allows userspace to write only 0 if guests cannot enumerate the
feature. And even better, your next version can be on top of Sean's series

https://lore.kernel.org/kvm/20240425181422.3250947-1-seanjc@google.com/T/#md00be687770e1e658fc9fe0eac20b5f0bd230e4c

this way, you can get rid of the "host_initiated" check.
Sean Christopherson June 12, 2024, 10:09 p.m. UTC | #2
On Wed, Feb 07, 2024, Xin Li wrote:
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 264378c3b784..ee61d2c25cb0 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1420,6 +1420,24 @@ static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
>  	preempt_enable();
>  	vmx->msr_guest_kernel_gs_base = data;
>  }
> +
> +static u64 vmx_read_guest_fred_rsp0(struct vcpu_vmx *vmx)
> +{
> +	preempt_disable();
> +	if (vmx->guest_state_loaded)
> +		vmx->msr_guest_fred_rsp0 = read_msr(MSR_IA32_FRED_RSP0);
> +	preempt_enable();
> +	return vmx->msr_guest_fred_rsp0;
> +}
> +
> +static void vmx_write_guest_fred_rsp0(struct vcpu_vmx *vmx, u64 data)
> +{
> +	preempt_disable();
> +	if (vmx->guest_state_loaded)
> +		wrmsrl(MSR_IA32_FRED_RSP0, data);
> +	preempt_enable();
> +	vmx->msr_guest_fred_rsp0 = data;
> +}

This should be unnecessary when you switch to the user return framework.

KERNEL_GS_BASE is a bit special because it needs to be reloaded if the kernel
switches to a different task, i.e. before an exit to userspace.

> @@ -1892,6 +1895,30 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
>  			return 1;
>  
>  		data = (u32)data;
> +		break;
> +	case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_CONFIG:
> +		if (index != MSR_IA32_FRED_STKLVLS && is_noncanonical_address(data, vcpu))
> +			return 1;
> +		if ((index >= MSR_IA32_FRED_RSP0 && index <= MSR_IA32_FRED_RSP3) &&
> +		    (data & GENMASK_ULL(5, 0)))
> +			return 1;
> +		if ((index >= MSR_IA32_FRED_SSP1 && index <= MSR_IA32_FRED_SSP3) &&
> +		    (data & GENMASK_ULL(2, 0)))
> +			return 1;
> +
> +		if (host_initiated) {
> +			if (!kvm_cpu_cap_has(X86_FEATURE_FRED))
> +				return 1;
> +		} else {
> +			/*
> +			 * Inject #GP upon FRED MSRs accesses from a non-FRED guest,
> +			 * which also ensures no malicious guest can write to FRED
> +			 * MSRs to corrupt host FRED MSRs.
> +			 */

Drop the comment, if someone reading KVM code doesn't grok that attempting to
access MSRs that shouldn't exist results in #GP, then a comment probably isn't
going to save them.

This should also be bumped to the top, i.e. do the "does this exist check" first.

Lastly, the direction we are taking is to NOT exempt host-initiated writes, i.e.
userspace has to set CPUID before MSRs.  If you base the next version on top of
this series, it should just work (and if it doesn't, I definitely want to know):

https://lore.kernel.org/all/20240425181422.3250947-1-seanjc@google.com


> +			if (!guest_can_use(vcpu, X86_FEATURE_FRED))
> +				return 1;
> +		}
> +

Uh, where does the value go?  Oh, this is common code.  Ah, and it's in common
code so that VMX can avoid having to make an extra function call for every MSR.
Neat.

>  		break;
>  	}
>  
> @@ -1936,6 +1963,22 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
>  		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
>  			return 1;
>  		break;
> +	case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_CONFIG:
> +		if (host_initiated) {
> +			if (!kvm_cpu_cap_has(X86_FEATURE_FRED))
> +				return 1;
> +		} else {
> +			/*
> +			 * Inject #GP upon FRED MSRs accesses from a non-FRED guest,
> +			 * which also ensures no malicious guest can write to FRED
> +			 * MSRs to corrupt host FRED MSRs.
> +			 */
> +			if (!guest_can_use(vcpu, X86_FEATURE_FRED))
> +				return 1;
> +		}

Same comments here.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 264378c3b784..ee61d2c25cb0 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1420,6 +1420,24 @@  static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
 	preempt_enable();
 	vmx->msr_guest_kernel_gs_base = data;
 }
+
+static u64 vmx_read_guest_fred_rsp0(struct vcpu_vmx *vmx)
+{
+	preempt_disable();
+	if (vmx->guest_state_loaded)
+		vmx->msr_guest_fred_rsp0 = read_msr(MSR_IA32_FRED_RSP0);
+	preempt_enable();
+	return vmx->msr_guest_fred_rsp0;
+}
+
+static void vmx_write_guest_fred_rsp0(struct vcpu_vmx *vmx, u64 data)
+{
+	preempt_disable();
+	if (vmx->guest_state_loaded)
+		wrmsrl(MSR_IA32_FRED_RSP0, data);
+	preempt_enable();
+	vmx->msr_guest_fred_rsp0 = data;
+}
 #endif
 
 void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
@@ -2019,6 +2037,33 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_KERNEL_GS_BASE:
 		msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
 		break;
+	case MSR_IA32_FRED_RSP0:
+		msr_info->data = vmx_read_guest_fred_rsp0(vmx);
+		break;
+	case MSR_IA32_FRED_RSP1:
+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_RSP1);
+		break;
+	case MSR_IA32_FRED_RSP2:
+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_RSP2);
+		break;
+	case MSR_IA32_FRED_RSP3:
+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_RSP3);
+		break;
+	case MSR_IA32_FRED_STKLVLS:
+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_STKLVLS);
+		break;
+	case MSR_IA32_FRED_SSP1:
+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_SSP1);
+		break;
+	case MSR_IA32_FRED_SSP2:
+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_SSP2);
+		break;
+	case MSR_IA32_FRED_SSP3:
+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_SSP3);
+		break;
+	case MSR_IA32_FRED_CONFIG:
+		msr_info->data = vmcs_read64(GUEST_IA32_FRED_CONFIG);
+		break;
 #endif
 	case MSR_EFER:
 		return kvm_get_msr_common(vcpu, msr_info);
@@ -2226,6 +2271,33 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			vmx_update_exception_bitmap(vcpu);
 		}
 		break;
+	case MSR_IA32_FRED_RSP0:
+		vmx_write_guest_fred_rsp0(vmx, data);
+		break;
+	case MSR_IA32_FRED_RSP1:
+		vmcs_write64(GUEST_IA32_FRED_RSP1, data);
+		break;
+	case MSR_IA32_FRED_RSP2:
+		vmcs_write64(GUEST_IA32_FRED_RSP2, data);
+		break;
+	case MSR_IA32_FRED_RSP3:
+		vmcs_write64(GUEST_IA32_FRED_RSP3, data);
+		break;
+	case MSR_IA32_FRED_STKLVLS:
+		vmcs_write64(GUEST_IA32_FRED_STKLVLS, data);
+		break;
+	case MSR_IA32_FRED_SSP1:
+		vmcs_write64(GUEST_IA32_FRED_SSP1, data);
+		break;
+	case MSR_IA32_FRED_SSP2:
+		vmcs_write64(GUEST_IA32_FRED_SSP2, data);
+		break;
+	case MSR_IA32_FRED_SSP3:
+		vmcs_write64(GUEST_IA32_FRED_SSP3, data);
+		break;
+	case MSR_IA32_FRED_CONFIG:
+		vmcs_write64(GUEST_IA32_FRED_CONFIG, data);
+		break;
 #endif
 	case MSR_IA32_SYSENTER_CS:
 		if (is_guest_mode(vcpu))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 363b1c080205..4e8d60f248e3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1451,6 +1451,9 @@  static const u32 msrs_to_save_base[] = {
 	MSR_STAR,
 #ifdef CONFIG_X86_64
 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
+	MSR_IA32_FRED_RSP0, MSR_IA32_FRED_RSP1, MSR_IA32_FRED_RSP2,
+	MSR_IA32_FRED_RSP3, MSR_IA32_FRED_STKLVLS, MSR_IA32_FRED_SSP1,
+	MSR_IA32_FRED_SSP2, MSR_IA32_FRED_SSP3, MSR_IA32_FRED_CONFIG,
 #endif
 	MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
 	MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
@@ -1892,6 +1895,30 @@  static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
 			return 1;
 
 		data = (u32)data;
+		break;
+	case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_CONFIG:
+		if (index != MSR_IA32_FRED_STKLVLS && is_noncanonical_address(data, vcpu))
+			return 1;
+		if ((index >= MSR_IA32_FRED_RSP0 && index <= MSR_IA32_FRED_RSP3) &&
+		    (data & GENMASK_ULL(5, 0)))
+			return 1;
+		if ((index >= MSR_IA32_FRED_SSP1 && index <= MSR_IA32_FRED_SSP3) &&
+		    (data & GENMASK_ULL(2, 0)))
+			return 1;
+
+		if (host_initiated) {
+			if (!kvm_cpu_cap_has(X86_FEATURE_FRED))
+				return 1;
+		} else {
+			/*
+			 * Inject #GP upon FRED MSRs accesses from a non-FRED guest,
+			 * which also ensures no malicious guest can write to FRED
+			 * MSRs to corrupt host FRED MSRs.
+			 */
+			if (!guest_can_use(vcpu, X86_FEATURE_FRED))
+				return 1;
+		}
+
 		break;
 	}
 
@@ -1936,6 +1963,22 @@  int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
 		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
 			return 1;
 		break;
+	case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_CONFIG:
+		if (host_initiated) {
+			if (!kvm_cpu_cap_has(X86_FEATURE_FRED))
+				return 1;
+		} else {
+			/*
+			 * Inject #GP upon FRED MSRs accesses from a non-FRED guest,
+			 * which also ensures no malicious guest can write to FRED
+			 * MSRs to corrupt host FRED MSRs.
+			 */
+			if (!guest_can_use(vcpu, X86_FEATURE_FRED))
+				return 1;
+		}
+
+		break;
+
 	}
 
 	msr.index = index;
@@ -7364,6 +7407,10 @@  static void kvm_probe_msr_to_save(u32 msr_index)
 		if (!(kvm_get_arch_capabilities() & ARCH_CAP_TSX_CTRL_MSR))
 			return;
 		break;
+	case MSR_IA32_FRED_RSP0 ... MSR_IA32_FRED_CONFIG:
+		if (!kvm_cpu_cap_has(X86_FEATURE_FRED))
+			return;
+		break;
 	default:
 		break;
 	}