diff mbox

[7/9] KVM/VMX: Emulate MSR_IA32_ARCH_CAPABILITIES

Message ID 1517938181-15317-8-git-send-email-dwmw@amazon.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Woodhouse, David Feb. 6, 2018, 5:29 p.m. UTC
From: KarimAllah Ahmed <karahmed@amazon.de>

Intel processors use MSR_IA32_ARCH_CAPABILITIES MSR to indicate RDCL_NO
(bit 0) and IBRS_ALL (bit 1). This is a read-only MSR. By default the
contents will come directly from the hardware, but user-space can still
override it.

[dwmw2: The bit in kvm_cpuid_7_0_edx_x86_features can be unconditional]

Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jun Nakajima <jun.nakajima@intel.com>
Cc: kvm@vger.kernel.org
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Ashok Raj <ashok.raj@intel.com>
Link: https://lkml.kernel.org/r/1517522386-18410-4-git-send-email-karahmed@amazon.de

(cherry picked from commit 28c1c9fabf48d6ad596273a11c46e0d0da3e14cd)
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/kvm/cpuid.c |  8 +++++++-
 arch/x86/kvm/cpuid.h |  8 ++++++++
 arch/x86/kvm/vmx.c   | 15 +++++++++++++++
 arch/x86/kvm/x86.c   |  1 +
 4 files changed, 31 insertions(+), 1 deletion(-)

Comments

Paolo Bonzini Feb. 16, 2018, 2:18 p.m. UTC | #1
On 06/02/2018 18:29, David Woodhouse wrote:
> From: KarimAllah Ahmed <karahmed@amazon.de>
> 
> Intel processors use MSR_IA32_ARCH_CAPABILITIES MSR to indicate RDCL_NO
> (bit 0) and IBRS_ALL (bit 1). This is a read-only MSR. By default the
> contents will come directly from the hardware, but user-space can still
> override it.

Uhm, taking contents from the hardware is wrong (guess why---live
migration).  I'll send a revert of those two lines.

Paolo

> [dwmw2: The bit in kvm_cpuid_7_0_edx_x86_features can be unconditional]
> 
> Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
> Reviewed-by: Jim Mattson <jmattson@google.com>
> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> Cc: Andrea Arcangeli <aarcange@redhat.com>
> Cc: Andi Kleen <ak@linux.intel.com>
> Cc: Jun Nakajima <jun.nakajima@intel.com>
> Cc: kvm@vger.kernel.org
> Cc: Dave Hansen <dave.hansen@intel.com>
> Cc: Linus Torvalds <torvalds@linux-foundation.org>
> Cc: Andy Lutomirski <luto@kernel.org>
> Cc: Asit Mallick <asit.k.mallick@intel.com>
> Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
> Cc: Greg KH <gregkh@linuxfoundation.org>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Tim Chen <tim.c.chen@linux.intel.com>
> Cc: Ashok Raj <ashok.raj@intel.com>
> Link: https://lkml.kernel.org/r/1517522386-18410-4-git-send-email-karahmed@amazon.de
> 
> (cherry picked from commit 28c1c9fabf48d6ad596273a11c46e0d0da3e14cd)
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> ---
>  arch/x86/kvm/cpuid.c |  8 +++++++-
>  arch/x86/kvm/cpuid.h |  8 ++++++++
>  arch/x86/kvm/vmx.c   | 15 +++++++++++++++
>  arch/x86/kvm/x86.c   |  1 +
>  4 files changed, 31 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 6f24483..9c6493f 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -380,6 +380,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
>  	/* cpuid 7.0.ecx*/
>  	const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/;
>  
> +	/* cpuid 7.0.edx*/
> +	const u32 kvm_cpuid_7_0_edx_x86_features =
> +		F(ARCH_CAPABILITIES);
> +
>  	/* all calls to cpuid_count() should be made on the same cpu */
>  	get_cpu();
>  
> @@ -462,12 +466,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
>  			/* PKU is not yet implemented for shadow paging. */
>  			if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
>  				entry->ecx &= ~F(PKU);
> +			entry->edx &= kvm_cpuid_7_0_edx_x86_features;
> +			cpuid_mask(&entry->edx, CPUID_7_EDX);
>  		} else {
>  			entry->ebx = 0;
>  			entry->ecx = 0;
> +			entry->edx = 0;
>  		}
>  		entry->eax = 0;
> -		entry->edx = 0;
>  		break;
>  	}
>  	case 9:
> diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> index ec4f9dc..8719997 100644
> --- a/arch/x86/kvm/cpuid.h
> +++ b/arch/x86/kvm/cpuid.h
> @@ -171,6 +171,14 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu)
>  	return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
>  }
>  
> +static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_cpuid_entry2 *best;
> +
> +	best = kvm_find_cpuid_entry(vcpu, 7, 0);
> +	return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES));
> +}
> +
>  
>  /*
>   * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index dd6c831..92bf61f 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -551,6 +551,8 @@ struct vcpu_vmx {
>  	u64 		      msr_guest_kernel_gs_base;
>  #endif
>  
> +	u64 		      arch_capabilities;
> +
>  	u32 vm_entry_controls_shadow;
>  	u32 vm_exit_controls_shadow;
>  	/*
> @@ -2979,6 +2981,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	case MSR_IA32_TSC:
>  		msr_info->data = guest_read_tsc(vcpu);
>  		break;
> +	case MSR_IA32_ARCH_CAPABILITIES:
> +		if (!msr_info->host_initiated &&
> +		    !guest_cpuid_has_arch_capabilities(vcpu))
> +			return 1;
> +		msr_info->data = to_vmx(vcpu)->arch_capabilities;
> +		break;
>  	case MSR_IA32_SYSENTER_CS:
>  		msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
>  		break;
> @@ -3110,6 +3118,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
>  					      MSR_TYPE_W);
>  		break;
> +	case MSR_IA32_ARCH_CAPABILITIES:
> +		if (!msr_info->host_initiated)
> +			return 1;
> +		vmx->arch_capabilities = data;
> +		break;
>  	case MSR_IA32_CR_PAT:
>  		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
>  			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
> @@ -5200,6 +5213,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>  		++vmx->nmsrs;
>  	}
>  
> +	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
> +		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
>  
>  	vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index e023ef9..94d1573 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -975,6 +975,7 @@ static u32 msrs_to_save[] = {
>  #endif
>  	MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
>  	MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
> +	MSR_IA32_ARCH_CAPABILITIES
>  };
>  
>  static unsigned num_msrs_to_save;
>
Jim Mattson Feb. 16, 2018, 4:29 p.m. UTC | #2
On Fri, Feb 16, 2018 at 6:18 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:

> Uhm, taking contents from the hardware is wrong (guess why---live
> migration).  I'll send a revert of those two lines.

Hardware seems like a reasonable place to get the default value (cf.
the VMX capability MSRs). Should these two lines just be moved to
vmx_create_cpu?
David Woodhouse Feb. 16, 2018, 4:33 p.m. UTC | #3
On Fri, 2018-02-16 at 08:29 -0800, Jim Mattson wrote:
> On Fri, Feb 16, 2018 at 6:18 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
> > Uhm, taking contents from the hardware is wrong (guess why---live
> > migration).  I'll send a revert of those two lines.
> 
> Hardware seems like a reasonable place to get the default value (cf.
> the VMX capability MSRs). Should these two lines just be moved to
> vmx_create_cpu?

They're already in vmx_create_cpu(). (Well, in vmx_cpu_setup() which is
a static function called only once, from vmx_create_cpu().)
Paolo Bonzini Feb. 19, 2018, 1:10 p.m. UTC | #4
On 16/02/2018 17:29, Jim Mattson wrote:
> On Fri, Feb 16, 2018 at 6:18 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
>> Uhm, taking contents from the hardware is wrong (guess why---live
>> migration).  I'll send a revert of those two lines.
> 
> Hardware seems like a reasonable place to get the default value (cf.
> the VMX capability MSRs).

There are some differences:

- a zero value for ARCH_CAPABILITIES should be safe, while a zero value
for VMX capabilities doesn't really make sense.  On the contrary, a
nonzero value for ARCH_CAPABILITIES is not safe across live migration.

- VMX doesn't support live migration; before adding that support we will
probably have Tom's patches to retrieve MSR capabilities.

Thanks,

Paolo

> Should these two lines just be moved to
> vmx_create_cpu?
>
David Woodhouse Feb. 19, 2018, 1:35 p.m. UTC | #5
On Mon, 2018-02-19 at 14:10 +0100, Paolo Bonzini wrote:
> > Hardware seems like a reasonable place to get the default value (cf.
> > the VMX capability MSRs).
> 
> There are some differences:
> 
> - a zero value for ARCH_CAPABILITIES should be safe, while a zero value
> for VMX capabilities doesn't really make sense.  On the contrary, a
> nonzero value for ARCH_CAPABILITIES is not safe across live migration.

Any VMM which is going to support live migration surely needs to pay at
least a small amount of attention to the features it exposes? Exposing
the ARCH_CAPABILITIES CPUID bit without actually looking at the
contents of the associated MSR which that bit advertises would be... a
little strange, would it not? 

I don't see why we care so much about the *default* value, in that
context.
Paolo Bonzini Feb. 19, 2018, 2:07 p.m. UTC | #6
On 19/02/2018 14:35, David Woodhouse wrote:
> On Mon, 2018-02-19 at 14:10 +0100, Paolo Bonzini wrote:
>>> Hardware seems like a reasonable place to get the default value (cf.
>>> the VMX capability MSRs).
>>
>> There are some differences:
>>
>> - a zero value for ARCH_CAPABILITIES should be safe, while a zero value
>> for VMX capabilities doesn't really make sense.  On the contrary, a
>> nonzero value for ARCH_CAPABILITIES is not safe across live migration.
> 
> Any VMM which is going to support live migration surely needs to pay at
> least a small amount of attention to the features it exposes? Exposing
> the ARCH_CAPABILITIES CPUID bit without actually looking at the
> contents of the associated MSR which that bit advertises would be... a
> little strange, would it not? 

I think what we should do is simply backport Tom Lendacky's series to
4.14 and 4.9 ASAP, and add ARCH_CAPABILITIES support there.  Then the
question of the default becomes moot, more or less.

Paolo

> I don't see why we care so much about the *default* value, in that
> context.
diff mbox

Patch

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 6f24483..9c6493f 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -380,6 +380,10 @@  static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	/* cpuid 7.0.ecx*/
 	const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/;
 
+	/* cpuid 7.0.edx*/
+	const u32 kvm_cpuid_7_0_edx_x86_features =
+		F(ARCH_CAPABILITIES);
+
 	/* all calls to cpuid_count() should be made on the same cpu */
 	get_cpu();
 
@@ -462,12 +466,14 @@  static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 			/* PKU is not yet implemented for shadow paging. */
 			if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
 				entry->ecx &= ~F(PKU);
+			entry->edx &= kvm_cpuid_7_0_edx_x86_features;
+			cpuid_mask(&entry->edx, CPUID_7_EDX);
 		} else {
 			entry->ebx = 0;
 			entry->ecx = 0;
+			entry->edx = 0;
 		}
 		entry->eax = 0;
-		entry->edx = 0;
 		break;
 	}
 	case 9:
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index ec4f9dc..8719997 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -171,6 +171,14 @@  static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu)
 	return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
 }
 
+static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 7, 0);
+	return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES));
+}
+
 
 /*
  * NRIPS is provided through cpuidfn 0x8000000a.edx bit 3
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index dd6c831..92bf61f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -551,6 +551,8 @@  struct vcpu_vmx {
 	u64 		      msr_guest_kernel_gs_base;
 #endif
 
+	u64 		      arch_capabilities;
+
 	u32 vm_entry_controls_shadow;
 	u32 vm_exit_controls_shadow;
 	/*
@@ -2979,6 +2981,12 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_TSC:
 		msr_info->data = guest_read_tsc(vcpu);
 		break;
+	case MSR_IA32_ARCH_CAPABILITIES:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has_arch_capabilities(vcpu))
+			return 1;
+		msr_info->data = to_vmx(vcpu)->arch_capabilities;
+		break;
 	case MSR_IA32_SYSENTER_CS:
 		msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
 		break;
@@ -3110,6 +3118,11 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
 					      MSR_TYPE_W);
 		break;
+	case MSR_IA32_ARCH_CAPABILITIES:
+		if (!msr_info->host_initiated)
+			return 1;
+		vmx->arch_capabilities = data;
+		break;
 	case MSR_IA32_CR_PAT:
 		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
 			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
@@ -5200,6 +5213,8 @@  static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 		++vmx->nmsrs;
 	}
 
+	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
 
 	vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e023ef9..94d1573 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -975,6 +975,7 @@  static u32 msrs_to_save[] = {
 #endif
 	MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
 	MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
+	MSR_IA32_ARCH_CAPABILITIES
 };
 
 static unsigned num_msrs_to_save;