diff mbox series

[v2,44/49] KVM: x86: Update guest cpu_caps at runtime for dynamic CPUID-based features

Message ID 20240517173926.965351-45-seanjc@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: CPUID overhaul, fixes, and caching | expand

Commit Message

Sean Christopherson May 17, 2024, 5:39 p.m. UTC
When updating guest CPUID entries to emulate runtime behavior, e.g. when
the guest enables a CR4-based feature that is tied to a CPUID flag, also
update the vCPU's cpu_caps accordingly.  This will allow replacing all
usage of guest_cpuid_has() with guest_cpu_cap_has().

Note, this relies on kvm_set_cpuid() taking a snapshot of cpu_caps before
invoking kvm_update_cpuid_runtime(), i.e. when KVM is updating CPUID
entries that *may* become the vCPU's CPUID, so that unwinding to the old
cpu_caps is possible if userspace tries to set bogus CPUID information.

Note #2, none of the features in question use guest_cpu_cap_has() at this
time, i.e. aside from settings bits in cpu_caps, this is a glorified nop.

Cc: Yang Weijiang <weijiang.yang@intel.com>
Cc: Robert Hoo <robert.hoo.linux@gmail.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/cpuid.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

Comments

Maxim Levitsky July 5, 2024, 2:26 a.m. UTC | #1
On Fri, 2024-05-17 at 10:39 -0700, Sean Christopherson wrote:
> When updating guest CPUID entries to emulate runtime behavior, e.g. when
> the guest enables a CR4-based feature that is tied to a CPUID flag, also
> update the vCPU's cpu_caps accordingly.  This will allow replacing all
> usage of guest_cpuid_has() with guest_cpu_cap_has().
> 
> Note, this relies on kvm_set_cpuid() taking a snapshot of cpu_caps before
> invoking kvm_update_cpuid_runtime(), i.e. when KVM is updating CPUID
> entries that *may* become the vCPU's CPUID, so that unwinding to the old
> cpu_caps is possible if userspace tries to set bogus CPUID information.
> 
> Note #2, none of the features in question use guest_cpu_cap_has() at this
> time, i.e. aside from settings bits in cpu_caps, this is a glorified nop.
> 
> Cc: Yang Weijiang <weijiang.yang@intel.com>
> Cc: Robert Hoo <robert.hoo.linux@gmail.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>  arch/x86/kvm/cpuid.c | 28 +++++++++++++++++++---------
>  1 file changed, 19 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 552e65ba5efa..1424a9d4eb17 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -330,28 +330,38 @@ static u64 cpuid_get_supported_xcr0(struct kvm_vcpu *vcpu)
>  	return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
>  }
>  
> +static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
> +						       struct kvm_cpuid_entry2 *entry,
> +						       unsigned int x86_feature,
> +						       bool has_feature)
> +{
> +	cpuid_entry_change(entry, x86_feature, has_feature);
> +	guest_cpu_cap_change(vcpu, x86_feature, has_feature);
> +}
> +
>  void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_cpuid_entry2 *best;
>  
>  	best = kvm_find_cpuid_entry(vcpu, 1);
>  	if (best) {
> -		cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
> -				   kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
> +		kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSXSAVE,
> +					   kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
>  
> -		cpuid_entry_change(best, X86_FEATURE_APIC,
> -			   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
> +		kvm_update_feature_runtime(vcpu, best, X86_FEATURE_APIC,
> +					   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
>  
>  		if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> -			cpuid_entry_change(best, X86_FEATURE_MWAIT,
> -					   vcpu->arch.ia32_misc_enable_msr &
> -					   MSR_IA32_MISC_ENABLE_MWAIT);
> +			kvm_update_feature_runtime(vcpu, best, X86_FEATURE_MWAIT,
> +						   vcpu->arch.ia32_misc_enable_msr &
> +						   MSR_IA32_MISC_ENABLE_MWAIT);
>  	}
>  
>  	best = kvm_find_cpuid_entry_index(vcpu, 7, 0);
>  	if (best)
> -		cpuid_entry_change(best, X86_FEATURE_OSPKE,
> -				   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> +		kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSPKE,
> +					   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> +
>  
>  	best = kvm_find_cpuid_entry_index(vcpu, 0xD, 0);
>  	if (best)


I am not 100% sure that we need to do this.

Runtime cpuid changes are a hack that Intel did back then,
due to various reasons, These changes don't really change the feature set
that CPU supports, but merly as you like to say 'massage' the output of
the CPUID instruction to make the unmodified OS happy usually.

Thus it feels to me that CPU caps should not include the dynamic features, and neither
KVM should use the value of these as a source for truth, but rather the underlying
source of the truth (e.g CR4).

But if you insist, I don't really have a very strong reason to object this.

Best regards,
	Maxim Levitsky
Sean Christopherson July 9, 2024, 12:24 a.m. UTC | #2
On Thu, Jul 04, 2024, Maxim Levitsky wrote:
> On Fri, 2024-05-17 at 10:39 -0700, Sean Christopherson wrote:
> > -		cpuid_entry_change(best, X86_FEATURE_OSPKE,
> > -				   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> > +		kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSPKE,
> > +					   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> > +
> >  
> >  	best = kvm_find_cpuid_entry_index(vcpu, 0xD, 0);
> >  	if (best)
> 
> 
> I am not 100% sure that we need to do this.
> 
> Runtime cpuid changes are a hack that Intel did back then, due to various
> reasons, These changes don't really change the feature set that CPU supports,
> but merly as you like to say 'massage' the output of the CPUID instruction to
> make the unmodified OS happy usually.
> 
> Thus it feels to me that CPU caps should not include the dynamic features,
> and neither KVM should use the value of these as a source for truth, but
> rather the underlying source of the truth (e.g CR4).
> 
> But if you insist, I don't really have a very strong reason to object this.

FWIW, I think I agree that CR4 should be the source of truth, but it's largely a
moot point because KVM doesn't actually check OSXSAVE or OSPKE, as KVM never
emulates the relevant instructions.  So for those, it's indeed not strictly
necessary.

Unfortunately, KVM has established ABI for checking X86_FEATURE_MWAIT when
"emulating" MONITOR and MWAIT, i.e. KVM can't use vcpu->arch.ia32_misc_enable_msr
as the source of truth.  So for MWAIT, KVM does need to update CPU caps (or carry
even more awful MWAIT code), at which point extending the behavior to the CR4
features (and to X86_FEATURE_APIC) is practically free.
Maxim Levitsky Sept. 10, 2024, 8:41 p.m. UTC | #3
On Mon, 2024-07-08 at 17:24 -0700, Sean Christopherson wrote:
> On Thu, Jul 04, 2024, Maxim Levitsky wrote:
> > On Fri, 2024-05-17 at 10:39 -0700, Sean Christopherson wrote:
> > > -		cpuid_entry_change(best, X86_FEATURE_OSPKE,
> > > -				   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> > > +		kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSPKE,
> > > +					   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> > > +
> > >  
> > >  	best = kvm_find_cpuid_entry_index(vcpu, 0xD, 0);
> > >  	if (best)
> > 
> > I am not 100% sure that we need to do this.
> > 
> > Runtime cpuid changes are a hack that Intel did back then, due to various
> > reasons, These changes don't really change the feature set that CPU supports,
> > but merly as you like to say 'massage' the output of the CPUID instruction to
> > make the unmodified OS happy usually.
> > 
> > Thus it feels to me that CPU caps should not include the dynamic features,
> > and neither KVM should use the value of these as a source for truth, but
> > rather the underlying source of the truth (e.g CR4).
> > 
> > But if you insist, I don't really have a very strong reason to object this.
> 
> FWIW, I think I agree that CR4 should be the source of truth, but it's largely a
> moot point because KVM doesn't actually check OSXSAVE or OSPKE, as KVM never
> emulates the relevant instructions.  So for those, it's indeed not strictly
> necessary.
> 
> Unfortunately, KVM has established ABI for checking X86_FEATURE_MWAIT when
> "emulating" MONITOR and MWAIT, i.e. KVM can't use vcpu->arch.ia32_misc_enable_msr
> as the source of truth.

Can you elaborate on this? Can you give me an example of the ABI?


>   So for MWAIT, KVM does need to update CPU caps (or carry
> even more awful MWAIT code), at which point extending the behavior to the CR4
> features (and to X86_FEATURE_APIC) is practically free.
> 


Best regards,
	Maxim Levitsky
Sean Christopherson Sept. 11, 2024, 3:41 p.m. UTC | #4
On Tue, Sep 10, 2024, Maxim Levitsky wrote:
> On Mon, 2024-07-08 at 17:24 -0700, Sean Christopherson wrote:
> > On Thu, Jul 04, 2024, Maxim Levitsky wrote:
> > > On Fri, 2024-05-17 at 10:39 -0700, Sean Christopherson wrote:
> > > > -		cpuid_entry_change(best, X86_FEATURE_OSPKE,
> > > > -				   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> > > > +		kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSPKE,
> > > > +					   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> > > > +
> > > >  
> > > >  	best = kvm_find_cpuid_entry_index(vcpu, 0xD, 0);
> > > >  	if (best)
> > > 
> > > I am not 100% sure that we need to do this.
> > > 
> > > Runtime cpuid changes are a hack that Intel did back then, due to various
> > > reasons, These changes don't really change the feature set that CPU supports,
> > > but merly as you like to say 'massage' the output of the CPUID instruction to
> > > make the unmodified OS happy usually.
> > > 
> > > Thus it feels to me that CPU caps should not include the dynamic features,
> > > and neither KVM should use the value of these as a source for truth, but
> > > rather the underlying source of the truth (e.g CR4).
> > > 
> > > But if you insist, I don't really have a very strong reason to object this.
> > 
> > FWIW, I think I agree that CR4 should be the source of truth, but it's largely a
> > moot point because KVM doesn't actually check OSXSAVE or OSPKE, as KVM never
> > emulates the relevant instructions.  So for those, it's indeed not strictly
> > necessary.
> > 
> > Unfortunately, KVM has established ABI for checking X86_FEATURE_MWAIT when
> > "emulating" MONITOR and MWAIT, i.e. KVM can't use vcpu->arch.ia32_misc_enable_msr
> > as the source of truth.
> 
> Can you elaborate on this? Can you give me an example of the ABI?

Writes to MSR_IA32_MISC_ENABLE are guarded with a quirk:

		if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
		    ((old_val ^ data)  & MSR_IA32_MISC_ENABLE_MWAIT)) {
			if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
				return 1;
			vcpu->arch.ia32_misc_enable_msr = data;
			kvm_update_cpuid_runtime(vcpu);
		} else {
			vcpu->arch.ia32_misc_enable_msr = data;
		}

as is enforcement of #UD on MONITOR/MWAIT.

  static int kvm_emulate_monitor_mwait(struct kvm_vcpu *vcpu, const char *insn)
  {
	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS) &&
	    !guest_cpuid_has(vcpu, X86_FEATURE_MWAIT))
		return kvm_handle_invalid_op(vcpu);

	pr_warn_once("%s instruction emulated as NOP!\n", insn);
	return kvm_emulate_as_nop(vcpu);
  }

If KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT is enabled but KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS
is _disabled_, then KVM's ABI is to honor X86_FEATURE_MWAIT regardless of what
is in vcpu->arch.ia32_misc_enable_msr (because userspace owns X86_FEATURE_MWAIT
in that scenario).
diff mbox series

Patch

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 552e65ba5efa..1424a9d4eb17 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -330,28 +330,38 @@  static u64 cpuid_get_supported_xcr0(struct kvm_vcpu *vcpu)
 	return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
 }
 
+static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
+						       struct kvm_cpuid_entry2 *entry,
+						       unsigned int x86_feature,
+						       bool has_feature)
+{
+	cpuid_entry_change(entry, x86_feature, has_feature);
+	guest_cpu_cap_change(vcpu, x86_feature, has_feature);
+}
+
 void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
 
 	best = kvm_find_cpuid_entry(vcpu, 1);
 	if (best) {
-		cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
-				   kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
+		kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSXSAVE,
+					   kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
 
-		cpuid_entry_change(best, X86_FEATURE_APIC,
-			   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
+		kvm_update_feature_runtime(vcpu, best, X86_FEATURE_APIC,
+					   vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
 
 		if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
-			cpuid_entry_change(best, X86_FEATURE_MWAIT,
-					   vcpu->arch.ia32_misc_enable_msr &
-					   MSR_IA32_MISC_ENABLE_MWAIT);
+			kvm_update_feature_runtime(vcpu, best, X86_FEATURE_MWAIT,
+						   vcpu->arch.ia32_misc_enable_msr &
+						   MSR_IA32_MISC_ENABLE_MWAIT);
 	}
 
 	best = kvm_find_cpuid_entry_index(vcpu, 7, 0);
 	if (best)
-		cpuid_entry_change(best, X86_FEATURE_OSPKE,
-				   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
+		kvm_update_feature_runtime(vcpu, best, X86_FEATURE_OSPKE,
+					   kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
+
 
 	best = kvm_find_cpuid_entry_index(vcpu, 0xD, 0);
 	if (best)