diff mbox series

[1/4] KVM: x86: Allow vendor code to disable quirks

Message ID 20250301073428.2435768-2-pbonzini@redhat.com (mailing list archive)
State New
Headers show
Series KVM: x86: Introduce quirk KVM_X86_QUIRK_EPT_IGNORE_GUEST_PAT | expand

Commit Message

Paolo Bonzini March 1, 2025, 7:34 a.m. UTC
In some cases, the handling of quirks is split between platform-specific
code and generic code, or it is done entirely in generic code, but the
relevant bug does not trigger on some platforms; for example,
KVM_X86_QUIRK_CD_NW_CLEARED is only applicable to AMD systems.  In that
case, allow unaffected vendor modules to disable handling of the quirk.

The quirk remains available in KVM_CAP_DISABLE_QUIRKS2, because that API
tells userspace that KVM *knows* that some of its past behavior was bogus
or just undesirable.  In other words, it's plausible for userspace to
refuse to run if a quirk is not listed by KVM_CAP_DISABLE_QUIRKS2.

In kvm_check_has_quirk(), in addition to checking if a quirk is not
explicitly disabled by the user, also verify if the quirk applies to
the hardware.

Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
Message-ID: <20250224070832.31394-1-yan.y.zhao@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c |  1 +
 arch/x86/kvm/x86.c     |  1 +
 arch/x86/kvm/x86.h     | 12 +++++++-----
 3 files changed, 9 insertions(+), 5 deletions(-)

Comments

Xiaoyao Li March 2, 2025, 5:11 p.m. UTC | #1
On 3/1/2025 3:34 PM, Paolo Bonzini wrote:
> In some cases, the handling of quirks is split between platform-specific
> code and generic code, or it is done entirely in generic code, but the
> relevant bug does not trigger on some platforms; for example,
> KVM_X86_QUIRK_CD_NW_CLEARED is only applicable to AMD systems.  In that
> case, allow unaffected vendor modules to disable handling of the quirk.
> 
> The quirk remains available in KVM_CAP_DISABLE_QUIRKS2, because that API
> tells userspace that KVM *knows* that some of its past behavior was bogus
> or just undesirable.  In other words, it's plausible for userspace to
> refuse to run if a quirk is not listed by KVM_CAP_DISABLE_QUIRKS2.

I think it's just for existing quirks for backwards compatibilities 
reason. For new quirk bit that is vendor specific, 
KVM_CAP_DISABLE_QUIRKS2 is OK to enumerate different value.

> In kvm_check_has_quirk(), in addition to checking if a quirk is not
> explicitly disabled by the user, also verify if the quirk applies to
> the hardware.
> 
> Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>

This is inconsistent with the Author.

> Message-ID: <20250224070832.31394-1-yan.y.zhao@intel.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   arch/x86/kvm/vmx/vmx.c |  1 +
>   arch/x86/kvm/x86.c     |  1 +
>   arch/x86/kvm/x86.h     | 12 +++++++-----
>   3 files changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 486fbdb4365c..75df4caea2f7 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -8506,6 +8506,7 @@ __init int vmx_hardware_setup(void)
>   
>   	kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler);
>   
> +	kvm_caps.inapplicable_quirks = KVM_X86_QUIRK_CD_NW_CLEARED;

Suggest to make inapplicable_quirks per VM, as I comments in patch 4:

https://lore.kernel.org/all/338901b6-4d10-480d-bd0a-0db8ec4afad5@intel.com/https://lore.kernel.org/all/338901b6-4d10-480d-bd0a-0db8ec4afad5@intel.com/

>   	return r;
>   }
>   
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 856ceeb4fb35..fd0a44e59314 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9775,6 +9775,7 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
>   		kvm_host.xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
>   		kvm_caps.supported_xcr0 = kvm_host.xcr0 & KVM_SUPPORTED_XCR0;
>   	}
> +	kvm_caps.inapplicable_quirks = 0;
>   
>   	rdmsrl_safe(MSR_EFER, &kvm_host.efer);
>   
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 8ce6da98b5a2..9af199c8e5c8 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -34,6 +34,7 @@ struct kvm_caps {
>   	u64 supported_xcr0;
>   	u64 supported_xss;
>   	u64 supported_perf_cap;
> +	u64 inapplicable_quirks;
>   };
>   
>   struct kvm_host_values {
> @@ -354,11 +355,6 @@ static inline void kvm_register_write(struct kvm_vcpu *vcpu,
>   	return kvm_register_write_raw(vcpu, reg, val);
>   }
>   
> -static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
> -{
> -	return !(kvm->arch.disabled_quirks & quirk);
> -}
> -
>   void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
>   
>   u64 get_kvmclock_ns(struct kvm *kvm);
> @@ -394,6 +390,12 @@ extern struct kvm_host_values kvm_host;
>   
>   extern bool enable_pmu;
>   
> +static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
> +{
> +	u64 disabled_quirks = kvm_caps.inapplicable_quirks | kvm->arch.disabled_quirks;
> +	return !(disabled_quirks & quirk);
> +}
> +
>   /*
>    * Get a filtered version of KVM's supported XCR0 that strips out dynamic
>    * features for which the current process doesn't (yet) have permission to use.
Yan Zhao March 3, 2025, 1:15 a.m. UTC | #2
On Sat, Mar 01, 2025 at 02:34:25AM -0500, Paolo Bonzini wrote:
> In some cases, the handling of quirks is split between platform-specific
> code and generic code, or it is done entirely in generic code, but the
> relevant bug does not trigger on some platforms; for example,
> KVM_X86_QUIRK_CD_NW_CLEARED is only applicable to AMD systems.  In that
> case, allow unaffected vendor modules to disable handling of the quirk.
> 
> The quirk remains available in KVM_CAP_DISABLE_QUIRKS2, because that API
> tells userspace that KVM *knows* that some of its past behavior was bogus
> or just undesirable.  In other words, it's plausible for userspace to
> refuse to run if a quirk is not listed by KVM_CAP_DISABLE_QUIRKS2.
> 
> In kvm_check_has_quirk(), in addition to checking if a quirk is not
> explicitly disabled by the user, also verify if the quirk applies to
> the hardware.
> 
> Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
> Message-ID: <20250224070832.31394-1-yan.y.zhao@intel.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/kvm/vmx/vmx.c |  1 +
>  arch/x86/kvm/x86.c     |  1 +
>  arch/x86/kvm/x86.h     | 12 +++++++-----
>  3 files changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 486fbdb4365c..75df4caea2f7 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -8506,6 +8506,7 @@ __init int vmx_hardware_setup(void)
>  
>  	kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler);
>  
> +	kvm_caps.inapplicable_quirks = KVM_X86_QUIRK_CD_NW_CLEARED;
As you mentioned, KVM_X86_QUIRK_CD_NW_CLEARED has no effect on Intel's
platforms, no matter kvm_check_has_quirk() returns true or false.

So, what's the purpose to introduce kvm_caps.inapplicable_quirks?

One concern is that since KVM_X86_QUIRK_CD_NW_CLEARED is not for Intel
platforms, it's unnatural for Intel's code to add it into the
kvm_caps.inapplicable_quirks.
If AMD introduces new quirks that apply only to its own platform in future,
they may have no idea whether it's applicable to Intel as well.

>  	return r;
>  }
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 856ceeb4fb35..fd0a44e59314 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9775,6 +9775,7 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
>  		kvm_host.xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
>  		kvm_caps.supported_xcr0 = kvm_host.xcr0 & KVM_SUPPORTED_XCR0;
>  	}
> +	kvm_caps.inapplicable_quirks = 0;
>  
>  	rdmsrl_safe(MSR_EFER, &kvm_host.efer);
>  
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 8ce6da98b5a2..9af199c8e5c8 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -34,6 +34,7 @@ struct kvm_caps {
>  	u64 supported_xcr0;
>  	u64 supported_xss;
>  	u64 supported_perf_cap;
> +	u64 inapplicable_quirks;
>  };
>  
>  struct kvm_host_values {
> @@ -354,11 +355,6 @@ static inline void kvm_register_write(struct kvm_vcpu *vcpu,
>  	return kvm_register_write_raw(vcpu, reg, val);
>  }
>  
> -static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
> -{
> -	return !(kvm->arch.disabled_quirks & quirk);
> -}
> -
>  void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
>  
>  u64 get_kvmclock_ns(struct kvm *kvm);
> @@ -394,6 +390,12 @@ extern struct kvm_host_values kvm_host;
>  
>  extern bool enable_pmu;
>  
> +static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
> +{
> +	u64 disabled_quirks = kvm_caps.inapplicable_quirks | kvm->arch.disabled_quirks;
> +	return !(disabled_quirks & quirk);
> +}
> +
>  /*
>   * Get a filtered version of KVM's supported XCR0 that strips out dynamic
>   * features for which the current process doesn't (yet) have permission to use.
> -- 
> 2.43.5
> 
>
Paolo Bonzini March 3, 2025, 4:04 p.m. UTC | #3
On 3/3/25 02:15, Yan Zhao wrote:
> On Sat, Mar 01, 2025 at 02:34:25AM -0500, Paolo Bonzini wrote:
>> In some cases, the handling of quirks is split between platform-specific
>> code and generic code, or it is done entirely in generic code, but the
>> relevant bug does not trigger on some platforms; for example,
>> KVM_X86_QUIRK_CD_NW_CLEARED is only applicable to AMD systems.  In that
>> case, allow unaffected vendor modules to disable handling of the quirk.
>>
>> The quirk remains available in KVM_CAP_DISABLE_QUIRKS2, because that API
>> tells userspace that KVM *knows* that some of its past behavior was bogus
>> or just undesirable.  In other words, it's plausible for userspace to
>> refuse to run if a quirk is not listed by KVM_CAP_DISABLE_QUIRKS2.
>>
>> In kvm_check_has_quirk(), in addition to checking if a quirk is not
>> explicitly disabled by the user, also verify if the quirk applies to
>> the hardware.
>>
>> Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
>> Message-ID: <20250224070832.31394-1-yan.y.zhao@intel.com>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>   arch/x86/kvm/vmx/vmx.c |  1 +
>>   arch/x86/kvm/x86.c     |  1 +
>>   arch/x86/kvm/x86.h     | 12 +++++++-----
>>   3 files changed, 9 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index 486fbdb4365c..75df4caea2f7 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -8506,6 +8506,7 @@ __init int vmx_hardware_setup(void)
>>   
>>   	kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler);
>>   
>> +	kvm_caps.inapplicable_quirks = KVM_X86_QUIRK_CD_NW_CLEARED;
> 
> As you mentioned, KVM_X86_QUIRK_CD_NW_CLEARED has no effect on Intel's
> platforms, no matter kvm_check_has_quirk() returns true or false.
> So, what's the purpose to introduce kvm_caps.inapplicable_quirks?

The purpose is to later mark IGNORE_GUEST_PAT as inapplicable, so that 
the relevant code does not run on AMD.  However you have a point here:

> One concern is that since KVM_X86_QUIRK_CD_NW_CLEARED is not for Intel
> platforms, it's unnatural for Intel's code to add it into the
> kvm_caps.inapplicable_quirks.

So let's instead have kvm-amd.ko clear it from inapplicable_quirks.  And 
likewise kvm-intel.ko can clear IGNORE_GUEST_PAT.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 486fbdb4365c..75df4caea2f7 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8506,6 +8506,7 @@  __init int vmx_hardware_setup(void)
 
 	kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler);
 
+	kvm_caps.inapplicable_quirks = KVM_X86_QUIRK_CD_NW_CLEARED;
 	return r;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 856ceeb4fb35..fd0a44e59314 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9775,6 +9775,7 @@  int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
 		kvm_host.xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
 		kvm_caps.supported_xcr0 = kvm_host.xcr0 & KVM_SUPPORTED_XCR0;
 	}
+	kvm_caps.inapplicable_quirks = 0;
 
 	rdmsrl_safe(MSR_EFER, &kvm_host.efer);
 
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 8ce6da98b5a2..9af199c8e5c8 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -34,6 +34,7 @@  struct kvm_caps {
 	u64 supported_xcr0;
 	u64 supported_xss;
 	u64 supported_perf_cap;
+	u64 inapplicable_quirks;
 };
 
 struct kvm_host_values {
@@ -354,11 +355,6 @@  static inline void kvm_register_write(struct kvm_vcpu *vcpu,
 	return kvm_register_write_raw(vcpu, reg, val);
 }
 
-static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
-{
-	return !(kvm->arch.disabled_quirks & quirk);
-}
-
 void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
 
 u64 get_kvmclock_ns(struct kvm *kvm);
@@ -394,6 +390,12 @@  extern struct kvm_host_values kvm_host;
 
 extern bool enable_pmu;
 
+static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
+{
+	u64 disabled_quirks = kvm_caps.inapplicable_quirks | kvm->arch.disabled_quirks;
+	return !(disabled_quirks & quirk);
+}
+
 /*
  * Get a filtered version of KVM's supported XCR0 that strips out dynamic
  * features for which the current process doesn't (yet) have permission to use.