diff mbox series

[v4,1/3] KVM: x86: add support for user wait instructions

Message ID 20190619060945.14104-2-tao3.xu@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: Enable user wait instructions | expand

Commit Message

Tao Xu June 19, 2019, 6:09 a.m. UTC
UMONITOR, UMWAIT and TPAUSE are a set of user wait instructions.
This patch adds support for user wait instructions in KVM. Availability
of the user wait instructions is indicated by the presence of the CPUID
feature flag WAITPKG CPUID.0x07.0x0:ECX[5]. User wait instructions may
be executed at any privilege level, and use IA32_UMWAIT_CONTROL MSR to
set the maximum time.

The behavior of user wait instructions in VMX non-root operation is
determined first by the setting of the "enable user wait and pause"
secondary processor-based VM-execution control bit 26.
	If the VM-execution control is 0, UMONITOR/UMWAIT/TPAUSE cause
an invalid-opcode exception (#UD).
	If the VM-execution control is 1, treatment is based on the
setting of the “RDTSC exiting” VM-execution control. Because KVM never
enables RDTSC exiting, if the instruction causes a delay, the amount of
time delayed is called here the physical delay. The physical delay is
first computed by determining the virtual delay. If
IA32_UMWAIT_CONTROL[31:2] is zero, the virtual delay is the value in
EDX:EAX minus the value that RDTSC would return; if
IA32_UMWAIT_CONTROL[31:2] is not zero, the virtual delay is the minimum
of that difference and AND(IA32_UMWAIT_CONTROL,FFFFFFFCH).

Because umwait and tpause can put a (psysical) CPU into a power saving
state, by default we dont't expose it to kvm and enable it only when
guest CPUID has it.

Detailed information about user wait instructions can be found in the
latest Intel 64 and IA-32 Architectures Software Developer's Manual.

Co-developed-by: Jingqi Liu <jingqi.liu@intel.com>
Signed-off-by: Jingqi Liu <jingqi.liu@intel.com>
Signed-off-by: Tao Xu <tao3.xu@intel.com>
---

no changes in v4.
---
 arch/x86/include/asm/vmx.h      | 1 +
 arch/x86/kvm/cpuid.c            | 2 +-
 arch/x86/kvm/vmx/capabilities.h | 6 ++++++
 arch/x86/kvm/vmx/vmx.c          | 4 ++++
 4 files changed, 12 insertions(+), 1 deletion(-)

Comments

Xiaoyao Li June 19, 2019, 6:23 a.m. UTC | #1
On 6/19/2019 2:09 PM, Tao Xu wrote:
> UMONITOR, UMWAIT and TPAUSE are a set of user wait instructions.
> This patch adds support for user wait instructions in KVM. Availability
> of the user wait instructions is indicated by the presence of the CPUID
> feature flag WAITPKG CPUID.0x07.0x0:ECX[5]. User wait instructions may
> be executed at any privilege level, and use IA32_UMWAIT_CONTROL MSR to
> set the maximum time.
> 
> The behavior of user wait instructions in VMX non-root operation is
> determined first by the setting of the "enable user wait and pause"
> secondary processor-based VM-execution control bit 26.
> 	If the VM-execution control is 0, UMONITOR/UMWAIT/TPAUSE cause
> an invalid-opcode exception (#UD).
> 	If the VM-execution control is 1, treatment is based on the
> setting of the “RDTSC exiting” VM-execution control. Because KVM never
> enables RDTSC exiting, if the instruction causes a delay, the amount of
> time delayed is called here the physical delay. The physical delay is
> first computed by determining the virtual delay. If
> IA32_UMWAIT_CONTROL[31:2] is zero, the virtual delay is the value in
> EDX:EAX minus the value that RDTSC would return; if
> IA32_UMWAIT_CONTROL[31:2] is not zero, the virtual delay is the minimum
> of that difference and AND(IA32_UMWAIT_CONTROL,FFFFFFFCH).
> 
> Because umwait and tpause can put a (psysical) CPU into a power saving
> state, by default we dont't expose it to kvm and enable it only when
> guest CPUID has it.
> 
> Detailed information about user wait instructions can be found in the
> latest Intel 64 and IA-32 Architectures Software Developer's Manual.
> 
> Co-developed-by: Jingqi Liu <jingqi.liu@intel.com>
> Signed-off-by: Jingqi Liu <jingqi.liu@intel.com>
> Signed-off-by: Tao Xu <tao3.xu@intel.com>
> ---
> 
> no changes in v4.
> ---
>   arch/x86/include/asm/vmx.h      | 1 +
>   arch/x86/kvm/cpuid.c            | 2 +-
>   arch/x86/kvm/vmx/capabilities.h | 6 ++++++
>   arch/x86/kvm/vmx/vmx.c          | 4 ++++
>   4 files changed, 12 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index a39136b0d509..8f00882664d3 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -69,6 +69,7 @@
>   #define SECONDARY_EXEC_PT_USE_GPA		0x01000000
>   #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC	0x00400000
>   #define SECONDARY_EXEC_TSC_SCALING              0x02000000
> +#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE	0x04000000
>   
>   #define PIN_BASED_EXT_INTR_MASK                 0x00000001
>   #define PIN_BASED_NMI_EXITING                   0x00000008
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index e18a9f9f65b5..48bd851a6ae5 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -405,7 +405,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
>   		F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
>   		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
>   		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
> -		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
> +		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
>   
>   	/* cpuid 7.0.edx*/
>   	const u32 kvm_cpuid_7_0_edx_x86_features =
> diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
> index d6664ee3d127..fd77e17651b4 100644
> --- a/arch/x86/kvm/vmx/capabilities.h
> +++ b/arch/x86/kvm/vmx/capabilities.h
> @@ -253,6 +253,12 @@ static inline bool cpu_has_vmx_tsc_scaling(void)
>   		SECONDARY_EXEC_TSC_SCALING;
>   }
>   
> +static inline bool vmx_waitpkg_supported(void)
> +{
> +	return vmcs_config.cpu_based_2nd_exec_ctrl &
> +		SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;

Shouldn't it be
	return vmx->secondary_exec_control &
                 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;   ?

> +}
> +
>   static inline bool cpu_has_vmx_apicv(void)
>   {
>   	return cpu_has_vmx_apic_register_virt() &&
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index b93e36ddee5e..b35bfac30a34 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2250,6 +2250,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
>   			SECONDARY_EXEC_RDRAND_EXITING |
>   			SECONDARY_EXEC_ENABLE_PML |
>   			SECONDARY_EXEC_TSC_SCALING |
> +			SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
>   			SECONDARY_EXEC_PT_USE_GPA |
>   			SECONDARY_EXEC_PT_CONCEAL_VMX |
>   			SECONDARY_EXEC_ENABLE_VMFUNC |
> @@ -3987,6 +3988,9 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
>   		}
>   	}
>   
> +	if (!guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG))
> +		exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
> +
>   	vmx->secondary_exec_control = exec_control;
>   }
>   
>
Tao Xu June 19, 2019, 7:01 a.m. UTC | #2
On 6/19/2019 2:23 PM, Xiaoyao Li wrote:
> 
> 
> On 6/19/2019 2:09 PM, Tao Xu wrote:
>> UMONITOR, UMWAIT and TPAUSE are a set of user wait instructions.
>> This patch adds support for user wait instructions in KVM. Availability
>> of the user wait instructions is indicated by the presence of the CPUID
>> feature flag WAITPKG CPUID.0x07.0x0:ECX[5]. User wait instructions may
>> be executed at any privilege level, and use IA32_UMWAIT_CONTROL MSR to
>> set the maximum time.
>>
>> The behavior of user wait instructions in VMX non-root operation is
>> determined first by the setting of the "enable user wait and pause"
>> secondary processor-based VM-execution control bit 26.
>>     If the VM-execution control is 0, UMONITOR/UMWAIT/TPAUSE cause
>> an invalid-opcode exception (#UD).
>>     If the VM-execution control is 1, treatment is based on the
>> setting of the “RDTSC exiting” VM-execution control. Because KVM never
>> enables RDTSC exiting, if the instruction causes a delay, the amount of
>> time delayed is called here the physical delay. The physical delay is
>> first computed by determining the virtual delay. If
>> IA32_UMWAIT_CONTROL[31:2] is zero, the virtual delay is the value in
>> EDX:EAX minus the value that RDTSC would return; if
>> IA32_UMWAIT_CONTROL[31:2] is not zero, the virtual delay is the minimum
>> of that difference and AND(IA32_UMWAIT_CONTROL,FFFFFFFCH).
>>
>> Because umwait and tpause can put a (psysical) CPU into a power saving
>> state, by default we dont't expose it to kvm and enable it only when
>> guest CPUID has it.
>>
>> Detailed information about user wait instructions can be found in the
>> latest Intel 64 and IA-32 Architectures Software Developer's Manual.
>>
>> Co-developed-by: Jingqi Liu <jingqi.liu@intel.com>
>> Signed-off-by: Jingqi Liu <jingqi.liu@intel.com>
>> Signed-off-by: Tao Xu <tao3.xu@intel.com>
>> ---
>>
>> no changes in v4.
>> ---
>>   arch/x86/include/asm/vmx.h      | 1 +
>>   arch/x86/kvm/cpuid.c            | 2 +-
>>   arch/x86/kvm/vmx/capabilities.h | 6 ++++++
>>   arch/x86/kvm/vmx/vmx.c          | 4 ++++
>>   4 files changed, 12 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>> index a39136b0d509..8f00882664d3 100644
>> --- a/arch/x86/include/asm/vmx.h
>> +++ b/arch/x86/include/asm/vmx.h
>> @@ -69,6 +69,7 @@
>>   #define SECONDARY_EXEC_PT_USE_GPA        0x01000000
>>   #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC    0x00400000
>>   #define SECONDARY_EXEC_TSC_SCALING              0x02000000
>> +#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE    0x04000000
>>   #define PIN_BASED_EXT_INTR_MASK                 0x00000001
>>   #define PIN_BASED_NMI_EXITING                   0x00000008
>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>> index e18a9f9f65b5..48bd851a6ae5 100644
>> --- a/arch/x86/kvm/cpuid.c
>> +++ b/arch/x86/kvm/cpuid.c
>> @@ -405,7 +405,7 @@ static inline int __do_cpuid_ent(struct 
>> kvm_cpuid_entry2 *entry, u32 function,
>>           F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
>>           F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
>>           F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
>> -        F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
>> +        F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
>>       /* cpuid 7.0.edx*/
>>       const u32 kvm_cpuid_7_0_edx_x86_features =
>> diff --git a/arch/x86/kvm/vmx/capabilities.h 
>> b/arch/x86/kvm/vmx/capabilities.h
>> index d6664ee3d127..fd77e17651b4 100644
>> --- a/arch/x86/kvm/vmx/capabilities.h
>> +++ b/arch/x86/kvm/vmx/capabilities.h
>> @@ -253,6 +253,12 @@ static inline bool cpu_has_vmx_tsc_scaling(void)
>>           SECONDARY_EXEC_TSC_SCALING;
>>   }
>> +static inline bool vmx_waitpkg_supported(void)
>> +{
>> +    return vmcs_config.cpu_based_2nd_exec_ctrl &
>> +        SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
> 
> Shouldn't it be
>      return vmx->secondary_exec_control &
>                  SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;   ?

vmx->secondary_exec_control is another way to get SECONDARY_EXEC_CONTROL 
but it need to add vmx as input. Use vmcs_config.cpu_based_2nd_exec_ctrl 
  is easy expand to use in other place.
Xiaoyao Li June 19, 2019, 7:16 a.m. UTC | #3
On 6/19/2019 3:01 PM, Tao Xu wrote:
> On 6/19/2019 2:23 PM, Xiaoyao Li wrote:
>>
>>
>> On 6/19/2019 2:09 PM, Tao Xu wrote:
>>> UMONITOR, UMWAIT and TPAUSE are a set of user wait instructions.
>>> This patch adds support for user wait instructions in KVM. Availability
>>> of the user wait instructions is indicated by the presence of the CPUID
>>> feature flag WAITPKG CPUID.0x07.0x0:ECX[5]. User wait instructions may
>>> be executed at any privilege level, and use IA32_UMWAIT_CONTROL MSR to
>>> set the maximum time.
>>>
>>> The behavior of user wait instructions in VMX non-root operation is
>>> determined first by the setting of the "enable user wait and pause"
>>> secondary processor-based VM-execution control bit 26.
>>>     If the VM-execution control is 0, UMONITOR/UMWAIT/TPAUSE cause
>>> an invalid-opcode exception (#UD).
>>>     If the VM-execution control is 1, treatment is based on the
>>> setting of the “RDTSC exiting” VM-execution control. Because KVM never
>>> enables RDTSC exiting, if the instruction causes a delay, the amount of
>>> time delayed is called here the physical delay. The physical delay is
>>> first computed by determining the virtual delay. If
>>> IA32_UMWAIT_CONTROL[31:2] is zero, the virtual delay is the value in
>>> EDX:EAX minus the value that RDTSC would return; if
>>> IA32_UMWAIT_CONTROL[31:2] is not zero, the virtual delay is the minimum
>>> of that difference and AND(IA32_UMWAIT_CONTROL,FFFFFFFCH).
>>>
>>> Because umwait and tpause can put a (psysical) CPU into a power saving
>>> state, by default we dont't expose it to kvm and enable it only when
>>> guest CPUID has it.
>>>
>>> Detailed information about user wait instructions can be found in the
>>> latest Intel 64 and IA-32 Architectures Software Developer's Manual.
>>>
>>> Co-developed-by: Jingqi Liu <jingqi.liu@intel.com>
>>> Signed-off-by: Jingqi Liu <jingqi.liu@intel.com>
>>> Signed-off-by: Tao Xu <tao3.xu@intel.com>
>>> ---
>>>
>>> no changes in v4.
>>> ---
>>>   arch/x86/include/asm/vmx.h      | 1 +
>>>   arch/x86/kvm/cpuid.c            | 2 +-
>>>   arch/x86/kvm/vmx/capabilities.h | 6 ++++++
>>>   arch/x86/kvm/vmx/vmx.c          | 4 ++++
>>>   4 files changed, 12 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>>> index a39136b0d509..8f00882664d3 100644
>>> --- a/arch/x86/include/asm/vmx.h
>>> +++ b/arch/x86/include/asm/vmx.h
>>> @@ -69,6 +69,7 @@
>>>   #define SECONDARY_EXEC_PT_USE_GPA        0x01000000
>>>   #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC    0x00400000
>>>   #define SECONDARY_EXEC_TSC_SCALING              0x02000000
>>> +#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE    0x04000000
>>>   #define PIN_BASED_EXT_INTR_MASK                 0x00000001
>>>   #define PIN_BASED_NMI_EXITING                   0x00000008
>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>>> index e18a9f9f65b5..48bd851a6ae5 100644
>>> --- a/arch/x86/kvm/cpuid.c
>>> +++ b/arch/x86/kvm/cpuid.c
>>> @@ -405,7 +405,7 @@ static inline int __do_cpuid_ent(struct 
>>> kvm_cpuid_entry2 *entry, u32 function,
>>>           F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
>>>           F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
>>>           F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
>>> -        F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
>>> +        F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
>>>       /* cpuid 7.0.edx*/
>>>       const u32 kvm_cpuid_7_0_edx_x86_features =
>>> diff --git a/arch/x86/kvm/vmx/capabilities.h 
>>> b/arch/x86/kvm/vmx/capabilities.h
>>> index d6664ee3d127..fd77e17651b4 100644
>>> --- a/arch/x86/kvm/vmx/capabilities.h
>>> +++ b/arch/x86/kvm/vmx/capabilities.h
>>> @@ -253,6 +253,12 @@ static inline bool cpu_has_vmx_tsc_scaling(void)
>>>           SECONDARY_EXEC_TSC_SCALING;
>>>   }
>>> +static inline bool vmx_waitpkg_supported(void)
>>> +{
>>> +    return vmcs_config.cpu_based_2nd_exec_ctrl &
>>> +        SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
>>
>> Shouldn't it be
>>      return vmx->secondary_exec_control &
>>                  SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;   ?
> 
> vmx->secondary_exec_control is another way to get SECONDARY_EXEC_CONTROL 
> but it need to add vmx as input. Use vmcs_config.cpu_based_2nd_exec_ctrl 
>   is easy expand to use in other place.
> 

This patch want to enable waitpkg for guest only when 
guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG), and it only updates 
vmx->secondary_exec_control based on guest's cpuid when setup vcpu. But 
vmcs_config remains unchanged.  So using vmcs_config there is wrong.
Tao Xu June 19, 2019, 1:32 p.m. UTC | #4
On 6/19/2019 3:16 PM, Xiaoyao Li wrote:
> On 6/19/2019 3:01 PM, Tao Xu wrote:
>> On 6/19/2019 2:23 PM, Xiaoyao Li wrote:
>>>
>>>
>>> On 6/19/2019 2:09 PM, Tao Xu wrote:
>>>> UMONITOR, UMWAIT and TPAUSE are a set of user wait instructions.
>>>> This patch adds support for user wait instructions in KVM. Availability
>>>> of the user wait instructions is indicated by the presence of the CPUID
>>>> feature flag WAITPKG CPUID.0x07.0x0:ECX[5]. User wait instructions may
>>>> be executed at any privilege level, and use IA32_UMWAIT_CONTROL MSR to
>>>> set the maximum time.
>>>>
>>>> The behavior of user wait instructions in VMX non-root operation is
>>>> determined first by the setting of the "enable user wait and pause"
>>>> secondary processor-based VM-execution control bit 26.
>>>>     If the VM-execution control is 0, UMONITOR/UMWAIT/TPAUSE cause
>>>> an invalid-opcode exception (#UD).
>>>>     If the VM-execution control is 1, treatment is based on the
>>>> setting of the “RDTSC exiting” VM-execution control. Because KVM never
>>>> enables RDTSC exiting, if the instruction causes a delay, the amount of
>>>> time delayed is called here the physical delay. The physical delay is
>>>> first computed by determining the virtual delay. If
>>>> IA32_UMWAIT_CONTROL[31:2] is zero, the virtual delay is the value in
>>>> EDX:EAX minus the value that RDTSC would return; if
>>>> IA32_UMWAIT_CONTROL[31:2] is not zero, the virtual delay is the minimum
>>>> of that difference and AND(IA32_UMWAIT_CONTROL,FFFFFFFCH).
>>>>
>>>> Because umwait and tpause can put a (psysical) CPU into a power saving
>>>> state, by default we dont't expose it to kvm and enable it only when
>>>> guest CPUID has it.
>>>>
>>>> Detailed information about user wait instructions can be found in the
>>>> latest Intel 64 and IA-32 Architectures Software Developer's Manual.
>>>>
>>>> Co-developed-by: Jingqi Liu <jingqi.liu@intel.com>
>>>> Signed-off-by: Jingqi Liu <jingqi.liu@intel.com>
>>>> Signed-off-by: Tao Xu <tao3.xu@intel.com>
>>>> ---
>>>>
>>>> no changes in v4.
>>>> ---
>>>>   arch/x86/include/asm/vmx.h      | 1 +
>>>>   arch/x86/kvm/cpuid.c            | 2 +-
>>>>   arch/x86/kvm/vmx/capabilities.h | 6 ++++++
>>>>   arch/x86/kvm/vmx/vmx.c          | 4 ++++
>>>>   4 files changed, 12 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>>>> index a39136b0d509..8f00882664d3 100644
>>>> --- a/arch/x86/include/asm/vmx.h
>>>> +++ b/arch/x86/include/asm/vmx.h
>>>> @@ -69,6 +69,7 @@
>>>>   #define SECONDARY_EXEC_PT_USE_GPA        0x01000000
>>>>   #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC    0x00400000
>>>>   #define SECONDARY_EXEC_TSC_SCALING              0x02000000
>>>> +#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE    0x04000000
>>>>   #define PIN_BASED_EXT_INTR_MASK                 0x00000001
>>>>   #define PIN_BASED_NMI_EXITING                   0x00000008
>>>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>>>> index e18a9f9f65b5..48bd851a6ae5 100644
>>>> --- a/arch/x86/kvm/cpuid.c
>>>> +++ b/arch/x86/kvm/cpuid.c
>>>> @@ -405,7 +405,7 @@ static inline int __do_cpuid_ent(struct 
>>>> kvm_cpuid_entry2 *entry, u32 function,
>>>>           F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
>>>>           F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
>>>>           F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
>>>> -        F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
>>>> +        F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
>>>>       /* cpuid 7.0.edx*/
>>>>       const u32 kvm_cpuid_7_0_edx_x86_features =
>>>> diff --git a/arch/x86/kvm/vmx/capabilities.h 
>>>> b/arch/x86/kvm/vmx/capabilities.h
>>>> index d6664ee3d127..fd77e17651b4 100644
>>>> --- a/arch/x86/kvm/vmx/capabilities.h
>>>> +++ b/arch/x86/kvm/vmx/capabilities.h
>>>> @@ -253,6 +253,12 @@ static inline bool cpu_has_vmx_tsc_scaling(void)
>>>>           SECONDARY_EXEC_TSC_SCALING;
>>>>   }
>>>> +static inline bool vmx_waitpkg_supported(void)
>>>> +{
>>>> +    return vmcs_config.cpu_based_2nd_exec_ctrl &
>>>> +        SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
>>>
>>> Shouldn't it be
>>>      return vmx->secondary_exec_control &
>>>                  SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;   ?
>>
>> vmx->secondary_exec_control is another way to get 
>> SECONDARY_EXEC_CONTROL but it need to add vmx as input. Use 
>> vmcs_config.cpu_based_2nd_exec_ctrl   is easy expand to use in other 
>> place.
>>
> 
> This patch want to enable waitpkg for guest only when 
> guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG), and it only updates 
> vmx->secondary_exec_control based on guest's cpuid when setup vcpu. But 
> vmcs_config remains unchanged.  So using vmcs_config there is wrong.
> 

Xiaoyao, you are right. And MSR_IA32_XSS has the same logic. I will fix 
my patch. And we may need to submit a fix patch for MSR_IA32_XSS.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index a39136b0d509..8f00882664d3 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -69,6 +69,7 @@ 
 #define SECONDARY_EXEC_PT_USE_GPA		0x01000000
 #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC	0x00400000
 #define SECONDARY_EXEC_TSC_SCALING              0x02000000
+#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE	0x04000000
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
 #define PIN_BASED_NMI_EXITING                   0x00000008
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e18a9f9f65b5..48bd851a6ae5 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -405,7 +405,7 @@  static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
 		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
 		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
-		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
+		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
 
 	/* cpuid 7.0.edx*/
 	const u32 kvm_cpuid_7_0_edx_x86_features =
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index d6664ee3d127..fd77e17651b4 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -253,6 +253,12 @@  static inline bool cpu_has_vmx_tsc_scaling(void)
 		SECONDARY_EXEC_TSC_SCALING;
 }
 
+static inline bool vmx_waitpkg_supported(void)
+{
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+}
+
 static inline bool cpu_has_vmx_apicv(void)
 {
 	return cpu_has_vmx_apic_register_virt() &&
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b93e36ddee5e..b35bfac30a34 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2250,6 +2250,7 @@  static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
 			SECONDARY_EXEC_RDRAND_EXITING |
 			SECONDARY_EXEC_ENABLE_PML |
 			SECONDARY_EXEC_TSC_SCALING |
+			SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
 			SECONDARY_EXEC_PT_USE_GPA |
 			SECONDARY_EXEC_PT_CONCEAL_VMX |
 			SECONDARY_EXEC_ENABLE_VMFUNC |
@@ -3987,6 +3988,9 @@  static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 		}
 	}
 
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG))
+		exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+
 	vmx->secondary_exec_control = exec_control;
 }