Message ID | 20230414062545.270178-3-chao.gao@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Intel IA32_SPEC_CTRL Virtualization | expand |
On 4/14/2023 2:25 PM, Chao Gao wrote: > From: Zhang Chen <chen.zhang@intel.com> > > Add a kvm-only CPUID feature leaf for CPUID.7.2.EDX and RRSBA_CTRL > as the first feature in the leaf. > > RRSBA_CTRL is enumerated by CPUID.7.2.EDX[2]. If supported, RRSBA_DIS_U > (bit 5) and RRSBA_DIS_S (bit 6) of IA32_SPEC_CTRL MSR can be used to > disable RRSBA behavior for CPL3 and CPL0/1/2 respectively. > > Note that KVM does not intercept guests' IA32_SPEC_CTRL MSR accesses > after a non-zero is written to the MSR. Therefore, guests can already > toggle the two bits if the host supports RRSBA_CTRL, and no extra code > is needed to allow guests to toggle the two bits. > > Signed-off-by: Zhang Chen <chen.zhang@intel.com> > Signed-off-by: Chao Gao <chao.gao@intel.com> > Tested-by: Jiaan Lu <jiaan.lu@intel.com> > --- > arch/x86/kvm/cpuid.c | 22 +++++++++++++++++++--- > arch/x86/kvm/reverse_cpuid.h | 7 +++++++ > 2 files changed, 26 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > index 9583a110cf5f..f024c3ac2203 100644 > --- a/arch/x86/kvm/cpuid.c > +++ b/arch/x86/kvm/cpuid.c > @@ -685,6 +685,10 @@ void kvm_set_cpu_caps(void) > SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA) > ); > > + kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX, > + SF(RRSBA_CTRL) > + ); > + Is it slightly better to put the new added one after kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX, ... to make it in order? > kvm_cpu_cap_mask(CPUID_8000_0001_ECX, > F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | > F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | > @@ -949,13 +953,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) > break; > /* function 7 has additional index. */ > case 7: > - entry->eax = min(entry->eax, 1u); > + entry->eax = min(entry->eax, 2u); > cpuid_entry_override(entry, CPUID_7_0_EBX); > cpuid_entry_override(entry, CPUID_7_ECX); > cpuid_entry_override(entry, CPUID_7_EDX); > > - /* KVM only supports 0x7.0 and 0x7.1, capped above via min(). */ > - if (entry->eax == 1) { > + max_idx = entry->eax; > + > + if (max_idx >= 1) { > entry = do_host_cpuid(array, function, 1); > if (!entry) > goto out; > @@ -965,6 +970,17 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) > entry->ebx = 0; > entry->ecx = 0; > } > + > + if (max_idx >= 2) { > + entry = do_host_cpuid(array, function, 2); > + if (!entry) > + goto out; > + > + cpuid_entry_override(entry, CPUID_7_2_EDX); > + entry->eax = 0; > + entry->ebx = 0; > + entry->ecx = 0; > + } > break; > case 0xa: { /* Architectural Performance Monitoring */ > union cpuid10_eax eax; > diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h > index a5717282bb9c..72bad8314a9c 100644 > --- a/arch/x86/kvm/reverse_cpuid.h > +++ b/arch/x86/kvm/reverse_cpuid.h > @@ -15,6 +15,7 @@ enum kvm_only_cpuid_leafs { > CPUID_12_EAX = NCAPINTS, > CPUID_7_1_EDX, > CPUID_8000_0007_EDX, > + CPUID_7_2_EDX, > NR_KVM_CPU_CAPS, > > NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, > @@ -47,6 +48,9 @@ enum kvm_only_cpuid_leafs { > /* CPUID level 0x80000007 (EDX). */ > #define KVM_X86_FEATURE_CONSTANT_TSC KVM_X86_FEATURE(CPUID_8000_0007_EDX, 8) > > +/* Intel-defined sub-features, CPUID level 0x00000007:2 (EDX) */ > +#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2) > + > struct cpuid_reg { > u32 function; > u32 index; > @@ -69,6 +73,7 @@ static const struct cpuid_reg reverse_cpuid[] = { > [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, > [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, > [CPUID_7_1_EAX] = { 7, 1, CPUID_EAX}, > + [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, > [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX}, > [CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX}, > [CPUID_7_1_EDX] = { 7, 1, CPUID_EDX}, > @@ -108,6 +113,8 @@ static __always_inline u32 __feature_translate(int x86_feature) > return KVM_X86_FEATURE_SGX_EDECCSSA; > else if (x86_feature == X86_FEATURE_CONSTANT_TSC) > return KVM_X86_FEATURE_CONSTANT_TSC; > + else if (x86_feature == X86_FEATURE_RRSBA_CTRL) > + return KVM_X86_FEATURE_RRSBA_CTRL; > > return x86_feature; > }
On Sun, Apr 16, 2023 at 03:04:59PM +0800, Binbin Wu wrote: > >On 4/14/2023 2:25 PM, Chao Gao wrote: >> From: Zhang Chen <chen.zhang@intel.com> >> >> Add a kvm-only CPUID feature leaf for CPUID.7.2.EDX and RRSBA_CTRL >> as the first feature in the leaf. >> >> RRSBA_CTRL is enumerated by CPUID.7.2.EDX[2]. If supported, RRSBA_DIS_U >> (bit 5) and RRSBA_DIS_S (bit 6) of IA32_SPEC_CTRL MSR can be used to >> disable RRSBA behavior for CPL3 and CPL0/1/2 respectively. >> >> Note that KVM does not intercept guests' IA32_SPEC_CTRL MSR accesses >> after a non-zero is written to the MSR. Therefore, guests can already >> toggle the two bits if the host supports RRSBA_CTRL, and no extra code >> is needed to allow guests to toggle the two bits. >> >> Signed-off-by: Zhang Chen <chen.zhang@intel.com> >> Signed-off-by: Chao Gao <chao.gao@intel.com> >> Tested-by: Jiaan Lu <jiaan.lu@intel.com> >> --- >> arch/x86/kvm/cpuid.c | 22 +++++++++++++++++++--- >> arch/x86/kvm/reverse_cpuid.h | 7 +++++++ >> 2 files changed, 26 insertions(+), 3 deletions(-) >> >> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c >> index 9583a110cf5f..f024c3ac2203 100644 >> --- a/arch/x86/kvm/cpuid.c >> +++ b/arch/x86/kvm/cpuid.c >> @@ -685,6 +685,10 @@ void kvm_set_cpu_caps(void) >> SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA) >> ); >> + kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX, >> + SF(RRSBA_CTRL) >> + ); >> + > >Is it slightly better to put the new added one after > > kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX, ... > >to make it in order? Yes. Will do.
On 4/14/2023 2:25 PM, Chao Gao wrote: > From: Zhang Chen <chen.zhang@intel.com> > > Add a kvm-only CPUID feature leaf for CPUID.7.2.EDX and RRSBA_CTRL > as the first feature in the leaf. > > RRSBA_CTRL is enumerated by CPUID.7.2.EDX[2]. If supported, RRSBA_DIS_U > (bit 5) and RRSBA_DIS_S (bit 6) of IA32_SPEC_CTRL MSR can be used to > disable RRSBA behavior for CPL3 and CPL0/1/2 respectively. > > Note that KVM does not intercept guests' IA32_SPEC_CTRL MSR accesses > after a non-zero is written to the MSR. Therefore, guests can already > toggle the two bits if the host supports RRSBA_CTRL, and no extra code > is needed to allow guests to toggle the two bits. This is a bug that also matters with other bits in MSR_IA32_SPEC_CTRL which has a dedicated enumeration CPUID bit and no support in KVM yet. I think we need to fix this bug at first. > Signed-off-by: Zhang Chen <chen.zhang@intel.com> > Signed-off-by: Chao Gao <chao.gao@intel.com> > Tested-by: Jiaan Lu <jiaan.lu@intel.com> > --- > arch/x86/kvm/cpuid.c | 22 +++++++++++++++++++--- > arch/x86/kvm/reverse_cpuid.h | 7 +++++++ > 2 files changed, 26 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > index 9583a110cf5f..f024c3ac2203 100644 > --- a/arch/x86/kvm/cpuid.c > +++ b/arch/x86/kvm/cpuid.c > @@ -685,6 +685,10 @@ void kvm_set_cpu_caps(void) > SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA) > ); > > + kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX, > + SF(RRSBA_CTRL) > + ); > + Please move this hook up to right follow the leaf CPUID_7_1_EAX. > kvm_cpu_cap_mask(CPUID_8000_0001_ECX, > F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | > F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | > @@ -949,13 +953,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) > break; > /* function 7 has additional index. */ > case 7: > - entry->eax = min(entry->eax, 1u); > + entry->eax = min(entry->eax, 2u); > cpuid_entry_override(entry, CPUID_7_0_EBX); > cpuid_entry_override(entry, CPUID_7_ECX); > cpuid_entry_override(entry, CPUID_7_EDX); > > - /* KVM only supports 0x7.0 and 0x7.1, capped above via min(). */ > - if (entry->eax == 1) { > + max_idx = entry->eax; > + > + if (max_idx >= 1) { > entry = do_host_cpuid(array, function, 1); > if (!entry) > goto out; > @@ -965,6 +970,17 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) > entry->ebx = 0; > entry->ecx = 0; > } > + > + if (max_idx >= 2) { > + entry = do_host_cpuid(array, function, 2); > + if (!entry) > + goto out; > + > + cpuid_entry_override(entry, CPUID_7_2_EDX); > + entry->eax = 0; > + entry->ebx = 0; > + entry->ecx = 0; > + } > break; > case 0xa: { /* Architectural Performance Monitoring */ > union cpuid10_eax eax; > diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h > index a5717282bb9c..72bad8314a9c 100644 > --- a/arch/x86/kvm/reverse_cpuid.h > +++ b/arch/x86/kvm/reverse_cpuid.h > @@ -15,6 +15,7 @@ enum kvm_only_cpuid_leafs { > CPUID_12_EAX = NCAPINTS, > CPUID_7_1_EDX, > CPUID_8000_0007_EDX, > + CPUID_7_2_EDX, > NR_KVM_CPU_CAPS, > > NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, > @@ -47,6 +48,9 @@ enum kvm_only_cpuid_leafs { > /* CPUID level 0x80000007 (EDX). */ > #define KVM_X86_FEATURE_CONSTANT_TSC KVM_X86_FEATURE(CPUID_8000_0007_EDX, 8) > > +/* Intel-defined sub-features, CPUID level 0x00000007:2 (EDX) */ > +#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2) > + > struct cpuid_reg { > u32 function; > u32 index; > @@ -69,6 +73,7 @@ static const struct cpuid_reg reverse_cpuid[] = { > [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, > [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, > [CPUID_7_1_EAX] = { 7, 1, CPUID_EAX}, > + [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, > [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX}, > [CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX}, > [CPUID_7_1_EDX] = { 7, 1, CPUID_EDX}, > @@ -108,6 +113,8 @@ static __always_inline u32 __feature_translate(int x86_feature) > return KVM_X86_FEATURE_SGX_EDECCSSA; > else if (x86_feature == X86_FEATURE_CONSTANT_TSC) > return KVM_X86_FEATURE_CONSTANT_TSC; > + else if (x86_feature == X86_FEATURE_RRSBA_CTRL) > + return KVM_X86_FEATURE_RRSBA_CTRL; > > return x86_feature; > }
On Mon, May 15, 2023 at 02:53:07PM +0800, Xiaoyao Li wrote: >On 4/14/2023 2:25 PM, Chao Gao wrote: >> From: Zhang Chen <chen.zhang@intel.com> >> >> Add a kvm-only CPUID feature leaf for CPUID.7.2.EDX and RRSBA_CTRL >> as the first feature in the leaf. >> >> RRSBA_CTRL is enumerated by CPUID.7.2.EDX[2]. If supported, RRSBA_DIS_U >> (bit 5) and RRSBA_DIS_S (bit 6) of IA32_SPEC_CTRL MSR can be used to >> disable RRSBA behavior for CPL3 and CPL0/1/2 respectively. >> >> Note that KVM does not intercept guests' IA32_SPEC_CTRL MSR accesses >> after a non-zero is written to the MSR. Therefore, guests can already >> toggle the two bits if the host supports RRSBA_CTRL, and no extra code >> is needed to allow guests to toggle the two bits. > >This is a bug that also matters with other bits in MSR_IA32_SPEC_CTRL which >has a dedicated enumeration CPUID bit and no support in KVM yet. Do you mean passing through the MSR is a bug? guest can write any hardware supported value to the MSR if the MSR isn't intercepted. I guess this is intentional and a trade-off for performance (note that context-switch may cause writes to the MSR). And see commit 841c2be09fe4 ("kvm: x86: replace kvm_spec_ctrl_test_value with runtime test on the host") it appears that this behavior is widely recognized. > >I think we need to fix this bug at first. I have no idea how to fix the "bug" without intercepting the MSR. The performance penalty makes me think intercepting the MSR is not a viable solution. > >> Signed-off-by: Zhang Chen <chen.zhang@intel.com> >> Signed-off-by: Chao Gao <chao.gao@intel.com> >> Tested-by: Jiaan Lu <jiaan.lu@intel.com> >> --- >> arch/x86/kvm/cpuid.c | 22 +++++++++++++++++++--- >> arch/x86/kvm/reverse_cpuid.h | 7 +++++++ >> 2 files changed, 26 insertions(+), 3 deletions(-) >> >> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c >> index 9583a110cf5f..f024c3ac2203 100644 >> --- a/arch/x86/kvm/cpuid.c >> +++ b/arch/x86/kvm/cpuid.c >> @@ -685,6 +685,10 @@ void kvm_set_cpu_caps(void) >> SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA) >> ); >> + kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX, >> + SF(RRSBA_CTRL) >> + ); >> + > >Please move this hook up to right follow the leaf CPUID_7_1_EAX. sure. will do.
On 5/16/2023 10:04 AM, Chao Gao wrote: > On Mon, May 15, 2023 at 02:53:07PM +0800, Xiaoyao Li wrote: >> On 4/14/2023 2:25 PM, Chao Gao wrote: >>> From: Zhang Chen <chen.zhang@intel.com> >>> >>> Add a kvm-only CPUID feature leaf for CPUID.7.2.EDX and RRSBA_CTRL >>> as the first feature in the leaf. >>> >>> RRSBA_CTRL is enumerated by CPUID.7.2.EDX[2]. If supported, RRSBA_DIS_U >>> (bit 5) and RRSBA_DIS_S (bit 6) of IA32_SPEC_CTRL MSR can be used to >>> disable RRSBA behavior for CPL3 and CPL0/1/2 respectively. >>> >>> Note that KVM does not intercept guests' IA32_SPEC_CTRL MSR accesses >>> after a non-zero is written to the MSR. Therefore, guests can already >>> toggle the two bits if the host supports RRSBA_CTRL, and no extra code >>> is needed to allow guests to toggle the two bits. >> >> This is a bug that also matters with other bits in MSR_IA32_SPEC_CTRL which >> has a dedicated enumeration CPUID bit and no support in KVM yet. > > Do you mean passing through the MSR is a bug? guest can write any hardware > supported value to the MSR if the MSR isn't intercepted. > > I guess this is intentional and a trade-off for performance (note that > context-switch may cause writes to the MSR). And see > > commit 841c2be09fe4 ("kvm: x86: replace kvm_spec_ctrl_test_value with runtime test on the host") > > it appears that this behavior is widely recognized. > >> >> I think we need to fix this bug at first. > > I have no idea how to fix the "bug" without intercepting the MSR. The > performance penalty makes me think intercepting the MSR is not a viable > solution. I thought correctness always takes higher priority over performance. >> >>> Signed-off-by: Zhang Chen <chen.zhang@intel.com> >>> Signed-off-by: Chao Gao <chao.gao@intel.com> >>> Tested-by: Jiaan Lu <jiaan.lu@intel.com> >>> --- >>> arch/x86/kvm/cpuid.c | 22 +++++++++++++++++++--- >>> arch/x86/kvm/reverse_cpuid.h | 7 +++++++ >>> 2 files changed, 26 insertions(+), 3 deletions(-) >>> >>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c >>> index 9583a110cf5f..f024c3ac2203 100644 >>> --- a/arch/x86/kvm/cpuid.c >>> +++ b/arch/x86/kvm/cpuid.c >>> @@ -685,6 +685,10 @@ void kvm_set_cpu_caps(void) >>> SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA) >>> ); >>> + kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX, >>> + SF(RRSBA_CTRL) >>> + ); >>> + >> >> Please move this hook up to right follow the leaf CPUID_7_1_EAX. > > sure. will do.
On Tue, May 16, 2023 at 10:22:22AM +0800, Xiaoyao Li wrote: >> > I think we need to fix this bug at first. >> >> I have no idea how to fix the "bug" without intercepting the MSR. The >> performance penalty makes me think intercepting the MSR is not a viable >> solution. > >I thought correctness always takes higher priority over performance. It is generally true. however, there are situations where we should make trade-offs between correctness and other factors (like performance): E.g., instructions without control bits, to be 100% compliant with CPU spec, in theory, VMMs can trap/decode every instruction and inject #UD if a guest tries to use some instructions it shouldn't.
On 5/16/2023 11:01 AM, Chao Gao wrote: > On Tue, May 16, 2023 at 10:22:22AM +0800, Xiaoyao Li wrote: >>>> I think we need to fix this bug at first. >>> >>> I have no idea how to fix the "bug" without intercepting the MSR. The >>> performance penalty makes me think intercepting the MSR is not a viable >>> solution. >> >> I thought correctness always takes higher priority over performance. > > It is generally true. however, there are situations where we should make > trade-offs between correctness and other factors (like performance): > > E.g., instructions without control bits, to be 100% compliant with CPU > spec, in theory, VMMs can trap/decode every instruction and inject #UD > if a guest tries to use some instructions it shouldn't. This is the virtualization hole. IMHO, they are different things. Pass through MSR_IA32_SPEC_CTRL was introduced in commit d28b387fb74d ("KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL"). At that time there was only a few bits defined, and the changelog called out that No attempt is made to handle STIBP here, intentionally. Filtering STIBP may be added in a future patch, which may require trapping all writes if we don't want to pass it through directly to the guest. Per my undesrstanding, it implied that we need to re-visit it when more bits added instead of following the pass-through design siliently.
On Tue, May 16, 2023 at 03:03:15PM +0800, Xiaoyao Li wrote: >On 5/16/2023 11:01 AM, Chao Gao wrote: >> On Tue, May 16, 2023 at 10:22:22AM +0800, Xiaoyao Li wrote: >> > > > I think we need to fix this bug at first. >> > > >> > > I have no idea how to fix the "bug" without intercepting the MSR. The >> > > performance penalty makes me think intercepting the MSR is not a viable >> > > solution. >> > >> > I thought correctness always takes higher priority over performance. >> >> It is generally true. however, there are situations where we should make >> trade-offs between correctness and other factors (like performance): >> >> E.g., instructions without control bits, to be 100% compliant with CPU >> spec, in theory, VMMs can trap/decode every instruction and inject #UD >> if a guest tries to use some instructions it shouldn't. > >This is the virtualization hole. IMHO, they are different things. what are the differences between? 1. Executing some unsupported instructions should cause #UD. But this is allowed in a KVM guest. 2. Setting some reserved bits in SPEC_CTRL MSR should cause #GP. But this is allowed in a KVM guest. > >Pass through MSR_IA32_SPEC_CTRL was introduced in commit d28b387fb74d >("KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL"). At that time there >was only a few bits defined, and the changelog called out that > > No attempt is made to handle STIBP here, intentionally. Filtering > STIBP may be added in a future patch, which may require trapping all > writes if we don't want to pass it through directly to the guest. > >Per my undesrstanding, it implied that we need to re-visit it when more bits >added instead of following the pass-through design siliently. I don't object to re-visiting the design. My point is that to prevent guests from setting RRSBA_CTRL/BHI_CTRL when they are not advertised isn't a strong justfication for intercepting the MSR. STIBP and other bits (except IBRS) have the same problem. And the gain of fixing this is too small. If passing through the SPEC_CTRL MSR to guests might cause security issues, I would agree to intercept accesses to the MSR.
On 5/16/2023 5:09 PM, Chao Gao wrote: > On Tue, May 16, 2023 at 03:03:15PM +0800, Xiaoyao Li wrote: >> On 5/16/2023 11:01 AM, Chao Gao wrote: >>> On Tue, May 16, 2023 at 10:22:22AM +0800, Xiaoyao Li wrote: >>>>>> I think we need to fix this bug at first. >>>>> >>>>> I have no idea how to fix the "bug" without intercepting the MSR. The >>>>> performance penalty makes me think intercepting the MSR is not a viable >>>>> solution. >>>> >>>> I thought correctness always takes higher priority over performance. >>> >>> It is generally true. however, there are situations where we should make >>> trade-offs between correctness and other factors (like performance): >>> >>> E.g., instructions without control bits, to be 100% compliant with CPU >>> spec, in theory, VMMs can trap/decode every instruction and inject #UD >>> if a guest tries to use some instructions it shouldn't. >> >> This is the virtualization hole. IMHO, they are different things. > > what are the differences between? > 1. Executing some unsupported instructions should cause #UD. But this is allowed > in a KVM guest. > 2. Setting some reserved bits in SPEC_CTRL MSR should cause #GP. But this is > allowed in a KVM guest. The difference is that for virtualization hole, there is no way but intercept and decode every instruction if we want the correctness. It's a disaster. But for MSR virtualization, we do have an option and we don't need to trap every instruction. MSR interception is the designated mechanism to correctly and elegantly virtualize the MSR. >> >> Pass through MSR_IA32_SPEC_CTRL was introduced in commit d28b387fb74d >> ("KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL"). At that time there >> was only a few bits defined, and the changelog called out that >> >> No attempt is made to handle STIBP here, intentionally. Filtering >> STIBP may be added in a future patch, which may require trapping all >> writes if we don't want to pass it through directly to the guest. >> >> Per my undesrstanding, it implied that we need to re-visit it when more bits >> added instead of following the pass-through design siliently. > > I don't object to re-visiting the design. My point is that to prevent guests from > setting RRSBA_CTRL/BHI_CTRL when they are not advertised isn't a strong > justfication for intercepting the MSR. STIBP and other bits (except IBRS) have > the same problem. And the gain of fixing this is too small. > > If passing through the SPEC_CTRL MSR to guests might cause security issues, I > would agree to intercept accesses to the MSR. I never buy it. How to interpret the security? If the user wants to hide one feature from guest but KVM allows it when KVM does have a reasonable way to hide it. Does it violate the security?
On Thu, May 18, 2023 at 05:50:17PM +0800, Xiaoyao Li wrote: >On 5/16/2023 5:09 PM, Chao Gao wrote: >> On Tue, May 16, 2023 at 03:03:15PM +0800, Xiaoyao Li wrote: >> > On 5/16/2023 11:01 AM, Chao Gao wrote: >> > > On Tue, May 16, 2023 at 10:22:22AM +0800, Xiaoyao Li wrote: >> > > > > > I think we need to fix this bug at first. >> > > > > >> > > > > I have no idea how to fix the "bug" without intercepting the MSR. The >> > > > > performance penalty makes me think intercepting the MSR is not a viable >> > > > > solution. >> > > > >> > > > I thought correctness always takes higher priority over performance. >> > > >> > > It is generally true. however, there are situations where we should make >> > > trade-offs between correctness and other factors (like performance): >> > > >> > > E.g., instructions without control bits, to be 100% compliant with CPU >> > > spec, in theory, VMMs can trap/decode every instruction and inject #UD >> > > if a guest tries to use some instructions it shouldn't. >> > >> > This is the virtualization hole. IMHO, they are different things. >> >> what are the differences between? >> 1. Executing some unsupported instructions should cause #UD. But this is allowed >> in a KVM guest. >> 2. Setting some reserved bits in SPEC_CTRL MSR should cause #GP. But this is >> allowed in a KVM guest. > >The difference is that for virtualization hole, there is no way but intercept >and decode every instruction if we want the correctness. It's a disaster. > >But for MSR virtualization, we do have an option and we don't need to trap >every instruction. MSR interception is the designated mechanism to correctly >and elegantly virtualize the MSR. The gains in this two cases are similar: some operations in guest are prevented. But the costs on performance are not. So, how do you draw the line when we can sacrafice correctness for performance? > >> > >> > Pass through MSR_IA32_SPEC_CTRL was introduced in commit d28b387fb74d >> > ("KVM/VMX: Allow direct access to MSR_IA32_SPEC_CTRL"). At that time there >> > was only a few bits defined, and the changelog called out that >> > >> > No attempt is made to handle STIBP here, intentionally. Filtering >> > STIBP may be added in a future patch, which may require trapping all >> > writes if we don't want to pass it through directly to the guest. >> > >> > Per my undesrstanding, it implied that we need to re-visit it when more bits >> > added instead of following the pass-through design siliently. >> >> I don't object to re-visiting the design. My point is that to prevent guests from >> setting RRSBA_CTRL/BHI_CTRL when they are not advertised isn't a strong >> justfication for intercepting the MSR. STIBP and other bits (except IBRS) have >> the same problem. And the gain of fixing this is too small. >> >> If passing through the SPEC_CTRL MSR to guests might cause security issues, I >> would agree to intercept accesses to the MSR. > >I never buy it. How to interpret the security? If the user wants to hide one >feature from guest but KVM allows it when KVM does have a reasonable way to >hide it. Does it violate the security? I would say no. By "security", I mean guest becomes vulnerable to some issues or guest attacks host or guest can access unauthorized data. I tried to say that if the value of intercepting IA32_SPEC_CTRL outweighs the perform penalty, it makes sense to do that. I guess the decision has been made when enabling STIBP and it means people care more about performance than preventing guests from setting STIBP.
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9583a110cf5f..f024c3ac2203 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -685,6 +685,10 @@ void kvm_set_cpu_caps(void) SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA) ); + kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX, + SF(RRSBA_CTRL) + ); + kvm_cpu_cap_mask(CPUID_8000_0001_ECX, F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | @@ -949,13 +953,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) break; /* function 7 has additional index. */ case 7: - entry->eax = min(entry->eax, 1u); + entry->eax = min(entry->eax, 2u); cpuid_entry_override(entry, CPUID_7_0_EBX); cpuid_entry_override(entry, CPUID_7_ECX); cpuid_entry_override(entry, CPUID_7_EDX); - /* KVM only supports 0x7.0 and 0x7.1, capped above via min(). */ - if (entry->eax == 1) { + max_idx = entry->eax; + + if (max_idx >= 1) { entry = do_host_cpuid(array, function, 1); if (!entry) goto out; @@ -965,6 +970,17 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->ebx = 0; entry->ecx = 0; } + + if (max_idx >= 2) { + entry = do_host_cpuid(array, function, 2); + if (!entry) + goto out; + + cpuid_entry_override(entry, CPUID_7_2_EDX); + entry->eax = 0; + entry->ebx = 0; + entry->ecx = 0; + } break; case 0xa: { /* Architectural Performance Monitoring */ union cpuid10_eax eax; diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index a5717282bb9c..72bad8314a9c 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -15,6 +15,7 @@ enum kvm_only_cpuid_leafs { CPUID_12_EAX = NCAPINTS, CPUID_7_1_EDX, CPUID_8000_0007_EDX, + CPUID_7_2_EDX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, @@ -47,6 +48,9 @@ enum kvm_only_cpuid_leafs { /* CPUID level 0x80000007 (EDX). */ #define KVM_X86_FEATURE_CONSTANT_TSC KVM_X86_FEATURE(CPUID_8000_0007_EDX, 8) +/* Intel-defined sub-features, CPUID level 0x00000007:2 (EDX) */ +#define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2) + struct cpuid_reg { u32 function; u32 index; @@ -69,6 +73,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, [CPUID_7_1_EAX] = { 7, 1, CPUID_EAX}, + [CPUID_7_2_EDX] = { 7, 2, CPUID_EDX}, [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX}, [CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX}, [CPUID_7_1_EDX] = { 7, 1, CPUID_EDX}, @@ -108,6 +113,8 @@ static __always_inline u32 __feature_translate(int x86_feature) return KVM_X86_FEATURE_SGX_EDECCSSA; else if (x86_feature == X86_FEATURE_CONSTANT_TSC) return KVM_X86_FEATURE_CONSTANT_TSC; + else if (x86_feature == X86_FEATURE_RRSBA_CTRL) + return KVM_X86_FEATURE_RRSBA_CTRL; return x86_feature; }