Message ID | 20241121201448.36170-8-adrian.hunter@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: TDX: TD vcpu enter/exit | expand |
>+static bool tdparams_tsx_supported(struct kvm_cpuid2 *cpuid) >+{ >+ const struct kvm_cpuid_entry2 *entry; >+ u64 mask; >+ u32 ebx; >+ >+ entry = kvm_find_cpuid_entry2(cpuid->entries, cpuid->nent, 0x7, 0); >+ if (entry) >+ ebx = entry->ebx; >+ else >+ ebx = 0; >+ >+ mask = __feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM); >+ return ebx & mask; >+} >+ > static int setup_tdparams(struct kvm *kvm, struct td_params *td_params, > struct kvm_tdx_init_vm *init_vm) > { >@@ -1299,6 +1322,7 @@ static int setup_tdparams(struct kvm *kvm, struct td_params *td_params, > MEMCPY_SAME_SIZE(td_params->mrowner, init_vm->mrowner); > MEMCPY_SAME_SIZE(td_params->mrownerconfig, init_vm->mrownerconfig); > >+ to_kvm_tdx(kvm)->tsx_supported = tdparams_tsx_supported(cpuid); > return 0; > } > >@@ -2272,6 +2296,11 @@ static int __init __tdx_bringup(void) > return -EIO; > } > } >+ tdx_uret_tsx_ctrl_slot = kvm_find_user_return_msr(MSR_IA32_TSX_CTRL); >+ if (tdx_uret_tsx_ctrl_slot == -1 && boot_cpu_has(X86_FEATURE_MSR_TSX_CTRL)) { >+ pr_err("MSR_IA32_TSX_CTRL isn't included by kvm_find_user_return_msr\n"); >+ return -EIO; >+ } > > /* > * Enabling TDX requires enabling hardware virtualization first, >diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h >index 48cf0a1abfcc..815ff6bdbc7e 100644 >--- a/arch/x86/kvm/vmx/tdx.h >+++ b/arch/x86/kvm/vmx/tdx.h >@@ -29,6 +29,14 @@ struct kvm_tdx { > u8 nr_tdcs_pages; > u8 nr_vcpu_tdcx_pages; > >+ /* >+ * Used on each TD-exit, see tdx_user_return_msr_update_cache(). >+ * TSX_CTRL value on TD exit >+ * - set 0 if guest TSX enabled >+ * - preserved if guest TSX disabled >+ */ >+ bool tsx_supported; Is it possible to drop this boolean and tdparams_tsx_supported()? I think we can use the guest_can_use() framework instead. >+ > u64 tsc_offset; > > enum kvm_tdx_state state; >-- >2.43.0 >
On Fri, Nov 22, 2024, Chao Gao wrote: > >+static bool tdparams_tsx_supported(struct kvm_cpuid2 *cpuid) > >+{ > >+ const struct kvm_cpuid_entry2 *entry; > >+ u64 mask; > >+ u32 ebx; > >+ > >+ entry = kvm_find_cpuid_entry2(cpuid->entries, cpuid->nent, 0x7, 0); > >+ if (entry) > >+ ebx = entry->ebx; > >+ else > >+ ebx = 0; > >+ > >+ mask = __feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM); > >+ return ebx & mask; > >+} > >+ > > static int setup_tdparams(struct kvm *kvm, struct td_params *td_params, > > struct kvm_tdx_init_vm *init_vm) > > { > >@@ -1299,6 +1322,7 @@ static int setup_tdparams(struct kvm *kvm, struct td_params *td_params, > > MEMCPY_SAME_SIZE(td_params->mrowner, init_vm->mrowner); > > MEMCPY_SAME_SIZE(td_params->mrownerconfig, init_vm->mrownerconfig); > > > >+ to_kvm_tdx(kvm)->tsx_supported = tdparams_tsx_supported(cpuid); > > return 0; > > } > > > >@@ -2272,6 +2296,11 @@ static int __init __tdx_bringup(void) > > return -EIO; > > } > > } > >+ tdx_uret_tsx_ctrl_slot = kvm_find_user_return_msr(MSR_IA32_TSX_CTRL); > >+ if (tdx_uret_tsx_ctrl_slot == -1 && boot_cpu_has(X86_FEATURE_MSR_TSX_CTRL)) { > >+ pr_err("MSR_IA32_TSX_CTRL isn't included by kvm_find_user_return_msr\n"); > >+ return -EIO; > >+ } > > > > /* > > * Enabling TDX requires enabling hardware virtualization first, > >diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h > >index 48cf0a1abfcc..815ff6bdbc7e 100644 > >--- a/arch/x86/kvm/vmx/tdx.h > >+++ b/arch/x86/kvm/vmx/tdx.h > >@@ -29,6 +29,14 @@ struct kvm_tdx { > > u8 nr_tdcs_pages; > > u8 nr_vcpu_tdcx_pages; > > > >+ /* > >+ * Used on each TD-exit, see tdx_user_return_msr_update_cache(). > >+ * TSX_CTRL value on TD exit > >+ * - set 0 if guest TSX enabled > >+ * - preserved if guest TSX disabled > >+ */ > >+ bool tsx_supported; > > Is it possible to drop this boolean and tdparams_tsx_supported()? I think we > can use the guest_can_use() framework instead. Yeah, though that optimized handling will soon come for free[*], and I plan on landing that sooner than TDX, so don't fret too much over this. [*] https://lore.kernel.org/all/20240517173926.965351-1-seanjc@google.com
On 27/11/24 16:00, Sean Christopherson wrote: > On Fri, Nov 22, 2024, Chao Gao wrote: >>> +static bool tdparams_tsx_supported(struct kvm_cpuid2 *cpuid) >>> +{ >>> + const struct kvm_cpuid_entry2 *entry; >>> + u64 mask; >>> + u32 ebx; >>> + >>> + entry = kvm_find_cpuid_entry2(cpuid->entries, cpuid->nent, 0x7, 0); >>> + if (entry) >>> + ebx = entry->ebx; >>> + else >>> + ebx = 0; >>> + >>> + mask = __feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM); >>> + return ebx & mask; >>> +} >>> + >>> static int setup_tdparams(struct kvm *kvm, struct td_params *td_params, >>> struct kvm_tdx_init_vm *init_vm) >>> { >>> @@ -1299,6 +1322,7 @@ static int setup_tdparams(struct kvm *kvm, struct td_params *td_params, >>> MEMCPY_SAME_SIZE(td_params->mrowner, init_vm->mrowner); >>> MEMCPY_SAME_SIZE(td_params->mrownerconfig, init_vm->mrownerconfig); >>> >>> + to_kvm_tdx(kvm)->tsx_supported = tdparams_tsx_supported(cpuid); >>> return 0; >>> } >>> >>> @@ -2272,6 +2296,11 @@ static int __init __tdx_bringup(void) >>> return -EIO; >>> } >>> } >>> + tdx_uret_tsx_ctrl_slot = kvm_find_user_return_msr(MSR_IA32_TSX_CTRL); >>> + if (tdx_uret_tsx_ctrl_slot == -1 && boot_cpu_has(X86_FEATURE_MSR_TSX_CTRL)) { >>> + pr_err("MSR_IA32_TSX_CTRL isn't included by kvm_find_user_return_msr\n"); >>> + return -EIO; >>> + } >>> >>> /* >>> * Enabling TDX requires enabling hardware virtualization first, >>> diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h >>> index 48cf0a1abfcc..815ff6bdbc7e 100644 >>> --- a/arch/x86/kvm/vmx/tdx.h >>> +++ b/arch/x86/kvm/vmx/tdx.h >>> @@ -29,6 +29,14 @@ struct kvm_tdx { >>> u8 nr_tdcs_pages; >>> u8 nr_vcpu_tdcx_pages; >>> >>> + /* >>> + * Used on each TD-exit, see tdx_user_return_msr_update_cache(). >>> + * TSX_CTRL value on TD exit >>> + * - set 0 if guest TSX enabled >>> + * - preserved if guest TSX disabled >>> + */ >>> + bool tsx_supported; >> >> Is it possible to drop this boolean and tdparams_tsx_supported()? I think we >> can use the guest_can_use() framework instead. > > Yeah, though that optimized handling will soon come for free[*], and I plan on > landing that sooner than TDX, so don't fret too much over this. > > [*] https://lore.kernel.org/all/20240517173926.965351-1-seanjc@google.com guest_can_use() is per-vcpu whereas we are currently using the CPUID from TD_PARAMS (as per spec) before there are any VCPU's. It is a bit of a disconnect so let's keep tsx_supported for now.
On Fri, Nov 29, 2024, Adrian Hunter wrote: > On 27/11/24 16:00, Sean Christopherson wrote: > > On Fri, Nov 22, 2024, Chao Gao wrote: > >>> diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h > >>> index 48cf0a1abfcc..815ff6bdbc7e 100644 > >>> --- a/arch/x86/kvm/vmx/tdx.h > >>> +++ b/arch/x86/kvm/vmx/tdx.h > >>> @@ -29,6 +29,14 @@ struct kvm_tdx { > >>> u8 nr_tdcs_pages; > >>> u8 nr_vcpu_tdcx_pages; > >>> > >>> + /* > >>> + * Used on each TD-exit, see tdx_user_return_msr_update_cache(). > >>> + * TSX_CTRL value on TD exit > >>> + * - set 0 if guest TSX enabled > >>> + * - preserved if guest TSX disabled > >>> + */ > >>> + bool tsx_supported; > >> > >> Is it possible to drop this boolean and tdparams_tsx_supported()? I think we > >> can use the guest_can_use() framework instead. > > > > Yeah, though that optimized handling will soon come for free[*], and I plan on > > landing that sooner than TDX, so don't fret too much over this. > > > > [*] https://lore.kernel.org/all/20240517173926.965351-1-seanjc@google.com > > guest_can_use() is per-vcpu whereas we are currently using the > CPUID from TD_PARAMS (as per spec) before there are any VCPU's. > It is a bit of a disconnect so let's keep tsx_supported for now. No, as was agreed upon[*], KVM needs to ensure consistency between what KVM sees as guest CPUID and what is actually enabled/exposed to the guest. If there are no vCPUs, then there's zero reason to snapshot the value in kvm_tdx. And if there are vCPUs, then their CPUID info needs to be consistent with respect to TDPARAMS. - Don't hardcode fixed/required CPUID values in KVM, use available metadata from TDX Module to reject "bad" guest CPUID (or let the TDX module reject?). I.e. don't let a guest silently run with a CPUID that diverges from what userspace provided. [*] https://lore.kernel.org/all/20240405165844.1018872-1-seanjc@google.com
On Mon, 2024-12-02 at 11:07 -0800, Sean Christopherson wrote: > > guest_can_use() is per-vcpu whereas we are currently using the > > CPUID from TD_PARAMS (as per spec) before there are any VCPU's. > > It is a bit of a disconnect so let's keep tsx_supported for now. > > No, as was agreed upon[*], KVM needs to ensure consistency between what KVM > sees > as guest CPUID and what is actually enabled/exposed to the guest. If there > are > no vCPUs, then there's zero reason to snapshot the value in kvm_tdx. And if > there > are vCPUs, then their CPUID info needs to be consistent with respect to > TDPARAMS. Small point - the last conversation[0] we had on this was to let *userspace* ensure consistency between KVM's CPUID (i.e. KVM_SET_CPUID2) and the TDX Module's view. So the configuration goes: 1. Userspace configures per-VM CPU features 2. Userspace gets TDX Module's final per-vCPU version of CPUID configuration via KVM API 3. Userspace calls KVM_SET_CPUID2 with the merge of TDX Module's version, and userspace's desired values for KVM "owned" CPUID leads (pv features, etc) But KVM's knowledge of CPUID bits still remains per-vcpu for TDX in any case. > > - Don't hardcode fixed/required CPUID values in KVM, use available metadata > from TDX Module to reject "bad" guest CPUID (or let the TDX module > reject?). > I.e. don't let a guest silently run with a CPUID that diverges from what > userspace provided. The latest QEMU patches have this fixed bit data hardcoded in QEMU. Then the long term solution is to make the TDX module return this data. Xiaoyao will post a proposal on how the TDX module should expose this soon. > > [*] https://lore.kernel.org/all/20240405165844.1018872-1-seanjc@google.com [0]https://lore.kernel.org/kvm/CABgObfaobJ=G18JO9Jx6-K2mhZ2saVyLY-tHOgab1cJupOe-0Q@mail.gmail.com/
On Mon, Dec 02, 2024, Rick P Edgecombe wrote: > On Mon, 2024-12-02 at 11:07 -0800, Sean Christopherson wrote: > > > guest_can_use() is per-vcpu whereas we are currently using the > > > CPUID from TD_PARAMS (as per spec) before there are any VCPU's. > > > It is a bit of a disconnect so let's keep tsx_supported for now. > > > > No, as was agreed upon[*], KVM needs to ensure consistency between what KVM > > sees Rick, fix your MUA to not wrap already :-) > > as guest CPUID and what is actually enabled/exposed to the guest. If there > > are no vCPUs, then there's zero reason to snapshot the value in kvm_tdx. > > And if there are vCPUs, then their CPUID info needs to be consistent with > > respect to TDPARAMS. > > Small point - the last conversation[0] we had on this was to let *userspace* > ensure consistency between KVM's CPUID (i.e. KVM_SET_CPUID2) and the TDX > Module's view. I'm all for that, right up until KVM needs to protect itself again userspace and flawed TDX architecture. A relevant comment I made in that thread: : If the upgrade breaks a setup because it confuses _KVM_, then I'll care As it applies here, letting vCPU CPUID and actual guest functionality diverge for features that KVM cares about _will_ cause problems. This will be less ugly to handle once kvm_vcpu_arch.cpu_caps is a thing. KVM can simply force set/clear bits to match the actual guest functionality that's hardcoded by the TDX Module or defined by TDPARAMS. > So the configuration goes: > 1. Userspace configures per-VM CPU features > 2. Userspace gets TDX Module's final per-vCPU version of CPUID configuration via > KVM API > 3. Userspace calls KVM_SET_CPUID2 with the merge of TDX Module's version, and > userspace's desired values for KVM "owned" CPUID leads (pv features, etc) > > But KVM's knowledge of CPUID bits still remains per-vcpu for TDX in any case. > > > > > - Don't hardcode fixed/required CPUID values in KVM, use available metadata > > from TDX Module to reject "bad" guest CPUID (or let the TDX module reject?). > > I.e. don't let a guest silently run with a CPUID that diverges from what > > userspace provided. > > The latest QEMU patches have this fixed bit data hardcoded in QEMU. Then the > long term solution is to make the TDX module return this data. Xiaoyao will post > a proposal on how the TDX module should expose this soon. Punting the "merge" to userspace is fine, but KVM still needs to ensure it doesn't have holes where userspace can attack the kernel by lying about what features the guest has access to. And that means forcing bits in kvm_vcpu_arch.cpu_caps; anything else is just asking for problems. > > [*] https://lore.kernel.org/all/20240405165844.1018872-1-seanjc@google.com > > > [0]https://lore.kernel.org/kvm/CABgObfaobJ=G18JO9Jx6-K2mhZ2saVyLY-tHOgab1cJupOe-0Q@mail.gmail.com/
On Mon, 2024-12-02 at 16:34 -0800, Sean Christopherson wrote: > > Small point - the last conversation[0] we had on this was to let *userspace* > > ensure consistency between KVM's CPUID (i.e. KVM_SET_CPUID2) and the TDX > > Module's view. > > I'm all for that, right up until KVM needs to protect itself again userspace > and > flawed TDX architecture. A relevant comment I made in that thread: > > : If the upgrade breaks a setup because it confuses _KVM_, then I'll care > > As it applies here, letting vCPU CPUID and actual guest functionality diverge > for > features that KVM cares about _will_ cause problems. Right, just wanted to make sure we don't need to re-open the major design. > > This will be less ugly to handle once kvm_vcpu_arch.cpu_caps is a thing. KVM > can simply force set/clear bits to match the actual guest functionality that's > hardcoded by the TDX Module or defined by TDPARAMS. > > > So the configuration goes: > > 1. Userspace configures per-VM CPU features > > 2. Userspace gets TDX Module's final per-vCPU version of CPUID configuration > > via > > KVM API > > 3. Userspace calls KVM_SET_CPUID2 with the merge of TDX Module's version, > > and > > userspace's desired values for KVM "owned" CPUID leads (pv features, etc) > > > > But KVM's knowledge of CPUID bits still remains per-vcpu for TDX in any > > case. > > > > > > > > - Don't hardcode fixed/required CPUID values in KVM, use available > > > metadata > > > from TDX Module to reject "bad" guest CPUID (or let the TDX module > > > reject?). > > > I.e. don't let a guest silently run with a CPUID that diverges from > > > what > > > userspace provided. > > > > The latest QEMU patches have this fixed bit data hardcoded in QEMU. Then the > > long term solution is to make the TDX module return this data. Xiaoyao will > > post > > a proposal on how the TDX module should expose this soon. > > Punting the "merge" to userspace is fine, but KVM still needs to ensure it > doesn't > have holes where userspace can attack the kernel by lying about what features > the > guest has access to. And that means forcing bits in kvm_vcpu_arch.cpu_caps; > anything else is just asking for problems. Ok, then for now let's just address them on a case-by-case basis for logic that protects KVM. I'll add to look at using kvm_vcpu_arch.cpu_caps to our future- things todo list. I think Adrian is going post a proposal for how to handle this case better.
On 3/12/24 19:34, Edgecombe, Rick P wrote: > On Mon, 2024-12-02 at 16:34 -0800, Sean Christopherson wrote: >>> Small point - the last conversation[0] we had on this was to let *userspace* >>> ensure consistency between KVM's CPUID (i.e. KVM_SET_CPUID2) and the TDX >>> Module's view. >> >> I'm all for that, right up until KVM needs to protect itself again userspace >> and >> flawed TDX architecture. A relevant comment I made in that thread: >> >> : If the upgrade breaks a setup because it confuses _KVM_, then I'll care >> >> As it applies here, letting vCPU CPUID and actual guest functionality diverge >> for >> features that KVM cares about _will_ cause problems. > > Right, just wanted to make sure we don't need to re-open the major design. > >> >> This will be less ugly to handle once kvm_vcpu_arch.cpu_caps is a thing. KVM >> can simply force set/clear bits to match the actual guest functionality that's >> hardcoded by the TDX Module or defined by TDPARAMS. >> >>> So the configuration goes: >>> 1. Userspace configures per-VM CPU features >>> 2. Userspace gets TDX Module's final per-vCPU version of CPUID configuration >>> via >>> KVM API >>> 3. Userspace calls KVM_SET_CPUID2 with the merge of TDX Module's version, >>> and >>> userspace's desired values for KVM "owned" CPUID leads (pv features, etc) >>> >>> But KVM's knowledge of CPUID bits still remains per-vcpu for TDX in any >>> case. >>> >>>> >>>> - Don't hardcode fixed/required CPUID values in KVM, use available >>>> metadata >>>> from TDX Module to reject "bad" guest CPUID (or let the TDX module >>>> reject?). >>>> I.e. don't let a guest silently run with a CPUID that diverges from >>>> what >>>> userspace provided. >>> >>> The latest QEMU patches have this fixed bit data hardcoded in QEMU. Then the >>> long term solution is to make the TDX module return this data. Xiaoyao will >>> post >>> a proposal on how the TDX module should expose this soon. >> >> Punting the "merge" to userspace is fine, but KVM still needs to ensure it >> doesn't >> have holes where userspace can attack the kernel by lying about what features >> the >> guest has access to. And that means forcing bits in kvm_vcpu_arch.cpu_caps; >> anything else is just asking for problems. > > Ok, then for now let's just address them on a case-by-case basis for logic that > protects KVM. I'll add to look at using kvm_vcpu_arch.cpu_caps to our future- > things todo list. > > I think Adrian is going post a proposal for how to handle this case better. Perhaps just do without TSX support to start with e.g. drop this "KVM: TDX: Add TSX_CTRL msr into uret_msrs list" patch and instead add the following: From: Adrian Hunter <adrian.hunter@intel.com> Date: Tue, 3 Dec 2024 08:20:03 +0200 Subject: [PATCH] KVM: TDX: Disable support for TSX and WAITPKG Support for restoring IA32_TSX_CTRL MSR and IA32_UMWAIT_CONTROL MSR is not yet implemented, so disable support for TSX and WAITPKG for now. Clear the associated CPUID bits returned by KVM_TDX_CAPABILITIES, and return an error if those bits are set in KVM_TDX_INIT_VM. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> --- arch/x86/kvm/vmx/tdx.c | 43 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 69bb3136076d..947f78dc3429 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -105,6 +105,44 @@ static u32 tdx_set_guest_phys_addr_bits(const u32 eax, int addr_bits) return (eax & ~GENMASK(23, 16)) | (addr_bits & 0xff) << 16; } +#define TDX_FEATURE_TSX (__feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM)) + +static bool has_tsx(const struct kvm_cpuid_entry2 *entry) +{ + return entry->function == 7 && entry->index == 0 && + (entry->ebx & TDX_FEATURE_TSX); +} + +static void clear_tsx(struct kvm_cpuid_entry2 *entry) +{ + entry->ebx &= ~TDX_FEATURE_TSX; +} + +static bool has_waitpkg(const struct kvm_cpuid_entry2 *entry) +{ + return entry->function == 7 && entry->index == 0 && + (entry->ecx & __feature_bit(X86_FEATURE_WAITPKG)); +} + +static void clear_waitpkg(struct kvm_cpuid_entry2 *entry) +{ + entry->ecx &= ~__feature_bit(X86_FEATURE_WAITPKG); +} + +static void tdx_clear_unsupported_cpuid(struct kvm_cpuid_entry2 *entry) +{ + if (has_tsx(entry)) + clear_tsx(entry); + + if (has_waitpkg(entry)) + clear_waitpkg(entry); +} + +static bool tdx_unsupported_cpuid(const struct kvm_cpuid_entry2 *entry) +{ + return has_tsx(entry) || has_waitpkg(entry); +} + #define KVM_TDX_CPUID_NO_SUBLEAF ((__u32)-1) static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char idx) @@ -124,6 +162,8 @@ static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char i /* Work around missing support on old TDX modules */ if (entry->function == 0x80000008) entry->eax = tdx_set_guest_phys_addr_bits(entry->eax, 0xff); + + tdx_clear_unsupported_cpuid(entry); } static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, @@ -1235,6 +1275,9 @@ static int setup_tdparams_cpuids(struct kvm_cpuid2 *cpuid, if (!entry) continue; + if (tdx_unsupported_cpuid(entry)) + return -EINVAL; + copy_cnt++; value = &td_params->cpuid_values[i];
>+#define TDX_FEATURE_TSX (__feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM)) >+ >+static bool has_tsx(const struct kvm_cpuid_entry2 *entry) >+{ >+ return entry->function == 7 && entry->index == 0 && >+ (entry->ebx & TDX_FEATURE_TSX); >+} >+ >+static void clear_tsx(struct kvm_cpuid_entry2 *entry) >+{ >+ entry->ebx &= ~TDX_FEATURE_TSX; >+} >+ >+static bool has_waitpkg(const struct kvm_cpuid_entry2 *entry) >+{ >+ return entry->function == 7 && entry->index == 0 && >+ (entry->ecx & __feature_bit(X86_FEATURE_WAITPKG)); >+} >+ >+static void clear_waitpkg(struct kvm_cpuid_entry2 *entry) >+{ >+ entry->ecx &= ~__feature_bit(X86_FEATURE_WAITPKG); >+} >+ >+static void tdx_clear_unsupported_cpuid(struct kvm_cpuid_entry2 *entry) >+{ >+ if (has_tsx(entry)) >+ clear_tsx(entry); >+ >+ if (has_waitpkg(entry)) >+ clear_waitpkg(entry); >+} >+ >+static bool tdx_unsupported_cpuid(const struct kvm_cpuid_entry2 *entry) >+{ >+ return has_tsx(entry) || has_waitpkg(entry); >+} No need to check TSX/WAITPKG explicitly because setup_tdparams_cpuids() already ensures that unconfigurable bits are not set by userspace. >+ > #define KVM_TDX_CPUID_NO_SUBLEAF ((__u32)-1) > > static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char idx) >@@ -124,6 +162,8 @@ static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char i > /* Work around missing support on old TDX modules */ > if (entry->function == 0x80000008) > entry->eax = tdx_set_guest_phys_addr_bits(entry->eax, 0xff); >+ >+ tdx_clear_unsupported_cpuid(entry); > } > > static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, >@@ -1235,6 +1275,9 @@ static int setup_tdparams_cpuids(struct kvm_cpuid2 *cpuid, > if (!entry) > continue; > >+ if (tdx_unsupported_cpuid(entry)) >+ return -EINVAL; >+ > copy_cnt++; > > value = &td_params->cpuid_values[i]; >-- >2.43.0 >
On 4/12/24 03:25, Chao Gao wrote: >> +#define TDX_FEATURE_TSX (__feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM)) >> + >> +static bool has_tsx(const struct kvm_cpuid_entry2 *entry) >> +{ >> + return entry->function == 7 && entry->index == 0 && >> + (entry->ebx & TDX_FEATURE_TSX); >> +} >> + >> +static void clear_tsx(struct kvm_cpuid_entry2 *entry) >> +{ >> + entry->ebx &= ~TDX_FEATURE_TSX; >> +} >> + >> +static bool has_waitpkg(const struct kvm_cpuid_entry2 *entry) >> +{ >> + return entry->function == 7 && entry->index == 0 && >> + (entry->ecx & __feature_bit(X86_FEATURE_WAITPKG)); >> +} >> + >> +static void clear_waitpkg(struct kvm_cpuid_entry2 *entry) >> +{ >> + entry->ecx &= ~__feature_bit(X86_FEATURE_WAITPKG); >> +} >> + >> +static void tdx_clear_unsupported_cpuid(struct kvm_cpuid_entry2 *entry) >> +{ >> + if (has_tsx(entry)) >> + clear_tsx(entry); >> + >> + if (has_waitpkg(entry)) >> + clear_waitpkg(entry); >> +} >> + >> +static bool tdx_unsupported_cpuid(const struct kvm_cpuid_entry2 *entry) >> +{ >> + return has_tsx(entry) || has_waitpkg(entry); >> +} > > No need to check TSX/WAITPKG explicitly because setup_tdparams_cpuids() already > ensures that unconfigurable bits are not set by userspace. Aren't they configurable? > >> + >> #define KVM_TDX_CPUID_NO_SUBLEAF ((__u32)-1) >> >> static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char idx) >> @@ -124,6 +162,8 @@ static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char i >> /* Work around missing support on old TDX modules */ >> if (entry->function == 0x80000008) >> entry->eax = tdx_set_guest_phys_addr_bits(entry->eax, 0xff); >> + >> + tdx_clear_unsupported_cpuid(entry); >> } >> >> static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, >> @@ -1235,6 +1275,9 @@ static int setup_tdparams_cpuids(struct kvm_cpuid2 *cpuid, >> if (!entry) >> continue; >> >> + if (tdx_unsupported_cpuid(entry)) >> + return -EINVAL; >> + >> copy_cnt++; >> >> value = &td_params->cpuid_values[i]; >> -- >> 2.43.0 >>
On Wed, Dec 04, 2024 at 08:18:32AM +0200, Adrian Hunter wrote: >On 4/12/24 03:25, Chao Gao wrote: >>> +#define TDX_FEATURE_TSX (__feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM)) >>> + >>> +static bool has_tsx(const struct kvm_cpuid_entry2 *entry) >>> +{ >>> + return entry->function == 7 && entry->index == 0 && >>> + (entry->ebx & TDX_FEATURE_TSX); >>> +} >>> + >>> +static void clear_tsx(struct kvm_cpuid_entry2 *entry) >>> +{ >>> + entry->ebx &= ~TDX_FEATURE_TSX; >>> +} >>> + >>> +static bool has_waitpkg(const struct kvm_cpuid_entry2 *entry) >>> +{ >>> + return entry->function == 7 && entry->index == 0 && >>> + (entry->ecx & __feature_bit(X86_FEATURE_WAITPKG)); >>> +} >>> + >>> +static void clear_waitpkg(struct kvm_cpuid_entry2 *entry) >>> +{ >>> + entry->ecx &= ~__feature_bit(X86_FEATURE_WAITPKG); >>> +} >>> + >>> +static void tdx_clear_unsupported_cpuid(struct kvm_cpuid_entry2 *entry) >>> +{ >>> + if (has_tsx(entry)) >>> + clear_tsx(entry); >>> + >>> + if (has_waitpkg(entry)) >>> + clear_waitpkg(entry); >>> +} >>> + >>> +static bool tdx_unsupported_cpuid(const struct kvm_cpuid_entry2 *entry) >>> +{ >>> + return has_tsx(entry) || has_waitpkg(entry); >>> +} >> >> No need to check TSX/WAITPKG explicitly because setup_tdparams_cpuids() already >> ensures that unconfigurable bits are not set by userspace. > >Aren't they configurable? They are cleared from the configurable bitmap by tdx_clear_unsupported_cpuid(), so they are not configurable from a userspace perspective. Did I miss anything? KVM should check user inputs against its adjusted configurable bitmap, right? > >> >>> + >>> #define KVM_TDX_CPUID_NO_SUBLEAF ((__u32)-1) >>> >>> static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char idx) >>> @@ -124,6 +162,8 @@ static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char i >>> /* Work around missing support on old TDX modules */ >>> if (entry->function == 0x80000008) >>> entry->eax = tdx_set_guest_phys_addr_bits(entry->eax, 0xff); >>> + >>> + tdx_clear_unsupported_cpuid(entry); >>> } >>> >>> static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, >>> @@ -1235,6 +1275,9 @@ static int setup_tdparams_cpuids(struct kvm_cpuid2 *cpuid, >>> if (!entry) >>> continue; >>> >>> + if (tdx_unsupported_cpuid(entry)) >>> + return -EINVAL; >>> + >>> copy_cnt++; >>> >>> value = &td_params->cpuid_values[i]; >>> -- >>> 2.43.0 >>> >
On 4/12/24 08:37, Chao Gao wrote: > On Wed, Dec 04, 2024 at 08:18:32AM +0200, Adrian Hunter wrote: >> On 4/12/24 03:25, Chao Gao wrote: >>>> +#define TDX_FEATURE_TSX (__feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM)) >>>> + >>>> +static bool has_tsx(const struct kvm_cpuid_entry2 *entry) >>>> +{ >>>> + return entry->function == 7 && entry->index == 0 && >>>> + (entry->ebx & TDX_FEATURE_TSX); >>>> +} >>>> + >>>> +static void clear_tsx(struct kvm_cpuid_entry2 *entry) >>>> +{ >>>> + entry->ebx &= ~TDX_FEATURE_TSX; >>>> +} >>>> + >>>> +static bool has_waitpkg(const struct kvm_cpuid_entry2 *entry) >>>> +{ >>>> + return entry->function == 7 && entry->index == 0 && >>>> + (entry->ecx & __feature_bit(X86_FEATURE_WAITPKG)); >>>> +} >>>> + >>>> +static void clear_waitpkg(struct kvm_cpuid_entry2 *entry) >>>> +{ >>>> + entry->ecx &= ~__feature_bit(X86_FEATURE_WAITPKG); >>>> +} >>>> + >>>> +static void tdx_clear_unsupported_cpuid(struct kvm_cpuid_entry2 *entry) >>>> +{ >>>> + if (has_tsx(entry)) >>>> + clear_tsx(entry); >>>> + >>>> + if (has_waitpkg(entry)) >>>> + clear_waitpkg(entry); >>>> +} >>>> + >>>> +static bool tdx_unsupported_cpuid(const struct kvm_cpuid_entry2 *entry) >>>> +{ >>>> + return has_tsx(entry) || has_waitpkg(entry); >>>> +} >>> >>> No need to check TSX/WAITPKG explicitly because setup_tdparams_cpuids() already >>> ensures that unconfigurable bits are not set by userspace. >> >> Aren't they configurable? > > They are cleared from the configurable bitmap by tdx_clear_unsupported_cpuid(), > so they are not configurable from a userspace perspective. Did I miss anything? > KVM should check user inputs against its adjusted configurable bitmap, right? Maybe I misunderstand but we rely on the TDX module to reject invalid configuration. We don't check exactly what is configurable for the TDX Module. TSX and WAITPKG are not invalid for the TDX Module, but KVM must either support them by restoring their MSRs, or disallow them. This patch disallows them for now. > >> >>> >>>> + >>>> #define KVM_TDX_CPUID_NO_SUBLEAF ((__u32)-1) >>>> >>>> static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char idx) >>>> @@ -124,6 +162,8 @@ static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char i >>>> /* Work around missing support on old TDX modules */ >>>> if (entry->function == 0x80000008) >>>> entry->eax = tdx_set_guest_phys_addr_bits(entry->eax, 0xff); >>>> + >>>> + tdx_clear_unsupported_cpuid(entry); >>>> } >>>> >>>> static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf, >>>> @@ -1235,6 +1275,9 @@ static int setup_tdparams_cpuids(struct kvm_cpuid2 *cpuid, >>>> if (!entry) >>>> continue; >>>> >>>> + if (tdx_unsupported_cpuid(entry)) >>>> + return -EINVAL; >>>> + >>>> copy_cnt++; >>>> >>>> value = &td_params->cpuid_values[i]; >>>> -- >>>> 2.43.0 >>>> >>
On Wed, Dec 04, 2024 at 08:57:23AM +0200, Adrian Hunter wrote: >On 4/12/24 08:37, Chao Gao wrote: >> On Wed, Dec 04, 2024 at 08:18:32AM +0200, Adrian Hunter wrote: >>> On 4/12/24 03:25, Chao Gao wrote: >>>>> +#define TDX_FEATURE_TSX (__feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM)) >>>>> + >>>>> +static bool has_tsx(const struct kvm_cpuid_entry2 *entry) >>>>> +{ >>>>> + return entry->function == 7 && entry->index == 0 && >>>>> + (entry->ebx & TDX_FEATURE_TSX); >>>>> +} >>>>> + >>>>> +static void clear_tsx(struct kvm_cpuid_entry2 *entry) >>>>> +{ >>>>> + entry->ebx &= ~TDX_FEATURE_TSX; >>>>> +} >>>>> + >>>>> +static bool has_waitpkg(const struct kvm_cpuid_entry2 *entry) >>>>> +{ >>>>> + return entry->function == 7 && entry->index == 0 && >>>>> + (entry->ecx & __feature_bit(X86_FEATURE_WAITPKG)); >>>>> +} >>>>> + >>>>> +static void clear_waitpkg(struct kvm_cpuid_entry2 *entry) >>>>> +{ >>>>> + entry->ecx &= ~__feature_bit(X86_FEATURE_WAITPKG); >>>>> +} >>>>> + >>>>> +static void tdx_clear_unsupported_cpuid(struct kvm_cpuid_entry2 *entry) >>>>> +{ >>>>> + if (has_tsx(entry)) >>>>> + clear_tsx(entry); >>>>> + >>>>> + if (has_waitpkg(entry)) >>>>> + clear_waitpkg(entry); >>>>> +} >>>>> + >>>>> +static bool tdx_unsupported_cpuid(const struct kvm_cpuid_entry2 *entry) >>>>> +{ >>>>> + return has_tsx(entry) || has_waitpkg(entry); >>>>> +} >>>> >>>> No need to check TSX/WAITPKG explicitly because setup_tdparams_cpuids() already >>>> ensures that unconfigurable bits are not set by userspace. >>> >>> Aren't they configurable? >> >> They are cleared from the configurable bitmap by tdx_clear_unsupported_cpuid(), >> so they are not configurable from a userspace perspective. Did I miss anything? >> KVM should check user inputs against its adjusted configurable bitmap, right? > >Maybe I misunderstand but we rely on the TDX module to reject >invalid configuration. We don't check exactly what is configurable >for the TDX Module. Ok, this is what I missed. I thought KVM validated user input and masked out all unsupported features. sorry for this. > >TSX and WAITPKG are not invalid for the TDX Module, but KVM >must either support them by restoring their MSRs, or disallow >them. This patch disallows them for now. Yes. I agree. what if a new feature (supported by a future TDX module) also needs KVM to restore some MSRs? current KVM will allow it to be exposed (since only TSX/WAITPKG are checked); then some MSRs may get corrupted. I may think this is not a good design. Current KVM should work with future TDX modules.
On 4/12/24 13:13, Chao Gao wrote: > On Wed, Dec 04, 2024 at 08:57:23AM +0200, Adrian Hunter wrote: >> On 4/12/24 08:37, Chao Gao wrote: >>> On Wed, Dec 04, 2024 at 08:18:32AM +0200, Adrian Hunter wrote: >>>> On 4/12/24 03:25, Chao Gao wrote: >>>>>> +#define TDX_FEATURE_TSX (__feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM)) >>>>>> + >>>>>> +static bool has_tsx(const struct kvm_cpuid_entry2 *entry) >>>>>> +{ >>>>>> + return entry->function == 7 && entry->index == 0 && >>>>>> + (entry->ebx & TDX_FEATURE_TSX); >>>>>> +} >>>>>> + >>>>>> +static void clear_tsx(struct kvm_cpuid_entry2 *entry) >>>>>> +{ >>>>>> + entry->ebx &= ~TDX_FEATURE_TSX; >>>>>> +} >>>>>> + >>>>>> +static bool has_waitpkg(const struct kvm_cpuid_entry2 *entry) >>>>>> +{ >>>>>> + return entry->function == 7 && entry->index == 0 && >>>>>> + (entry->ecx & __feature_bit(X86_FEATURE_WAITPKG)); >>>>>> +} >>>>>> + >>>>>> +static void clear_waitpkg(struct kvm_cpuid_entry2 *entry) >>>>>> +{ >>>>>> + entry->ecx &= ~__feature_bit(X86_FEATURE_WAITPKG); >>>>>> +} >>>>>> + >>>>>> +static void tdx_clear_unsupported_cpuid(struct kvm_cpuid_entry2 *entry) >>>>>> +{ >>>>>> + if (has_tsx(entry)) >>>>>> + clear_tsx(entry); >>>>>> + >>>>>> + if (has_waitpkg(entry)) >>>>>> + clear_waitpkg(entry); >>>>>> +} >>>>>> + >>>>>> +static bool tdx_unsupported_cpuid(const struct kvm_cpuid_entry2 *entry) >>>>>> +{ >>>>>> + return has_tsx(entry) || has_waitpkg(entry); >>>>>> +} >>>>> >>>>> No need to check TSX/WAITPKG explicitly because setup_tdparams_cpuids() already >>>>> ensures that unconfigurable bits are not set by userspace. >>>> >>>> Aren't they configurable? >>> >>> They are cleared from the configurable bitmap by tdx_clear_unsupported_cpuid(), >>> so they are not configurable from a userspace perspective. Did I miss anything? >>> KVM should check user inputs against its adjusted configurable bitmap, right? >> >> Maybe I misunderstand but we rely on the TDX module to reject >> invalid configuration. We don't check exactly what is configurable >> for the TDX Module. > > Ok, this is what I missed. I thought KVM validated user input and masked > out all unsupported features. sorry for this. > >> >> TSX and WAITPKG are not invalid for the TDX Module, but KVM >> must either support them by restoring their MSRs, or disallow >> them. This patch disallows them for now. > > Yes. I agree. what if a new feature (supported by a future TDX module) also > needs KVM to restore some MSRs? current KVM will allow it to be exposed (since > only TSX/WAITPKG are checked); then some MSRs may get corrupted. I may think > this is not a good design. Current KVM should work with future TDX modules. With respect to CPUID, I gather this kind of thing has been discussed, such as here: https://lore.kernel.org/all/ZhVsHVqaff7AKagu@google.com/ and Rick and Xiaoyao are working on something. In general, I would expect a new TDX Module would advertise support for new features, but KVM would have to opt in to use them.
On 12/4/2024 7:55 PM, Adrian Hunter wrote: > On 4/12/24 13:13, Chao Gao wrote: >> On Wed, Dec 04, 2024 at 08:57:23AM +0200, Adrian Hunter wrote: >>> On 4/12/24 08:37, Chao Gao wrote: >>>> On Wed, Dec 04, 2024 at 08:18:32AM +0200, Adrian Hunter wrote: >>>>> On 4/12/24 03:25, Chao Gao wrote: >>>>>>> +#define TDX_FEATURE_TSX (__feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM)) >>>>>>> + >>>>>>> +static bool has_tsx(const struct kvm_cpuid_entry2 *entry) >>>>>>> +{ >>>>>>> + return entry->function == 7 && entry->index == 0 && >>>>>>> + (entry->ebx & TDX_FEATURE_TSX); >>>>>>> +} >>>>>>> + >>>>>>> +static void clear_tsx(struct kvm_cpuid_entry2 *entry) >>>>>>> +{ >>>>>>> + entry->ebx &= ~TDX_FEATURE_TSX; >>>>>>> +} >>>>>>> + >>>>>>> +static bool has_waitpkg(const struct kvm_cpuid_entry2 *entry) >>>>>>> +{ >>>>>>> + return entry->function == 7 && entry->index == 0 && >>>>>>> + (entry->ecx & __feature_bit(X86_FEATURE_WAITPKG)); >>>>>>> +} >>>>>>> + >>>>>>> +static void clear_waitpkg(struct kvm_cpuid_entry2 *entry) >>>>>>> +{ >>>>>>> + entry->ecx &= ~__feature_bit(X86_FEATURE_WAITPKG); >>>>>>> +} >>>>>>> + >>>>>>> +static void tdx_clear_unsupported_cpuid(struct kvm_cpuid_entry2 *entry) >>>>>>> +{ >>>>>>> + if (has_tsx(entry)) >>>>>>> + clear_tsx(entry); >>>>>>> + >>>>>>> + if (has_waitpkg(entry)) >>>>>>> + clear_waitpkg(entry); >>>>>>> +} >>>>>>> + >>>>>>> +static bool tdx_unsupported_cpuid(const struct kvm_cpuid_entry2 *entry) >>>>>>> +{ >>>>>>> + return has_tsx(entry) || has_waitpkg(entry); >>>>>>> +} >>>>>> >>>>>> No need to check TSX/WAITPKG explicitly because setup_tdparams_cpuids() already >>>>>> ensures that unconfigurable bits are not set by userspace. >>>>> >>>>> Aren't they configurable? >>>> >>>> They are cleared from the configurable bitmap by tdx_clear_unsupported_cpuid(), >>>> so they are not configurable from a userspace perspective. Did I miss anything? >>>> KVM should check user inputs against its adjusted configurable bitmap, right? >>> >>> Maybe I misunderstand but we rely on the TDX module to reject >>> invalid configuration. We don't check exactly what is configurable >>> for the TDX Module. >> >> Ok, this is what I missed. I thought KVM validated user input and masked >> out all unsupported features. sorry for this. >> >>> >>> TSX and WAITPKG are not invalid for the TDX Module, but KVM >>> must either support them by restoring their MSRs, or disallow >>> them. This patch disallows them for now. >> >> Yes. I agree. what if a new feature (supported by a future TDX module) also >> needs KVM to restore some MSRs? current KVM will allow it to be exposed (since >> only TSX/WAITPKG are checked); then some MSRs may get corrupted. I may think >> this is not a good design. Current KVM should work with future TDX modules. > > With respect to CPUID, I gather this kind of thing has been > discussed, such as here: > > https://lore.kernel.org/all/ZhVsHVqaff7AKagu@google.com/ > > and Rick and Xiaoyao are working on something. > > In general, I would expect a new TDX Module would advertise support for > new features, but KVM would have to opt in to use them. > There were discussion[1] on whether KVM to gatekeep the configurable/supported CPUIDs for TDX. I stand by Sean that KVM needs to do so. Regarding KVM opt in the new feature, KVM gatekeeps the CPUID bit that can be set by userspace is exactly the behavior of opt-in. i.e., for a given KVM, it only allows a CPUID set {S} to be configured by userspace, if new TDX module supports new feature X, it needs KVM to opt-in X by adding X to {S} so that X is allowed to be configured by userspace. Besides, I find current interface between KVM and userspace lacks the ability to tell userspace what bits are not supported by KVM. KVM_TDX_CAPABILITIES.cpuid doesn't work because it represents the configurable CPUIDs, not supported CPUIDs (I think we might rename it to configurable_cpuid to better reflect its meaning). So userspace has to hardcode that TSX and WAITPKG is not support itself. [1] https://lore.kernel.org/all/ZuM12EFbOXmpHHVQ@google.com/
On Wed, 2024-12-04 at 13:55 +0200, Adrian Hunter wrote: > > > > > > > > They are cleared from the configurable bitmap by > > > > tdx_clear_unsupported_cpuid(), > > > > so they are not configurable from a userspace perspective. Did I miss > > > > anything? > > > > KVM should check user inputs against its adjusted configurable bitmap, > > > > right? > > > > > > Maybe I misunderstand but we rely on the TDX module to reject > > > invalid configuration. We don't check exactly what is configurable > > > for the TDX Module. > > > > Ok, this is what I missed. I thought KVM validated user input and masked > > out all unsupported features. sorry for this. This used to be how it behaved, but IIRC Paolo had suggested to simplify it by letting the TDX module do the rejection. But that was under the assumption there wasn't any TDX supported CPUID configuration that was harmful to KVM. We also used to filter which CPUID features that weren't supported by KVM, but this was also dropped to make things simpler. > > > > > > > > TSX and WAITPKG are not invalid for the TDX Module, but KVM > > > must either support them by restoring their MSRs, or disallow > > > them. This patch disallows them for now. > > > > Yes. I agree. what if a new feature (supported by a future TDX module) also > > needs KVM to restore some MSRs? current KVM will allow it to be exposed > > (since > > only TSX/WAITPKG are checked); then some MSRs may get corrupted. I may think > > this is not a good design. Current KVM should work with future TDX modules. > > With respect to CPUID, I gather this kind of thing has been > discussed, such as here: > > https://lore.kernel.org/all/ZhVsHVqaff7AKagu@google.com/ > > and Rick and Xiaoyao are working on something. This is around fixed 0/1 bits, and just to help userspace understand what configurations are possible. It isn't intended to filter any bits on the KVM side. > > In general, I would expect a new TDX Module would advertise support for > new features, but KVM would have to opt in to use them. This is true for attributes/xfam, but isn't for CPUID leafs. We used to filter them in various ways but it was messy. The suggestion was to simplify it. The current approach is to treat any TDX module changes that break the host like that as TDX module bugs. See this coverletter for more info on the history: https://lore.kernel.org/kvm/20240812224820.34826-1-rick.p.edgecombe@intel.com/
On Wed, 2024-12-04 at 23:33 +0800, Xiaoyao Li wrote: > > > > There were discussion[1] on whether KVM to gatekeep the > configurable/supported CPUIDs for TDX. I stand by Sean that KVM needs to > do so. > > Regarding KVM opt in the new feature, KVM gatekeeps the CPUID bit that > can be set by userspace is exactly the behavior of opt-in. i.e., for a > given KVM, it only allows a CPUID set {S} to be configured by userspace, > if new TDX module supports new feature X, it needs KVM to opt-in X by > adding X to {S} so that X is allowed to be configured by userspace. > > Besides, I find current interface between KVM and userspace lacks the > ability to tell userspace what bits are not supported by KVM. > KVM_TDX_CAPABILITIES.cpuid doesn't work because it represents the > configurable CPUIDs, not supported CPUIDs (I think we might rename it to > configurable_cpuid to better reflect its meaning). So userspace has to > hardcode that TSX and WAITPKG is not support itself. > > [1] https://lore.kernel.org/all/ZuM12EFbOXmpHHVQ@google.com/ I mean, this is kind of a good example for why we might want to go back to filtering CPUID bits. It kind of depends on how KVM wants to treat the TDX module. Hostile like userspace, or more trusting. KVM maintaining code to let the TDX module evolve unsafely would be unfortunate though. If we keep the current approach, it probably would be educational to highlight this example to the TDX module team. "Don't do this or you will have a bug on your side".
On 4/12/24 17:33, Xiaoyao Li wrote: > On 12/4/2024 7:55 PM, Adrian Hunter wrote: >> On 4/12/24 13:13, Chao Gao wrote: >>> On Wed, Dec 04, 2024 at 08:57:23AM +0200, Adrian Hunter wrote: >>>> On 4/12/24 08:37, Chao Gao wrote: >>>>> On Wed, Dec 04, 2024 at 08:18:32AM +0200, Adrian Hunter wrote: >>>>>> On 4/12/24 03:25, Chao Gao wrote: >>>>>>>> +#define TDX_FEATURE_TSX (__feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM)) >>>>>>>> + >>>>>>>> +static bool has_tsx(const struct kvm_cpuid_entry2 *entry) >>>>>>>> +{ >>>>>>>> + return entry->function == 7 && entry->index == 0 && >>>>>>>> + (entry->ebx & TDX_FEATURE_TSX); >>>>>>>> +} >>>>>>>> + >>>>>>>> +static void clear_tsx(struct kvm_cpuid_entry2 *entry) >>>>>>>> +{ >>>>>>>> + entry->ebx &= ~TDX_FEATURE_TSX; >>>>>>>> +} >>>>>>>> + >>>>>>>> +static bool has_waitpkg(const struct kvm_cpuid_entry2 *entry) >>>>>>>> +{ >>>>>>>> + return entry->function == 7 && entry->index == 0 && >>>>>>>> + (entry->ecx & __feature_bit(X86_FEATURE_WAITPKG)); >>>>>>>> +} >>>>>>>> + >>>>>>>> +static void clear_waitpkg(struct kvm_cpuid_entry2 *entry) >>>>>>>> +{ >>>>>>>> + entry->ecx &= ~__feature_bit(X86_FEATURE_WAITPKG); >>>>>>>> +} >>>>>>>> + >>>>>>>> +static void tdx_clear_unsupported_cpuid(struct kvm_cpuid_entry2 *entry) >>>>>>>> +{ >>>>>>>> + if (has_tsx(entry)) >>>>>>>> + clear_tsx(entry); >>>>>>>> + >>>>>>>> + if (has_waitpkg(entry)) >>>>>>>> + clear_waitpkg(entry); >>>>>>>> +} >>>>>>>> + >>>>>>>> +static bool tdx_unsupported_cpuid(const struct kvm_cpuid_entry2 *entry) >>>>>>>> +{ >>>>>>>> + return has_tsx(entry) || has_waitpkg(entry); >>>>>>>> +} >>>>>>> >>>>>>> No need to check TSX/WAITPKG explicitly because setup_tdparams_cpuids() already >>>>>>> ensures that unconfigurable bits are not set by userspace. >>>>>> >>>>>> Aren't they configurable? >>>>> >>>>> They are cleared from the configurable bitmap by tdx_clear_unsupported_cpuid(), >>>>> so they are not configurable from a userspace perspective. Did I miss anything? >>>>> KVM should check user inputs against its adjusted configurable bitmap, right? >>>> >>>> Maybe I misunderstand but we rely on the TDX module to reject >>>> invalid configuration. We don't check exactly what is configurable >>>> for the TDX Module. >>> >>> Ok, this is what I missed. I thought KVM validated user input and masked >>> out all unsupported features. sorry for this. >>> >>>> >>>> TSX and WAITPKG are not invalid for the TDX Module, but KVM >>>> must either support them by restoring their MSRs, or disallow >>>> them. This patch disallows them for now. >>> >>> Yes. I agree. what if a new feature (supported by a future TDX module) also >>> needs KVM to restore some MSRs? current KVM will allow it to be exposed (since >>> only TSX/WAITPKG are checked); then some MSRs may get corrupted. I may think >>> this is not a good design. Current KVM should work with future TDX modules. >> >> With respect to CPUID, I gather this kind of thing has been >> discussed, such as here: >> >> https://lore.kernel.org/all/ZhVsHVqaff7AKagu@google.com/ >> >> and Rick and Xiaoyao are working on something. >> >> In general, I would expect a new TDX Module would advertise support for >> new features, but KVM would have to opt in to use them. >> > > There were discussion[1] on whether KVM to gatekeep the configurable/supported CPUIDs for TDX. I stand by Sean that KVM needs to do so. > > Regarding KVM opt in the new feature, KVM gatekeeps the CPUID bit that can be set by userspace is exactly the behavior of opt-in. i.e., for a given KVM, it only allows a CPUID set {S} to be configured by userspace, if new TDX module supports new feature X, it needs KVM to opt-in X by adding X to {S} so that X is allowed to be configured by userspace. > > Besides, I find current interface between KVM and userspace lacks the ability to tell userspace what bits are not supported by KVM. KVM_TDX_CAPABILITIES.cpuid doesn't work because it represents the configurable CPUIDs, not supported CPUIDs (I think we might rename it to configurable_cpuid to better reflect its meaning). So userspace has to hardcode that TSX and WAITPKG is not support itself. I don't follow why hardcoding would be necessary. If the leaf is represented in KVM_TDX_CAPABILITIES.cpuid, and the bits are 0 there, why would userspace try to set them to 1? > > [1] https://lore.kernel.org/all/ZuM12EFbOXmpHHVQ@google.com/ >
On 12/6/2024 1:31 AM, Adrian Hunter wrote: > On 4/12/24 17:33, Xiaoyao Li wrote: >> On 12/4/2024 7:55 PM, Adrian Hunter wrote: >>> On 4/12/24 13:13, Chao Gao wrote: >>>> On Wed, Dec 04, 2024 at 08:57:23AM +0200, Adrian Hunter wrote: >>>>> On 4/12/24 08:37, Chao Gao wrote: >>>>>> On Wed, Dec 04, 2024 at 08:18:32AM +0200, Adrian Hunter wrote: >>>>>>> On 4/12/24 03:25, Chao Gao wrote: >>>>>>>>> +#define TDX_FEATURE_TSX (__feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM)) >>>>>>>>> + >>>>>>>>> +static bool has_tsx(const struct kvm_cpuid_entry2 *entry) >>>>>>>>> +{ >>>>>>>>> + return entry->function == 7 && entry->index == 0 && >>>>>>>>> + (entry->ebx & TDX_FEATURE_TSX); >>>>>>>>> +} >>>>>>>>> + >>>>>>>>> +static void clear_tsx(struct kvm_cpuid_entry2 *entry) >>>>>>>>> +{ >>>>>>>>> + entry->ebx &= ~TDX_FEATURE_TSX; >>>>>>>>> +} >>>>>>>>> + >>>>>>>>> +static bool has_waitpkg(const struct kvm_cpuid_entry2 *entry) >>>>>>>>> +{ >>>>>>>>> + return entry->function == 7 && entry->index == 0 && >>>>>>>>> + (entry->ecx & __feature_bit(X86_FEATURE_WAITPKG)); >>>>>>>>> +} >>>>>>>>> + >>>>>>>>> +static void clear_waitpkg(struct kvm_cpuid_entry2 *entry) >>>>>>>>> +{ >>>>>>>>> + entry->ecx &= ~__feature_bit(X86_FEATURE_WAITPKG); >>>>>>>>> +} >>>>>>>>> + >>>>>>>>> +static void tdx_clear_unsupported_cpuid(struct kvm_cpuid_entry2 *entry) >>>>>>>>> +{ >>>>>>>>> + if (has_tsx(entry)) >>>>>>>>> + clear_tsx(entry); >>>>>>>>> + >>>>>>>>> + if (has_waitpkg(entry)) >>>>>>>>> + clear_waitpkg(entry); >>>>>>>>> +} >>>>>>>>> + >>>>>>>>> +static bool tdx_unsupported_cpuid(const struct kvm_cpuid_entry2 *entry) >>>>>>>>> +{ >>>>>>>>> + return has_tsx(entry) || has_waitpkg(entry); >>>>>>>>> +} >>>>>>>> >>>>>>>> No need to check TSX/WAITPKG explicitly because setup_tdparams_cpuids() already >>>>>>>> ensures that unconfigurable bits are not set by userspace. >>>>>>> >>>>>>> Aren't they configurable? >>>>>> >>>>>> They are cleared from the configurable bitmap by tdx_clear_unsupported_cpuid(), >>>>>> so they are not configurable from a userspace perspective. Did I miss anything? >>>>>> KVM should check user inputs against its adjusted configurable bitmap, right? >>>>> >>>>> Maybe I misunderstand but we rely on the TDX module to reject >>>>> invalid configuration. We don't check exactly what is configurable >>>>> for the TDX Module. >>>> >>>> Ok, this is what I missed. I thought KVM validated user input and masked >>>> out all unsupported features. sorry for this. >>>> >>>>> >>>>> TSX and WAITPKG are not invalid for the TDX Module, but KVM >>>>> must either support them by restoring their MSRs, or disallow >>>>> them. This patch disallows them for now. >>>> >>>> Yes. I agree. what if a new feature (supported by a future TDX module) also >>>> needs KVM to restore some MSRs? current KVM will allow it to be exposed (since >>>> only TSX/WAITPKG are checked); then some MSRs may get corrupted. I may think >>>> this is not a good design. Current KVM should work with future TDX modules. >>> >>> With respect to CPUID, I gather this kind of thing has been >>> discussed, such as here: >>> >>> https://lore.kernel.org/all/ZhVsHVqaff7AKagu@google.com/ >>> >>> and Rick and Xiaoyao are working on something. >>> >>> In general, I would expect a new TDX Module would advertise support for >>> new features, but KVM would have to opt in to use them. >>> >> >> There were discussion[1] on whether KVM to gatekeep the configurable/supported CPUIDs for TDX. I stand by Sean that KVM needs to do so. >> >> Regarding KVM opt in the new feature, KVM gatekeeps the CPUID bit that can be set by userspace is exactly the behavior of opt-in. i.e., for a given KVM, it only allows a CPUID set {S} to be configured by userspace, if new TDX module supports new feature X, it needs KVM to opt-in X by adding X to {S} so that X is allowed to be configured by userspace. >> >> Besides, I find current interface between KVM and userspace lacks the ability to tell userspace what bits are not supported by KVM. KVM_TDX_CAPABILITIES.cpuid doesn't work because it represents the configurable CPUIDs, not supported CPUIDs (I think we might rename it to configurable_cpuid to better reflect its meaning). So userspace has to hardcode that TSX and WAITPKG is not support itself. > > I don't follow why hardcoding would be necessary. > > If the leaf is represented in KVM_TDX_CAPABILITIES.cpuid, and > the bits are 0 there, why would userspace try to set them to 1? Userspace doesn't set the bit to 1 in kvm_tdx_init_vm.cpuid, doesn't mean userspace wants the bit to be 0. Note, KVM_TDX_CAPABILITIES.cpuid reports the configurable bits. The value 0 of a bit in KVM_TDX_CAPABILITIES.cpuid means the bit is not configurable, not means the bit is unsupported. For kvm_tdx_init_vm.cpuid, - if the corresponding bit is reported as 1 in KVM_TDX_CAPABILITIES.cpuid, then a value 0 in kvm_tdx_init_vm.cpuid means userspace wants to configure it as 0. - if the corresponding bit is reported as 0 in KVM_TDX_CAPABILITIES.cpuid, then userspace has to pass a value 0 in kvm_tdx_init_vm.cpuid. But it doesn't mean the value of the bit will be 0. e.g., X2APIC bit is 0 in KVM_TDX_CAPABILITIES.cpuid, and it's also 0 in kvm_tdx_init_vm.cpuid, but TD guest sees a value of 1. In the view of QEMU, it maintains the bit of X2APIC as 1, and QEMU filters X2APIC bit when calling KVM_TDX_INIT_VM because X2APIC is not configurable. So when it comes to TSX and WAITPKG, QEMU also needs an interface to be informed that they are unsupported. Without the interface of fixed0 bits reported by KVM, QEMU needs to hardcode itself like [1]. The problem of hardcode is that it will conflict when future KVM allows them to be configurable. In the future, if we have interface from KVM to report the fixed0 and fixed1 bit (on top of the proposal [2]), userspace can drop the hardcoded one it maintains. At that time, KVM can ensure no conflict by removing the bits from fixed0/1 array when allowing them to be configurable. [1] https://lore.kernel.org/qemu-devel/20241105062408.3533704-49-xiaoyao.li@intel.com/ [2] https://lore.kernel.org/all/43b26df1-4c27-41ff-a482-e258f872cc31@intel.com/ >> >> [1] https://lore.kernel.org/all/ZuM12EFbOXmpHHVQ@google.com/ >> >
On 6/12/24 05:37, Xiaoyao Li wrote: > On 12/6/2024 1:31 AM, Adrian Hunter wrote: >> On 4/12/24 17:33, Xiaoyao Li wrote: >>> On 12/4/2024 7:55 PM, Adrian Hunter wrote: >>>> On 4/12/24 13:13, Chao Gao wrote: >>>>> On Wed, Dec 04, 2024 at 08:57:23AM +0200, Adrian Hunter wrote: >>>>>> On 4/12/24 08:37, Chao Gao wrote: >>>>>>> On Wed, Dec 04, 2024 at 08:18:32AM +0200, Adrian Hunter wrote: >>>>>>>> On 4/12/24 03:25, Chao Gao wrote: >>>>>>>>>> +#define TDX_FEATURE_TSX (__feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM)) >>>>>>>>>> + >>>>>>>>>> +static bool has_tsx(const struct kvm_cpuid_entry2 *entry) >>>>>>>>>> +{ >>>>>>>>>> + return entry->function == 7 && entry->index == 0 && >>>>>>>>>> + (entry->ebx & TDX_FEATURE_TSX); >>>>>>>>>> +} >>>>>>>>>> + >>>>>>>>>> +static void clear_tsx(struct kvm_cpuid_entry2 *entry) >>>>>>>>>> +{ >>>>>>>>>> + entry->ebx &= ~TDX_FEATURE_TSX; >>>>>>>>>> +} >>>>>>>>>> + >>>>>>>>>> +static bool has_waitpkg(const struct kvm_cpuid_entry2 *entry) >>>>>>>>>> +{ >>>>>>>>>> + return entry->function == 7 && entry->index == 0 && >>>>>>>>>> + (entry->ecx & __feature_bit(X86_FEATURE_WAITPKG)); >>>>>>>>>> +} >>>>>>>>>> + >>>>>>>>>> +static void clear_waitpkg(struct kvm_cpuid_entry2 *entry) >>>>>>>>>> +{ >>>>>>>>>> + entry->ecx &= ~__feature_bit(X86_FEATURE_WAITPKG); >>>>>>>>>> +} >>>>>>>>>> + >>>>>>>>>> +static void tdx_clear_unsupported_cpuid(struct kvm_cpuid_entry2 *entry) >>>>>>>>>> +{ >>>>>>>>>> + if (has_tsx(entry)) >>>>>>>>>> + clear_tsx(entry); >>>>>>>>>> + >>>>>>>>>> + if (has_waitpkg(entry)) >>>>>>>>>> + clear_waitpkg(entry); >>>>>>>>>> +} >>>>>>>>>> + >>>>>>>>>> +static bool tdx_unsupported_cpuid(const struct kvm_cpuid_entry2 *entry) >>>>>>>>>> +{ >>>>>>>>>> + return has_tsx(entry) || has_waitpkg(entry); >>>>>>>>>> +} >>>>>>>>> >>>>>>>>> No need to check TSX/WAITPKG explicitly because setup_tdparams_cpuids() already >>>>>>>>> ensures that unconfigurable bits are not set by userspace. >>>>>>>> >>>>>>>> Aren't they configurable? >>>>>>> >>>>>>> They are cleared from the configurable bitmap by tdx_clear_unsupported_cpuid(), >>>>>>> so they are not configurable from a userspace perspective. Did I miss anything? >>>>>>> KVM should check user inputs against its adjusted configurable bitmap, right? >>>>>> >>>>>> Maybe I misunderstand but we rely on the TDX module to reject >>>>>> invalid configuration. We don't check exactly what is configurable >>>>>> for the TDX Module. >>>>> >>>>> Ok, this is what I missed. I thought KVM validated user input and masked >>>>> out all unsupported features. sorry for this. >>>>> >>>>>> >>>>>> TSX and WAITPKG are not invalid for the TDX Module, but KVM >>>>>> must either support them by restoring their MSRs, or disallow >>>>>> them. This patch disallows them for now. >>>>> >>>>> Yes. I agree. what if a new feature (supported by a future TDX module) also >>>>> needs KVM to restore some MSRs? current KVM will allow it to be exposed (since >>>>> only TSX/WAITPKG are checked); then some MSRs may get corrupted. I may think >>>>> this is not a good design. Current KVM should work with future TDX modules. >>>> >>>> With respect to CPUID, I gather this kind of thing has been >>>> discussed, such as here: >>>> >>>> https://lore.kernel.org/all/ZhVsHVqaff7AKagu@google.com/ >>>> >>>> and Rick and Xiaoyao are working on something. >>>> >>>> In general, I would expect a new TDX Module would advertise support for >>>> new features, but KVM would have to opt in to use them. >>>> >>> >>> There were discussion[1] on whether KVM to gatekeep the configurable/supported CPUIDs for TDX. I stand by Sean that KVM needs to do so. >>> >>> Regarding KVM opt in the new feature, KVM gatekeeps the CPUID bit that can be set by userspace is exactly the behavior of opt-in. i.e., for a given KVM, it only allows a CPUID set {S} to be configured by userspace, if new TDX module supports new feature X, it needs KVM to opt-in X by adding X to {S} so that X is allowed to be configured by userspace. >>> >>> Besides, I find current interface between KVM and userspace lacks the ability to tell userspace what bits are not supported by KVM. KVM_TDX_CAPABILITIES.cpuid doesn't work because it represents the configurable CPUIDs, not supported CPUIDs (I think we might rename it to configurable_cpuid to better reflect its meaning). So userspace has to hardcode that TSX and WAITPKG is not support itself. >> >> I don't follow why hardcoding would be necessary. >> >> If the leaf is represented in KVM_TDX_CAPABILITIES.cpuid, and >> the bits are 0 there, why would userspace try to set them to 1? > > Userspace doesn't set the bit to 1 in kvm_tdx_init_vm.cpuid, doesn't mean userspace wants the bit to be 0. > > Note, KVM_TDX_CAPABILITIES.cpuid reports the configurable bits. The value 0 of a bit in KVM_TDX_CAPABILITIES.cpuid means the bit is not configurable, not means the bit is unsupported. For features configurable by CPUID like TSX and WAITPKG, a value of 0 does mean unsupported, because the value has to be 1 to enable the feature. From the TDX Module Base spec: 11.18. Transactional Synchronization Extensions (TSX) The host VMM can enable TSX for a TD by configuring the following CPUID bits as enabled in the TD_PARAMS input to TDH.MNG.INIT: - CPUID(7,0).EBX[4] (HLE) - CPUID(7,0).EBX[11] (RTM) etc 11.19.4. WAITPKG: TPAUSE, UMONITOR and UMWAIT Instructions The host VMM may allow guest TDs to use the TPAUSE, UMONITOR and UMWAIT instructions, if the CPU supports them, by configuring the virtual value of CPUID(7,0).ECX[5] (WAITPKG) to 1 using the CPUID configuration table which is part the TD_PARAMS input to TDH.MNG.INIT. Enabling CPUID(7,0).ECX[5] also enables TD access to IA32_UMWAIT_CONTROL (MSR 0xE1). If not allowed, then TD execution of TPAUSE, UMONITOR or UMWAIT results in a #UD, and access to IA32_UMWAIT_CONTROL results in a #GP(0). > For kvm_tdx_init_vm.cpuid, > - if the corresponding bit is reported as 1 in KVM_TDX_CAPABILITIES.cpuid, then a value 0 in kvm_tdx_init_vm.cpuid means userspace wants to configure it as 0. > - if the corresponding bit is reported as 0 in KVM_TDX_CAPABILITIES.cpuid, then userspace has to pass a value 0 in kvm_tdx_init_vm.cpuid. But it doesn't mean the value of the bit will be 0. > > e.g., X2APIC bit is 0 in KVM_TDX_CAPABILITIES.cpuid, and it's also 0 in kvm_tdx_init_vm.cpuid, but TD guest sees a value of 1. In the view of QEMU, it maintains the bit of X2APIC as 1, and QEMU filters X2APIC bit when calling KVM_TDX_INIT_VM because X2APIC is not configurable. > > So when it comes to TSX and WAITPKG, QEMU also needs an interface to be informed that they are unsupported. Without the interface of fixed0 bits reported by KVM, QEMU needs to hardcode itself like [1]. The problem of hardcode is that it will conflict when future KVM allows them to be configurable. So TSX and WAITPKG support should be based on KVM_TDX_CAPABILITIES.cpuid, and not hardcoded. > > In the future, if we have interface from KVM to report the fixed0 and fixed1 bit (on top of the proposal [2]), userspace can drop the hardcoded one it maintains. At that time, KVM can ensure no conflict by removing the bits from fixed0/1 array when allowing them to be configurable. > > [1] https://lore.kernel.org/qemu-devel/20241105062408.3533704-49-xiaoyao.li@intel.com/ > [2] https://lore.kernel.org/all/43b26df1-4c27-41ff-a482-e258f872cc31@intel.com/ > >>> >>> [1] https://lore.kernel.org/all/ZuM12EFbOXmpHHVQ@google.com/ >>> >> >
On 12/6/2024 10:40 PM, Adrian Hunter wrote: > On 6/12/24 05:37, Xiaoyao Li wrote: >> On 12/6/2024 1:31 AM, Adrian Hunter wrote: >>> On 4/12/24 17:33, Xiaoyao Li wrote: >>>> On 12/4/2024 7:55 PM, Adrian Hunter wrote: >>>>> On 4/12/24 13:13, Chao Gao wrote: >>>>>> On Wed, Dec 04, 2024 at 08:57:23AM +0200, Adrian Hunter wrote: >>>>>>> On 4/12/24 08:37, Chao Gao wrote: >>>>>>>> On Wed, Dec 04, 2024 at 08:18:32AM +0200, Adrian Hunter wrote: >>>>>>>>> On 4/12/24 03:25, Chao Gao wrote: >>>>>>>>>>> +#define TDX_FEATURE_TSX (__feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM)) >>>>>>>>>>> + >>>>>>>>>>> +static bool has_tsx(const struct kvm_cpuid_entry2 *entry) >>>>>>>>>>> +{ >>>>>>>>>>> + return entry->function == 7 && entry->index == 0 && >>>>>>>>>>> + (entry->ebx & TDX_FEATURE_TSX); >>>>>>>>>>> +} >>>>>>>>>>> + >>>>>>>>>>> +static void clear_tsx(struct kvm_cpuid_entry2 *entry) >>>>>>>>>>> +{ >>>>>>>>>>> + entry->ebx &= ~TDX_FEATURE_TSX; >>>>>>>>>>> +} >>>>>>>>>>> + >>>>>>>>>>> +static bool has_waitpkg(const struct kvm_cpuid_entry2 *entry) >>>>>>>>>>> +{ >>>>>>>>>>> + return entry->function == 7 && entry->index == 0 && >>>>>>>>>>> + (entry->ecx & __feature_bit(X86_FEATURE_WAITPKG)); >>>>>>>>>>> +} >>>>>>>>>>> + >>>>>>>>>>> +static void clear_waitpkg(struct kvm_cpuid_entry2 *entry) >>>>>>>>>>> +{ >>>>>>>>>>> + entry->ecx &= ~__feature_bit(X86_FEATURE_WAITPKG); >>>>>>>>>>> +} >>>>>>>>>>> + >>>>>>>>>>> +static void tdx_clear_unsupported_cpuid(struct kvm_cpuid_entry2 *entry) >>>>>>>>>>> +{ >>>>>>>>>>> + if (has_tsx(entry)) >>>>>>>>>>> + clear_tsx(entry); >>>>>>>>>>> + >>>>>>>>>>> + if (has_waitpkg(entry)) >>>>>>>>>>> + clear_waitpkg(entry); >>>>>>>>>>> +} >>>>>>>>>>> + >>>>>>>>>>> +static bool tdx_unsupported_cpuid(const struct kvm_cpuid_entry2 *entry) >>>>>>>>>>> +{ >>>>>>>>>>> + return has_tsx(entry) || has_waitpkg(entry); >>>>>>>>>>> +} >>>>>>>>>> >>>>>>>>>> No need to check TSX/WAITPKG explicitly because setup_tdparams_cpuids() already >>>>>>>>>> ensures that unconfigurable bits are not set by userspace. >>>>>>>>> >>>>>>>>> Aren't they configurable? >>>>>>>> >>>>>>>> They are cleared from the configurable bitmap by tdx_clear_unsupported_cpuid(), >>>>>>>> so they are not configurable from a userspace perspective. Did I miss anything? >>>>>>>> KVM should check user inputs against its adjusted configurable bitmap, right? >>>>>>> >>>>>>> Maybe I misunderstand but we rely on the TDX module to reject >>>>>>> invalid configuration. We don't check exactly what is configurable >>>>>>> for the TDX Module. >>>>>> >>>>>> Ok, this is what I missed. I thought KVM validated user input and masked >>>>>> out all unsupported features. sorry for this. >>>>>> >>>>>>> >>>>>>> TSX and WAITPKG are not invalid for the TDX Module, but KVM >>>>>>> must either support them by restoring their MSRs, or disallow >>>>>>> them. This patch disallows them for now. >>>>>> >>>>>> Yes. I agree. what if a new feature (supported by a future TDX module) also >>>>>> needs KVM to restore some MSRs? current KVM will allow it to be exposed (since >>>>>> only TSX/WAITPKG are checked); then some MSRs may get corrupted. I may think >>>>>> this is not a good design. Current KVM should work with future TDX modules. >>>>> >>>>> With respect to CPUID, I gather this kind of thing has been >>>>> discussed, such as here: >>>>> >>>>> https://lore.kernel.org/all/ZhVsHVqaff7AKagu@google.com/ >>>>> >>>>> and Rick and Xiaoyao are working on something. >>>>> >>>>> In general, I would expect a new TDX Module would advertise support for >>>>> new features, but KVM would have to opt in to use them. >>>>> >>>> >>>> There were discussion[1] on whether KVM to gatekeep the configurable/supported CPUIDs for TDX. I stand by Sean that KVM needs to do so. >>>> >>>> Regarding KVM opt in the new feature, KVM gatekeeps the CPUID bit that can be set by userspace is exactly the behavior of opt-in. i.e., for a given KVM, it only allows a CPUID set {S} to be configured by userspace, if new TDX module supports new feature X, it needs KVM to opt-in X by adding X to {S} so that X is allowed to be configured by userspace. >>>> >>>> Besides, I find current interface between KVM and userspace lacks the ability to tell userspace what bits are not supported by KVM. KVM_TDX_CAPABILITIES.cpuid doesn't work because it represents the configurable CPUIDs, not supported CPUIDs (I think we might rename it to configurable_cpuid to better reflect its meaning). So userspace has to hardcode that TSX and WAITPKG is not support itself. >>> >>> I don't follow why hardcoding would be necessary. >>> >>> If the leaf is represented in KVM_TDX_CAPABILITIES.cpuid, and >>> the bits are 0 there, why would userspace try to set them to 1? >> >> Userspace doesn't set the bit to 1 in kvm_tdx_init_vm.cpuid, doesn't mean userspace wants the bit to be 0. >> >> Note, KVM_TDX_CAPABILITIES.cpuid reports the configurable bits. The value 0 of a bit in KVM_TDX_CAPABILITIES.cpuid means the bit is not configurable, not means the bit is unsupported. > > For features configurable by CPUID like TSX and WAITPKG, > a value of 0 does mean unsupported, because the value > has to be 1 to enable the feature. > > From the TDX Module Base spec: > > 11.18. Transactional Synchronization Extensions (TSX) > The host VMM can enable TSX for a TD by configuring the following CPUID bits as enabled in the TD_PARAMS input to > TDH.MNG.INIT: > - CPUID(7,0).EBX[4] (HLE) > - CPUID(7,0).EBX[11] (RTM) > etc > > 11.19.4. WAITPKG: TPAUSE, UMONITOR and UMWAIT Instructions > The host VMM may allow guest TDs to use the TPAUSE, UMONITOR and UMWAIT instructions, if the CPU supports them, > by configuring the virtual value of CPUID(7,0).ECX[5] (WAITPKG) to 1 using the CPUID configuration table which is part > the TD_PARAMS input to TDH.MNG.INIT. Enabling CPUID(7,0).ECX[5] also enables TD access to IA32_UMWAIT_CONTROL > (MSR 0xE1). > If not allowed, then TD execution of TPAUSE, UMONITOR or UMWAIT results in a #UD, and access to > IA32_UMWAIT_CONTROL results in a #GP(0). > >> For kvm_tdx_init_vm.cpuid, >> - if the corresponding bit is reported as 1 in KVM_TDX_CAPABILITIES.cpuid, then a value 0 in kvm_tdx_init_vm.cpuid means userspace wants to configure it as 0. >> - if the corresponding bit is reported as 0 in KVM_TDX_CAPABILITIES.cpuid, then userspace has to pass a value 0 in kvm_tdx_init_vm.cpuid. But it doesn't mean the value of the bit will be 0. >> >> e.g., X2APIC bit is 0 in KVM_TDX_CAPABILITIES.cpuid, and it's also 0 in kvm_tdx_init_vm.cpuid, but TD guest sees a value of 1. In the view of QEMU, it maintains the bit of X2APIC as 1, and QEMU filters X2APIC bit when calling KVM_TDX_INIT_VM because X2APIC is not configurable. >> >> So when it comes to TSX and WAITPKG, QEMU also needs an interface to be informed that they are unsupported. Without the interface of fixed0 bits reported by KVM, QEMU needs to hardcode itself like [1]. The problem of hardcode is that it will conflict when future KVM allows them to be configurable. > > So TSX and WAITPKG support should be based on KVM_TDX_CAPABILITIES.cpuid, and not hardcoded. This requires Userspace to have the knowledge that "TSX and WAITPKG is supposed to be configurable and reported as 1 in KVM_TDX_CAPABILITIES.cpuid in normal case". And if userspace gets a value 0 of TSX and Waitpkg in KVM_TDX_CAPABILITIES.cpuid, it means either KVM doesn't support/allow it or the underlying TDX module doesn't. I think it's an implicit form of hardcode. From the perspective of userspace, I would like to avoid all the special handling. >> >> In the future, if we have interface from KVM to report the fixed0 and fixed1 bit (on top of the proposal [2]), userspace can drop the hardcoded one it maintains. At that time, KVM can ensure no conflict by removing the bits from fixed0/1 array when allowing them to be configurable. >> >> [1] https://lore.kernel.org/qemu-devel/20241105062408.3533704-49-xiaoyao.li@intel.com/ >> [2] https://lore.kernel.org/all/43b26df1-4c27-41ff-a482-e258f872cc31@intel.com/ >> >>>> >>>> [1] https://lore.kernel.org/all/ZuM12EFbOXmpHHVQ@google.com/ >>>> >>> >> >
On 9/12/24 04:46, Xiaoyao Li wrote: > On 12/6/2024 10:40 PM, Adrian Hunter wrote: >> On 6/12/24 05:37, Xiaoyao Li wrote: >>> On 12/6/2024 1:31 AM, Adrian Hunter wrote: >>>> On 4/12/24 17:33, Xiaoyao Li wrote: >>>>> On 12/4/2024 7:55 PM, Adrian Hunter wrote: >>>>>> On 4/12/24 13:13, Chao Gao wrote: >>>>>>> On Wed, Dec 04, 2024 at 08:57:23AM +0200, Adrian Hunter wrote: >>>>>>>> On 4/12/24 08:37, Chao Gao wrote: >>>>>>>>> On Wed, Dec 04, 2024 at 08:18:32AM +0200, Adrian Hunter wrote: >>>>>>>>>> On 4/12/24 03:25, Chao Gao wrote: >>>>>>>>>>>> +#define TDX_FEATURE_TSX (__feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM)) >>>>>>>>>>>> + >>>>>>>>>>>> +static bool has_tsx(const struct kvm_cpuid_entry2 *entry) >>>>>>>>>>>> +{ >>>>>>>>>>>> + return entry->function == 7 && entry->index == 0 && >>>>>>>>>>>> + (entry->ebx & TDX_FEATURE_TSX); >>>>>>>>>>>> +} >>>>>>>>>>>> + >>>>>>>>>>>> +static void clear_tsx(struct kvm_cpuid_entry2 *entry) >>>>>>>>>>>> +{ >>>>>>>>>>>> + entry->ebx &= ~TDX_FEATURE_TSX; >>>>>>>>>>>> +} >>>>>>>>>>>> + >>>>>>>>>>>> +static bool has_waitpkg(const struct kvm_cpuid_entry2 *entry) >>>>>>>>>>>> +{ >>>>>>>>>>>> + return entry->function == 7 && entry->index == 0 && >>>>>>>>>>>> + (entry->ecx & __feature_bit(X86_FEATURE_WAITPKG)); >>>>>>>>>>>> +} >>>>>>>>>>>> + >>>>>>>>>>>> +static void clear_waitpkg(struct kvm_cpuid_entry2 *entry) >>>>>>>>>>>> +{ >>>>>>>>>>>> + entry->ecx &= ~__feature_bit(X86_FEATURE_WAITPKG); >>>>>>>>>>>> +} >>>>>>>>>>>> + >>>>>>>>>>>> +static void tdx_clear_unsupported_cpuid(struct kvm_cpuid_entry2 *entry) >>>>>>>>>>>> +{ >>>>>>>>>>>> + if (has_tsx(entry)) >>>>>>>>>>>> + clear_tsx(entry); >>>>>>>>>>>> + >>>>>>>>>>>> + if (has_waitpkg(entry)) >>>>>>>>>>>> + clear_waitpkg(entry); >>>>>>>>>>>> +} >>>>>>>>>>>> + >>>>>>>>>>>> +static bool tdx_unsupported_cpuid(const struct kvm_cpuid_entry2 *entry) >>>>>>>>>>>> +{ >>>>>>>>>>>> + return has_tsx(entry) || has_waitpkg(entry); >>>>>>>>>>>> +} >>>>>>>>>>> >>>>>>>>>>> No need to check TSX/WAITPKG explicitly because setup_tdparams_cpuids() already >>>>>>>>>>> ensures that unconfigurable bits are not set by userspace. >>>>>>>>>> >>>>>>>>>> Aren't they configurable? >>>>>>>>> >>>>>>>>> They are cleared from the configurable bitmap by tdx_clear_unsupported_cpuid(), >>>>>>>>> so they are not configurable from a userspace perspective. Did I miss anything? >>>>>>>>> KVM should check user inputs against its adjusted configurable bitmap, right? >>>>>>>> >>>>>>>> Maybe I misunderstand but we rely on the TDX module to reject >>>>>>>> invalid configuration. We don't check exactly what is configurable >>>>>>>> for the TDX Module. >>>>>>> >>>>>>> Ok, this is what I missed. I thought KVM validated user input and masked >>>>>>> out all unsupported features. sorry for this. >>>>>>> >>>>>>>> >>>>>>>> TSX and WAITPKG are not invalid for the TDX Module, but KVM >>>>>>>> must either support them by restoring their MSRs, or disallow >>>>>>>> them. This patch disallows them for now. >>>>>>> >>>>>>> Yes. I agree. what if a new feature (supported by a future TDX module) also >>>>>>> needs KVM to restore some MSRs? current KVM will allow it to be exposed (since >>>>>>> only TSX/WAITPKG are checked); then some MSRs may get corrupted. I may think >>>>>>> this is not a good design. Current KVM should work with future TDX modules. >>>>>> >>>>>> With respect to CPUID, I gather this kind of thing has been >>>>>> discussed, such as here: >>>>>> >>>>>> https://lore.kernel.org/all/ZhVsHVqaff7AKagu@google.com/ >>>>>> >>>>>> and Rick and Xiaoyao are working on something. >>>>>> >>>>>> In general, I would expect a new TDX Module would advertise support for >>>>>> new features, but KVM would have to opt in to use them. >>>>>> >>>>> >>>>> There were discussion[1] on whether KVM to gatekeep the configurable/supported CPUIDs for TDX. I stand by Sean that KVM needs to do so. >>>>> >>>>> Regarding KVM opt in the new feature, KVM gatekeeps the CPUID bit that can be set by userspace is exactly the behavior of opt-in. i.e., for a given KVM, it only allows a CPUID set {S} to be configured by userspace, if new TDX module supports new feature X, it needs KVM to opt-in X by adding X to {S} so that X is allowed to be configured by userspace. >>>>> >>>>> Besides, I find current interface between KVM and userspace lacks the ability to tell userspace what bits are not supported by KVM. KVM_TDX_CAPABILITIES.cpuid doesn't work because it represents the configurable CPUIDs, not supported CPUIDs (I think we might rename it to configurable_cpuid to better reflect its meaning). So userspace has to hardcode that TSX and WAITPKG is not support itself. >>>> >>>> I don't follow why hardcoding would be necessary. >>>> >>>> If the leaf is represented in KVM_TDX_CAPABILITIES.cpuid, and >>>> the bits are 0 there, why would userspace try to set them to 1? >>> >>> Userspace doesn't set the bit to 1 in kvm_tdx_init_vm.cpuid, doesn't mean userspace wants the bit to be 0. >>> >>> Note, KVM_TDX_CAPABILITIES.cpuid reports the configurable bits. The value 0 of a bit in KVM_TDX_CAPABILITIES.cpuid means the bit is not configurable, not means the bit is unsupported. >> >> For features configurable by CPUID like TSX and WAITPKG, >> a value of 0 does mean unsupported, because the value >> has to be 1 to enable the feature. >> >> From the TDX Module Base spec: >> >> 11.18. Transactional Synchronization Extensions (TSX) >> The host VMM can enable TSX for a TD by configuring the following CPUID bits as enabled in the TD_PARAMS input to >> TDH.MNG.INIT: >> - CPUID(7,0).EBX[4] (HLE) >> - CPUID(7,0).EBX[11] (RTM) >> etc >> >> 11.19.4. WAITPKG: TPAUSE, UMONITOR and UMWAIT Instructions >> The host VMM may allow guest TDs to use the TPAUSE, UMONITOR and UMWAIT instructions, if the CPU supports them, >> by configuring the virtual value of CPUID(7,0).ECX[5] (WAITPKG) to 1 using the CPUID configuration table which is part >> the TD_PARAMS input to TDH.MNG.INIT. Enabling CPUID(7,0).ECX[5] also enables TD access to IA32_UMWAIT_CONTROL >> (MSR 0xE1). >> If not allowed, then TD execution of TPAUSE, UMONITOR or UMWAIT results in a #UD, and access to >> IA32_UMWAIT_CONTROL results in a #GP(0). >> >>> For kvm_tdx_init_vm.cpuid, >>> - if the corresponding bit is reported as 1 in KVM_TDX_CAPABILITIES.cpuid, then a value 0 in kvm_tdx_init_vm.cpuid means userspace wants to configure it as 0. >>> - if the corresponding bit is reported as 0 in KVM_TDX_CAPABILITIES.cpuid, then userspace has to pass a value 0 in kvm_tdx_init_vm.cpuid. But it doesn't mean the value of the bit will be 0. >>> >>> e.g., X2APIC bit is 0 in KVM_TDX_CAPABILITIES.cpuid, and it's also 0 in kvm_tdx_init_vm.cpuid, but TD guest sees a value of 1. In the view of QEMU, it maintains the bit of X2APIC as 1, and QEMU filters X2APIC bit when calling KVM_TDX_INIT_VM because X2APIC is not configurable. >>> >>> So when it comes to TSX and WAITPKG, QEMU also needs an interface to be informed that they are unsupported. Without the interface of fixed0 bits reported by KVM, QEMU needs to hardcode itself like [1]. The problem of hardcode is that it will conflict when future KVM allows them to be configurable. >> >> So TSX and WAITPKG support should be based on KVM_TDX_CAPABILITIES.cpuid, and not hardcoded. > > This requires Userspace to have the knowledge that "TSX and WAITPKG is supposed to be configurable and reported as 1 in KVM_TDX_CAPABILITIES.cpuid in normal case". And if userspace gets a value 0 of TSX and Waitpkg in KVM_TDX_CAPABILITIES.cpuid, it means either KVM doesn't support/allow it or the underlying TDX module doesn't. > > I think it's an implicit form of hardcode. From the perspective of userspace, I would like to avoid all the special handling. It is consistent with what is done for TD attributes and XFAM. Presumably feature names must be mapped to capability bits and configuration bits, and that information has to be represented somewhere. > >>> >>> In the future, if we have interface from KVM to report the fixed0 and fixed1 bit (on top of the proposal [2]), userspace can drop the hardcoded one it maintains. At that time, KVM can ensure no conflict by removing the bits from fixed0/1 array when allowing them to be configurable. >>> >>> [1] https://lore.kernel.org/qemu-devel/20241105062408.3533704-49-xiaoyao.li@intel.com/ >>> [2] https://lore.kernel.org/all/43b26df1-4c27-41ff-a482-e258f872cc31@intel.com/ >>> >>>>> >>>>> [1] https://lore.kernel.org/all/ZuM12EFbOXmpHHVQ@google.com/ >>>>> >>>> >>> >> >
On 12/9/2024 3:08 PM, Adrian Hunter wrote: > On 9/12/24 04:46, Xiaoyao Li wrote: >> On 12/6/2024 10:40 PM, Adrian Hunter wrote: >>> On 6/12/24 05:37, Xiaoyao Li wrote: >>>> On 12/6/2024 1:31 AM, Adrian Hunter wrote: >>>>> On 4/12/24 17:33, Xiaoyao Li wrote: >>>>>> On 12/4/2024 7:55 PM, Adrian Hunter wrote: >>>>>>> On 4/12/24 13:13, Chao Gao wrote: >>>>>>>> On Wed, Dec 04, 2024 at 08:57:23AM +0200, Adrian Hunter wrote: >>>>>>>>> On 4/12/24 08:37, Chao Gao wrote: >>>>>>>>>> On Wed, Dec 04, 2024 at 08:18:32AM +0200, Adrian Hunter wrote: >>>>>>>>>>> On 4/12/24 03:25, Chao Gao wrote: >>>>>>>>>>>>> +#define TDX_FEATURE_TSX (__feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM)) >>>>>>>>>>>>> + >>>>>>>>>>>>> +static bool has_tsx(const struct kvm_cpuid_entry2 *entry) >>>>>>>>>>>>> +{ >>>>>>>>>>>>> + return entry->function == 7 && entry->index == 0 && >>>>>>>>>>>>> + (entry->ebx & TDX_FEATURE_TSX); >>>>>>>>>>>>> +} >>>>>>>>>>>>> + >>>>>>>>>>>>> +static void clear_tsx(struct kvm_cpuid_entry2 *entry) >>>>>>>>>>>>> +{ >>>>>>>>>>>>> + entry->ebx &= ~TDX_FEATURE_TSX; >>>>>>>>>>>>> +} >>>>>>>>>>>>> + >>>>>>>>>>>>> +static bool has_waitpkg(const struct kvm_cpuid_entry2 *entry) >>>>>>>>>>>>> +{ >>>>>>>>>>>>> + return entry->function == 7 && entry->index == 0 && >>>>>>>>>>>>> + (entry->ecx & __feature_bit(X86_FEATURE_WAITPKG)); >>>>>>>>>>>>> +} >>>>>>>>>>>>> + >>>>>>>>>>>>> +static void clear_waitpkg(struct kvm_cpuid_entry2 *entry) >>>>>>>>>>>>> +{ >>>>>>>>>>>>> + entry->ecx &= ~__feature_bit(X86_FEATURE_WAITPKG); >>>>>>>>>>>>> +} >>>>>>>>>>>>> + >>>>>>>>>>>>> +static void tdx_clear_unsupported_cpuid(struct kvm_cpuid_entry2 *entry) >>>>>>>>>>>>> +{ >>>>>>>>>>>>> + if (has_tsx(entry)) >>>>>>>>>>>>> + clear_tsx(entry); >>>>>>>>>>>>> + >>>>>>>>>>>>> + if (has_waitpkg(entry)) >>>>>>>>>>>>> + clear_waitpkg(entry); >>>>>>>>>>>>> +} >>>>>>>>>>>>> + >>>>>>>>>>>>> +static bool tdx_unsupported_cpuid(const struct kvm_cpuid_entry2 *entry) >>>>>>>>>>>>> +{ >>>>>>>>>>>>> + return has_tsx(entry) || has_waitpkg(entry); >>>>>>>>>>>>> +} >>>>>>>>>>>> >>>>>>>>>>>> No need to check TSX/WAITPKG explicitly because setup_tdparams_cpuids() already >>>>>>>>>>>> ensures that unconfigurable bits are not set by userspace. >>>>>>>>>>> >>>>>>>>>>> Aren't they configurable? >>>>>>>>>> >>>>>>>>>> They are cleared from the configurable bitmap by tdx_clear_unsupported_cpuid(), >>>>>>>>>> so they are not configurable from a userspace perspective. Did I miss anything? >>>>>>>>>> KVM should check user inputs against its adjusted configurable bitmap, right? >>>>>>>>> >>>>>>>>> Maybe I misunderstand but we rely on the TDX module to reject >>>>>>>>> invalid configuration. We don't check exactly what is configurable >>>>>>>>> for the TDX Module. >>>>>>>> >>>>>>>> Ok, this is what I missed. I thought KVM validated user input and masked >>>>>>>> out all unsupported features. sorry for this. >>>>>>>> >>>>>>>>> >>>>>>>>> TSX and WAITPKG are not invalid for the TDX Module, but KVM >>>>>>>>> must either support them by restoring their MSRs, or disallow >>>>>>>>> them. This patch disallows them for now. >>>>>>>> >>>>>>>> Yes. I agree. what if a new feature (supported by a future TDX module) also >>>>>>>> needs KVM to restore some MSRs? current KVM will allow it to be exposed (since >>>>>>>> only TSX/WAITPKG are checked); then some MSRs may get corrupted. I may think >>>>>>>> this is not a good design. Current KVM should work with future TDX modules. >>>>>>> >>>>>>> With respect to CPUID, I gather this kind of thing has been >>>>>>> discussed, such as here: >>>>>>> >>>>>>> https://lore.kernel.org/all/ZhVsHVqaff7AKagu@google.com/ >>>>>>> >>>>>>> and Rick and Xiaoyao are working on something. >>>>>>> >>>>>>> In general, I would expect a new TDX Module would advertise support for >>>>>>> new features, but KVM would have to opt in to use them. >>>>>>> >>>>>> >>>>>> There were discussion[1] on whether KVM to gatekeep the configurable/supported CPUIDs for TDX. I stand by Sean that KVM needs to do so. >>>>>> >>>>>> Regarding KVM opt in the new feature, KVM gatekeeps the CPUID bit that can be set by userspace is exactly the behavior of opt-in. i.e., for a given KVM, it only allows a CPUID set {S} to be configured by userspace, if new TDX module supports new feature X, it needs KVM to opt-in X by adding X to {S} so that X is allowed to be configured by userspace. >>>>>> >>>>>> Besides, I find current interface between KVM and userspace lacks the ability to tell userspace what bits are not supported by KVM. KVM_TDX_CAPABILITIES.cpuid doesn't work because it represents the configurable CPUIDs, not supported CPUIDs (I think we might rename it to configurable_cpuid to better reflect its meaning). So userspace has to hardcode that TSX and WAITPKG is not support itself. >>>>> >>>>> I don't follow why hardcoding would be necessary. >>>>> >>>>> If the leaf is represented in KVM_TDX_CAPABILITIES.cpuid, and >>>>> the bits are 0 there, why would userspace try to set them to 1? >>>> >>>> Userspace doesn't set the bit to 1 in kvm_tdx_init_vm.cpuid, doesn't mean userspace wants the bit to be 0. >>>> >>>> Note, KVM_TDX_CAPABILITIES.cpuid reports the configurable bits. The value 0 of a bit in KVM_TDX_CAPABILITIES.cpuid means the bit is not configurable, not means the bit is unsupported. >>> >>> For features configurable by CPUID like TSX and WAITPKG, >>> a value of 0 does mean unsupported, because the value >>> has to be 1 to enable the feature. >>> >>> From the TDX Module Base spec: >>> >>> 11.18. Transactional Synchronization Extensions (TSX) >>> The host VMM can enable TSX for a TD by configuring the following CPUID bits as enabled in the TD_PARAMS input to >>> TDH.MNG.INIT: >>> - CPUID(7,0).EBX[4] (HLE) >>> - CPUID(7,0).EBX[11] (RTM) >>> etc >>> >>> 11.19.4. WAITPKG: TPAUSE, UMONITOR and UMWAIT Instructions >>> The host VMM may allow guest TDs to use the TPAUSE, UMONITOR and UMWAIT instructions, if the CPU supports them, >>> by configuring the virtual value of CPUID(7,0).ECX[5] (WAITPKG) to 1 using the CPUID configuration table which is part >>> the TD_PARAMS input to TDH.MNG.INIT. Enabling CPUID(7,0).ECX[5] also enables TD access to IA32_UMWAIT_CONTROL >>> (MSR 0xE1). >>> If not allowed, then TD execution of TPAUSE, UMONITOR or UMWAIT results in a #UD, and access to >>> IA32_UMWAIT_CONTROL results in a #GP(0). >>> >>>> For kvm_tdx_init_vm.cpuid, >>>> - if the corresponding bit is reported as 1 in KVM_TDX_CAPABILITIES.cpuid, then a value 0 in kvm_tdx_init_vm.cpuid means userspace wants to configure it as 0. >>>> - if the corresponding bit is reported as 0 in KVM_TDX_CAPABILITIES.cpuid, then userspace has to pass a value 0 in kvm_tdx_init_vm.cpuid. But it doesn't mean the value of the bit will be 0. >>>> >>>> e.g., X2APIC bit is 0 in KVM_TDX_CAPABILITIES.cpuid, and it's also 0 in kvm_tdx_init_vm.cpuid, but TD guest sees a value of 1. In the view of QEMU, it maintains the bit of X2APIC as 1, and QEMU filters X2APIC bit when calling KVM_TDX_INIT_VM because X2APIC is not configurable. >>>> >>>> So when it comes to TSX and WAITPKG, QEMU also needs an interface to be informed that they are unsupported. Without the interface of fixed0 bits reported by KVM, QEMU needs to hardcode itself like [1]. The problem of hardcode is that it will conflict when future KVM allows them to be configurable. >>> >>> So TSX and WAITPKG support should be based on KVM_TDX_CAPABILITIES.cpuid, and not hardcoded. >> >> This requires Userspace to have the knowledge that "TSX and WAITPKG is supposed to be configurable and reported as 1 in KVM_TDX_CAPABILITIES.cpuid in normal case". And if userspace gets a value 0 of TSX and Waitpkg in KVM_TDX_CAPABILITIES.cpuid, it means either KVM doesn't support/allow it or the underlying TDX module doesn't. >> >> I think it's an implicit form of hardcode. From the perspective of userspace, I would like to avoid all the special handling. > > It is consistent with what is done for TD attributes and XFAM. Presumably feature names must be mapped to capability bits and configuration bits, and that information has to be represented somewhere. Attrbutes and XFAM are different to cpuid either in KVM_TDX_CAPABILITIES or in struct kvm_tdx_init_vm. KVM_TDX_CAPABILITIES reports the *supported* bits for Attributes and XFAM while it reports the configurable bits for CPUID. The bits passed in struct kvm_tdx_init_vm for Attributes and XFAM are a whole of the final value that will be set for TD guest. While the bits passed in struct kvm_tdx_init_vm for cpuid are only the value of configurable bits, the final value that will be set for TD guest needs to be OR'ed with fixed1 bits. From the perspective of userspace,the information in KVM_TDX_CAPABILITIES.supported_attributes and KVM_TDX_CAPABILITIES.supported_xfam are enough, bits reported as 0 in them means they are not supported. While for cpuid, in general, the bits reported as 0 in KVM_TDX_CAPABILITIES.cpuid doesn't mean they are not supported. Anyway, my point is, in general, value 0 of a bit in KVM_TDX_CAPABILITIES.cpuid doesn't mean the bit is not supported for TD guest. It's better to have KVM expose an interface to report the unsupported bits. Without it, userspace has to hardcode some information and maintain it itself. QEMU already hardcodes the fixed0 and fixed1 bits itself[1] according to a specific version of TDX spec, while the tweak to TSX and WAITPKG in KVM will require more specific handling in QEMU. It's not the fault of the change to TSX/WAITPKG in KVM_TDX_CAPABILITIES.cpuid. It somehow reveals it's getting messier when KVM cannot report enough information to userspace. [1] https://lore.kernel.org/qemu-devel/20241105062408.3533704-49-xiaoyao.li@intel.com/ >> >>>> >>>> In the future, if we have interface from KVM to report the fixed0 and fixed1 bit (on top of the proposal [2]), userspace can drop the hardcoded one it maintains. At that time, KVM can ensure no conflict by removing the bits from fixed0/1 array when allowing them to be configurable. >>>> >>>> [1] https://lore.kernel.org/qemu-devel/20241105062408.3533704-49-xiaoyao.li@intel.com/ >>>> [2] https://lore.kernel.org/all/43b26df1-4c27-41ff-a482-e258f872cc31@intel.com/ >>>> >>>>>> >>>>>> [1] https://lore.kernel.org/all/ZuM12EFbOXmpHHVQ@google.com/ >>>>>> >>>>> >>>> >>> >> >
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 4a33ca54c8ba..2c7b6308da73 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -722,14 +722,21 @@ static struct tdx_uret_msr tdx_uret_msrs[] = { {.msr = MSR_LSTAR,}, {.msr = MSR_TSC_AUX,}, }; +static int tdx_uret_tsx_ctrl_slot; -static void tdx_user_return_msr_update_cache(void) +static void tdx_user_return_msr_update_cache(struct kvm_vcpu *vcpu) { int i; for (i = 0; i < ARRAY_SIZE(tdx_uret_msrs); i++) kvm_user_return_msr_update_cache(tdx_uret_msrs[i].slot, tdx_uret_msrs[i].defval); + /* + * TSX_CTRL is reset to 0 if guest TSX is supported. Otherwise + * preserved. + */ + if (to_kvm_tdx(vcpu->kvm)->tsx_supported && tdx_uret_tsx_ctrl_slot != -1) + kvm_user_return_msr_update_cache(tdx_uret_tsx_ctrl_slot, 0); } static void tdx_restore_host_xsave_state(struct kvm_vcpu *vcpu) @@ -817,7 +824,7 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) tdx_vcpu_enter_exit(vcpu); - tdx_user_return_msr_update_cache(); + tdx_user_return_msr_update_cache(vcpu); tdx_restore_host_xsave_state(vcpu); tdx->host_state_need_restore = true; @@ -1258,6 +1265,22 @@ static int setup_tdparams_cpuids(struct kvm_cpuid2 *cpuid, return 0; } +static bool tdparams_tsx_supported(struct kvm_cpuid2 *cpuid) +{ + const struct kvm_cpuid_entry2 *entry; + u64 mask; + u32 ebx; + + entry = kvm_find_cpuid_entry2(cpuid->entries, cpuid->nent, 0x7, 0); + if (entry) + ebx = entry->ebx; + else + ebx = 0; + + mask = __feature_bit(X86_FEATURE_HLE) | __feature_bit(X86_FEATURE_RTM); + return ebx & mask; +} + static int setup_tdparams(struct kvm *kvm, struct td_params *td_params, struct kvm_tdx_init_vm *init_vm) { @@ -1299,6 +1322,7 @@ static int setup_tdparams(struct kvm *kvm, struct td_params *td_params, MEMCPY_SAME_SIZE(td_params->mrowner, init_vm->mrowner); MEMCPY_SAME_SIZE(td_params->mrownerconfig, init_vm->mrownerconfig); + to_kvm_tdx(kvm)->tsx_supported = tdparams_tsx_supported(cpuid); return 0; } @@ -2272,6 +2296,11 @@ static int __init __tdx_bringup(void) return -EIO; } } + tdx_uret_tsx_ctrl_slot = kvm_find_user_return_msr(MSR_IA32_TSX_CTRL); + if (tdx_uret_tsx_ctrl_slot == -1 && boot_cpu_has(X86_FEATURE_MSR_TSX_CTRL)) { + pr_err("MSR_IA32_TSX_CTRL isn't included by kvm_find_user_return_msr\n"); + return -EIO; + } /* * Enabling TDX requires enabling hardware virtualization first, diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h index 48cf0a1abfcc..815ff6bdbc7e 100644 --- a/arch/x86/kvm/vmx/tdx.h +++ b/arch/x86/kvm/vmx/tdx.h @@ -29,6 +29,14 @@ struct kvm_tdx { u8 nr_tdcs_pages; u8 nr_vcpu_tdcx_pages; + /* + * Used on each TD-exit, see tdx_user_return_msr_update_cache(). + * TSX_CTRL value on TD exit + * - set 0 if guest TSX enabled + * - preserved if guest TSX disabled + */ + bool tsx_supported; + u64 tsc_offset; enum kvm_tdx_state state;