diff mbox series

[10/25] KVM: TDX: Initialize KVM supported capabilities when module setup

Message ID 20240812224820.34826-11-rick.p.edgecombe@intel.com (mailing list archive)
State New, archived
Headers show
Series TDX vCPU/VM creation | expand

Commit Message

Rick Edgecombe Aug. 12, 2024, 10:48 p.m. UTC
From: Xiaoyao Li <xiaoyao.li@intel.com>

While TDX module reports a set of capabilities/features that it
supports, what KVM currently supports might be a subset of them.
E.g., DEBUG and PERFMON are supported by TDX module but currently not
supported by KVM.

Introduce a new struct kvm_tdx_caps to store KVM's capabilities of TDX.
supported_attrs and suppported_xfam are validated against fixed0/1
values enumerated by TDX module. Configurable CPUID bits derive from TDX
module plus applying KVM's capabilities (KVM_GET_SUPPORTED_CPUID),
i.e., mask off the bits that are configurable in the view of TDX module
but not supported by KVM yet.

KVM_TDX_CPUID_NO_SUBLEAF is the concept from TDX module, switch it to 0
and use KVM_CPUID_FLAG_SIGNIFCANT_INDEX, which are the concept of KVM.

Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
---
uAPI breakout v1:
 - Change setup_kvm_tdx_caps() to use the exported 'struct tdx_sysinfo'
   pointer.
 - Change how to copy 'kvm_tdx_cpuid_config' since 'struct tdx_sysinfo'
   doesn't have 'kvm_tdx_cpuid_config'.
 - Updates for uAPI changes
---
 arch/x86/include/uapi/asm/kvm.h |  2 -
 arch/x86/kvm/vmx/tdx.c          | 81 +++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+), 2 deletions(-)

Comments

Chao Gao Aug. 13, 2024, 3:25 a.m. UTC | #1
On Mon, Aug 12, 2024 at 03:48:05PM -0700, Rick Edgecombe wrote:
>From: Xiaoyao Li <xiaoyao.li@intel.com>
>
>While TDX module reports a set of capabilities/features that it
>supports, what KVM currently supports might be a subset of them.
>E.g., DEBUG and PERFMON are supported by TDX module but currently not
>supported by KVM.
>
>Introduce a new struct kvm_tdx_caps to store KVM's capabilities of TDX.
>supported_attrs and suppported_xfam are validated against fixed0/1
>values enumerated by TDX module. Configurable CPUID bits derive from TDX
>module plus applying KVM's capabilities (KVM_GET_SUPPORTED_CPUID),
>i.e., mask off the bits that are configurable in the view of TDX module
>but not supported by KVM yet.
>
>KVM_TDX_CPUID_NO_SUBLEAF is the concept from TDX module, switch it to 0
>and use KVM_CPUID_FLAG_SIGNIFCANT_INDEX, which are the concept of KVM.

If we convert KVM_TDX_CPUID_NO_SUBLEAF to 0 when reporting capabilities to
QEMU, QEMU cannot distinguish a CPUID subleaf 0 from a CPUID w/o subleaf.
Does it matter to QEMU?

>
>Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
>---
>uAPI breakout v1:
> - Change setup_kvm_tdx_caps() to use the exported 'struct tdx_sysinfo'
>   pointer.
> - Change how to copy 'kvm_tdx_cpuid_config' since 'struct tdx_sysinfo'
>   doesn't have 'kvm_tdx_cpuid_config'.
> - Updates for uAPI changes
>---
> arch/x86/include/uapi/asm/kvm.h |  2 -
> arch/x86/kvm/vmx/tdx.c          | 81 +++++++++++++++++++++++++++++++++
> 2 files changed, 81 insertions(+), 2 deletions(-)
>
>diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
>index 47caf508cca7..c9eb2e2f5559 100644
>--- a/arch/x86/include/uapi/asm/kvm.h
>+++ b/arch/x86/include/uapi/asm/kvm.h
>@@ -952,8 +952,6 @@ struct kvm_tdx_cmd {
> 	__u64 hw_error;
> };
> 
>-#define KVM_TDX_CPUID_NO_SUBLEAF	((__u32)-1)
>-

This definition can be dropped from the previous patch because it isn't
used there.

> struct kvm_tdx_cpuid_config {
> 	__u32 leaf;
> 	__u32 sub_leaf;
>diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
>index 90b44ebaf864..d89973e554f6 100644
>--- a/arch/x86/kvm/vmx/tdx.c
>+++ b/arch/x86/kvm/vmx/tdx.c
>@@ -31,6 +31,19 @@ static void __used tdx_guest_keyid_free(int keyid)
> 	ida_free(&tdx_guest_keyid_pool, keyid);
> }
> 
>+#define KVM_TDX_CPUID_NO_SUBLEAF	((__u32)-1)
>+
>+struct kvm_tdx_caps {
>+	u64 supported_attrs;
>+	u64 supported_xfam;
>+
>+	u16 num_cpuid_config;
>+	/* This must the last member. */
>+	DECLARE_FLEX_ARRAY(struct kvm_tdx_cpuid_config, cpuid_configs);
>+};
>+
>+static struct kvm_tdx_caps *kvm_tdx_caps;
>+
> static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
> {
> 	const struct tdx_sysinfo_td_conf *td_conf = &tdx_sysinfo->td_conf;
>@@ -131,6 +144,68 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user *argp)
> 	return r;
> }
> 
>+#define KVM_SUPPORTED_TD_ATTRS (TDX_TD_ATTR_SEPT_VE_DISABLE)
>+
>+static int __init setup_kvm_tdx_caps(void)
>+{
>+	const struct tdx_sysinfo_td_conf *td_conf = &tdx_sysinfo->td_conf;
>+	u64 kvm_supported;
>+	int i;
>+
>+	kvm_tdx_caps = kzalloc(sizeof(*kvm_tdx_caps) +
>+			       sizeof(struct kvm_tdx_cpuid_config) * td_conf->num_cpuid_config,

struct_size()

>+			       GFP_KERNEL);
>+	if (!kvm_tdx_caps)
>+		return -ENOMEM;
>+
>+	kvm_supported = KVM_SUPPORTED_TD_ATTRS;
>+	if ((kvm_supported & td_conf->attributes_fixed1) != td_conf->attributes_fixed1)
>+		goto err;
>+
>+	kvm_tdx_caps->supported_attrs = kvm_supported & td_conf->attributes_fixed0;
>+
>+	kvm_supported = kvm_caps.supported_xcr0 | kvm_caps.supported_xss;
>+
>+	/*
>+	 * PT and CET can be exposed to TD guest regardless of KVM's XSS, PT
>+	 * and, CET support.
>+	 */
>+	kvm_supported |= XFEATURE_MASK_PT | XFEATURE_MASK_CET_USER |
>+			 XFEATURE_MASK_CET_KERNEL;

I prefer to add PT/CET bits in separate patches because PT/CET related MSRs may
need save/restore. Putting them in separate patches can give us the chance to
explain this in detail.

>+	if ((kvm_supported & td_conf->xfam_fixed1) != td_conf->xfam_fixed1)
>+		goto err;
>+
>+	kvm_tdx_caps->supported_xfam = kvm_supported & td_conf->xfam_fixed0;
>+
>+	kvm_tdx_caps->num_cpuid_config = td_conf->num_cpuid_config;
>+	for (i = 0; i < td_conf->num_cpuid_config; i++) {
>+		struct kvm_tdx_cpuid_config source = {
>+			.leaf = (u32)td_conf->cpuid_config_leaves[i],
>+			.sub_leaf = td_conf->cpuid_config_leaves[i] >> 32,
>+			.eax = (u32)td_conf->cpuid_config_values[i].eax_ebx,
>+			.ebx = td_conf->cpuid_config_values[i].eax_ebx >> 32,
>+			.ecx = (u32)td_conf->cpuid_config_values[i].ecx_edx,
>+			.edx = td_conf->cpuid_config_values[i].ecx_edx >> 32,
>+		};
>+		struct kvm_tdx_cpuid_config *dest =
>+			&kvm_tdx_caps->cpuid_configs[i];
>+
>+		memcpy(dest, &source, sizeof(struct kvm_tdx_cpuid_config));

this memcpy() looks superfluous. does this work?

		kvm_tdx_caps->cpuid_configs[i] = {
			.leaf = (u32)td_conf->cpuid_config_leaves[i],
			.sub_leaf = td_conf->cpuid_config_leaves[i] >> 32,
			.eax = (u32)td_conf->cpuid_config_values[i].eax_ebx,
			.ebx = td_conf->cpuid_config_values[i].eax_ebx >> 32,
			.ecx = (u32)td_conf->cpuid_config_values[i].ecx_edx,
			.edx = td_conf->cpuid_config_values[i].ecx_edx >> 32,
		};

>+		if (dest->sub_leaf == KVM_TDX_CPUID_NO_SUBLEAF)
>+			dest->sub_leaf = 0;
>+	}
>+
>+	return 0;
>+err:
>+	kfree(kvm_tdx_caps);
>+	return -EIO;
>+}
Kai Huang Aug. 13, 2024, 5:26 a.m. UTC | #2
On Tue, 2024-08-13 at 11:25 +0800, Chao Gao wrote:
> > +	for (i = 0; i < td_conf->num_cpuid_config; i++) {
> > +		struct kvm_tdx_cpuid_config source = {
> > +			.leaf = (u32)td_conf->cpuid_config_leaves[i],
> > +			.sub_leaf = td_conf->cpuid_config_leaves[i] >> 32,
> > +			.eax = (u32)td_conf->cpuid_config_values[i].eax_ebx,
> > +			.ebx = td_conf->cpuid_config_values[i].eax_ebx >> 32,
> > +			.ecx = (u32)td_conf->cpuid_config_values[i].ecx_edx,
> > +			.edx = td_conf->cpuid_config_values[i].ecx_edx >> 32,
> > +		};
> > +		struct kvm_tdx_cpuid_config *dest =
> > +			&kvm_tdx_caps->cpuid_configs[i];
> > +
> > +		memcpy(dest, &source, sizeof(struct kvm_tdx_cpuid_config));
> 
> this memcpy() looks superfluous. does this work?
> 
> 		kvm_tdx_caps->cpuid_configs[i] = {
> 			.leaf = (u32)td_conf->cpuid_config_leaves[i],
> 			.sub_leaf = td_conf->cpuid_config_leaves[i] >> 32,
> 			.eax = (u32)td_conf->cpuid_config_values[i].eax_ebx,
> 			.ebx = td_conf->cpuid_config_values[i].eax_ebx >> 32,
> 			.ecx = (u32)td_conf->cpuid_config_values[i].ecx_edx,
> 			.edx = td_conf->cpuid_config_values[i].ecx_edx >> 32,
> 		};

This looks good to me.  I didn't try to optimize because it's done in the
module loading time.
Binbin Wu Aug. 13, 2024, 7:24 a.m. UTC | #3
On 8/13/2024 11:25 AM, Chao Gao wrote:
> On Mon, Aug 12, 2024 at 03:48:05PM -0700, Rick Edgecombe wrote:
>> From: Xiaoyao Li <xiaoyao.li@intel.com>
>>
>> While TDX module reports a set of capabilities/features that it
>> supports, what KVM currently supports might be a subset of them.
>> E.g., DEBUG and PERFMON are supported by TDX module but currently not
>> supported by KVM.
>>
>> Introduce a new struct kvm_tdx_caps to store KVM's capabilities of TDX.
>> supported_attrs and suppported_xfam are validated against fixed0/1
>> values enumerated by TDX module. Configurable CPUID bits derive from TDX
>> module plus applying KVM's capabilities (KVM_GET_SUPPORTED_CPUID),
>> i.e., mask off the bits that are configurable in the view of TDX module
>> but not supported by KVM yet.
>>
>> KVM_TDX_CPUID_NO_SUBLEAF is the concept from TDX module, switch it to 0
>> and use KVM_CPUID_FLAG_SIGNIFCANT_INDEX, which are the concept of KVM.
> If we convert KVM_TDX_CPUID_NO_SUBLEAF to 0 when reporting capabilities to
> QEMU, QEMU cannot distinguish a CPUID subleaf 0 from a CPUID w/o subleaf.
> Does it matter to QEMU?

According to "and use KVM_CPUID_FLAG_SIGNIFCANT_INDEX, which are the
concept of KVM". IIUC, KVM's ABI uses KVM_CPUID_FLAG_SIGNIFCANT_INDEX
in flags of struct kvm_cpuid_entry2 to distinguish whether the index
is significant.
Chao Gao Aug. 14, 2024, 12:26 a.m. UTC | #4
On Tue, Aug 13, 2024 at 03:24:32PM +0800, Binbin Wu wrote:
>
>
>
>On 8/13/2024 11:25 AM, Chao Gao wrote:
>> On Mon, Aug 12, 2024 at 03:48:05PM -0700, Rick Edgecombe wrote:
>> > From: Xiaoyao Li <xiaoyao.li@intel.com>
>> > 
>> > While TDX module reports a set of capabilities/features that it
>> > supports, what KVM currently supports might be a subset of them.
>> > E.g., DEBUG and PERFMON are supported by TDX module but currently not
>> > supported by KVM.
>> > 
>> > Introduce a new struct kvm_tdx_caps to store KVM's capabilities of TDX.
>> > supported_attrs and suppported_xfam are validated against fixed0/1
>> > values enumerated by TDX module. Configurable CPUID bits derive from TDX
>> > module plus applying KVM's capabilities (KVM_GET_SUPPORTED_CPUID),
>> > i.e., mask off the bits that are configurable in the view of TDX module
>> > but not supported by KVM yet.
>> > 
>> > KVM_TDX_CPUID_NO_SUBLEAF is the concept from TDX module, switch it to 0
>> > and use KVM_CPUID_FLAG_SIGNIFCANT_INDEX, which are the concept of KVM.
>> If we convert KVM_TDX_CPUID_NO_SUBLEAF to 0 when reporting capabilities to
>> QEMU, QEMU cannot distinguish a CPUID subleaf 0 from a CPUID w/o subleaf.
>> Does it matter to QEMU?
>
>According to "and use KVM_CPUID_FLAG_SIGNIFCANT_INDEX, which are the
>concept of KVM". IIUC, KVM's ABI uses KVM_CPUID_FLAG_SIGNIFCANT_INDEX
>in flags of struct kvm_cpuid_entry2 to distinguish whether the index
>is significant.

If KVM doesn't indicate which CPU leaf doesn't support subleafs when reporting
TDX capabilities, how can QEMU know whether it should set the
KVM_CPUID_FLAG_SIGNIFICANT_INDEX flag for a given CPUID leaf?  Or is the
expectation that QEMU can discover that on its own?
Binbin Wu Aug. 14, 2024, 2:36 a.m. UTC | #5
On 8/14/2024 8:26 AM, Chao Gao wrote:
> On Tue, Aug 13, 2024 at 03:24:32PM +0800, Binbin Wu wrote:
>>
>>
>> On 8/13/2024 11:25 AM, Chao Gao wrote:
>>> On Mon, Aug 12, 2024 at 03:48:05PM -0700, Rick Edgecombe wrote:
>>>> From: Xiaoyao Li <xiaoyao.li@intel.com>
>>>>
>>>> While TDX module reports a set of capabilities/features that it
>>>> supports, what KVM currently supports might be a subset of them.
>>>> E.g., DEBUG and PERFMON are supported by TDX module but currently not
>>>> supported by KVM.
>>>>
>>>> Introduce a new struct kvm_tdx_caps to store KVM's capabilities of TDX.
>>>> supported_attrs and suppported_xfam are validated against fixed0/1
>>>> values enumerated by TDX module. Configurable CPUID bits derive from TDX
>>>> module plus applying KVM's capabilities (KVM_GET_SUPPORTED_CPUID),
>>>> i.e., mask off the bits that are configurable in the view of TDX module
>>>> but not supported by KVM yet.
>>>>
>>>> KVM_TDX_CPUID_NO_SUBLEAF is the concept from TDX module, switch it to 0
>>>> and use KVM_CPUID_FLAG_SIGNIFCANT_INDEX, which are the concept of KVM.
>>> If we convert KVM_TDX_CPUID_NO_SUBLEAF to 0 when reporting capabilities to
>>> QEMU, QEMU cannot distinguish a CPUID subleaf 0 from a CPUID w/o subleaf.
>>> Does it matter to QEMU?
>> According to "and use KVM_CPUID_FLAG_SIGNIFCANT_INDEX, which are the
>> concept of KVM". IIUC, KVM's ABI uses KVM_CPUID_FLAG_SIGNIFCANT_INDEX
>> in flags of struct kvm_cpuid_entry2 to distinguish whether the index
>> is significant.
> If KVM doesn't indicate which CPU leaf doesn't support subleafs when reporting
> TDX capabilities, how can QEMU know whether it should set the
> KVM_CPUID_FLAG_SIGNIFICANT_INDEX flag for a given CPUID leaf?  Or is the
> expectation that QEMU can discover that on its own?
>
When KVM report CPUIDs to userspace, for the entries that index is
significant, it will set KVM_CPUID_FLAG_SIGNIFICANT_INDEX, including
reporting CPUIDs for TDX.
QEMU can check the flag to see if the subleaf is significant or not.

On the other side, when QEMU build its own version, it also set
KVM_CPUID_FLAG_SIGNIFICANT_INDEX for the entries that index is significant.
Tao Su Aug. 19, 2024, 1:33 a.m. UTC | #6
On Mon, Aug 12, 2024 at 03:48:05PM -0700, Rick Edgecombe wrote:
> From: Xiaoyao Li <xiaoyao.li@intel.com>
> 
> While TDX module reports a set of capabilities/features that it
> supports, what KVM currently supports might be a subset of them.
> E.g., DEBUG and PERFMON are supported by TDX module but currently not
> supported by KVM.
> 
> Introduce a new struct kvm_tdx_caps to store KVM's capabilities of TDX.
> supported_attrs and suppported_xfam are validated against fixed0/1
> values enumerated by TDX module. Configurable CPUID bits derive from TDX
> module plus applying KVM's capabilities (KVM_GET_SUPPORTED_CPUID),
> i.e., mask off the bits that are configurable in the view of TDX module
> but not supported by KVM yet.
> 

But this mask is not implemented in this patch, which should be in patch24?

> KVM_TDX_CPUID_NO_SUBLEAF is the concept from TDX module, switch it to 0
> and use KVM_CPUID_FLAG_SIGNIFCANT_INDEX, which are the concept of KVM.
> 
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>

[...]
Nikolay Borisov Aug. 26, 2024, 11:04 a.m. UTC | #7
On 13.08.24 г. 1:48 ч., Rick Edgecombe wrote:
> From: Xiaoyao Li <xiaoyao.li@intel.com>
> 
> While TDX module reports a set of capabilities/features that it
> supports, what KVM currently supports might be a subset of them.
> E.g., DEBUG and PERFMON are supported by TDX module but currently not
> supported by KVM.
> 
> Introduce a new struct kvm_tdx_caps to store KVM's capabilities of TDX.
> supported_attrs and suppported_xfam are validated against fixed0/1
> values enumerated by TDX module. Configurable CPUID bits derive from TDX
> module plus applying KVM's capabilities (KVM_GET_SUPPORTED_CPUID),
> i.e., mask off the bits that are configurable in the view of TDX module
> but not supported by KVM yet.
> 
> KVM_TDX_CPUID_NO_SUBLEAF is the concept from TDX module, switch it to 0
> and use KVM_CPUID_FLAG_SIGNIFCANT_INDEX, which are the concept of KVM.
> 
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> ---
> uAPI breakout v1:
>   - Change setup_kvm_tdx_caps() to use the exported 'struct tdx_sysinfo'
>     pointer.
>   - Change how to copy 'kvm_tdx_cpuid_config' since 'struct tdx_sysinfo'
>     doesn't have 'kvm_tdx_cpuid_config'.
>   - Updates for uAPI changes
> ---

<snip>


> +
>   static int tdx_online_cpu(unsigned int cpu)
>   {
>   	unsigned long flags;
> @@ -217,11 +292,16 @@ static int __init __tdx_bringup(void)
>   		goto get_sysinfo_err;
>   	}
>   
> +	r = setup_kvm_tdx_caps();

nit: Since there are other similarly named functions that come later how 
about rename this to init_kvm_tdx_caps, so that it's clear that the 
functions that are executed ones are prefixed with "init_" and those 
that will be executed on every TDV boot up can be named prefixed with 
"setup_"


> +	if (r)
> +		goto get_sysinfo_err;
> +
>   	/*
>   	 * Leave hardware virtualization enabled after TDX is enabled
>   	 * successfully.  TDX CPU hotplug depends on this.
>   	 */
>   	return 0;
> +
>   get_sysinfo_err:
>   	__do_tdx_cleanup();
>   tdx_bringup_err:
> @@ -232,6 +312,7 @@ static int __init __tdx_bringup(void)
>   void tdx_cleanup(void)
>   {
>   	if (enable_tdx) {
> +		free_kvm_tdx_cap();
>   		__do_tdx_cleanup();
>   		kvm_disable_virtualization();
>   	}
Tony Lindgren Aug. 29, 2024, 4:51 a.m. UTC | #8
On Mon, Aug 26, 2024 at 02:04:27PM +0300, Nikolay Borisov wrote:
> On 13.08.24 г. 1:48 ч., Rick Edgecombe wrote:
> > From: Xiaoyao Li <xiaoyao.li@intel.com>
> >   static int tdx_online_cpu(unsigned int cpu)
> >   {
> >   	unsigned long flags;
> > @@ -217,11 +292,16 @@ static int __init __tdx_bringup(void)
> >   		goto get_sysinfo_err;
> >   	}
> > +	r = setup_kvm_tdx_caps();
> 
> nit: Since there are other similarly named functions that come later how
> about rename this to init_kvm_tdx_caps, so that it's clear that the
> functions that are executed ones are prefixed with "init_" and those that
> will be executed on every TDV boot up can be named prefixed with "setup_"

We can call setup_kvm_tdx_caps() from from tdx_get_kvm_supported_cpuid(),
and drop the struct kvm_tdx_caps. So then the setup_kvm_tdx_caps() should
be OK.

Regards,

Tony
Xiaoyao Li Aug. 29, 2024, 1:28 p.m. UTC | #9
On 8/19/2024 9:33 AM, Tao Su wrote:
> On Mon, Aug 12, 2024 at 03:48:05PM -0700, Rick Edgecombe wrote:
>> From: Xiaoyao Li <xiaoyao.li@intel.com>
>>
>> While TDX module reports a set of capabilities/features that it
>> supports, what KVM currently supports might be a subset of them.
>> E.g., DEBUG and PERFMON are supported by TDX module but currently not
>> supported by KVM.
>>
>> Introduce a new struct kvm_tdx_caps to store KVM's capabilities of TDX.
>> supported_attrs and suppported_xfam are validated against fixed0/1
>> values enumerated by TDX module. Configurable CPUID bits derive from TDX
>> module plus applying KVM's capabilities (KVM_GET_SUPPORTED_CPUID),
>> i.e., mask off the bits that are configurable in the view of TDX module
>> but not supported by KVM yet.
>>
> 
> But this mask is not implemented in this patch, which should be in patch24?

yes, the commit message needs to be updated. Even more the patches need 
to be re-organized.

>> KVM_TDX_CPUID_NO_SUBLEAF is the concept from TDX module, switch it to 0
>> and use KVM_CPUID_FLAG_SIGNIFCANT_INDEX, which are the concept of KVM.
>>
>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> 
> [...]
Tony Lindgren Aug. 30, 2024, 8:34 a.m. UTC | #10
On Tue, Aug 13, 2024 at 11:25:37AM +0800, Chao Gao wrote:
> On Mon, Aug 12, 2024 at 03:48:05PM -0700, Rick Edgecombe wrote:
> >From: Xiaoyao Li <xiaoyao.li@intel.com>
> >+static int __init setup_kvm_tdx_caps(void)
> >+{
> >+	const struct tdx_sysinfo_td_conf *td_conf = &tdx_sysinfo->td_conf;
> >+	u64 kvm_supported;
> >+	int i;
> >+
> >+	kvm_tdx_caps = kzalloc(sizeof(*kvm_tdx_caps) +
> >+			       sizeof(struct kvm_tdx_cpuid_config) * td_conf->num_cpuid_config,
> 
> struct_size()
> 
> >+			       GFP_KERNEL);
> >+	if (!kvm_tdx_caps)
> >+		return -ENOMEM;

This will go away with the dropping of struct kvm_tdx_caps. Should be checked
for other places though.

Regards,

Tony
Tony Lindgren Aug. 30, 2024, 8:44 a.m. UTC | #11
On Tue, Aug 13, 2024 at 05:26:06AM +0000, Huang, Kai wrote:
> On Tue, 2024-08-13 at 11:25 +0800, Chao Gao wrote:
> > > +	for (i = 0; i < td_conf->num_cpuid_config; i++) {
> > > +		struct kvm_tdx_cpuid_config source = {
> > > +			.leaf = (u32)td_conf->cpuid_config_leaves[i],
> > > +			.sub_leaf = td_conf->cpuid_config_leaves[i] >> 32,
> > > +			.eax = (u32)td_conf->cpuid_config_values[i].eax_ebx,
> > > +			.ebx = td_conf->cpuid_config_values[i].eax_ebx >> 32,
> > > +			.ecx = (u32)td_conf->cpuid_config_values[i].ecx_edx,
> > > +			.edx = td_conf->cpuid_config_values[i].ecx_edx >> 32,
> > > +		};
> > > +		struct kvm_tdx_cpuid_config *dest =
> > > +			&kvm_tdx_caps->cpuid_configs[i];
> > > +
> > > +		memcpy(dest, &source, sizeof(struct kvm_tdx_cpuid_config));
> > 
> > this memcpy() looks superfluous. does this work?
> > 
> > 		kvm_tdx_caps->cpuid_configs[i] = {
> > 			.leaf = (u32)td_conf->cpuid_config_leaves[i],
> > 			.sub_leaf = td_conf->cpuid_config_leaves[i] >> 32,
> > 			.eax = (u32)td_conf->cpuid_config_values[i].eax_ebx,
> > 			.ebx = td_conf->cpuid_config_values[i].eax_ebx >> 32,
> > 			.ecx = (u32)td_conf->cpuid_config_values[i].ecx_edx,
> > 			.edx = td_conf->cpuid_config_values[i].ecx_edx >> 32,
> > 		};
> 
> This looks good to me.  I didn't try to optimize because it's done in the
> module loading time.

I'll do a patch to initialize dest directly without a memcpy().

Tony
Rick Edgecombe Sept. 3, 2024, 4:53 p.m. UTC | #12
On Tue, 2024-08-13 at 11:25 +0800, Chao Gao wrote:
> > +       /*
> > +        * PT and CET can be exposed to TD guest regardless of KVM's XSS, PT
> > +        * and, CET support.
> > +        */
> > +       kvm_supported |= XFEATURE_MASK_PT | XFEATURE_MASK_CET_USER |
> > +                        XFEATURE_MASK_CET_KERNEL;
> 
> I prefer to add PT/CET bits in separate patches because PT/CET related MSRs
> may
> need save/restore. Putting them in separate patches can give us the chance to
> explain this in detail.

I think we should just drop them from the base series to save required testing.
We can leave them for the future.
Nikolay Borisov Sept. 4, 2024, 11:58 a.m. UTC | #13
On 13.08.24 г. 1:48 ч., Rick Edgecombe wrote:
> From: Xiaoyao Li <xiaoyao.li@intel.com>
> 
> While TDX module reports a set of capabilities/features that it
> supports, what KVM currently supports might be a subset of them.
> E.g., DEBUG and PERFMON are supported by TDX module but currently not
> supported by KVM.
> 
> Introduce a new struct kvm_tdx_caps to store KVM's capabilities of TDX.
> supported_attrs and suppported_xfam are validated against fixed0/1
> values enumerated by TDX module. Configurable CPUID bits derive from TDX
> module plus applying KVM's capabilities (KVM_GET_SUPPORTED_CPUID),
> i.e., mask off the bits that are configurable in the view of TDX module
> but not supported by KVM yet.
> 
> KVM_TDX_CPUID_NO_SUBLEAF is the concept from TDX module, switch it to 0
> and use KVM_CPUID_FLAG_SIGNIFCANT_INDEX, which are the concept of KVM.
> 
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> ---
> uAPI breakout v1:
>   - Change setup_kvm_tdx_caps() to use the exported 'struct tdx_sysinfo'
>     pointer.
>   - Change how to copy 'kvm_tdx_cpuid_config' since 'struct tdx_sysinfo'
>     doesn't have 'kvm_tdx_cpuid_config'.
>   - Updates for uAPI changes
> ---
>   arch/x86/include/uapi/asm/kvm.h |  2 -
>   arch/x86/kvm/vmx/tdx.c          | 81 +++++++++++++++++++++++++++++++++
>   2 files changed, 81 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
> index 47caf508cca7..c9eb2e2f5559 100644
> --- a/arch/x86/include/uapi/asm/kvm.h
> +++ b/arch/x86/include/uapi/asm/kvm.h
> @@ -952,8 +952,6 @@ struct kvm_tdx_cmd {
>   	__u64 hw_error;
>   };
>   
> -#define KVM_TDX_CPUID_NO_SUBLEAF	((__u32)-1)
> -
>   struct kvm_tdx_cpuid_config {
>   	__u32 leaf;
>   	__u32 sub_leaf;
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 90b44ebaf864..d89973e554f6 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -31,6 +31,19 @@ static void __used tdx_guest_keyid_free(int keyid)
>   	ida_free(&tdx_guest_keyid_pool, keyid);
>   }
>   
> +#define KVM_TDX_CPUID_NO_SUBLEAF	((__u32)-1)
> +
> +struct kvm_tdx_caps {
> +	u64 supported_attrs;
> +	u64 supported_xfam;
> +
> +	u16 num_cpuid_config;
> +	/* This must the last member. */
> +	DECLARE_FLEX_ARRAY(struct kvm_tdx_cpuid_config, cpuid_configs);
> +};
> +
> +static struct kvm_tdx_caps *kvm_tdx_caps;
> +
>   static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
>   {
>   	const struct tdx_sysinfo_td_conf *td_conf = &tdx_sysinfo->td_conf;
> @@ -131,6 +144,68 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user *argp)
>   	return r;
>   }
>   
> +#define KVM_SUPPORTED_TD_ATTRS (TDX_TD_ATTR_SEPT_VE_DISABLE)

Why isn't TDX_TD_ATTR_DEBUG added as well?

<snip>
Xiaoyao Li Sept. 5, 2024, 1:36 p.m. UTC | #14
On 9/4/2024 7:58 PM, Nikolay Borisov wrote:
> 
> 
> On 13.08.24 г. 1:48 ч., Rick Edgecombe wrote:
>> From: Xiaoyao Li <xiaoyao.li@intel.com>
>>
>> While TDX module reports a set of capabilities/features that it
>> supports, what KVM currently supports might be a subset of them.
>> E.g., DEBUG and PERFMON are supported by TDX module but currently not
>> supported by KVM.
>>
>> Introduce a new struct kvm_tdx_caps to store KVM's capabilities of TDX.
>> supported_attrs and suppported_xfam are validated against fixed0/1
>> values enumerated by TDX module. Configurable CPUID bits derive from TDX
>> module plus applying KVM's capabilities (KVM_GET_SUPPORTED_CPUID),
>> i.e., mask off the bits that are configurable in the view of TDX module
>> but not supported by KVM yet.
>>
>> KVM_TDX_CPUID_NO_SUBLEAF is the concept from TDX module, switch it to 0
>> and use KVM_CPUID_FLAG_SIGNIFCANT_INDEX, which are the concept of KVM.
>>
>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
>> ---
>> uAPI breakout v1:
>>   - Change setup_kvm_tdx_caps() to use the exported 'struct tdx_sysinfo'
>>     pointer.
>>   - Change how to copy 'kvm_tdx_cpuid_config' since 'struct tdx_sysinfo'
>>     doesn't have 'kvm_tdx_cpuid_config'.
>>   - Updates for uAPI changes
>> ---
>>   arch/x86/include/uapi/asm/kvm.h |  2 -
>>   arch/x86/kvm/vmx/tdx.c          | 81 +++++++++++++++++++++++++++++++++
>>   2 files changed, 81 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/include/uapi/asm/kvm.h 
>> b/arch/x86/include/uapi/asm/kvm.h
>> index 47caf508cca7..c9eb2e2f5559 100644
>> --- a/arch/x86/include/uapi/asm/kvm.h
>> +++ b/arch/x86/include/uapi/asm/kvm.h
>> @@ -952,8 +952,6 @@ struct kvm_tdx_cmd {
>>       __u64 hw_error;
>>   };
>> -#define KVM_TDX_CPUID_NO_SUBLEAF    ((__u32)-1)
>> -
>>   struct kvm_tdx_cpuid_config {
>>       __u32 leaf;
>>       __u32 sub_leaf;
>> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
>> index 90b44ebaf864..d89973e554f6 100644
>> --- a/arch/x86/kvm/vmx/tdx.c
>> +++ b/arch/x86/kvm/vmx/tdx.c
>> @@ -31,6 +31,19 @@ static void __used tdx_guest_keyid_free(int keyid)
>>       ida_free(&tdx_guest_keyid_pool, keyid);
>>   }
>> +#define KVM_TDX_CPUID_NO_SUBLEAF    ((__u32)-1)
>> +
>> +struct kvm_tdx_caps {
>> +    u64 supported_attrs;
>> +    u64 supported_xfam;
>> +
>> +    u16 num_cpuid_config;
>> +    /* This must the last member. */
>> +    DECLARE_FLEX_ARRAY(struct kvm_tdx_cpuid_config, cpuid_configs);
>> +};
>> +
>> +static struct kvm_tdx_caps *kvm_tdx_caps;
>> +
>>   static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
>>   {
>>       const struct tdx_sysinfo_td_conf *td_conf = &tdx_sysinfo->td_conf;
>> @@ -131,6 +144,68 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user *argp)
>>       return r;
>>   }
>> +#define KVM_SUPPORTED_TD_ATTRS (TDX_TD_ATTR_SEPT_VE_DISABLE)
> 
> Why isn't TDX_TD_ATTR_DEBUG added as well?

Because so far KVM doesn't support all the features of a DEBUG TD for 
userspace. e.g., KVM doesn't provide interface for userspace to 
read/write private memory of DEBUG TD.

> <snip>
Paolo Bonzini Sept. 10, 2024, 4:58 p.m. UTC | #15
On 8/30/24 10:34, Tony Lindgren wrote:
> On Tue, Aug 13, 2024 at 11:25:37AM +0800, Chao Gao wrote:
>> On Mon, Aug 12, 2024 at 03:48:05PM -0700, Rick Edgecombe wrote:
>>> From: Xiaoyao Li <xiaoyao.li@intel.com>
>>> +static int __init setup_kvm_tdx_caps(void)
>>> +{
>>> +	const struct tdx_sysinfo_td_conf *td_conf = &tdx_sysinfo->td_conf;
>>> +	u64 kvm_supported;
>>> +	int i;
>>> +
>>> +	kvm_tdx_caps = kzalloc(sizeof(*kvm_tdx_caps) +
>>> +			       sizeof(struct kvm_tdx_cpuid_config) * td_conf->num_cpuid_config,
>>
>> struct_size()
>>
>>> +			       GFP_KERNEL);
>>> +	if (!kvm_tdx_caps)
>>> +		return -ENOMEM;
> 
> This will go away with the dropping of struct kvm_tdx_caps. Should be checked
> for other places though.

What do you mean exactly by dropping of struct kvm_tdx_caps?

Paolo
Paolo Bonzini Sept. 10, 2024, 5:15 p.m. UTC | #16
On 8/29/24 06:51, Tony Lindgren wrote:
>> nit: Since there are other similarly named functions that come later how
>> about rename this to init_kvm_tdx_caps, so that it's clear that the
>> functions that are executed ones are prefixed with "init_" and those that
>> will be executed on every TDV boot up can be named prefixed with "setup_"
> We can call setup_kvm_tdx_caps() from from tdx_get_kvm_supported_cpuid(),
> and drop the struct kvm_tdx_caps. So then the setup_kvm_tdx_caps() should
> be OK.

I don't understand this suggestion since tdx_get_capabilities() also 
needs kvm_tdx_caps.  I think the code is okay as it is with just the 
rename that Nik suggested (there are already some setup_*() functions in 
KVM but for example setup_vmcs_config() is called from hardware_setup()).

Paolo
Tony Lindgren Sept. 11, 2024, 11:04 a.m. UTC | #17
On Tue, Sep 10, 2024 at 07:15:12PM +0200, Paolo Bonzini wrote:
> On 8/29/24 06:51, Tony Lindgren wrote:
> > > nit: Since there are other similarly named functions that come later how
> > > about rename this to init_kvm_tdx_caps, so that it's clear that the
> > > functions that are executed ones are prefixed with "init_" and those that
> > > will be executed on every TDV boot up can be named prefixed with "setup_"
> > We can call setup_kvm_tdx_caps() from from tdx_get_kvm_supported_cpuid(),
> > and drop the struct kvm_tdx_caps. So then the setup_kvm_tdx_caps() should
> > be OK.
> 
> I don't understand this suggestion since tdx_get_capabilities() also needs
> kvm_tdx_caps.  I think the code is okay as it is with just the rename that
> Nik suggested (there are already some setup_*() functions in KVM but for
> example setup_vmcs_config() is called from hardware_setup()).

Oh sorry for the confusion, looks like I pasted the function names wrong
way around above and left out where setup_kvm_tdx_caps() can be called
from.

I meant only tdx_get_capabilities() needs to call setup_kvm_tdx_caps().
And setup_kvm_tdx_caps() calls tdx_get_kvm_supported_cpuid().

The data in kvm_tdx_caps is only needed for tdx_get_capabilities(). It can
be generated from the data already in td_conf.

At least that's what it looks like to me, but maybe I'm missing something.

Regards,

Tony
Tony Lindgren Sept. 11, 2024, 11:07 a.m. UTC | #18
On Tue, Sep 10, 2024 at 06:58:06PM +0200, Paolo Bonzini wrote:
> On 8/30/24 10:34, Tony Lindgren wrote:
> > On Tue, Aug 13, 2024 at 11:25:37AM +0800, Chao Gao wrote:
> > > On Mon, Aug 12, 2024 at 03:48:05PM -0700, Rick Edgecombe wrote:
> > > > From: Xiaoyao Li <xiaoyao.li@intel.com>
> > > > +static int __init setup_kvm_tdx_caps(void)
> > > > +{
> > > > +	const struct tdx_sysinfo_td_conf *td_conf = &tdx_sysinfo->td_conf;
> > > > +	u64 kvm_supported;
> > > > +	int i;
> > > > +
> > > > +	kvm_tdx_caps = kzalloc(sizeof(*kvm_tdx_caps) +
> > > > +			       sizeof(struct kvm_tdx_cpuid_config) * td_conf->num_cpuid_config,
> > > 
> > > struct_size()
> > > 
> > > > +			       GFP_KERNEL);
> > > > +	if (!kvm_tdx_caps)
> > > > +		return -ENOMEM;
> > 
> > This will go away with the dropping of struct kvm_tdx_caps. Should be checked
> > for other places though.
> 
> What do you mean exactly by dropping of struct kvm_tdx_caps?

I think we can initialize the data as needed based on td_conf.

Regards,

Tony
Nikolay Borisov Sept. 12, 2024, 8:04 a.m. UTC | #19
On 5.09.24 г. 16:36 ч., Xiaoyao Li wrote:
> On 9/4/2024 7:58 PM, Nikolay Borisov wrote:
>>
>>
>> On 13.08.24 г. 1:48 ч., Rick Edgecombe wrote:
>>> From: Xiaoyao Li <xiaoyao.li@intel.com>
>>>
>>> While TDX module reports a set of capabilities/features that it
>>> supports, what KVM currently supports might be a subset of them.
>>> E.g., DEBUG and PERFMON are supported by TDX module but currently not
>>> supported by KVM.
>>>
>>> Introduce a new struct kvm_tdx_caps to store KVM's capabilities of TDX.
>>> supported_attrs and suppported_xfam are validated against fixed0/1
>>> values enumerated by TDX module. Configurable CPUID bits derive from TDX
>>> module plus applying KVM's capabilities (KVM_GET_SUPPORTED_CPUID),
>>> i.e., mask off the bits that are configurable in the view of TDX module
>>> but not supported by KVM yet.
>>>
>>> KVM_TDX_CPUID_NO_SUBLEAF is the concept from TDX module, switch it to 0
>>> and use KVM_CPUID_FLAG_SIGNIFCANT_INDEX, which are the concept of KVM.
>>>
>>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
>>> ---
>>> uAPI breakout v1:
>>>   - Change setup_kvm_tdx_caps() to use the exported 'struct tdx_sysinfo'
>>>     pointer.
>>>   - Change how to copy 'kvm_tdx_cpuid_config' since 'struct tdx_sysinfo'
>>>     doesn't have 'kvm_tdx_cpuid_config'.
>>>   - Updates for uAPI changes
>>> ---
>>>   arch/x86/include/uapi/asm/kvm.h |  2 -
>>>   arch/x86/kvm/vmx/tdx.c          | 81 +++++++++++++++++++++++++++++++++
>>>   2 files changed, 81 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/x86/include/uapi/asm/kvm.h 
>>> b/arch/x86/include/uapi/asm/kvm.h
>>> index 47caf508cca7..c9eb2e2f5559 100644
>>> --- a/arch/x86/include/uapi/asm/kvm.h
>>> +++ b/arch/x86/include/uapi/asm/kvm.h
>>> @@ -952,8 +952,6 @@ struct kvm_tdx_cmd {
>>>       __u64 hw_error;
>>>   };
>>> -#define KVM_TDX_CPUID_NO_SUBLEAF    ((__u32)-1)
>>> -
>>>   struct kvm_tdx_cpuid_config {
>>>       __u32 leaf;
>>>       __u32 sub_leaf;
>>> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
>>> index 90b44ebaf864..d89973e554f6 100644
>>> --- a/arch/x86/kvm/vmx/tdx.c
>>> +++ b/arch/x86/kvm/vmx/tdx.c
>>> @@ -31,6 +31,19 @@ static void __used tdx_guest_keyid_free(int keyid)
>>>       ida_free(&tdx_guest_keyid_pool, keyid);
>>>   }
>>> +#define KVM_TDX_CPUID_NO_SUBLEAF    ((__u32)-1)
>>> +
>>> +struct kvm_tdx_caps {
>>> +    u64 supported_attrs;
>>> +    u64 supported_xfam;
>>> +
>>> +    u16 num_cpuid_config;
>>> +    /* This must the last member. */
>>> +    DECLARE_FLEX_ARRAY(struct kvm_tdx_cpuid_config, cpuid_configs);
>>> +};
>>> +
>>> +static struct kvm_tdx_caps *kvm_tdx_caps;
>>> +
>>>   static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
>>>   {
>>>       const struct tdx_sysinfo_td_conf *td_conf = &tdx_sysinfo->td_conf;
>>> @@ -131,6 +144,68 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user 
>>> *argp)
>>>       return r;
>>>   }
>>> +#define KVM_SUPPORTED_TD_ATTRS (TDX_TD_ATTR_SEPT_VE_DISABLE)
>>
>> Why isn't TDX_TD_ATTR_DEBUG added as well?
> 
> Because so far KVM doesn't support all the features of a DEBUG TD for 
> userspace. e.g., KVM doesn't provide interface for userspace to 
> read/write private memory of DEBUG TD.

But this means that you can't really run a TDX with SEPT_VE_DISABLE 
disabled for debugging purposes, so perhaps it might be necessary to 
rethink the condition allowing SEPT_VE_DISABLE to be disabled. Without 
the debug flag and SEPT_VE_DISABLE disabled the code refuses to start 
the VM, what if one wants to debug some SEPT issue by having an oops 
generated inside the vm ?

> 
>> <snip>
> 
>
Xiaoyao Li Sept. 12, 2024, 8:37 a.m. UTC | #20
On 9/12/2024 4:04 PM, Nikolay Borisov wrote:
> 
> 
> On 5.09.24 г. 16:36 ч., Xiaoyao Li wrote:
>> On 9/4/2024 7:58 PM, Nikolay Borisov wrote:
>>>
>>>
>>> On 13.08.24 г. 1:48 ч., Rick Edgecombe wrote:
>>>> From: Xiaoyao Li <xiaoyao.li@intel.com>
>>>>
>>>> While TDX module reports a set of capabilities/features that it
>>>> supports, what KVM currently supports might be a subset of them.
>>>> E.g., DEBUG and PERFMON are supported by TDX module but currently not
>>>> supported by KVM.
>>>>
>>>> Introduce a new struct kvm_tdx_caps to store KVM's capabilities of TDX.
>>>> supported_attrs and suppported_xfam are validated against fixed0/1
>>>> values enumerated by TDX module. Configurable CPUID bits derive from 
>>>> TDX
>>>> module plus applying KVM's capabilities (KVM_GET_SUPPORTED_CPUID),
>>>> i.e., mask off the bits that are configurable in the view of TDX module
>>>> but not supported by KVM yet.
>>>>
>>>> KVM_TDX_CPUID_NO_SUBLEAF is the concept from TDX module, switch it to 0
>>>> and use KVM_CPUID_FLAG_SIGNIFCANT_INDEX, which are the concept of KVM.
>>>>
>>>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>>> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
>>>> ---
>>>> uAPI breakout v1:
>>>>   - Change setup_kvm_tdx_caps() to use the exported 'struct 
>>>> tdx_sysinfo'
>>>>     pointer.
>>>>   - Change how to copy 'kvm_tdx_cpuid_config' since 'struct 
>>>> tdx_sysinfo'
>>>>     doesn't have 'kvm_tdx_cpuid_config'.
>>>>   - Updates for uAPI changes
>>>> ---
>>>>   arch/x86/include/uapi/asm/kvm.h |  2 -
>>>>   arch/x86/kvm/vmx/tdx.c          | 81 
>>>> +++++++++++++++++++++++++++++++++
>>>>   2 files changed, 81 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/arch/x86/include/uapi/asm/kvm.h 
>>>> b/arch/x86/include/uapi/asm/kvm.h
>>>> index 47caf508cca7..c9eb2e2f5559 100644
>>>> --- a/arch/x86/include/uapi/asm/kvm.h
>>>> +++ b/arch/x86/include/uapi/asm/kvm.h
>>>> @@ -952,8 +952,6 @@ struct kvm_tdx_cmd {
>>>>       __u64 hw_error;
>>>>   };
>>>> -#define KVM_TDX_CPUID_NO_SUBLEAF    ((__u32)-1)
>>>> -
>>>>   struct kvm_tdx_cpuid_config {
>>>>       __u32 leaf;
>>>>       __u32 sub_leaf;
>>>> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
>>>> index 90b44ebaf864..d89973e554f6 100644
>>>> --- a/arch/x86/kvm/vmx/tdx.c
>>>> +++ b/arch/x86/kvm/vmx/tdx.c
>>>> @@ -31,6 +31,19 @@ static void __used tdx_guest_keyid_free(int keyid)
>>>>       ida_free(&tdx_guest_keyid_pool, keyid);
>>>>   }
>>>> +#define KVM_TDX_CPUID_NO_SUBLEAF    ((__u32)-1)
>>>> +
>>>> +struct kvm_tdx_caps {
>>>> +    u64 supported_attrs;
>>>> +    u64 supported_xfam;
>>>> +
>>>> +    u16 num_cpuid_config;
>>>> +    /* This must the last member. */
>>>> +    DECLARE_FLEX_ARRAY(struct kvm_tdx_cpuid_config, cpuid_configs);
>>>> +};
>>>> +
>>>> +static struct kvm_tdx_caps *kvm_tdx_caps;
>>>> +
>>>>   static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
>>>>   {
>>>>       const struct tdx_sysinfo_td_conf *td_conf = 
>>>> &tdx_sysinfo->td_conf;
>>>> @@ -131,6 +144,68 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user 
>>>> *argp)
>>>>       return r;
>>>>   }
>>>> +#define KVM_SUPPORTED_TD_ATTRS (TDX_TD_ATTR_SEPT_VE_DISABLE)
>>>
>>> Why isn't TDX_TD_ATTR_DEBUG added as well?
>>
>> Because so far KVM doesn't support all the features of a DEBUG TD for 
>> userspace. e.g., KVM doesn't provide interface for userspace to 
>> read/write private memory of DEBUG TD.
> 
> But this means that you can't really run a TDX with SEPT_VE_DISABLE 
> disabled for debugging purposes, so perhaps it might be necessary to 
> rethink the condition allowing SEPT_VE_DISABLE to be disabled. Without 
> the debug flag and SEPT_VE_DISABLE disabled the code refuses to start 
> the VM, what if one wants to debug some SEPT issue by having an oops 
> generated inside the vm ?

sept_ve_disable is allowed to be disable, i.e., set to 0.

I think there must be some misunderstanding.

>>
>>> <snip>
>>
>>
Nikolay Borisov Sept. 12, 2024, 8:43 a.m. UTC | #21
On 12.09.24 г. 11:37 ч., Xiaoyao Li wrote:
> On 9/12/2024 4:04 PM, Nikolay Borisov wrote:
>>
>>
>> On 5.09.24 г. 16:36 ч., Xiaoyao Li wrote:
>>> On 9/4/2024 7:58 PM, Nikolay Borisov wrote:
>>>>
>>>>
>>>> On 13.08.24 г. 1:48 ч., Rick Edgecombe wrote:
>>>>> From: Xiaoyao Li <xiaoyao.li@intel.com>
>>>>>
>>>>> While TDX module reports a set of capabilities/features that it
>>>>> supports, what KVM currently supports might be a subset of them.
>>>>> E.g., DEBUG and PERFMON are supported by TDX module but currently not
>>>>> supported by KVM.
>>>>>
>>>>> Introduce a new struct kvm_tdx_caps to store KVM's capabilities of 
>>>>> TDX.
>>>>> supported_attrs and suppported_xfam are validated against fixed0/1
>>>>> values enumerated by TDX module. Configurable CPUID bits derive 
>>>>> from TDX
>>>>> module plus applying KVM's capabilities (KVM_GET_SUPPORTED_CPUID),
>>>>> i.e., mask off the bits that are configurable in the view of TDX 
>>>>> module
>>>>> but not supported by KVM yet.
>>>>>
>>>>> KVM_TDX_CPUID_NO_SUBLEAF is the concept from TDX module, switch it 
>>>>> to 0
>>>>> and use KVM_CPUID_FLAG_SIGNIFCANT_INDEX, which are the concept of KVM.
>>>>>
>>>>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>>>> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
>>>>> ---
>>>>> uAPI breakout v1:
>>>>>   - Change setup_kvm_tdx_caps() to use the exported 'struct 
>>>>> tdx_sysinfo'
>>>>>     pointer.
>>>>>   - Change how to copy 'kvm_tdx_cpuid_config' since 'struct 
>>>>> tdx_sysinfo'
>>>>>     doesn't have 'kvm_tdx_cpuid_config'.
>>>>>   - Updates for uAPI changes
>>>>> ---
>>>>>   arch/x86/include/uapi/asm/kvm.h |  2 -
>>>>>   arch/x86/kvm/vmx/tdx.c          | 81 
>>>>> +++++++++++++++++++++++++++++++++
>>>>>   2 files changed, 81 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/arch/x86/include/uapi/asm/kvm.h 
>>>>> b/arch/x86/include/uapi/asm/kvm.h
>>>>> index 47caf508cca7..c9eb2e2f5559 100644
>>>>> --- a/arch/x86/include/uapi/asm/kvm.h
>>>>> +++ b/arch/x86/include/uapi/asm/kvm.h
>>>>> @@ -952,8 +952,6 @@ struct kvm_tdx_cmd {
>>>>>       __u64 hw_error;
>>>>>   };
>>>>> -#define KVM_TDX_CPUID_NO_SUBLEAF    ((__u32)-1)
>>>>> -
>>>>>   struct kvm_tdx_cpuid_config {
>>>>>       __u32 leaf;
>>>>>       __u32 sub_leaf;
>>>>> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
>>>>> index 90b44ebaf864..d89973e554f6 100644
>>>>> --- a/arch/x86/kvm/vmx/tdx.c
>>>>> +++ b/arch/x86/kvm/vmx/tdx.c
>>>>> @@ -31,6 +31,19 @@ static void __used tdx_guest_keyid_free(int keyid)
>>>>>       ida_free(&tdx_guest_keyid_pool, keyid);
>>>>>   }
>>>>> +#define KVM_TDX_CPUID_NO_SUBLEAF    ((__u32)-1)
>>>>> +
>>>>> +struct kvm_tdx_caps {
>>>>> +    u64 supported_attrs;
>>>>> +    u64 supported_xfam;
>>>>> +
>>>>> +    u16 num_cpuid_config;
>>>>> +    /* This must the last member. */
>>>>> +    DECLARE_FLEX_ARRAY(struct kvm_tdx_cpuid_config, cpuid_configs);
>>>>> +};
>>>>> +
>>>>> +static struct kvm_tdx_caps *kvm_tdx_caps;
>>>>> +
>>>>>   static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
>>>>>   {
>>>>>       const struct tdx_sysinfo_td_conf *td_conf = 
>>>>> &tdx_sysinfo->td_conf;
>>>>> @@ -131,6 +144,68 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user 
>>>>> *argp)
>>>>>       return r;
>>>>>   }
>>>>> +#define KVM_SUPPORTED_TD_ATTRS (TDX_TD_ATTR_SEPT_VE_DISABLE)
>>>>
>>>> Why isn't TDX_TD_ATTR_DEBUG added as well?
>>>
>>> Because so far KVM doesn't support all the features of a DEBUG TD for 
>>> userspace. e.g., KVM doesn't provide interface for userspace to 
>>> read/write private memory of DEBUG TD.
>>
>> But this means that you can't really run a TDX with SEPT_VE_DISABLE 
>> disabled for debugging purposes, so perhaps it might be necessary to 
>> rethink the condition allowing SEPT_VE_DISABLE to be disabled. Without 
>> the debug flag and SEPT_VE_DISABLE disabled the code refuses to start 
>> the VM, what if one wants to debug some SEPT issue by having an oops 
>> generated inside the vm ?
> 
> sept_ve_disable is allowed to be disable, i.e., set to 0.
> 
> I think there must be some misunderstanding.

There isn't, the current code is:

   201         if (!(td_attr & ATTR_SEPT_VE_DISABLE)) { 

     1                 const char *msg = "TD misconfiguration: 
SEPT_VE_DISABLE attribute must be set.";
     2 

     3                 /* Relax SEPT_VE_DISABLE check for debug TD. */ 

     4                 if (td_attr & ATTR_DEBUG) 

     5                         pr_warn("%s\n", msg); 

     6                 else 

     7                         tdx_panic(msg); 

     8         }


I.e if we disable SEPT_VE_DISABLE without having ATTR_DEBUG it results 
in a panic.

> 
>>>
>>>> <snip>
>>>
>>>
>
Xiaoyao Li Sept. 12, 2024, 9:07 a.m. UTC | #22
On 9/12/2024 4:43 PM, Nikolay Borisov wrote:
> 
> 
> On 12.09.24 г. 11:37 ч., Xiaoyao Li wrote:
>> On 9/12/2024 4:04 PM, Nikolay Borisov wrote:
>>>
>>>
>>> On 5.09.24 г. 16:36 ч., Xiaoyao Li wrote:
>>>> On 9/4/2024 7:58 PM, Nikolay Borisov wrote:
>>>>>
>>>>>
>>>>> On 13.08.24 г. 1:48 ч., Rick Edgecombe wrote:
>>>>>> From: Xiaoyao Li <xiaoyao.li@intel.com>
>>>>>>
>>>>>> While TDX module reports a set of capabilities/features that it
>>>>>> supports, what KVM currently supports might be a subset of them.
>>>>>> E.g., DEBUG and PERFMON are supported by TDX module but currently not
>>>>>> supported by KVM.
>>>>>>
>>>>>> Introduce a new struct kvm_tdx_caps to store KVM's capabilities of 
>>>>>> TDX.
>>>>>> supported_attrs and suppported_xfam are validated against fixed0/1
>>>>>> values enumerated by TDX module. Configurable CPUID bits derive 
>>>>>> from TDX
>>>>>> module plus applying KVM's capabilities (KVM_GET_SUPPORTED_CPUID),
>>>>>> i.e., mask off the bits that are configurable in the view of TDX 
>>>>>> module
>>>>>> but not supported by KVM yet.
>>>>>>
>>>>>> KVM_TDX_CPUID_NO_SUBLEAF is the concept from TDX module, switch it 
>>>>>> to 0
>>>>>> and use KVM_CPUID_FLAG_SIGNIFCANT_INDEX, which are the concept of 
>>>>>> KVM.
>>>>>>
>>>>>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>>>>> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
>>>>>> ---
>>>>>> uAPI breakout v1:
>>>>>>   - Change setup_kvm_tdx_caps() to use the exported 'struct 
>>>>>> tdx_sysinfo'
>>>>>>     pointer.
>>>>>>   - Change how to copy 'kvm_tdx_cpuid_config' since 'struct 
>>>>>> tdx_sysinfo'
>>>>>>     doesn't have 'kvm_tdx_cpuid_config'.
>>>>>>   - Updates for uAPI changes
>>>>>> ---
>>>>>>   arch/x86/include/uapi/asm/kvm.h |  2 -
>>>>>>   arch/x86/kvm/vmx/tdx.c          | 81 
>>>>>> +++++++++++++++++++++++++++++++++
>>>>>>   2 files changed, 81 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/x86/include/uapi/asm/kvm.h 
>>>>>> b/arch/x86/include/uapi/asm/kvm.h
>>>>>> index 47caf508cca7..c9eb2e2f5559 100644
>>>>>> --- a/arch/x86/include/uapi/asm/kvm.h
>>>>>> +++ b/arch/x86/include/uapi/asm/kvm.h
>>>>>> @@ -952,8 +952,6 @@ struct kvm_tdx_cmd {
>>>>>>       __u64 hw_error;
>>>>>>   };
>>>>>> -#define KVM_TDX_CPUID_NO_SUBLEAF    ((__u32)-1)
>>>>>> -
>>>>>>   struct kvm_tdx_cpuid_config {
>>>>>>       __u32 leaf;
>>>>>>       __u32 sub_leaf;
>>>>>> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
>>>>>> index 90b44ebaf864..d89973e554f6 100644
>>>>>> --- a/arch/x86/kvm/vmx/tdx.c
>>>>>> +++ b/arch/x86/kvm/vmx/tdx.c
>>>>>> @@ -31,6 +31,19 @@ static void __used tdx_guest_keyid_free(int keyid)
>>>>>>       ida_free(&tdx_guest_keyid_pool, keyid);
>>>>>>   }
>>>>>> +#define KVM_TDX_CPUID_NO_SUBLEAF    ((__u32)-1)
>>>>>> +
>>>>>> +struct kvm_tdx_caps {
>>>>>> +    u64 supported_attrs;
>>>>>> +    u64 supported_xfam;
>>>>>> +
>>>>>> +    u16 num_cpuid_config;
>>>>>> +    /* This must the last member. */
>>>>>> +    DECLARE_FLEX_ARRAY(struct kvm_tdx_cpuid_config, cpuid_configs);
>>>>>> +};
>>>>>> +
>>>>>> +static struct kvm_tdx_caps *kvm_tdx_caps;
>>>>>> +
>>>>>>   static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
>>>>>>   {
>>>>>>       const struct tdx_sysinfo_td_conf *td_conf = 
>>>>>> &tdx_sysinfo->td_conf;
>>>>>> @@ -131,6 +144,68 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user 
>>>>>> *argp)
>>>>>>       return r;
>>>>>>   }
>>>>>> +#define KVM_SUPPORTED_TD_ATTRS (TDX_TD_ATTR_SEPT_VE_DISABLE)
>>>>>
>>>>> Why isn't TDX_TD_ATTR_DEBUG added as well?
>>>>
>>>> Because so far KVM doesn't support all the features of a DEBUG TD 
>>>> for userspace. e.g., KVM doesn't provide interface for userspace to 
>>>> read/write private memory of DEBUG TD.
>>>
>>> But this means that you can't really run a TDX with SEPT_VE_DISABLE 
>>> disabled for debugging purposes, so perhaps it might be necessary to 
>>> rethink the condition allowing SEPT_VE_DISABLE to be disabled. 
>>> Without the debug flag and SEPT_VE_DISABLE disabled the code refuses 
>>> to start the VM, what if one wants to debug some SEPT issue by having 
>>> an oops generated inside the vm ?
>>
>> sept_ve_disable is allowed to be disable, i.e., set to 0.
>>
>> I think there must be some misunderstanding.
> 
> There isn't, the current code is:
> 
>    201         if (!(td_attr & ATTR_SEPT_VE_DISABLE)) {
>      1                 const char *msg = "TD misconfiguration: 
> SEPT_VE_DISABLE attribute must be set.";
>      2
>      3                 /* Relax SEPT_VE_DISABLE check for debug TD. */
>      4                 if (td_attr & ATTR_DEBUG)
>      5                         pr_warn("%s\n", msg);
>      6                 else
>      7                         tdx_panic(msg);
>      8         }
> 
> 
> I.e if we disable SEPT_VE_DISABLE without having ATTR_DEBUG it results 
> in a panic.

I see now.

It's linux TD guest's implementation, which requires SEPT_VE_DISABLE 
must be set unless it's a debug TD.

Yes, it can be the motivation to request KVM to add the support of 
ATTRIBUTES.DEBUG. But the support of ATTRIBUTES.DEBUG is not just 
allowing this bit to be set to 1. For DEBUG TD, VMM is allowed to 
read/write the private memory content, cpu registers, and MSRs, VMM is 
allowed to trap the exceptions in TD, VMM is allowed to manipulate the 
VMCS of TD vcpu, etc.

IMHO, for upstream, no need to support all the debug capability as 
described above. But we need firstly define a subset of them as the 
starter of supporting ATTRIBUTES.DEBUG. Otherwise, what is the meaning 
of KVM to allow the DEBUG to be set without providing any debug capability?

For debugging purpose, you can just hack guest kernel to allow 
spet_ve_disable to be 0 without DEBUG bit set, or hack KVM to allow 
DEBUG bit to be set.

>>
>>>>
>>>>> <snip>
>>>>
>>>>
>>
Rick Edgecombe Sept. 12, 2024, 3:12 p.m. UTC | #23
On Thu, 2024-09-12 at 17:07 +0800, Xiaoyao Li wrote:
> > I.e if we disable SEPT_VE_DISABLE without having ATTR_DEBUG it results 
> > in a panic.
> 
> I see now.
> 
> It's linux TD guest's implementation, which requires SEPT_VE_DISABLE 
> must be set unless it's a debug TD.
> 
> Yes, it can be the motivation to request KVM to add the support of 
> ATTRIBUTES.DEBUG. But the support of ATTRIBUTES.DEBUG is not just 
> allowing this bit to be set to 1. For DEBUG TD, VMM is allowed to 
> read/write the private memory content, cpu registers, and MSRs, VMM is 
> allowed to trap the exceptions in TD, VMM is allowed to manipulate the 
> VMCS of TD vcpu, etc.
> 
> IMHO, for upstream, no need to support all the debug capability as 
> described above. 

I think you mean for the first upstream support. I don't see why it would not be
suitable for upstream if we have upstream users doing it.

Nikolay, is this hypothetical or something that you have been doing with some
other TDX tree? We can factor it into the post-base support roadmap.

> But we need firstly define a subset of them as the 
> starter of supporting ATTRIBUTES.DEBUG. Otherwise, what is the meaning 
> of KVM to allow the DEBUG to be set without providing any debug capability?
> 
> For debugging purpose, you can just hack guest kernel to allow 
> spet_ve_disable to be 0 without DEBUG bit set, or hack KVM to allow 
> DEBUG bit to be set.
Nikolay Borisov Sept. 12, 2024, 3:18 p.m. UTC | #24
On 12.09.24 г. 18:12 ч., Edgecombe, Rick P wrote:
> On Thu, 2024-09-12 at 17:07 +0800, Xiaoyao Li wrote:
>>> I.e if we disable SEPT_VE_DISABLE without having ATTR_DEBUG it results
>>> in a panic.
>>
>> I see now.
>>
>> It's linux TD guest's implementation, which requires SEPT_VE_DISABLE
>> must be set unless it's a debug TD.
>>
>> Yes, it can be the motivation to request KVM to add the support of
>> ATTRIBUTES.DEBUG. But the support of ATTRIBUTES.DEBUG is not just
>> allowing this bit to be set to 1. For DEBUG TD, VMM is allowed to
>> read/write the private memory content, cpu registers, and MSRs, VMM is
>> allowed to trap the exceptions in TD, VMM is allowed to manipulate the
>> VMCS of TD vcpu, etc.
>>
>> IMHO, for upstream, no need to support all the debug capability as
>> described above.
> 
> I think you mean for the first upstream support. I don't see why it would not be
> suitable for upstream if we have upstream users doing it.
> 
> Nikolay, is this hypothetical or something that you have been doing with some
> other TDX tree? We can factor it into the post-base support roadmap.

The real world use case here is a report comes and says " Hey, our TVM 
locks up on certain event". Turns out it happens due to the hypervisor 
not handling correctly some TD exit event caused by a SEPT violation. So 
then I can instruct the person to simply disable SEPT_VE_DISABLE so that 
instead of a TD exit we get a nice oops inside the guest which can serve 
further debugging.


> 
>> But we need firstly define a subset of them as the
>> starter of supporting ATTRIBUTES.DEBUG. Otherwise, what is the meaning
>> of KVM to allow the DEBUG to be set without providing any debug capability?
>>
>> For debugging purpose, you can just hack guest kernel to allow
>> spet_ve_disable to be 0 without DEBUG bit set, or hack KVM to allow
>> DEBUG bit to be set.
>
Xiaoyao Li Oct. 10, 2024, 8:25 a.m. UTC | #25
On 9/11/2024 7:04 PM, Tony Lindgren wrote:
> On Tue, Sep 10, 2024 at 07:15:12PM +0200, Paolo Bonzini wrote:
>> On 8/29/24 06:51, Tony Lindgren wrote:
>>>> nit: Since there are other similarly named functions that come later how
>>>> about rename this to init_kvm_tdx_caps, so that it's clear that the
>>>> functions that are executed ones are prefixed with "init_" and those that
>>>> will be executed on every TDV boot up can be named prefixed with "setup_"
>>> We can call setup_kvm_tdx_caps() from from tdx_get_kvm_supported_cpuid(),
>>> and drop the struct kvm_tdx_caps. So then the setup_kvm_tdx_caps() should
>>> be OK.
>>
>> I don't understand this suggestion since tdx_get_capabilities() also needs
>> kvm_tdx_caps.  I think the code is okay as it is with just the rename that
>> Nik suggested (there are already some setup_*() functions in KVM but for
>> example setup_vmcs_config() is called from hardware_setup()).
> 
> Oh sorry for the confusion, looks like I pasted the function names wrong
> way around above and left out where setup_kvm_tdx_caps() can be called
> from.
> 
> I meant only tdx_get_capabilities() needs to call setup_kvm_tdx_caps().
> And setup_kvm_tdx_caps() calls tdx_get_kvm_supported_cpuid().
> 
> The data in kvm_tdx_caps is only needed for tdx_get_capabilities(). It can
> be generated from the data already in td_conf.
> 
> At least that's what it looks like to me, but maybe I'm missing something.

kvm_tdx_caps is setup in __tdx_bringup() because it also serves the 
purpose to validate the KVM's capabilities against the specific TDX 
module. If KVM and TDX module are incompatible, it needs to fail the 
bring up of TDX in KVM. It's too late to validate it when 
KVM_TDX_CAPABILITIES issued.  E.g., if the TDX module reports some 
fixed-1 attribute bit while KVM isn't aware of, in such case KVM needs 
to set enable_tdx to 0 to reflect that TDX cannot be enabled/brought up.

> Regards,
> 
> Tony
Tony Lindgren Oct. 10, 2024, 9:49 a.m. UTC | #26
On Thu, Oct 10, 2024 at 04:25:30PM +0800, Xiaoyao Li wrote:
> On 9/11/2024 7:04 PM, Tony Lindgren wrote:
> > On Tue, Sep 10, 2024 at 07:15:12PM +0200, Paolo Bonzini wrote:
> > > On 8/29/24 06:51, Tony Lindgren wrote:
> > > > > nit: Since there are other similarly named functions that come later how
> > > > > about rename this to init_kvm_tdx_caps, so that it's clear that the
> > > > > functions that are executed ones are prefixed with "init_" and those that
> > > > > will be executed on every TDV boot up can be named prefixed with "setup_"
> > > > We can call setup_kvm_tdx_caps() from from tdx_get_kvm_supported_cpuid(),
> > > > and drop the struct kvm_tdx_caps. So then the setup_kvm_tdx_caps() should
> > > > be OK.
> > > 
> > > I don't understand this suggestion since tdx_get_capabilities() also needs
> > > kvm_tdx_caps.  I think the code is okay as it is with just the rename that
> > > Nik suggested (there are already some setup_*() functions in KVM but for
> > > example setup_vmcs_config() is called from hardware_setup()).
> > 
> > Oh sorry for the confusion, looks like I pasted the function names wrong
> > way around above and left out where setup_kvm_tdx_caps() can be called
> > from.
> > 
> > I meant only tdx_get_capabilities() needs to call setup_kvm_tdx_caps().
> > And setup_kvm_tdx_caps() calls tdx_get_kvm_supported_cpuid().
> > 
> > The data in kvm_tdx_caps is only needed for tdx_get_capabilities(). It can
> > be generated from the data already in td_conf.
> > 
> > At least that's what it looks like to me, but maybe I'm missing something.
> 
> kvm_tdx_caps is setup in __tdx_bringup() because it also serves the purpose
> to validate the KVM's capabilities against the specific TDX module. If KVM
> and TDX module are incompatible, it needs to fail the bring up of TDX in
> KVM. It's too late to validate it when KVM_TDX_CAPABILITIES issued.  E.g.,
> if the TDX module reports some fixed-1 attribute bit while KVM isn't aware
> of, in such case KVM needs to set enable_tdx to 0 to reflect that TDX cannot
> be enabled/brought up.

OK makes sense, thanks for clarifying the use case for __tdx_bringup().

We can check the attributes_fixed1 and xfam_fixed1 also on __tdx_bringup()
no problem.

Regards,

Tony
diff mbox series

Patch

diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 47caf508cca7..c9eb2e2f5559 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -952,8 +952,6 @@  struct kvm_tdx_cmd {
 	__u64 hw_error;
 };
 
-#define KVM_TDX_CPUID_NO_SUBLEAF	((__u32)-1)
-
 struct kvm_tdx_cpuid_config {
 	__u32 leaf;
 	__u32 sub_leaf;
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 90b44ebaf864..d89973e554f6 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -31,6 +31,19 @@  static void __used tdx_guest_keyid_free(int keyid)
 	ida_free(&tdx_guest_keyid_pool, keyid);
 }
 
+#define KVM_TDX_CPUID_NO_SUBLEAF	((__u32)-1)
+
+struct kvm_tdx_caps {
+	u64 supported_attrs;
+	u64 supported_xfam;
+
+	u16 num_cpuid_config;
+	/* This must the last member. */
+	DECLARE_FLEX_ARRAY(struct kvm_tdx_cpuid_config, cpuid_configs);
+};
+
+static struct kvm_tdx_caps *kvm_tdx_caps;
+
 static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
 {
 	const struct tdx_sysinfo_td_conf *td_conf = &tdx_sysinfo->td_conf;
@@ -131,6 +144,68 @@  int tdx_vm_ioctl(struct kvm *kvm, void __user *argp)
 	return r;
 }
 
+#define KVM_SUPPORTED_TD_ATTRS (TDX_TD_ATTR_SEPT_VE_DISABLE)
+
+static int __init setup_kvm_tdx_caps(void)
+{
+	const struct tdx_sysinfo_td_conf *td_conf = &tdx_sysinfo->td_conf;
+	u64 kvm_supported;
+	int i;
+
+	kvm_tdx_caps = kzalloc(sizeof(*kvm_tdx_caps) +
+			       sizeof(struct kvm_tdx_cpuid_config) * td_conf->num_cpuid_config,
+			       GFP_KERNEL);
+	if (!kvm_tdx_caps)
+		return -ENOMEM;
+
+	kvm_supported = KVM_SUPPORTED_TD_ATTRS;
+	if ((kvm_supported & td_conf->attributes_fixed1) != td_conf->attributes_fixed1)
+		goto err;
+
+	kvm_tdx_caps->supported_attrs = kvm_supported & td_conf->attributes_fixed0;
+
+	kvm_supported = kvm_caps.supported_xcr0 | kvm_caps.supported_xss;
+
+	/*
+	 * PT and CET can be exposed to TD guest regardless of KVM's XSS, PT
+	 * and, CET support.
+	 */
+	kvm_supported |= XFEATURE_MASK_PT | XFEATURE_MASK_CET_USER |
+			 XFEATURE_MASK_CET_KERNEL;
+	if ((kvm_supported & td_conf->xfam_fixed1) != td_conf->xfam_fixed1)
+		goto err;
+
+	kvm_tdx_caps->supported_xfam = kvm_supported & td_conf->xfam_fixed0;
+
+	kvm_tdx_caps->num_cpuid_config = td_conf->num_cpuid_config;
+	for (i = 0; i < td_conf->num_cpuid_config; i++) {
+		struct kvm_tdx_cpuid_config source = {
+			.leaf = (u32)td_conf->cpuid_config_leaves[i],
+			.sub_leaf = td_conf->cpuid_config_leaves[i] >> 32,
+			.eax = (u32)td_conf->cpuid_config_values[i].eax_ebx,
+			.ebx = td_conf->cpuid_config_values[i].eax_ebx >> 32,
+			.ecx = (u32)td_conf->cpuid_config_values[i].ecx_edx,
+			.edx = td_conf->cpuid_config_values[i].ecx_edx >> 32,
+		};
+		struct kvm_tdx_cpuid_config *dest =
+			&kvm_tdx_caps->cpuid_configs[i];
+
+		memcpy(dest, &source, sizeof(struct kvm_tdx_cpuid_config));
+		if (dest->sub_leaf == KVM_TDX_CPUID_NO_SUBLEAF)
+			dest->sub_leaf = 0;
+	}
+
+	return 0;
+err:
+	kfree(kvm_tdx_caps);
+	return -EIO;
+}
+
+static void free_kvm_tdx_cap(void)
+{
+	kfree(kvm_tdx_caps);
+}
+
 static int tdx_online_cpu(unsigned int cpu)
 {
 	unsigned long flags;
@@ -217,11 +292,16 @@  static int __init __tdx_bringup(void)
 		goto get_sysinfo_err;
 	}
 
+	r = setup_kvm_tdx_caps();
+	if (r)
+		goto get_sysinfo_err;
+
 	/*
 	 * Leave hardware virtualization enabled after TDX is enabled
 	 * successfully.  TDX CPU hotplug depends on this.
 	 */
 	return 0;
+
 get_sysinfo_err:
 	__do_tdx_cleanup();
 tdx_bringup_err:
@@ -232,6 +312,7 @@  static int __init __tdx_bringup(void)
 void tdx_cleanup(void)
 {
 	if (enable_tdx) {
+		free_kvm_tdx_cap();
 		__do_tdx_cleanup();
 		kvm_disable_virtualization();
 	}