diff mbox series

[v19,067/130] KVM: TDX: Add load_mmu_pgd method for TDX

Message ID bef7033b687e75c5436c0aee07691327d36734ea.1708933498.git.isaku.yamahata@intel.com (mailing list archive)
State New, archived
Headers show
Series [v19,001/130] x86/virt/tdx: Rename _offset to _member for TD_SYSINFO_MAP() macro | expand

Commit Message

Isaku Yamahata Feb. 26, 2024, 8:26 a.m. UTC
From: Sean Christopherson <sean.j.christopherson@intel.com>

For virtual IO, the guest TD shares guest pages with VMM without
encryption.  Shared EPT is used to map guest pages in unprotected way.

Add the VMCS field encoding for the shared EPTP, which will be used by
TDX to have separate EPT walks for private GPAs (existing EPTP) versus
shared GPAs (new shared EPTP).

Set shared EPT pointer value for the TDX guest to initialize TDX MMU.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
---
v19:
- Add WARN_ON_ONCE() to tdx_load_mmu_pgd() and drop unconditional mask
---
 arch/x86/include/asm/vmx.h |  1 +
 arch/x86/kvm/vmx/main.c    | 13 ++++++++++++-
 arch/x86/kvm/vmx/tdx.c     |  6 ++++++
 arch/x86/kvm/vmx/x86_ops.h |  4 ++++
 4 files changed, 23 insertions(+), 1 deletion(-)

Comments

Binbin Wu April 1, 2024, 3:49 p.m. UTC | #1
On 2/26/2024 4:26 PM, isaku.yamahata@intel.com wrote:
> From: Sean Christopherson <sean.j.christopherson@intel.com>
>
> For virtual IO, the guest TD shares guest pages with VMM without
> encryption.

Virtual IO is a use case of shared memory, it's better to use it
as a example instead of putting it at the beginning of the sentence.


>   Shared EPT is used to map guest pages in unprotected way.
>
> Add the VMCS field encoding for the shared EPTP, which will be used by
> TDX to have separate EPT walks for private GPAs (existing EPTP) versus
> shared GPAs (new shared EPTP).
>
> Set shared EPT pointer value for the TDX guest to initialize TDX MMU.
May have a mention that the EPTP for priavet GPAs is set by TDX module.

>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> v19:
> - Add WARN_ON_ONCE() to tdx_load_mmu_pgd() and drop unconditional mask
> ---
>   arch/x86/include/asm/vmx.h |  1 +
>   arch/x86/kvm/vmx/main.c    | 13 ++++++++++++-
>   arch/x86/kvm/vmx/tdx.c     |  6 ++++++
>   arch/x86/kvm/vmx/x86_ops.h |  4 ++++
>   4 files changed, 23 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index f703bae0c4ac..9deb663a42e3 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -236,6 +236,7 @@ enum vmcs_field {
>   	TSC_MULTIPLIER_HIGH             = 0x00002033,
>   	TERTIARY_VM_EXEC_CONTROL	= 0x00002034,
>   	TERTIARY_VM_EXEC_CONTROL_HIGH	= 0x00002035,
> +	SHARED_EPT_POINTER		= 0x0000203C,
>   	PID_POINTER_TABLE		= 0x00002042,
>   	PID_POINTER_TABLE_HIGH		= 0x00002043,
>   	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> index d0f75020579f..076a471d9aea 100644
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -123,6 +123,17 @@ static void vt_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>   	vmx_vcpu_reset(vcpu, init_event);
>   }
>   
> +static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
> +			int pgd_level)
> +{
> +	if (is_td_vcpu(vcpu)) {
> +		tdx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
> +		return;
> +	}
> +
> +	vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
> +}
> +
>   static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
>   {
>   	if (!is_td(kvm))
> @@ -256,7 +267,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
>   	.write_tsc_offset = vmx_write_tsc_offset,
>   	.write_tsc_multiplier = vmx_write_tsc_multiplier,
>   
> -	.load_mmu_pgd = vmx_load_mmu_pgd,
> +	.load_mmu_pgd = vt_load_mmu_pgd,
>   
>   	.check_intercept = vmx_check_intercept,
>   	.handle_exit_irqoff = vmx_handle_exit_irqoff,
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 54e0d4efa2bd..143a3c2a16bc 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -453,6 +453,12 @@ void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>   	 */
>   }
>   
> +void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
> +{
> +	WARN_ON_ONCE(root_hpa & ~PAGE_MASK);
> +	td_vmcs_write64(to_tdx(vcpu), SHARED_EPT_POINTER, root_hpa);
> +}
> +
>   static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
>   {
>   	struct kvm_tdx_capabilities __user *user_caps;
> diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
> index f5820f617b2e..24161fa404aa 100644
> --- a/arch/x86/kvm/vmx/x86_ops.h
> +++ b/arch/x86/kvm/vmx/x86_ops.h
> @@ -152,6 +152,8 @@ void tdx_vcpu_free(struct kvm_vcpu *vcpu);
>   void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
>   
>   int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
> +
> +void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
>   #else
>   static inline int tdx_hardware_setup(struct kvm_x86_ops *x86_ops) { return -EOPNOTSUPP; }
>   static inline void tdx_hardware_unsetup(void) {}
> @@ -173,6 +175,8 @@ static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {}
>   static inline void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) {}
>   
>   static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }
> +
> +static inline void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level) {}
>   #endif
>   
>   #endif /* __KVM_X86_VMX_X86_OPS_H */
Isaku Yamahata April 3, 2024, 5:33 p.m. UTC | #2
On Mon, Apr 01, 2024 at 11:49:43PM +0800,
Binbin Wu <binbin.wu@linux.intel.com> wrote:

> 
> 
> On 2/26/2024 4:26 PM, isaku.yamahata@intel.com wrote:
> > From: Sean Christopherson <sean.j.christopherson@intel.com>
> > 
> > For virtual IO, the guest TD shares guest pages with VMM without
> > encryption.
> 
> Virtual IO is a use case of shared memory, it's better to use it
> as a example instead of putting it at the beginning of the sentence.
> 
> 
> >   Shared EPT is used to map guest pages in unprotected way.
> > 
> > Add the VMCS field encoding for the shared EPTP, which will be used by
> > TDX to have separate EPT walks for private GPAs (existing EPTP) versus
> > shared GPAs (new shared EPTP).
> > 
> > Set shared EPT pointer value for the TDX guest to initialize TDX MMU.
> May have a mention that the EPTP for priavet GPAs is set by TDX module.

Sure, let me update the commit message.
Edgecombe, Rick P April 6, 2024, 12:09 a.m. UTC | #3
On Wed, 2024-04-03 at 10:33 -0700, Isaku Yamahata wrote:
> On Mon, Apr 01, 2024 at 11:49:43PM +0800,
> Binbin Wu <binbin.wu@linux.intel.com> wrote:
> 
> > 
> > 
> > On 2/26/2024 4:26 PM, isaku.yamahata@intel.com wrote:
> > > From: Sean Christopherson <sean.j.christopherson@intel.com>
> > > 
> > > For virtual IO, the guest TD shares guest pages with VMM without
> > > encryption.
> > 
> > Virtual IO is a use case of shared memory, it's better to use it
> > as a example instead of putting it at the beginning of the sentence.
> > 
> > 
> > >    Shared EPT is used to map guest pages in unprotected way.
> > > 
> > > Add the VMCS field encoding for the shared EPTP, which will be used by
> > > TDX to have separate EPT walks for private GPAs (existing EPTP) versus
> > > shared GPAs (new shared EPTP).
> > > 
> > > Set shared EPT pointer value for the TDX guest to initialize TDX MMU.
> > May have a mention that the EPTP for priavet GPAs is set by TDX module.
> 
> Sure, let me update the commit message.

How about this?

KVM: TDX: Add load_mmu_pgd method for TDX

TDX has uses two EPT pointers, one for the private half of the GPA
space and one for the shared half. The private half used the normal
EPT_POINTER vmcs field and is managed in a special way by the TDX module.
The shared half uses a new SHARED_EPT_POINTER field and will be managed by
the conventional MMU management operations that operate directly on the
EPT tables. This means for TDX the .load_mmu_pgd() operation will need to
know to use the SHARED_EPT_POINTER field instead of the normal one. Add a
new wrapper in x86 ops for load_mmu_pgd() that either directs the write to
the existing vmx implementation or a TDX one.

For the TDX operation, EPT will always be used, so it can simpy write to
the SHARED_EPT_POINTER field.
Huang, Kai April 6, 2024, 12:58 a.m. UTC | #4
On Sat, 2024-04-06 at 00:09 +0000, Edgecombe, Rick P wrote:
> On Wed, 2024-04-03 at 10:33 -0700, Isaku Yamahata wrote:
> > On Mon, Apr 01, 2024 at 11:49:43PM +0800,
> > Binbin Wu <binbin.wu@linux.intel.com> wrote:
> > 
> > > 
> > > 
> > > On 2/26/2024 4:26 PM, isaku.yamahata@intel.com wrote:
> > > > From: Sean Christopherson <sean.j.christopherson@intel.com>
> > > > 
> > > > For virtual IO, the guest TD shares guest pages with VMM without
> > > > encryption.
> > > 
> > > Virtual IO is a use case of shared memory, it's better to use it
> > > as a example instead of putting it at the beginning of the sentence.
> > > 
> > > 
> > > >    Shared EPT is used to map guest pages in unprotected way.
> > > > 
> > > > Add the VMCS field encoding for the shared EPTP, which will be used by
> > > > TDX to have separate EPT walks for private GPAs (existing EPTP) versus
> > > > shared GPAs (new shared EPTP).
> > > > 
> > > > Set shared EPT pointer value for the TDX guest to initialize TDX MMU.
> > > May have a mention that the EPTP for priavet GPAs is set by TDX module.
> > 
> > Sure, let me update the commit message.
> 
> How about this?

Looks good.  Some nits though:

> 
> KVM: TDX: Add load_mmu_pgd method for TDX
> 
> TDX has uses two EPT pointers, one for the private half of the GPA

"TDX uses"

> space and one for the shared half. The private half used the normal

"used" -> "uses"

> EPT_POINTER vmcs field and is managed in a special way by the TDX module.

Perhaps add:

KVM is not allowed to operate on the EPT_POINTER directly.

> The shared half uses a new SHARED_EPT_POINTER field and will be managed by
> the conventional MMU management operations that operate directly on the
> EPT tables. 
> 

I would like to explicitly call out KVM can update SHARED_EPT_POINTER directly:

The shared half uses a new SHARED_EPT_POINTER field.  KVM is allowed to set it
directly by the interface provided by the TDX module, and KVM is expected to
manage the shared half just like it manages the existing EPT page table today.


> This means for TDX the .load_mmu_pgd() operation will need to
> know to use the SHARED_EPT_POINTER field instead of the normal one. Add a
> new wrapper in x86 ops for load_mmu_pgd() that either directs the write to
> the existing vmx implementation or a TDX one.
> 
> For the TDX operation, EPT will always be used, so it can simpy write to
> the SHARED_EPT_POINTER field.
Binbin Wu April 7, 2024, 1:32 a.m. UTC | #5
On 4/6/2024 8:58 AM, Huang, Kai wrote:
> On Sat, 2024-04-06 at 00:09 +0000, Edgecombe, Rick P wrote:
>> On Wed, 2024-04-03 at 10:33 -0700, Isaku Yamahata wrote:
>>> On Mon, Apr 01, 2024 at 11:49:43PM +0800,
>>> Binbin Wu <binbin.wu@linux.intel.com> wrote:
>>>
>>>>
>>>> On 2/26/2024 4:26 PM, isaku.yamahata@intel.com wrote:
>>>>> From: Sean Christopherson <sean.j.christopherson@intel.com>
>>>>>
>>>>> For virtual IO, the guest TD shares guest pages with VMM without
>>>>> encryption.
>>>> Virtual IO is a use case of shared memory, it's better to use it
>>>> as a example instead of putting it at the beginning of the sentence.
>>>>
>>>>
>>>>>     Shared EPT is used to map guest pages in unprotected way.
>>>>>
>>>>> Add the VMCS field encoding for the shared EPTP, which will be used by
>>>>> TDX to have separate EPT walks for private GPAs (existing EPTP) versus
>>>>> shared GPAs (new shared EPTP).
>>>>>
>>>>> Set shared EPT pointer value for the TDX guest to initialize TDX MMU.
>>>> May have a mention that the EPTP for priavet GPAs is set by TDX module.
>>> Sure, let me update the commit message.
>> How about this?
> Looks good.  Some nits though:
>
>> KVM: TDX: Add load_mmu_pgd method for TDX
>>
>> TDX has uses two EPT pointers, one for the private half of the GPA
> "TDX uses"
>
>> space and one for the shared half. The private half used the normal
> "used" -> "uses"
>
>> EPT_POINTER vmcs field and is managed in a special way by the TDX module.
> Perhaps add:
>
> KVM is not allowed to operate on the EPT_POINTER directly.
>
>> The shared half uses a new SHARED_EPT_POINTER field and will be managed by
>> the conventional MMU management operations that operate directly on the
>> EPT tables.
>>
> I would like to explicitly call out KVM can update SHARED_EPT_POINTER directly:
>
> The shared half uses a new SHARED_EPT_POINTER field.  KVM is allowed to set it
> directly by the interface provided by the TDX module, and KVM is expected to
> manage the shared half just like it manages the existing EPT page table today.
>
>
>> This means for TDX the .load_mmu_pgd() operation will need to
>> know to use the SHARED_EPT_POINTER field instead of the normal one. Add a
>> new wrapper in x86 ops for load_mmu_pgd() that either directs the write to
>> the existing vmx implementation or a TDX one.
>>
>> For the TDX operation, EPT will always be used, so it can simpy write to


Maybe remove "so"?  IMO, there is no causal relationship between the 
first and second half of the sentence.

Typo, "simpy" -> "simply"

>> the SHARED_EPT_POINTER field.
>
Edgecombe, Rick P April 8, 2024, 3:32 p.m. UTC | #6
On Sun, 2024-04-07 at 09:32 +0800, Binbin Wu wrote:
> > Looks good.  Some nits though:
> > 
> > > KVM: TDX: Add load_mmu_pgd method for TDX
> > > 
> > > TDX has uses two EPT pointers, one for the private half of the GPA
> > "TDX uses"
> > 
> > > space and one for the shared half. The private half used the normal
> > "used" -> "uses"
> > 
> > > EPT_POINTER vmcs field and is managed in a special way by the TDX module.
> > Perhaps add:
> > 
> > KVM is not allowed to operate on the EPT_POINTER directly.
> > 
> > > The shared half uses a new SHARED_EPT_POINTER field and will be managed by
> > > the conventional MMU management operations that operate directly on the
> > > EPT tables.
> > > 
> > I would like to explicitly call out KVM can update SHARED_EPT_POINTER directly:
> > 
> > The shared half uses a new SHARED_EPT_POINTER field.  KVM is allowed to set it
> > directly by the interface provided by the TDX module, and KVM is expected to
> > manage the shared half just like it manages the existing EPT page table today.
> > 
> > 
> > > This means for TDX the .load_mmu_pgd() operation will need to
> > > know to use the SHARED_EPT_POINTER field instead of the normal one. Add a
> > > new wrapper in x86 ops for load_mmu_pgd() that either directs the write to
> > > the existing vmx implementation or a TDX one.
> > > 
> > > For the TDX operation, EPT will always be used, so it can simpy write to
> 
> 
> Maybe remove "so"?  IMO, there is no causal relationship between the 
> first and second half of the sentence.
> 

I was trying to nod at why tdx_load_mmu_pgd() is so much simpler than vmx_load_mmu_pgd(). Here is a
new version with all the feedback:

KVM: TDX: Add load_mmu_pgd method for TDX

TDX uses two EPT pointers, one for the private half of the GPA space and one for the shared half.
The private half uses the normal EPT_POINTER vmcs field, which is managed in a special way by the
TDX module. For TDX, KVM is not allowed to operate on it directly. The shared half uses a new
SHARED_EPT_POINTER field and will be managed by the conventional MMU management operations that
operate directly on the EPT root. This means for TDX the .load_mmu_pgd() operation will need to know
to use the SHARED_EPT_POINTER field instead of the normal one. Add a new wrapper in x86 ops for
load_mmu_pgd() that either directs the write to the existing vmx implementation or a TDX one.

For the TDX mode of operation, EPT will always be used and KVM does not need to be involved in
virtualization of CR3 behavior. So tdx_load_mmu_pgd() can simply write to SHARED_EPT_POINTER.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index f703bae0c4ac..9deb663a42e3 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -236,6 +236,7 @@  enum vmcs_field {
 	TSC_MULTIPLIER_HIGH             = 0x00002033,
 	TERTIARY_VM_EXEC_CONTROL	= 0x00002034,
 	TERTIARY_VM_EXEC_CONTROL_HIGH	= 0x00002035,
+	SHARED_EPT_POINTER		= 0x0000203C,
 	PID_POINTER_TABLE		= 0x00002042,
 	PID_POINTER_TABLE_HIGH		= 0x00002043,
 	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index d0f75020579f..076a471d9aea 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -123,6 +123,17 @@  static void vt_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vmx_vcpu_reset(vcpu, init_event);
 }
 
+static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
+			int pgd_level)
+{
+	if (is_td_vcpu(vcpu)) {
+		tdx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
+		return;
+	}
+
+	vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
+}
+
 static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
 {
 	if (!is_td(kvm))
@@ -256,7 +267,7 @@  struct kvm_x86_ops vt_x86_ops __initdata = {
 	.write_tsc_offset = vmx_write_tsc_offset,
 	.write_tsc_multiplier = vmx_write_tsc_multiplier,
 
-	.load_mmu_pgd = vmx_load_mmu_pgd,
+	.load_mmu_pgd = vt_load_mmu_pgd,
 
 	.check_intercept = vmx_check_intercept,
 	.handle_exit_irqoff = vmx_handle_exit_irqoff,
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 54e0d4efa2bd..143a3c2a16bc 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -453,6 +453,12 @@  void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	 */
 }
 
+void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
+{
+	WARN_ON_ONCE(root_hpa & ~PAGE_MASK);
+	td_vmcs_write64(to_tdx(vcpu), SHARED_EPT_POINTER, root_hpa);
+}
+
 static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
 {
 	struct kvm_tdx_capabilities __user *user_caps;
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index f5820f617b2e..24161fa404aa 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -152,6 +152,8 @@  void tdx_vcpu_free(struct kvm_vcpu *vcpu);
 void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
 
 int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
+
+void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
 #else
 static inline int tdx_hardware_setup(struct kvm_x86_ops *x86_ops) { return -EOPNOTSUPP; }
 static inline void tdx_hardware_unsetup(void) {}
@@ -173,6 +175,8 @@  static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {}
 static inline void tdx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) {}
 
 static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }
+
+static inline void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level) {}
 #endif
 
 #endif /* __KVM_X86_VMX_X86_OPS_H */