diff mbox series

[02/21] KVM: x86/tdp_mmu: Add a helper function to walk down the TDP MMU

Message ID 20240904030751.117579-3-rick.p.edgecombe@intel.com (mailing list archive)
State New, archived
Headers show
Series TDX MMU Part 2 | expand

Commit Message

Rick Edgecombe Sept. 4, 2024, 3:07 a.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

Export a function to walk down the TDP without modifying it and simply
check if a PGA is mapped.

Future changes will support pre-populating TDX private memory. In order to
implement this KVM will need to check if a given GFN is already
pre-populated in the mirrored EPT. [1]

There is already a TDP MMU walker, kvm_tdp_mmu_get_walk() for use within
the KVM MMU that almost does what is required. However, to make sense of
the results, MMU internal PTE helpers are needed. Refactor the code to
provide a helper that can be used outside of the KVM MMU code.

Refactoring the KVM page fault handler to support this lookup usage was
also considered, but it was an awkward fit.

kvm_tdp_mmu_gpa_is_mapped() is based on a diff by Paolo Bonzini.

Link: https://lore.kernel.org/kvm/ZfBkle1eZFfjPI8l@google.com/ [1]
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
---
TDX MMU part 2 v1:
 - Change exported function to just return of GPA is mapped because "You
   are executing with the filemap_invalidate_lock() taken, and therefore
   cannot race with kvm_gmem_punch_hole()" (Paolo)
   https://lore.kernel.org/kvm/CABgObfbpNN842noAe77WYvgi5MzK2SAA_FYw-=fGa+PcT_Z22w@mail.gmail.com/
 - Take root hpa instead of enum (Paolo)

TDX MMU Prep v2:
 - Rename function with "mirror" and use root enum

TDX MMU Prep:
 - New patch
---
 arch/x86/kvm/mmu.h         |  3 +++
 arch/x86/kvm/mmu/mmu.c     |  3 +--
 arch/x86/kvm/mmu/tdp_mmu.c | 37 ++++++++++++++++++++++++++++++++-----
 3 files changed, 36 insertions(+), 7 deletions(-)

Comments

Paolo Bonzini Sept. 9, 2024, 1:51 p.m. UTC | #1
On 9/4/24 05:07, Rick Edgecombe wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> Export a function to walk down the TDP without modifying it and simply
> check if a PGA is mapped.
> 
> Future changes will support pre-populating TDX private memory. In order to
> implement this KVM will need to check if a given GFN is already
> pre-populated in the mirrored EPT. [1]
> 
> There is already a TDP MMU walker, kvm_tdp_mmu_get_walk() for use within
> the KVM MMU that almost does what is required. However, to make sense of
> the results, MMU internal PTE helpers are needed. Refactor the code to
> provide a helper that can be used outside of the KVM MMU code.
> 
> Refactoring the KVM page fault handler to support this lookup usage was
> also considered, but it was an awkward fit.
> 
> kvm_tdp_mmu_gpa_is_mapped() is based on a diff by Paolo Bonzini.
> 
> Link: https://lore.kernel.org/kvm/ZfBkle1eZFfjPI8l@google.com/ [1]
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> ---
> TDX MMU part 2 v1:
>   - Change exported function to just return of GPA is mapped because "You
>     are executing with the filemap_invalidate_lock() taken, and therefore
>     cannot race with kvm_gmem_punch_hole()" (Paolo)
>     https://lore.kernel.org/kvm/CABgObfbpNN842noAe77WYvgi5MzK2SAA_FYw-=fGa+PcT_Z22w@mail.gmail.com/
>   - Take root hpa instead of enum (Paolo)
> 
> TDX MMU Prep v2:
>   - Rename function with "mirror" and use root enum
> 
> TDX MMU Prep:
>   - New patch
> ---
>   arch/x86/kvm/mmu.h         |  3 +++
>   arch/x86/kvm/mmu/mmu.c     |  3 +--
>   arch/x86/kvm/mmu/tdp_mmu.c | 37 ++++++++++++++++++++++++++++++++-----
>   3 files changed, 36 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 8f289222b353..5faa416ac874 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -254,6 +254,9 @@ extern bool tdp_mmu_enabled;
>   #define tdp_mmu_enabled false
>   #endif
>   
> +bool kvm_tdp_mmu_gpa_is_mapped(struct kvm_vcpu *vcpu, u64 gpa);
> +int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code, u8 *level);
> +
>   static inline bool kvm_memslots_have_rmaps(struct kvm *kvm)
>   {
>   	return !tdp_mmu_enabled || kvm_shadow_root_allocated(kvm);
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 7e66d7c426c1..01808cdf8627 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4713,8 +4713,7 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
>   	return direct_page_fault(vcpu, fault);
>   }
>   
> -static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code,
> -			    u8 *level)
> +int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code, u8 *level)
>   {
>   	int r;
>   
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 37b3769a5d32..019b43723d90 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -1911,16 +1911,13 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
>    *
>    * Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}.
>    */
> -int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
> -			 int *root_level)
> +static int __kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
> +				  struct kvm_mmu_page *root)
>   {
> -	struct kvm_mmu_page *root = root_to_sp(vcpu->arch.mmu->root.hpa);
>   	struct tdp_iter iter;
>   	gfn_t gfn = addr >> PAGE_SHIFT;
>   	int leaf = -1;
>   
> -	*root_level = vcpu->arch.mmu->root_role.level;
> -
>   	tdp_mmu_for_each_pte(iter, vcpu->kvm, root, gfn, gfn + 1) {
>   		leaf = iter.level;
>   		sptes[leaf] = iter.old_spte;
> @@ -1929,6 +1926,36 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
>   	return leaf;
>   }
>   
> +int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
> +			 int *root_level)
> +{
> +	struct kvm_mmu_page *root = root_to_sp(vcpu->arch.mmu->root.hpa);
> +	*root_level = vcpu->arch.mmu->root_role.level;
> +
> +	return __kvm_tdp_mmu_get_walk(vcpu, addr, sptes, root);
> +}
> +
> +bool kvm_tdp_mmu_gpa_is_mapped(struct kvm_vcpu *vcpu, u64 gpa)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	bool is_direct = kvm_is_addr_direct(kvm, gpa);
> +	hpa_t root = is_direct ? vcpu->arch.mmu->root.hpa :
> +				 vcpu->arch.mmu->mirror_root_hpa;
> +	u64 sptes[PT64_ROOT_MAX_LEVEL + 1], spte;
> +	int leaf;
> +
> +	lockdep_assert_held(&kvm->mmu_lock);
> +	rcu_read_lock();
> +	leaf = __kvm_tdp_mmu_get_walk(vcpu, gpa, sptes, root_to_sp(root));
> +	rcu_read_unlock();
> +	if (leaf < 0)
> +		return false;
> +
> +	spte = sptes[leaf];
> +	return is_shadow_present_pte(spte) && is_last_spte(spte, leaf);
> +}
> +EXPORT_SYMBOL_GPL(kvm_tdp_mmu_gpa_is_mapped);
> +
>   /*
>    * Returns the last level spte pointer of the shadow page walk for the given
>    * gpa, and sets *spte to the spte value. This spte may be non-preset. If no

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>

I will take another look at the locking after I see some callers.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 8f289222b353..5faa416ac874 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -254,6 +254,9 @@  extern bool tdp_mmu_enabled;
 #define tdp_mmu_enabled false
 #endif
 
+bool kvm_tdp_mmu_gpa_is_mapped(struct kvm_vcpu *vcpu, u64 gpa);
+int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code, u8 *level);
+
 static inline bool kvm_memslots_have_rmaps(struct kvm *kvm)
 {
 	return !tdp_mmu_enabled || kvm_shadow_root_allocated(kvm);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 7e66d7c426c1..01808cdf8627 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4713,8 +4713,7 @@  int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 	return direct_page_fault(vcpu, fault);
 }
 
-static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code,
-			    u8 *level)
+int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code, u8 *level)
 {
 	int r;
 
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 37b3769a5d32..019b43723d90 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1911,16 +1911,13 @@  bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
  *
  * Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}.
  */
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
-			 int *root_level)
+static int __kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+				  struct kvm_mmu_page *root)
 {
-	struct kvm_mmu_page *root = root_to_sp(vcpu->arch.mmu->root.hpa);
 	struct tdp_iter iter;
 	gfn_t gfn = addr >> PAGE_SHIFT;
 	int leaf = -1;
 
-	*root_level = vcpu->arch.mmu->root_role.level;
-
 	tdp_mmu_for_each_pte(iter, vcpu->kvm, root, gfn, gfn + 1) {
 		leaf = iter.level;
 		sptes[leaf] = iter.old_spte;
@@ -1929,6 +1926,36 @@  int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 	return leaf;
 }
 
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+			 int *root_level)
+{
+	struct kvm_mmu_page *root = root_to_sp(vcpu->arch.mmu->root.hpa);
+	*root_level = vcpu->arch.mmu->root_role.level;
+
+	return __kvm_tdp_mmu_get_walk(vcpu, addr, sptes, root);
+}
+
+bool kvm_tdp_mmu_gpa_is_mapped(struct kvm_vcpu *vcpu, u64 gpa)
+{
+	struct kvm *kvm = vcpu->kvm;
+	bool is_direct = kvm_is_addr_direct(kvm, gpa);
+	hpa_t root = is_direct ? vcpu->arch.mmu->root.hpa :
+				 vcpu->arch.mmu->mirror_root_hpa;
+	u64 sptes[PT64_ROOT_MAX_LEVEL + 1], spte;
+	int leaf;
+
+	lockdep_assert_held(&kvm->mmu_lock);
+	rcu_read_lock();
+	leaf = __kvm_tdp_mmu_get_walk(vcpu, gpa, sptes, root_to_sp(root));
+	rcu_read_unlock();
+	if (leaf < 0)
+		return false;
+
+	spte = sptes[leaf];
+	return is_shadow_present_pte(spte) && is_last_spte(spte, leaf);
+}
+EXPORT_SYMBOL_GPL(kvm_tdp_mmu_gpa_is_mapped);
+
 /*
  * Returns the last level spte pointer of the shadow page walk for the given
  * gpa, and sets *spte to the spte value. This spte may be non-preset. If no