diff mbox series

[v3,3/9] KVM: x86/mmu: Shrink split_shadow_page_cache via KVM MMU shrinker

Message ID 20221222023457.1764-4-vipinsh@google.com (mailing list archive)
State New, archived
Headers show
Series NUMA aware page table's pages allocation | expand

Commit Message

Vipin Sharma Dec. 22, 2022, 2:34 a.m. UTC
split_shadow_page_cache is not used after dirty log is disabled. It is a
good candidate to free memory in case of mmu_shrink_scan kicks in.

Account for split_shadow_page_cache via kvm_total_unused_mmu_pages and
use it in mmu_shrink_scan.

Signed-off-by: Vipin Sharma <vipinsh@google.com>
---
 arch/x86/include/asm/kvm_host.h |  5 +++
 arch/x86/kvm/mmu/mmu.c          | 63 +++++++++++++++++++--------------
 2 files changed, 42 insertions(+), 26 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f89f02e18080..293994fabae3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1413,6 +1413,11 @@  struct kvm_arch {
 	struct kvm_mmu_memory_cache split_shadow_page_cache;
 	struct kvm_mmu_memory_cache split_page_header_cache;
 
+	/*
+	 * Protects change in size of split_shadow_page_cache cache.
+	 */
+	spinlock_t split_shadow_page_cache_lock;
+
 	/*
 	 * Memory cache used to allocate pte_list_desc structs while splitting
 	 * huge pages. In the worst case, to split one huge page, 512
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 3364760a1695..6f6a10d7a871 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -659,14 +659,15 @@  static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
 }
 
 static int mmu_topup_sp_memory_cache(struct kvm_mmu_memory_cache *cache,
-				     spinlock_t *cache_lock)
+				     spinlock_t *cache_lock,
+				     int min)
 {
 	int orig_nobjs;
 	int r;
 
 	spin_lock(cache_lock);
 	orig_nobjs = cache->nobjs;
-	r = kvm_mmu_topup_memory_cache(cache, PT64_ROOT_MAX_LEVEL);
+	r = kvm_mmu_topup_memory_cache(cache, min);
 	if (orig_nobjs != cache->nobjs)
 		percpu_counter_add(&kvm_total_unused_mmu_pages,
 				   (cache->nobjs - orig_nobjs));
@@ -684,7 +685,8 @@  static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
 	if (r)
 		return r;
 	r = mmu_topup_sp_memory_cache(&vcpu->arch.mmu_shadow_page_cache,
-				      &vcpu->arch.mmu_shadow_page_cache_lock);
+				      &vcpu->arch.mmu_shadow_page_cache_lock,
+				      PT64_ROOT_MAX_LEVEL);
 	if (r)
 		return r;
 	if (maybe_indirect) {
@@ -2184,16 +2186,12 @@  void *kvm_mmu_sp_memory_cache_alloc(struct kvm_mmu_memory_cache *shadow_page_cac
 	int orig_nobjs;
 	void *page;
 
-	if (!cache_lock) {
-		spin_lock(cache_lock);
-		orig_nobjs = shadow_page_cache->nobjs;
-	}
+	spin_lock(cache_lock);
+	orig_nobjs = shadow_page_cache->nobjs;
 	page = kvm_mmu_memory_cache_alloc(shadow_page_cache);
-	if (!cache_lock) {
-		if (orig_nobjs)
-			percpu_counter_dec(&kvm_total_unused_mmu_pages);
-		spin_unlock(cache_lock);
-	}
+	if (orig_nobjs)
+		percpu_counter_dec(&kvm_total_unused_mmu_pages);
+	spin_unlock(cache_lock);
 	return page;
 }
 
@@ -6130,6 +6128,7 @@  int kvm_mmu_init_vm(struct kvm *kvm)
 	kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO;
 
 	kvm->arch.split_shadow_page_cache.gfp_zero = __GFP_ZERO;
+	spin_lock_init(&kvm->arch.split_shadow_page_cache_lock);
 
 	kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache;
 	kvm->arch.split_desc_cache.gfp_zero = __GFP_ZERO;
@@ -6141,7 +6140,8 @@  static void mmu_free_vm_memory_caches(struct kvm *kvm)
 {
 	kvm_mmu_free_memory_cache(&kvm->arch.split_desc_cache);
 	kvm_mmu_free_memory_cache(&kvm->arch.split_page_header_cache);
-	kvm_mmu_free_memory_cache(&kvm->arch.split_shadow_page_cache);
+	mmu_free_sp_memory_cache(&kvm->arch.split_shadow_page_cache,
+				 &kvm->arch.split_shadow_page_cache_lock);
 }
 
 void kvm_mmu_uninit_vm(struct kvm *kvm)
@@ -6295,7 +6295,9 @@  static int topup_split_caches(struct kvm *kvm)
 	if (r)
 		return r;
 
-	return kvm_mmu_topup_memory_cache(&kvm->arch.split_shadow_page_cache, 1);
+	return mmu_topup_sp_memory_cache(&kvm->arch.split_shadow_page_cache,
+					 &kvm->arch.split_shadow_page_cache_lock,
+					 1);
 }
 
 static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *huge_sptep)
@@ -6320,7 +6322,7 @@  static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *hu
 	/* Direct SPs do not require a shadowed_info_cache. */
 	caches.page_header_cache = &kvm->arch.split_page_header_cache;
 	caches.shadow_page_cache = &kvm->arch.split_shadow_page_cache;
-	caches.shadow_page_cache_lock = NULL;
+	caches.shadow_page_cache_lock = &kvm->arch.split_shadow_page_cache_lock;
 
 	/* Safe to pass NULL for vCPU since requesting a direct SP. */
 	return __kvm_mmu_get_shadow_page(kvm, NULL, &caches, gfn, role);
@@ -6687,14 +6689,23 @@  void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
 	}
 }
 
+static unsigned long mmu_shrink_cache(struct kvm_mmu_memory_cache *cache,
+				      spinlock_t *cache_lock)
+{
+	unsigned long freed = 0;
+
+	spin_lock(cache_lock);
+	if (cache->nobjs)
+		freed = kvm_mmu_empty_memory_cache(cache);
+	spin_unlock(cache_lock);
+	return freed;
+}
+
 static unsigned long
 mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
-	struct kvm_mmu_memory_cache *cache;
 	struct kvm *kvm, *first_kvm = NULL;
 	unsigned long freed = 0;
-	/* spinlock for memory cache */
-	spinlock_t *cache_lock;
 	struct kvm_vcpu *vcpu;
 	unsigned long i;
 
@@ -6707,15 +6718,15 @@  mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 			first_kvm = kvm;
 		list_move_tail(&kvm->vm_list, &vm_list);
 
-		kvm_for_each_vcpu(i, vcpu, kvm) {
-			cache = &vcpu->arch.mmu_shadow_page_cache;
-			cache_lock = &vcpu->arch.mmu_shadow_page_cache_lock;
-			if (READ_ONCE(cache->nobjs)) {
-				spin_lock(cache_lock);
-				freed += kvm_mmu_empty_memory_cache(cache);
-				spin_unlock(cache_lock);
-			}
+		freed += mmu_shrink_cache(&kvm->arch.split_shadow_page_cache,
+					  &kvm->arch.split_shadow_page_cache_lock);
 
+		if (freed >= sc->nr_to_scan)
+			break;
+
+		kvm_for_each_vcpu(i, vcpu, kvm) {
+			freed += mmu_shrink_cache(&vcpu->arch.mmu_shadow_page_cache,
+						  &vcpu->arch.mmu_shadow_page_cache_lock);
 		}
 
 		if (freed >= sc->nr_to_scan)