@@ -1453,7 +1453,7 @@ struct kvm_arch {
*
* Protected by kvm->slots_lock.
*/
- struct kvm_mmu_memory_cache split_shadow_page_cache;
+ struct kvm_mmu_memory_cache split_shadow_page_cache[MAX_NUMNODES];
struct kvm_mmu_memory_cache split_page_header_cache;
/*
@@ -6140,7 +6140,7 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
int kvm_mmu_init_vm(struct kvm *kvm)
{
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
- int r;
+ int r, nid;
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
@@ -6159,7 +6159,9 @@ int kvm_mmu_init_vm(struct kvm *kvm)
INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_page_header_cache);
kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache;
- INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_shadow_page_cache);
+ for_each_node(nid)
+ INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_shadow_page_cache[nid]);
+
INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_desc_cache);
kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache;
@@ -6169,10 +6171,13 @@ int kvm_mmu_init_vm(struct kvm *kvm)
static void mmu_free_vm_memory_caches(struct kvm *kvm)
{
+ int nid;
+
kvm_mmu_free_memory_cache(&kvm->arch.split_desc_cache);
kvm_mmu_free_memory_cache(&kvm->arch.split_page_header_cache);
mutex_lock(&kvm->slots_lock);
- mmu_free_sp_memory_cache(&kvm->arch.split_shadow_page_cache);
+ for_each_node(nid)
+ mmu_free_sp_memory_cache(&kvm->arch.split_shadow_page_cache[nid]);
mutex_unlock(&kvm->slots_lock);
}
@@ -6282,7 +6287,7 @@ static inline bool need_topup(struct kvm_mmu_memory_cache *cache, int min)
return kvm_mmu_memory_cache_nr_free_objects(cache) < min;
}
-static bool need_topup_split_caches_or_resched(struct kvm *kvm)
+static bool need_topup_split_caches_or_resched(struct kvm *kvm, int nid)
{
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock))
return true;
@@ -6294,10 +6299,10 @@ static bool need_topup_split_caches_or_resched(struct kvm *kvm)
*/
return need_topup(&kvm->arch.split_desc_cache, SPLIT_DESC_CACHE_MIN_NR_OBJECTS) ||
need_topup(&kvm->arch.split_page_header_cache, 1) ||
- need_topup(&kvm->arch.split_shadow_page_cache, 1);
+ need_topup(&kvm->arch.split_shadow_page_cache[nid], 1);
}
-static int topup_split_caches(struct kvm *kvm)
+static int topup_split_caches(struct kvm *kvm, int nid)
{
/*
* Allocating rmap list entries when splitting huge pages for nested
@@ -6327,10 +6332,11 @@ static int topup_split_caches(struct kvm *kvm)
if (r)
return r;
- return mmu_topup_sp_memory_cache(&kvm->arch.split_shadow_page_cache, 1);
+ return mmu_topup_sp_memory_cache(&kvm->arch.split_shadow_page_cache[nid], 1);
}
-static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *huge_sptep)
+static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *huge_sptep,
+ int nid)
{
struct kvm_mmu_page *huge_sp = sptep_to_sp(huge_sptep);
struct shadow_page_caches caches = {};
@@ -6351,7 +6357,7 @@ static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *hu
/* Direct SPs do not require a shadowed_info_cache. */
caches.page_header_cache = &kvm->arch.split_page_header_cache;
- caches.shadow_page_cache = &kvm->arch.split_shadow_page_cache;
+ caches.shadow_page_cache = &kvm->arch.split_shadow_page_cache[nid];
/* Safe to pass NULL for vCPU since requesting a direct SP. */
return __kvm_mmu_get_shadow_page(kvm, NULL, &caches, gfn, role);
@@ -6359,7 +6365,7 @@ static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *hu
static void shadow_mmu_split_huge_page(struct kvm *kvm,
const struct kvm_memory_slot *slot,
- u64 *huge_sptep)
+ u64 *huge_sptep, int nid)
{
struct kvm_mmu_memory_cache *cache = &kvm->arch.split_desc_cache;
@@ -6370,7 +6376,7 @@ static void shadow_mmu_split_huge_page(struct kvm *kvm,
gfn_t gfn;
int index;
- sp = shadow_mmu_get_sp_for_split(kvm, huge_sptep);
+ sp = shadow_mmu_get_sp_for_split(kvm, huge_sptep, nid);
for (index = 0; index < SPTE_ENT_PER_PAGE; index++) {
sptep = &sp->spt[index];
@@ -6408,7 +6414,7 @@ static int shadow_mmu_try_split_huge_page(struct kvm *kvm,
u64 *huge_sptep)
{
struct kvm_mmu_page *huge_sp = sptep_to_sp(huge_sptep);
- int level, r = 0;
+ int level, r = 0, nid;
gfn_t gfn;
u64 spte;
@@ -6422,7 +6428,9 @@ static int shadow_mmu_try_split_huge_page(struct kvm *kvm,
goto out;
}
- if (need_topup_split_caches_or_resched(kvm)) {
+ nid = kvm_pfn_to_mmu_cache_nid(kvm, spte_to_pfn(spte));
+
+ if (need_topup_split_caches_or_resched(kvm, nid)) {
write_unlock(&kvm->mmu_lock);
cond_resched();
/*
@@ -6430,12 +6438,12 @@ static int shadow_mmu_try_split_huge_page(struct kvm *kvm,
* rmap iterator should be restarted because the MMU lock was
* dropped.
*/
- r = topup_split_caches(kvm) ?: -EAGAIN;
+ r = topup_split_caches(kvm, nid) ?: -EAGAIN;
write_lock(&kvm->mmu_lock);
goto out;
}
- shadow_mmu_split_huge_page(kvm, slot, huge_sptep);
+ shadow_mmu_split_huge_page(kvm, slot, huge_sptep, nid);
out:
trace_kvm_mmu_split_huge_page(gfn, spte, level, r);
@@ -6761,8 +6769,8 @@ static unsigned long mmu_shrink_scan(struct shrinker *shrink,
if (freed >= sc->nr_to_scan)
goto out;
}
- freed += mmu_memory_cache_try_empty(&kvm->arch.split_shadow_page_cache,
- 1, &kvm->slots_lock);
+ freed += mmu_memory_cache_try_empty(kvm->arch.split_shadow_page_cache,
+ MAX_NUMNODES, &kvm->slots_lock);
if (freed >= sc->nr_to_scan)
goto out;
}
@@ -6176,7 +6176,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
struct kvm_enable_cap *cap)
{
- int r;
+ int r, nid;
if (cap->flags)
return -EINVAL;
@@ -6397,6 +6397,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
mutex_lock(&kvm->lock);
if (!kvm->created_vcpus) {
kvm->arch.numa_aware_page_table = true;
+
+ mutex_lock(&kvm->slots_lock);
+ for_each_node(nid) {
+ kvm->arch.split_shadow_page_cache[nid].node = nid;
+ }
+ mutex_unlock(&kvm->slots_lock);
r = 0;
}
mutex_unlock(&kvm->lock);
When splitting a huge page and NUMA aware page split option is enabled, try to allocate new lower level page tables on the same NUMA node of the huge page. If NUMA aware page split is disabled then fallback to default policy of using current thread's mempolicy. Signed-off-by: Vipin Sharma <vipinsh@google.com> --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu/mmu.c | 42 ++++++++++++++++++++------------- arch/x86/kvm/x86.c | 8 ++++++- 3 files changed, 33 insertions(+), 19 deletions(-)