[v5,2/7] KVM: Guest page hinting functionality

Message ID	20171128200324.4432-3-nilal@redhat.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@kernel.org> From: nilal@redhat.com To: kvm@vger.kernel.org, pbonzini@redhat.com, pagupta@redhat.com, wei.w.wang@intel.com, yang.zhang.wz@gmail.com, riel@redhat.com, david@redhat.com, mst@redhat.com, dodgen@google.com, konrad.wilk@oracle.com Subject: [Patch v5 2/7] KVM: Guest page hinting functionality Date: Tue, 28 Nov 2017 15:03:19 -0500 Message-Id: <20171128200324.4432-3-nilal@redhat.com> In-Reply-To: <20171128200324.4432-1-nilal@redhat.com> References: <20171128200324.4432-1-nilal@redhat.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk

diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c index 39d2b1d..658856d 100644 --- a/virt/kvm/page_hinting.c +++ b/virt/kvm/page_hinting.c @@ -3,8 +3,7 @@ #include <linux/page_ref.h> #include <linux/kvm_host.h> #include <linux/sort.h> - -#include <trace/events/kvm.h> +#include <linux/kernel.h> #define MAX_FGPT_ENTRIES 1000 #define HYPERLIST_THRESHOLD 500 @@ -33,14 +32,256 @@ struct hypervisor_pages { unsigned int pages; }; +static __cacheline_aligned_in_smp DEFINE_SEQLOCK(guest_page_lock); DEFINE_PER_CPU(struct kvm_free_pages [MAX_FGPT_ENTRIES], kvm_pt); DEFINE_PER_CPU(int, kvm_pt_idx); struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES]; +static void empty_hyperlist(void) +{ + int i = 0; + + while (i < MAX_FGPT_ENTRIES) { + hypervisor_pagelist[i].pfn = 0; + hypervisor_pagelist[i].pages = 0; + i++; + } +} + +static void make_hypercall(void) +{ + /* + * Dummy function: Tobe filled later. + */ + empty_hyperlist(); +} + +static int sort_pfn(const void *a1, const void *b1) +{ + const struct hypervisor_pages *a = a1; + const struct hypervisor_pages *b = b1; + + if (a->pfn > b->pfn) + return 1; + + if (a->pfn < b->pfn) + return -1; + + return 0; +} + +static int pack_hyperlist(void) +{ + int i = 0, j = 0; + + while (i < MAX_FGPT_ENTRIES) { + if (hypervisor_pagelist[i].pfn != 0) { + if (i != j) { + hypervisor_pagelist[j].pfn = + hypervisor_pagelist[i].pfn; + hypervisor_pagelist[j].pages = + hypervisor_pagelist[i].pages; + } + j++; + } + i++; + } + i = j; + while (j < MAX_FGPT_ENTRIES) { + hypervisor_pagelist[j].pfn = 0; + hypervisor_pagelist[j].pages = 0; + j++; + } + return i; +} + +int compress_hyperlist(void) +{ + int i = 0, j = 1, merge_counter = 0, ret = 0; + + sort(hypervisor_pagelist, MAX_FGPT_ENTRIES, + sizeof(struct hypervisor_pages), sort_pfn, NULL); + while (i < MAX_FGPT_ENTRIES && j < MAX_FGPT_ENTRIES) { + unsigned long pfni = hypervisor_pagelist[i].pfn; + unsigned int pagesi = hypervisor_pagelist[i].pages; + unsigned long pfnj = hypervisor_pagelist[j].pfn; + unsigned int pagesj = hypervisor_pagelist[j].pages; + + if (pfnj <= pfni) { + if (((pfnj + pagesj - 1) <= (pfni + pagesi - 1)) && + ((pfnj + pagesj - 1) >= (pfni - 1))) { + hypervisor_pagelist[i].pfn = pfnj; + hypervisor_pagelist[i].pages += pfni - pfnj; + hypervisor_pagelist[j].pfn = 0; + hypervisor_pagelist[j].pages = 0; + j++; + merge_counter++; + continue; + } else if ((pfnj + pagesj - 1) > (pfni + pagesi - 1)) { + hypervisor_pagelist[i].pfn = pfnj; + hypervisor_pagelist[i].pages = pagesj; + hypervisor_pagelist[j].pfn = 0; + hypervisor_pagelist[j].pages = 0; + j++; + merge_counter++; + continue; + } + } else if (pfnj > pfni) { + if ((pfnj + pagesj - 1) > (pfni + pagesi - 1) && + (pfnj <= pfni + pagesi)) { + hypervisor_pagelist[i].pages += + (pfnj + pagesj - 1) - + (pfni + pagesi - 1); + hypervisor_pagelist[j].pfn = 0; + hypervisor_pagelist[j].pages = 0; + j++; + merge_counter++; + continue; + } else if ((pfnj + pagesj - 1) <= (pfni + pagesi - 1)) { + hypervisor_pagelist[j].pfn = 0; + hypervisor_pagelist[j].pages = 0; + j++; + merge_counter++; + continue; + } + } + i = j; + j++; + } + if (merge_counter != 0) + ret = pack_hyperlist() - 1; + else + ret = MAX_FGPT_ENTRIES - 1; + return ret; +} + +void copy_hyperlist(int hyper_idx) +{ + int *idx = &get_cpu_var(kvm_pt_idx); + struct kvm_free_pages *free_page_obj; + int i = 0; + + free_page_obj = &get_cpu_var(kvm_pt)[0]; + while (i < hyper_idx) { + free_page_obj[*idx].pfn = hypervisor_pagelist[i].pfn; + free_page_obj[*idx].pages = hypervisor_pagelist[i].pages; + *idx += 1; + i++; + } + empty_hyperlist(); + put_cpu_var(kvm_pt); + put_cpu_var(kvm_pt_idx); +} + +/* + * arch_free_page_slowpath() - This function adds the guest free page entries + * to hypervisor_pages list and also ensures defragmentation prior to addition + * if it is present with any entry of the kvm_free_pages list. + */ +void arch_free_page_slowpath(void) +{ + int idx = 0; + int hyper_idx = -1; + int *kvm_idx = &get_cpu_var(kvm_pt_idx); + struct kvm_free_pages *free_page_obj = &get_cpu_var(kvm_pt)[0]; + + write_seqlock(&guest_page_lock); + while (idx < MAX_FGPT_ENTRIES) { + unsigned long pfn = free_page_obj[idx].pfn; + unsigned long pfn_end = free_page_obj[idx].pfn + + free_page_obj[idx].pages - 1; + bool prev_free = false; + + while (pfn <= pfn_end) { + struct page *p = pfn_to_page(pfn); + + if (PageCompound(p)) { + struct page *head_page = compound_head(p); + unsigned long head_pfn = page_to_pfn(head_page); + unsigned int alloc_pages = + 1 << compound_order(head_page); + + pfn = head_pfn + alloc_pages; + prev_free = false; + continue; + } + if (page_ref_count(p)) { + pfn++; + prev_free = false; + continue; + } + /* + * The page is free so add it to the list and free the + * hypervisor_pagelist if required. + */ + if (!prev_free) { + hyper_idx++; + hypervisor_pagelist[hyper_idx].pfn = pfn; + hypervisor_pagelist[hyper_idx].pages = 1; + if (hyper_idx == MAX_FGPT_ENTRIES - 1) { + hyper_idx = compress_hyperlist(); + if (hyper_idx >= + HYPERLIST_THRESHOLD) { + make_hypercall(); + hyper_idx = 0; + } + } + /* + * If the next contiguous page is free, it can + * be added to this same entry. + */ + prev_free = true; + } else { + /* + * Multiple adjacent free pages + */ + hypervisor_pagelist[hyper_idx].pages++; + } + pfn++; + } + free_page_obj[idx].pfn = 0; + free_page_obj[idx].pages = 0; + idx++; + } + *kvm_idx = 0; + put_cpu_var(kvm_pt); + put_cpu_var(kvm_pt_idx); + write_sequnlock(&guest_page_lock); +} + void arch_alloc_page(struct page *page, int order) { + unsigned int seq; + + /* + * arch_free_page will acquire the lock once the list carrying guest + * free pages is full and a hypercall will be made. Until complete free + * page list is traversed no further allocaiton will be allowed. + */ + do { + seq = read_seqbegin(&guest_page_lock); + } while (read_seqretry(&guest_page_lock, seq)); } void arch_free_page(struct page *page, int order) { + int *free_page_idx = &get_cpu_var(kvm_pt_idx); + struct kvm_free_pages *free_page_obj; + unsigned long flags; + + /* + * use of global variables may trigger a race condition between irq and + * process context causing unwanted overwrites. This will be replaced + * with a better solution to prevent such race conditions. + */ + local_irq_save(flags); + free_page_obj = &get_cpu_var(kvm_pt)[0]; + free_page_obj[*free_page_idx].pfn = page_to_pfn(page); + free_page_obj[*free_page_idx].pages = 1 << order; + *free_page_idx += 1; + if (*free_page_idx == MAX_FGPT_ENTRIES) + arch_free_page_slowpath(); + put_cpu_var(kvm_pt); + put_cpu_var(kvm_pt_idx); + local_irq_restore(flags); }

[v5,2/7] KVM: Guest page hinting functionality

Commit Message

Patch