Message ID | 20171103203013.9521-3-nilal@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Nov 03, 2017 at 04:30:09PM -0400, nilal@redhat.com wrote: > From: Nitesh Narayan Lal <niteshnarayanlalleo@gmail.com> > > This patch adds the guest implementation in order to maintain the list of > pages which are freed by the guest and are not reused. To avoid any > reallocation it includes seqlock once the list is completely filled. > Though it doesn't carries the hypercall related changes. > > Signed-off-by: Nitesh Narayan Lal <nilal@redhat.com> > --- > virt/kvm/page_hinting.c | 245 +++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 243 insertions(+), 2 deletions(-) > > diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c > index 39d2b1d..658856d 100644 > --- a/virt/kvm/page_hinting.c > +++ b/virt/kvm/page_hinting.c > @@ -3,8 +3,7 @@ > #include <linux/page_ref.h> > #include <linux/kvm_host.h> > #include <linux/sort.h> > - > -#include <trace/events/kvm.h> > +#include <linux/kernel.h> > > #define MAX_FGPT_ENTRIES 1000 > #define HYPERLIST_THRESHOLD 500 > @@ -33,14 +32,256 @@ struct hypervisor_pages { > unsigned int pages; > }; > > +static __cacheline_aligned_in_smp DEFINE_SEQLOCK(guest_page_lock); > DEFINE_PER_CPU(struct kvm_free_pages [MAX_FGPT_ENTRIES], kvm_pt); > DEFINE_PER_CPU(int, kvm_pt_idx); > struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES]; > > +static void empty_hyperlist(void) > +{ > + int i = 0; > + > + while (i < MAX_FGPT_ENTRIES) { > + hypervisor_pagelist[i].pfn = 0; > + hypervisor_pagelist[i].pages = 0; > + i++; > + } > +} > + > +static void make_hypercall(void) > +{ > + /* > + * Dummy function: Tobe filled later. > + */ > + empty_hyperlist(); > +} > + > +static int sort_pfn(const void *a1, const void *b1) > +{ > + const struct hypervisor_pages *a = a1; > + const struct hypervisor_pages *b = b1; > + > + if (a->pfn > b->pfn) > + return 1; > + > + if (a->pfn < b->pfn) > + return -1; > + > + return 0; > +} > + > +static int pack_hyperlist(void) > +{ > + int i = 0, j = 0; > + > + while (i < MAX_FGPT_ENTRIES) { > + if (hypervisor_pagelist[i].pfn != 0) { > + if (i != j) { > + hypervisor_pagelist[j].pfn = > + hypervisor_pagelist[i].pfn; > + hypervisor_pagelist[j].pages = > + hypervisor_pagelist[i].pages; > + } > + j++; > + } > + i++; > + } > + i = j; > + while (j < MAX_FGPT_ENTRIES) { > + hypervisor_pagelist[j].pfn = 0; > + hypervisor_pagelist[j].pages = 0; > + j++; > + } > + return i; > +} > + > +int compress_hyperlist(void) > +{ > + int i = 0, j = 1, merge_counter = 0, ret = 0; > + > + sort(hypervisor_pagelist, MAX_FGPT_ENTRIES, > + sizeof(struct hypervisor_pages), sort_pfn, NULL); > + while (i < MAX_FGPT_ENTRIES && j < MAX_FGPT_ENTRIES) { > + unsigned long pfni = hypervisor_pagelist[i].pfn; > + unsigned int pagesi = hypervisor_pagelist[i].pages; > + unsigned long pfnj = hypervisor_pagelist[j].pfn; > + unsigned int pagesj = hypervisor_pagelist[j].pages; > + > + if (pfnj <= pfni) { > + if (((pfnj + pagesj - 1) <= (pfni + pagesi - 1)) && > + ((pfnj + pagesj - 1) >= (pfni - 1))) { > + hypervisor_pagelist[i].pfn = pfnj; > + hypervisor_pagelist[i].pages += pfni - pfnj; > + hypervisor_pagelist[j].pfn = 0; > + hypervisor_pagelist[j].pages = 0; > + j++; > + merge_counter++; > + continue; > + } else if ((pfnj + pagesj - 1) > (pfni + pagesi - 1)) { > + hypervisor_pagelist[i].pfn = pfnj; > + hypervisor_pagelist[i].pages = pagesj; > + hypervisor_pagelist[j].pfn = 0; > + hypervisor_pagelist[j].pages = 0; > + j++; > + merge_counter++; > + continue; > + } > + } else if (pfnj > pfni) { > + if ((pfnj + pagesj - 1) > (pfni + pagesi - 1) && > + (pfnj <= pfni + pagesi)) { > + hypervisor_pagelist[i].pages += > + (pfnj + pagesj - 1) - > + (pfni + pagesi - 1); > + hypervisor_pagelist[j].pfn = 0; > + hypervisor_pagelist[j].pages = 0; > + j++; > + merge_counter++; > + continue; > + } else if ((pfnj + pagesj - 1) <= (pfni + pagesi - 1)) { > + hypervisor_pagelist[j].pfn = 0; > + hypervisor_pagelist[j].pages = 0; > + j++; > + merge_counter++; > + continue; > + } > + } > + i = j; > + j++; > + } > + if (merge_counter != 0) > + ret = pack_hyperlist() - 1; > + else > + ret = MAX_FGPT_ENTRIES - 1; > + return ret; > +} > + > +void copy_hyperlist(int hyper_idx) > +{ > + int *idx = &get_cpu_var(kvm_pt_idx); > + struct kvm_free_pages *free_page_obj; > + int i = 0; > + > + free_page_obj = &get_cpu_var(kvm_pt)[0]; > + while (i < hyper_idx) { > + free_page_obj[*idx].pfn = hypervisor_pagelist[i].pfn; > + free_page_obj[*idx].pages = hypervisor_pagelist[i].pages; > + *idx += 1; > + i++; > + } > + empty_hyperlist(); > + put_cpu_var(kvm_pt); > + put_cpu_var(kvm_pt_idx); > +} > + > +/* > + * arch_free_page_slowpath() - This function adds the guest free page entries > + * to hypervisor_pages list and also ensures defragmentation prior to addition > + * if it is present with any entry of the kvm_free_pages list. > + */ > +void arch_free_page_slowpath(void) > +{ > + int idx = 0; > + int hyper_idx = -1; > + int *kvm_idx = &get_cpu_var(kvm_pt_idx); > + struct kvm_free_pages *free_page_obj = &get_cpu_var(kvm_pt)[0]; > + > + write_seqlock(&guest_page_lock); > + while (idx < MAX_FGPT_ENTRIES) { > + unsigned long pfn = free_page_obj[idx].pfn; > + unsigned long pfn_end = free_page_obj[idx].pfn + > + free_page_obj[idx].pages - 1; > + bool prev_free = false; > + > + while (pfn <= pfn_end) { > + struct page *p = pfn_to_page(pfn); > + > + if (PageCompound(p)) { > + struct page *head_page = compound_head(p); > + unsigned long head_pfn = page_to_pfn(head_page); > + unsigned int alloc_pages = > + 1 << compound_order(head_page); > + > + pfn = head_pfn + alloc_pages; > + prev_free = false; > + continue; > + } > + if (page_ref_count(p)) { > + pfn++; > + prev_free = false; > + continue; > + } > + /* > + * The page is free so add it to the list and free the > + * hypervisor_pagelist if required. > + */ > + if (!prev_free) { > + hyper_idx++; > + hypervisor_pagelist[hyper_idx].pfn = pfn; > + hypervisor_pagelist[hyper_idx].pages = 1; > + if (hyper_idx == MAX_FGPT_ENTRIES - 1) { > + hyper_idx = compress_hyperlist(); > + if (hyper_idx >= > + HYPERLIST_THRESHOLD) { > + make_hypercall(); > + hyper_idx = 0; > + } > + } > + /* > + * If the next contiguous page is free, it can > + * be added to this same entry. > + */ > + prev_free = true; > + } else { > + /* > + * Multiple adjacent free pages > + */ > + hypervisor_pagelist[hyper_idx].pages++; > + } > + pfn++; > + } > + free_page_obj[idx].pfn = 0; > + free_page_obj[idx].pages = 0; > + idx++; > + } > + *kvm_idx = 0; > + put_cpu_var(kvm_pt); > + put_cpu_var(kvm_pt_idx); > + write_sequnlock(&guest_page_lock); > +} > + > void arch_alloc_page(struct page *page, int order) > { > + unsigned int seq; > + > + /* > + * arch_free_page will acquire the lock once the list carrying guest > + * free pages is full and a hypercall will be made. Until complete free > + * page list is traversed no further allocaiton will be allowed. > + */ > + do { > + seq = read_seqbegin(&guest_page_lock); > + } while (read_seqretry(&guest_page_lock, seq)); > } > > void arch_free_page(struct page *page, int order) > { > + int *free_page_idx = &get_cpu_var(kvm_pt_idx); > + struct kvm_free_pages *free_page_obj; > + unsigned long flags; > + > + /* > + * use of global variables may trigger a race condition between irq and > + * process context causing unwanted overwrites. This will be replaced > + * with a better solution to prevent such race conditions. When do you plan to replace this? If you don't want this to be merged yet pls include RFC in subject. > + */ > + local_irq_save(flags); > + free_page_obj = &get_cpu_var(kvm_pt)[0]; > + free_page_obj[*free_page_idx].pfn = page_to_pfn(page); > + free_page_obj[*free_page_idx].pages = 1 << order; > + *free_page_idx += 1; > + if (*free_page_idx == MAX_FGPT_ENTRIES) > + arch_free_page_slowpath(); > + put_cpu_var(kvm_pt); > + put_cpu_var(kvm_pt_idx); > + local_irq_restore(flags); > } > -- > 2.9.4
diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c index 39d2b1d..658856d 100644 --- a/virt/kvm/page_hinting.c +++ b/virt/kvm/page_hinting.c @@ -3,8 +3,7 @@ #include <linux/page_ref.h> #include <linux/kvm_host.h> #include <linux/sort.h> - -#include <trace/events/kvm.h> +#include <linux/kernel.h> #define MAX_FGPT_ENTRIES 1000 #define HYPERLIST_THRESHOLD 500 @@ -33,14 +32,256 @@ struct hypervisor_pages { unsigned int pages; }; +static __cacheline_aligned_in_smp DEFINE_SEQLOCK(guest_page_lock); DEFINE_PER_CPU(struct kvm_free_pages [MAX_FGPT_ENTRIES], kvm_pt); DEFINE_PER_CPU(int, kvm_pt_idx); struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES]; +static void empty_hyperlist(void) +{ + int i = 0; + + while (i < MAX_FGPT_ENTRIES) { + hypervisor_pagelist[i].pfn = 0; + hypervisor_pagelist[i].pages = 0; + i++; + } +} + +static void make_hypercall(void) +{ + /* + * Dummy function: Tobe filled later. + */ + empty_hyperlist(); +} + +static int sort_pfn(const void *a1, const void *b1) +{ + const struct hypervisor_pages *a = a1; + const struct hypervisor_pages *b = b1; + + if (a->pfn > b->pfn) + return 1; + + if (a->pfn < b->pfn) + return -1; + + return 0; +} + +static int pack_hyperlist(void) +{ + int i = 0, j = 0; + + while (i < MAX_FGPT_ENTRIES) { + if (hypervisor_pagelist[i].pfn != 0) { + if (i != j) { + hypervisor_pagelist[j].pfn = + hypervisor_pagelist[i].pfn; + hypervisor_pagelist[j].pages = + hypervisor_pagelist[i].pages; + } + j++; + } + i++; + } + i = j; + while (j < MAX_FGPT_ENTRIES) { + hypervisor_pagelist[j].pfn = 0; + hypervisor_pagelist[j].pages = 0; + j++; + } + return i; +} + +int compress_hyperlist(void) +{ + int i = 0, j = 1, merge_counter = 0, ret = 0; + + sort(hypervisor_pagelist, MAX_FGPT_ENTRIES, + sizeof(struct hypervisor_pages), sort_pfn, NULL); + while (i < MAX_FGPT_ENTRIES && j < MAX_FGPT_ENTRIES) { + unsigned long pfni = hypervisor_pagelist[i].pfn; + unsigned int pagesi = hypervisor_pagelist[i].pages; + unsigned long pfnj = hypervisor_pagelist[j].pfn; + unsigned int pagesj = hypervisor_pagelist[j].pages; + + if (pfnj <= pfni) { + if (((pfnj + pagesj - 1) <= (pfni + pagesi - 1)) && + ((pfnj + pagesj - 1) >= (pfni - 1))) { + hypervisor_pagelist[i].pfn = pfnj; + hypervisor_pagelist[i].pages += pfni - pfnj; + hypervisor_pagelist[j].pfn = 0; + hypervisor_pagelist[j].pages = 0; + j++; + merge_counter++; + continue; + } else if ((pfnj + pagesj - 1) > (pfni + pagesi - 1)) { + hypervisor_pagelist[i].pfn = pfnj; + hypervisor_pagelist[i].pages = pagesj; + hypervisor_pagelist[j].pfn = 0; + hypervisor_pagelist[j].pages = 0; + j++; + merge_counter++; + continue; + } + } else if (pfnj > pfni) { + if ((pfnj + pagesj - 1) > (pfni + pagesi - 1) && + (pfnj <= pfni + pagesi)) { + hypervisor_pagelist[i].pages += + (pfnj + pagesj - 1) - + (pfni + pagesi - 1); + hypervisor_pagelist[j].pfn = 0; + hypervisor_pagelist[j].pages = 0; + j++; + merge_counter++; + continue; + } else if ((pfnj + pagesj - 1) <= (pfni + pagesi - 1)) { + hypervisor_pagelist[j].pfn = 0; + hypervisor_pagelist[j].pages = 0; + j++; + merge_counter++; + continue; + } + } + i = j; + j++; + } + if (merge_counter != 0) + ret = pack_hyperlist() - 1; + else + ret = MAX_FGPT_ENTRIES - 1; + return ret; +} + +void copy_hyperlist(int hyper_idx) +{ + int *idx = &get_cpu_var(kvm_pt_idx); + struct kvm_free_pages *free_page_obj; + int i = 0; + + free_page_obj = &get_cpu_var(kvm_pt)[0]; + while (i < hyper_idx) { + free_page_obj[*idx].pfn = hypervisor_pagelist[i].pfn; + free_page_obj[*idx].pages = hypervisor_pagelist[i].pages; + *idx += 1; + i++; + } + empty_hyperlist(); + put_cpu_var(kvm_pt); + put_cpu_var(kvm_pt_idx); +} + +/* + * arch_free_page_slowpath() - This function adds the guest free page entries + * to hypervisor_pages list and also ensures defragmentation prior to addition + * if it is present with any entry of the kvm_free_pages list. + */ +void arch_free_page_slowpath(void) +{ + int idx = 0; + int hyper_idx = -1; + int *kvm_idx = &get_cpu_var(kvm_pt_idx); + struct kvm_free_pages *free_page_obj = &get_cpu_var(kvm_pt)[0]; + + write_seqlock(&guest_page_lock); + while (idx < MAX_FGPT_ENTRIES) { + unsigned long pfn = free_page_obj[idx].pfn; + unsigned long pfn_end = free_page_obj[idx].pfn + + free_page_obj[idx].pages - 1; + bool prev_free = false; + + while (pfn <= pfn_end) { + struct page *p = pfn_to_page(pfn); + + if (PageCompound(p)) { + struct page *head_page = compound_head(p); + unsigned long head_pfn = page_to_pfn(head_page); + unsigned int alloc_pages = + 1 << compound_order(head_page); + + pfn = head_pfn + alloc_pages; + prev_free = false; + continue; + } + if (page_ref_count(p)) { + pfn++; + prev_free = false; + continue; + } + /* + * The page is free so add it to the list and free the + * hypervisor_pagelist if required. + */ + if (!prev_free) { + hyper_idx++; + hypervisor_pagelist[hyper_idx].pfn = pfn; + hypervisor_pagelist[hyper_idx].pages = 1; + if (hyper_idx == MAX_FGPT_ENTRIES - 1) { + hyper_idx = compress_hyperlist(); + if (hyper_idx >= + HYPERLIST_THRESHOLD) { + make_hypercall(); + hyper_idx = 0; + } + } + /* + * If the next contiguous page is free, it can + * be added to this same entry. + */ + prev_free = true; + } else { + /* + * Multiple adjacent free pages + */ + hypervisor_pagelist[hyper_idx].pages++; + } + pfn++; + } + free_page_obj[idx].pfn = 0; + free_page_obj[idx].pages = 0; + idx++; + } + *kvm_idx = 0; + put_cpu_var(kvm_pt); + put_cpu_var(kvm_pt_idx); + write_sequnlock(&guest_page_lock); +} + void arch_alloc_page(struct page *page, int order) { + unsigned int seq; + + /* + * arch_free_page will acquire the lock once the list carrying guest + * free pages is full and a hypercall will be made. Until complete free + * page list is traversed no further allocaiton will be allowed. + */ + do { + seq = read_seqbegin(&guest_page_lock); + } while (read_seqretry(&guest_page_lock, seq)); } void arch_free_page(struct page *page, int order) { + int *free_page_idx = &get_cpu_var(kvm_pt_idx); + struct kvm_free_pages *free_page_obj; + unsigned long flags; + + /* + * use of global variables may trigger a race condition between irq and + * process context causing unwanted overwrites. This will be replaced + * with a better solution to prevent such race conditions. + */ + local_irq_save(flags); + free_page_obj = &get_cpu_var(kvm_pt)[0]; + free_page_obj[*free_page_idx].pfn = page_to_pfn(page); + free_page_obj[*free_page_idx].pages = 1 << order; + *free_page_idx += 1; + if (*free_page_idx == MAX_FGPT_ENTRIES) + arch_free_page_slowpath(); + put_cpu_var(kvm_pt); + put_cpu_var(kvm_pt_idx); + local_irq_restore(flags); }