@@ -32,6 +32,7 @@
#include <linux/mm.h>
#include <linux/mount.h>
#include <linux/magic.h>
+#include <linux/page_hinting.h>
/*
* Balloon device works in 4K page units. So each page is pointed to by
@@ -53,7 +54,7 @@ static struct vfsmount *balloon_mnt;
struct virtio_balloon {
struct virtio_device *vdev;
- struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
+ struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *hinting_vq;
/* The balloon servicing is delegated to a freezable workqueue. */
struct work_struct update_balloon_stats_work;
@@ -95,6 +96,39 @@ static struct virtio_device_id id_table[] = {
{ 0 },
};
+#ifdef CONFIG_KVM_FREE_PAGE_HINTING
+static void tell_host_one_page(struct virtio_balloon *vb, struct virtqueue *vq,
+ unsigned long pfn)
+{
+ unsigned int id = VIRTQUEUE_DESC_ID_INIT;
+ u64 addr = pfn << VIRTIO_BALLOON_PFN_SHIFT;
+
+ virtqueue_add_chain_desc(vq, addr, PAGE_SIZE, &id, &id, 0);
+ virtqueue_add_chain(vq, id, 0, NULL, (void *)addr, NULL);
+}
+
+void virtballoon_page_hinting(struct virtio_balloon *vb, int hyper_entries)
+{
+ int i = 0;
+
+ for (i = 0; i < hyper_entries; i++) {
+ unsigned long pfn = hypervisor_pagelist[i].pfn;
+ unsigned long pfn_end = hypervisor_pagelist[i].pfn +
+ hypervisor_pagelist[i].pages - 1;
+
+ while (pfn <= pfn_end) {
+ vb->pfns[0] = cpu_to_virtio32(vb->vdev, pfn);
+ vb->num_pfns = 1;
+ tell_host_one_page(vb, vb->hinting_vq,
+ vb->pfns[0]);
+ pfn++;
+ }
+ hypervisor_pagelist[i].pfn = 0;
+ hypervisor_pagelist[i].pages = 0;
+ }
+}
+#endif
+
static u32 page_to_balloon_pfn(struct page *page)
{
unsigned long pfn = page_to_pfn(page);
@@ -111,6 +145,12 @@ static void balloon_ack(struct virtqueue *vq)
wake_up(&vb->acked);
}
+static void hinting_ack(struct virtqueue *vq)
+{
+ struct virtio_balloon *vb = vq->vdev->priv;
+
+ wake_up(&vb->acked);
+}
static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
{
struct scatterlist sg;
@@ -404,22 +444,25 @@ static void update_balloon_size_func(struct work_struct *work)
static int init_vqs(struct virtio_balloon *vb)
{
- struct virtqueue *vqs[3];
- vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
- static const char * const names[] = { "inflate", "deflate", "stats" };
+ struct virtqueue *vqs[4];
+ vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, hinting_ack,
+ stats_request };
+ static const char * const names[] = { "inflate", "deflate", "hinting",
+ "stats" };
int err, nvqs;
/*
* We expect two virtqueues: inflate and deflate, and
* optionally stat.
*/
- nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
+ nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 4 : 3;
err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
if (err)
return err;
vb->inflate_vq = vqs[0];
vb->deflate_vq = vqs[1];
+ vb->hinting_vq = vqs[3];
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
struct scatterlist sg;
unsigned int num_stats;
@@ -581,6 +624,11 @@ static int virtballoon_probe(struct virtio_device *vdev)
virtio_device_ready(vdev);
+#ifdef CONFIG_KVM_FREE_PAGE_HINTING
+ request_hypercall = (void *)&virtballoon_page_hinting;
+ balloon_ptr = vb;
+#endif
+
if (towards_target(vb))
virtballoon_changed(vdev);
return 0;
new file mode 100644
@@ -0,0 +1,16 @@
+#define MAX_FGPT_ENTRIES 1000
+/*
+ * hypervisor_pages - It is a dummy structure passed with the hypercall.
+ * @pfn - page frame number for the page which is to be freed.
+ * @pages - number of pages which are supposed to be freed.
+ * A global array object is used to to hold the list of pfn and pages and is
+ * passed as part of the hypercall.
+ */
+struct hypervisor_pages {
+ unsigned long pfn;
+ unsigned int pages;
+};
+
+extern struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
+extern void (*request_hypercall)(void *, int);
+extern void *balloon_ptr;
@@ -5,8 +5,8 @@
#include <linux/sort.h>
#include <linux/kernel.h>
#include <trace/events/kvm.h>
+#include <linux/page_hinting.h>
-#define MAX_FGPT_ENTRIES 1000
#define HYPERLIST_THRESHOLD 500
/*
* struct kvm_free_pages - Tracks the pages which are freed by the guest.
@@ -21,22 +21,12 @@ struct kvm_free_pages {
unsigned int pages;
};
-/*
- * hypervisor_pages - It is a dummy structure passed with the hypercall.
- * @pfn - page frame number for the page which is to be freed.
- * @pages - number of pages which are supposed to be freed.
- * A global array object is used to to hold the list of pfn and pages and is
- * passed as part of the hypercall.
- */
-struct hypervisor_pages {
- unsigned long pfn;
- unsigned int pages;
-};
-
static __cacheline_aligned_in_smp DEFINE_SEQLOCK(guest_page_lock);
DEFINE_PER_CPU(struct kvm_free_pages [MAX_FGPT_ENTRIES], kvm_pt);
DEFINE_PER_CPU(int, kvm_pt_idx);
struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
+void (*request_hypercall)(void *, int);
+void *balloon_ptr;
static void empty_hyperlist(void)
{
@@ -49,13 +39,11 @@ static void empty_hyperlist(void)
}
}
-void make_hypercall(void)
+void hyperlist_ready(int entries)
{
- /*
- * Dummy function: Tobe filled later.
- */
- empty_hyperlist();
trace_guest_str_dump("Hypercall to host...:");
+ request_hypercall(balloon_ptr, entries);
+ empty_hyperlist();
}
static int sort_pfn(const void *a1, const void *b1)
@@ -156,7 +144,7 @@ int compress_hyperlist(void)
if (merge_counter != 0)
ret = pack_hyperlist() - 1;
else
- ret = MAX_FGPT_ENTRIES - 1;
+ ret = MAX_FGPT_ENTRIES;
return ret;
}
@@ -227,16 +215,16 @@ void arch_free_page_slowpath(void)
*/
if (!prev_free) {
hyper_idx++;
- hypervisor_pagelist[hyper_idx].pfn = pfn;
- hypervisor_pagelist[hyper_idx].pages = 1;
trace_guest_free_page_slowpath(
hypervisor_pagelist[hyper_idx].pfn,
hypervisor_pagelist[hyper_idx].pages);
+ hypervisor_pagelist[hyper_idx].pfn = pfn;
+ hypervisor_pagelist[hyper_idx].pages = 1;
if (hyper_idx == MAX_FGPT_ENTRIES - 1) {
hyper_idx = compress_hyperlist();
if (hyper_idx >=
HYPERLIST_THRESHOLD) {
- make_hypercall();
+ hyperlist_ready(hyper_idx);
hyper_idx = 0;
}
}
@@ -272,6 +260,7 @@ void arch_alloc_page(struct page *page, int order)
* free pages is full and a hypercall will be made. Until complete free
* page list is traversed no further allocaiton will be allowed.
*/
+
do {
seq = read_seqbegin(&guest_page_lock);
} while (read_seqretry(&guest_page_lock, seq));