diff mbox

[RFC,V7,5/7] KVM: Sending hyperlist to the host via hinting_vq

Message ID 20180611151902.14383-6-nilal@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nitesh Lal June 11, 2018, 3:19 p.m. UTC
From: Nitesh Narayan Lal <nilal@redhat.com>

This patch creates a new vq (hinting_vq) to be used for page hinting
and adds support in the existing virtio balloon infrastructure so
that the hyper list carrying pages which are supposed to be freed
could be sent to the host (QEMU) for processing by using hinting_vq.

Signed-off-by: Nitesh Narayan Lal <nilal@redhat.com>
---
 drivers/virtio/virtio_balloon.c     | 99 ++++++++++++++++++++++++++++++++++++-
 include/linux/page_hinting.h        | 16 ++++++
 include/uapi/linux/virtio_balloon.h |  1 +
 virt/kvm/page_hinting.c             | 36 ++++++--------
 4 files changed, 129 insertions(+), 23 deletions(-)
 create mode 100644 include/linux/page_hinting.h

Comments

Luiz Capitulino June 11, 2018, 8:26 p.m. UTC | #1
On Mon, 11 Jun 2018 11:19:00 -0400
nilal@redhat.com wrote:

> From: Nitesh Narayan Lal <nilal@redhat.com>
> 
> This patch creates a new vq (hinting_vq) to be used for page hinting
> and adds support in the existing virtio balloon infrastructure so
> that the hyper list carrying pages which are supposed to be freed
> could be sent to the host (QEMU) for processing by using hinting_vq.
> 
> Signed-off-by: Nitesh Narayan Lal <nilal@redhat.com>
> ---
>  drivers/virtio/virtio_balloon.c     | 99 ++++++++++++++++++++++++++++++++++++-
>  include/linux/page_hinting.h        | 16 ++++++
>  include/uapi/linux/virtio_balloon.h |  1 +
>  virt/kvm/page_hinting.c             | 36 ++++++--------
>  4 files changed, 129 insertions(+), 23 deletions(-)
>  create mode 100644 include/linux/page_hinting.h
> 
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 6b237e3..217523f 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -32,6 +32,7 @@
>  #include <linux/mm.h>
>  #include <linux/mount.h>
>  #include <linux/magic.h>
> +#include <linux/page_hinting.h>
>  
>  /*
>   * Balloon device works in 4K page units.  So each page is pointed to by
> @@ -53,8 +54,11 @@ static struct vfsmount *balloon_mnt;
>  
>  struct virtio_balloon {
>  	struct virtio_device *vdev;
> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING
> +	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *hinting_vq;
> +#else
>  	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
> -
> +#endif

This is duplicating the exiting virtqueue pointers. It's only hinting_vq
that has to be protected by CONFIG_KVM_FREE_PAGE_HINTING.

>  	/* The balloon servicing is delegated to a freezable workqueue. */
>  	struct work_struct update_balloon_stats_work;
>  	struct work_struct update_balloon_size_work;
> @@ -95,6 +99,33 @@ static struct virtio_device_id id_table[] = {
>  	{ 0 },
>  };
>  
> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING
> +static void tell_host_one_page(struct virtio_balloon *vb, struct virtqueue *vq,
> +			       u64 gvaddr, int len)
> +{

I think this function actually tells the host the whole array at once,
right (vs. a single page). In this case, maybe it should be renamed to
tell_host_free_pfns?

> +	unsigned int id = VIRTQUEUE_DESC_ID_INIT;
> +	u64 gpaddr = virt_to_phys((void *)gvaddr);
> +
> +	virtqueue_add_chain_desc(vq, gpaddr, len, &id, &id, 0);
> +	virtqueue_add_chain(vq, id, 0, NULL, (void *)gpaddr, NULL);
> +}
> +
> +void virtballoon_page_hinting(struct virtio_balloon *vb, int hyper_entries)
> +{
> +	u64 gvaddr = (u64)hypervisor_pagelist;
> +
> +	vb->num_pfns = hyper_entries;
> +	tell_host_one_page(vb, vb->hinting_vq, gvaddr, hyper_entries);
> +}
> +
> +static void hinting_ack(struct virtqueue *vq)
> +{
> +	struct virtio_balloon *vb = vq->vdev->priv;
> +
> +	wake_up(&vb->acked);

Honest question: is there a process to be woken up? If I understood
correctly, the guest busy-waits for the host ACK, so the calling process is
never put to sleep. Or did I misunderstand?

> +}
> +#endif
> +
>  static u32 page_to_balloon_pfn(struct page *page)
>  {
>  	unsigned long pfn = page_to_pfn(page);
> @@ -425,6 +456,62 @@ static void update_balloon_size_func(struct work_struct *work)
>  		queue_work(system_freezable_wq, work);
>  }
>  
> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING
> +static int init_vqs(struct virtio_balloon *vb)
> +{

I don't think you can duplicate init_vqs().

> +	struct virtqueue *vqs[4];
> +	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request,
> +				       hinting_ack };
> +	static const char * const names[] = { "inflate", "deflate", "stats",
> +					      "hinting" };
> +	int err, nvqs;
> +	bool stats_vq_support, page_hinting_support;
> +
> +	/*
> +	 * We expect two virtqueues: inflate and deflate, and
> +	 * optionally stat and hinting.
> +	 */
> +	stats_vq_support = virtio_has_feature(vb->vdev,
> +					      VIRTIO_BALLOON_F_STATS_VQ);
> +	page_hinting_support = virtio_has_feature(vb->vdev,
> +						  VIRTIO_GUEST_PAGE_HINTING_VQ
> +						  );
> +	if (stats_vq_support && page_hinting_support)
> +		nvqs = 4;
> +	else if (stats_vq_support || page_hinting_support)
> +		nvqs = 3;
> +	else
> +		nvqs = 2;
> +
> +	err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
> +	if (err)
> +		return err;
> +
> +	vb->inflate_vq = vqs[0];
> +	vb->deflate_vq = vqs[1];
> +	if (page_hinting_support)
> +		vb->hinting_vq = vqs[3];
> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> +		struct scatterlist sg;
> +		unsigned int num_stats;
> +
> +		vb->stats_vq = vqs[2];
> +
> +		/*
> +		 * Prime this virtqueue with one buffer so the hypervisor can
> +		 * use it to signal us later (it can't be broken yet!).
> +		 */
> +		num_stats = update_balloon_stats(vb);
> +
> +		sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
> +		if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL)
> +		    < 0)
> +			BUG();
> +		virtqueue_kick(vb->stats_vq);
> +	}
> +	return 0;
> +}
> +#else
>  static int init_vqs(struct virtio_balloon *vb)
>  {
>  	struct virtqueue *vqs[3];
> @@ -463,6 +550,8 @@ static int init_vqs(struct virtio_balloon *vb)
>  	return 0;
>  }
>  
> +#endif
> +
>  #ifdef CONFIG_BALLOON_COMPACTION
>  /*
>   * virtballoon_migratepage - perform the balloon page migration on behalf of
> @@ -604,6 +693,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
>  
>  	virtio_device_ready(vdev);
>  
> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING
> +	if (virtio_has_feature(vb->vdev, VIRTIO_GUEST_PAGE_HINTING_VQ)) {
> +		request_hypercall = (void *)&virtballoon_page_hinting;
> +		balloon_ptr = vb;
> +	}
> +#endif

Very minor, but I think it would be nicer if you used a function from
virt/kvm/page_hinting.c instead of assigning global variables. Say:

page_hinting_enable(vb, virtballoon_page_hinting);

> +
>  	if (towards_target(vb))
>  		virtballoon_changed(vdev);
>  	return 0;
> @@ -692,6 +788,7 @@ static unsigned int features[] = {
>  	VIRTIO_BALLOON_F_MUST_TELL_HOST,
>  	VIRTIO_BALLOON_F_STATS_VQ,
>  	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
> +	VIRTIO_GUEST_PAGE_HINTING_VQ,
>  };
>  
>  static struct virtio_driver virtio_balloon_driver = {
> diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h
> new file mode 100644
> index 0000000..0bfb646
> --- /dev/null
> +++ b/include/linux/page_hinting.h
> @@ -0,0 +1,16 @@
> +#define MAX_FGPT_ENTRIES	1000
> +/*
> + * hypervisor_pages - It is a dummy structure passed with the hypercall.
> + * @pfn - page frame number for the page which is to be freed.
> + * @pages - number of pages which are supposed to be freed.
> + * A global array object is used to to hold the list of pfn and pages and is
> + * passed as part of the hypercall.
> + */
> +struct hypervisor_pages {
> +	unsigned long pfn;
> +	unsigned int pages;
> +};
> +
> +extern struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
> +extern void (*request_hypercall)(void *, int);
> +extern void *balloon_ptr;
> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> index 13b8cb5..16d299f 100644
> --- a/include/uapi/linux/virtio_balloon.h
> +++ b/include/uapi/linux/virtio_balloon.h
> @@ -34,6 +34,7 @@
>  #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
>  #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
>  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
> +#define VIRTIO_GUEST_PAGE_HINTING_VQ	3 /* Page hinting virtqueue */
>  
>  /* Size of a PFN in the balloon interface. */
>  #define VIRTIO_BALLOON_PFN_SHIFT 12
> diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
> index 97500bb..417582a 100644
> --- a/virt/kvm/page_hinting.c
> +++ b/virt/kvm/page_hinting.c
> @@ -5,8 +5,8 @@
>  #include <linux/sort.h>
>  #include <linux/kernel.h>
>  #include <trace/events/kmem.h>
> +#include <linux/page_hinting.h>
>  
> -#define MAX_FGPT_ENTRIES	1000
>  #define HYPERLIST_THRESHOLD	1	/* FIXME: find a good threshold */
>  /*
>   * struct kvm_free_pages - Tracks the pages which are freed by the guest.
> @@ -21,22 +21,15 @@ struct kvm_free_pages {
>  	unsigned int pages;
>  };
>  
> -/*
> - * hypervisor_pages - It is a dummy structure passed with the hypercall.
> - * @pfn - page frame number for the page which is to be freed.
> - * @pages - number of pages which are supposed to be freed.
> - * A global array object is used to to hold the list of pfn and pages and is
> - * passed as part of the hypercall.
> - */
> -struct hypervisor_pages {
> -	unsigned long pfn;
> -	unsigned int pages;
> -};
> -
>  static __cacheline_aligned_in_smp DEFINE_SEQLOCK(guest_page_lock);
>  DEFINE_PER_CPU(struct kvm_free_pages [MAX_FGPT_ENTRIES], kvm_pt);
>  DEFINE_PER_CPU(int, kvm_pt_idx);
>  struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
> +EXPORT_SYMBOL(hypervisor_pagelist);
> +void (*request_hypercall)(void *, int);
> +EXPORT_SYMBOL(request_hypercall);
> +void *balloon_ptr;
> +EXPORT_SYMBOL(balloon_ptr);
>  
>  static void empty_hyperlist(void)
>  {
> @@ -49,13 +42,11 @@ static void empty_hyperlist(void)
>  	}
>  }
>  
> -void make_hypercall(void)
> +void hyperlist_ready(int entries)
>  {
> -	/*
> -	 * Dummy function: Tobe filled later.
> -	 */
> -	empty_hyperlist();
>  	trace_guest_str_dump("Hypercall to host...:");
> +	request_hypercall(balloon_ptr, entries);
> +	empty_hyperlist();
>  }
>  
>  static int sort_pfn(const void *a1, const void *b1)
> @@ -156,7 +147,7 @@ int compress_hyperlist(void)
>  	if (merge_counter != 0)
>  		ret = pack_hyperlist() - 1;
>  	else
> -		ret = MAX_FGPT_ENTRIES - 1;
> +		ret = MAX_FGPT_ENTRIES;
>  	return ret;
>  }
>  
> @@ -227,16 +218,16 @@ void arch_free_page_slowpath(void)
>  			 */
>  			if (!prev_free) {
>  				hyper_idx++;
> -				hypervisor_pagelist[hyper_idx].pfn = pfn;
> -				hypervisor_pagelist[hyper_idx].pages = 1;
>  				trace_guest_free_page_slowpath(
>  				hypervisor_pagelist[hyper_idx].pfn,
>  				hypervisor_pagelist[hyper_idx].pages);
> +				hypervisor_pagelist[hyper_idx].pfn = pfn;
> +				hypervisor_pagelist[hyper_idx].pages = 1;
>  				if (hyper_idx == MAX_FGPT_ENTRIES - 1) {
>  					hyper_idx =  compress_hyperlist();
>  					if (hyper_idx >=
>  					    HYPERLIST_THRESHOLD) {
> -						make_hypercall();
> +						hyperlist_ready(hyper_idx);
>  						hyper_idx = 0;
>  					}
>  				}
> @@ -272,6 +263,7 @@ void arch_alloc_page(struct page *page, int order)
>  	 * free pages is full and a hypercall will be made. Until complete free
>  	 * page list is traversed no further allocaiton will be allowed.
>  	 */
> +
>  	do {
>  		seq = read_seqbegin(&guest_page_lock);
>  	} while (read_seqretry(&guest_page_lock, seq));
Nitesh Lal June 12, 2018, 3:02 p.m. UTC | #2
On 06/11/2018 04:26 PM, Luiz Capitulino wrote:
> On Mon, 11 Jun 2018 11:19:00 -0400
> nilal@redhat.com wrote:
>
>> From: Nitesh Narayan Lal <nilal@redhat.com>
>>
>> This patch creates a new vq (hinting_vq) to be used for page hinting
>> and adds support in the existing virtio balloon infrastructure so
>> that the hyper list carrying pages which are supposed to be freed
>> could be sent to the host (QEMU) for processing by using hinting_vq.
>>
>> Signed-off-by: Nitesh Narayan Lal <nilal@redhat.com>
>> ---
>>  drivers/virtio/virtio_balloon.c     | 99 ++++++++++++++++++++++++++++++++++++-
>>  include/linux/page_hinting.h        | 16 ++++++
>>  include/uapi/linux/virtio_balloon.h |  1 +
>>  virt/kvm/page_hinting.c             | 36 ++++++--------
>>  4 files changed, 129 insertions(+), 23 deletions(-)
>>  create mode 100644 include/linux/page_hinting.h
>>
>> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
>> index 6b237e3..217523f 100644
>> --- a/drivers/virtio/virtio_balloon.c
>> +++ b/drivers/virtio/virtio_balloon.c
>> @@ -32,6 +32,7 @@
>>  #include <linux/mm.h>
>>  #include <linux/mount.h>
>>  #include <linux/magic.h>
>> +#include <linux/page_hinting.h>
>>  
>>  /*
>>   * Balloon device works in 4K page units.  So each page is pointed to by
>> @@ -53,8 +54,11 @@ static struct vfsmount *balloon_mnt;
>>  
>>  struct virtio_balloon {
>>  	struct virtio_device *vdev;
>> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING
>> +	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *hinting_vq;
>> +#else
>>  	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
>> -
>> +#endif
> This is duplicating the exiting virtqueue pointers. It's only hinting_vq
> that has to be protected by CONFIG_KVM_FREE_PAGE_HINTING.
I will move hinting_vq into a separate statement, so that other
virtqueue pointers are not duplicated.
>
>>  	/* The balloon servicing is delegated to a freezable workqueue. */
>>  	struct work_struct update_balloon_stats_work;
>>  	struct work_struct update_balloon_size_work;
>> @@ -95,6 +99,33 @@ static struct virtio_device_id id_table[] = {
>>  	{ 0 },
>>  };
>>  
>> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING
>> +static void tell_host_one_page(struct virtio_balloon *vb, struct virtqueue *vq,
>> +			       u64 gvaddr, int len)
>> +{
> I think this function actually tells the host the whole array at once,
> right (vs. a single page). In this case, maybe it should be renamed to
> tell_host_free_pfns?
It communicates the address where the first entry of hypervisor_pagelist
is stored. Will it make sense if I rename it to tell_host_free_list?
>
>> +	unsigned int id = VIRTQUEUE_DESC_ID_INIT;
>> +	u64 gpaddr = virt_to_phys((void *)gvaddr);
>> +
>> +	virtqueue_add_chain_desc(vq, gpaddr, len, &id, &id, 0);
>> +	virtqueue_add_chain(vq, id, 0, NULL, (void *)gpaddr, NULL);
>> +}
>> +
>> +void virtballoon_page_hinting(struct virtio_balloon *vb, int hyper_entries)
>> +{
>> +	u64 gvaddr = (u64)hypervisor_pagelist;
>> +
>> +	vb->num_pfns = hyper_entries;
>> +	tell_host_one_page(vb, vb->hinting_vq, gvaddr, hyper_entries);
>> +}
>> +
>> +static void hinting_ack(struct virtqueue *vq)
>> +{
>> +	struct virtio_balloon *vb = vq->vdev->priv;
>> +
>> +	wake_up(&vb->acked);
> Honest question: is there a process to be woken up? If I understood
> correctly, the guest busy-waits for the host ACK, so the calling process is
> never put to sleep. Or did I misunderstand?
Yes, It is not required. I will remove this.
>
>> +}
>> +#endif
>> +
>>  static u32 page_to_balloon_pfn(struct page *page)
>>  {
>>  	unsigned long pfn = page_to_pfn(page);
>> @@ -425,6 +456,62 @@ static void update_balloon_size_func(struct work_struct *work)
>>  		queue_work(system_freezable_wq, work);
>>  }
>>  
>> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING
>> +static int init_vqs(struct virtio_balloon *vb)
>> +{
> I don't think you can duplicate init_vqs().
I will use a single init_vqs with ifdefs to add the code corresponding
to page hinting in my next patch-series. Thank you for pointing this out.
>
>> +	struct virtqueue *vqs[4];
>> +	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request,
>> +				       hinting_ack };
>> +	static const char * const names[] = { "inflate", "deflate", "stats",
>> +					      "hinting" };
>> +	int err, nvqs;
>> +	bool stats_vq_support, page_hinting_support;
>> +
>> +	/*
>> +	 * We expect two virtqueues: inflate and deflate, and
>> +	 * optionally stat and hinting.
>> +	 */
>> +	stats_vq_support = virtio_has_feature(vb->vdev,
>> +					      VIRTIO_BALLOON_F_STATS_VQ);
>> +	page_hinting_support = virtio_has_feature(vb->vdev,
>> +						  VIRTIO_GUEST_PAGE_HINTING_VQ
>> +						  );
>> +	if (stats_vq_support && page_hinting_support)
>> +		nvqs = 4;
>> +	else if (stats_vq_support || page_hinting_support)
>> +		nvqs = 3;
>> +	else
>> +		nvqs = 2;
>> +
>> +	err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
>> +	if (err)
>> +		return err;
>> +
>> +	vb->inflate_vq = vqs[0];
>> +	vb->deflate_vq = vqs[1];
>> +	if (page_hinting_support)
>> +		vb->hinting_vq = vqs[3];
>> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
>> +		struct scatterlist sg;
>> +		unsigned int num_stats;
>> +
>> +		vb->stats_vq = vqs[2];
>> +
>> +		/*
>> +		 * Prime this virtqueue with one buffer so the hypervisor can
>> +		 * use it to signal us later (it can't be broken yet!).
>> +		 */
>> +		num_stats = update_balloon_stats(vb);
>> +
>> +		sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
>> +		if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL)
>> +		    < 0)
>> +			BUG();
>> +		virtqueue_kick(vb->stats_vq);
>> +	}
>> +	return 0;
>> +}
>> +#else
>>  static int init_vqs(struct virtio_balloon *vb)
>>  {
>>  	struct virtqueue *vqs[3];
>> @@ -463,6 +550,8 @@ static int init_vqs(struct virtio_balloon *vb)
>>  	return 0;
>>  }
>>  
>> +#endif
>> +
>>  #ifdef CONFIG_BALLOON_COMPACTION
>>  /*
>>   * virtballoon_migratepage - perform the balloon page migration on behalf of
>> @@ -604,6 +693,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
>>  
>>  	virtio_device_ready(vdev);
>>  
>> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING
>> +	if (virtio_has_feature(vb->vdev, VIRTIO_GUEST_PAGE_HINTING_VQ)) {
>> +		request_hypercall = (void *)&virtballoon_page_hinting;
>> +		balloon_ptr = vb;
>> +	}
>> +#endif
> Very minor, but I think it would be nicer if you used a function from
> virt/kvm/page_hinting.c instead of assigning global variables. Say:
>
> page_hinting_enable(vb, virtballoon_page_hinting);
I will make this change.
>
>> +
>>  	if (towards_target(vb))
>>  		virtballoon_changed(vdev);
>>  	return 0;
>> @@ -692,6 +788,7 @@ static unsigned int features[] = {
>>  	VIRTIO_BALLOON_F_MUST_TELL_HOST,
>>  	VIRTIO_BALLOON_F_STATS_VQ,
>>  	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
>> +	VIRTIO_GUEST_PAGE_HINTING_VQ,
>>  };
>>  
>>  static struct virtio_driver virtio_balloon_driver = {
>> diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h
>> new file mode 100644
>> index 0000000..0bfb646
>> --- /dev/null
>> +++ b/include/linux/page_hinting.h
>> @@ -0,0 +1,16 @@
>> +#define MAX_FGPT_ENTRIES	1000
>> +/*
>> + * hypervisor_pages - It is a dummy structure passed with the hypercall.
>> + * @pfn - page frame number for the page which is to be freed.
>> + * @pages - number of pages which are supposed to be freed.
>> + * A global array object is used to to hold the list of pfn and pages and is
>> + * passed as part of the hypercall.
>> + */
>> +struct hypervisor_pages {
>> +	unsigned long pfn;
>> +	unsigned int pages;
>> +};
>> +
>> +extern struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
>> +extern void (*request_hypercall)(void *, int);
>> +extern void *balloon_ptr;
>> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
>> index 13b8cb5..16d299f 100644
>> --- a/include/uapi/linux/virtio_balloon.h
>> +++ b/include/uapi/linux/virtio_balloon.h
>> @@ -34,6 +34,7 @@
>>  #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
>>  #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
>>  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
>> +#define VIRTIO_GUEST_PAGE_HINTING_VQ	3 /* Page hinting virtqueue */
>>  
>>  /* Size of a PFN in the balloon interface. */
>>  #define VIRTIO_BALLOON_PFN_SHIFT 12
>> diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
>> index 97500bb..417582a 100644
>> --- a/virt/kvm/page_hinting.c
>> +++ b/virt/kvm/page_hinting.c
>> @@ -5,8 +5,8 @@
>>  #include <linux/sort.h>
>>  #include <linux/kernel.h>
>>  #include <trace/events/kmem.h>
>> +#include <linux/page_hinting.h>
>>  
>> -#define MAX_FGPT_ENTRIES	1000
>>  #define HYPERLIST_THRESHOLD	1	/* FIXME: find a good threshold */
>>  /*
>>   * struct kvm_free_pages - Tracks the pages which are freed by the guest.
>> @@ -21,22 +21,15 @@ struct kvm_free_pages {
>>  	unsigned int pages;
>>  };
>>  
>> -/*
>> - * hypervisor_pages - It is a dummy structure passed with the hypercall.
>> - * @pfn - page frame number for the page which is to be freed.
>> - * @pages - number of pages which are supposed to be freed.
>> - * A global array object is used to to hold the list of pfn and pages and is
>> - * passed as part of the hypercall.
>> - */
>> -struct hypervisor_pages {
>> -	unsigned long pfn;
>> -	unsigned int pages;
>> -};
>> -
>>  static __cacheline_aligned_in_smp DEFINE_SEQLOCK(guest_page_lock);
>>  DEFINE_PER_CPU(struct kvm_free_pages [MAX_FGPT_ENTRIES], kvm_pt);
>>  DEFINE_PER_CPU(int, kvm_pt_idx);
>>  struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
>> +EXPORT_SYMBOL(hypervisor_pagelist);
>> +void (*request_hypercall)(void *, int);
>> +EXPORT_SYMBOL(request_hypercall);
>> +void *balloon_ptr;
>> +EXPORT_SYMBOL(balloon_ptr);
>>  
>>  static void empty_hyperlist(void)
>>  {
>> @@ -49,13 +42,11 @@ static void empty_hyperlist(void)
>>  	}
>>  }
>>  
>> -void make_hypercall(void)
>> +void hyperlist_ready(int entries)
>>  {
>> -	/*
>> -	 * Dummy function: Tobe filled later.
>> -	 */
>> -	empty_hyperlist();
>>  	trace_guest_str_dump("Hypercall to host...:");
>> +	request_hypercall(balloon_ptr, entries);
>> +	empty_hyperlist();
>>  }
>>  
>>  static int sort_pfn(const void *a1, const void *b1)
>> @@ -156,7 +147,7 @@ int compress_hyperlist(void)
>>  	if (merge_counter != 0)
>>  		ret = pack_hyperlist() - 1;
>>  	else
>> -		ret = MAX_FGPT_ENTRIES - 1;
>> +		ret = MAX_FGPT_ENTRIES;
>>  	return ret;
>>  }
>>  
>> @@ -227,16 +218,16 @@ void arch_free_page_slowpath(void)
>>  			 */
>>  			if (!prev_free) {
>>  				hyper_idx++;
>> -				hypervisor_pagelist[hyper_idx].pfn = pfn;
>> -				hypervisor_pagelist[hyper_idx].pages = 1;
>>  				trace_guest_free_page_slowpath(
>>  				hypervisor_pagelist[hyper_idx].pfn,
>>  				hypervisor_pagelist[hyper_idx].pages);
>> +				hypervisor_pagelist[hyper_idx].pfn = pfn;
>> +				hypervisor_pagelist[hyper_idx].pages = 1;
>>  				if (hyper_idx == MAX_FGPT_ENTRIES - 1) {
>>  					hyper_idx =  compress_hyperlist();
>>  					if (hyper_idx >=
>>  					    HYPERLIST_THRESHOLD) {
>> -						make_hypercall();
>> +						hyperlist_ready(hyper_idx);
>>  						hyper_idx = 0;
>>  					}
>>  				}
>> @@ -272,6 +263,7 @@ void arch_alloc_page(struct page *page, int order)
>>  	 * free pages is full and a hypercall will be made. Until complete free
>>  	 * page list is traversed no further allocaiton will be allowed.
>>  	 */
>> +
>>  	do {
>>  		seq = read_seqbegin(&guest_page_lock);
>>  	} while (read_seqretry(&guest_page_lock, seq));
Luiz Capitulino June 12, 2018, 3:30 p.m. UTC | #3
On Tue, 12 Jun 2018 11:02:27 -0400
Nitesh Narayan Lal <nilal@redhat.com> wrote:

> On 06/11/2018 04:26 PM, Luiz Capitulino wrote:
> > On Mon, 11 Jun 2018 11:19:00 -0400
> > nilal@redhat.com wrote:
> >  
> >> From: Nitesh Narayan Lal <nilal@redhat.com>
> >>
> >> This patch creates a new vq (hinting_vq) to be used for page hinting
> >> and adds support in the existing virtio balloon infrastructure so
> >> that the hyper list carrying pages which are supposed to be freed
> >> could be sent to the host (QEMU) for processing by using hinting_vq.
> >>
> >> Signed-off-by: Nitesh Narayan Lal <nilal@redhat.com>
> >> ---
> >>  drivers/virtio/virtio_balloon.c     | 99 ++++++++++++++++++++++++++++++++++++-
> >>  include/linux/page_hinting.h        | 16 ++++++
> >>  include/uapi/linux/virtio_balloon.h |  1 +
> >>  virt/kvm/page_hinting.c             | 36 ++++++--------
> >>  4 files changed, 129 insertions(+), 23 deletions(-)
> >>  create mode 100644 include/linux/page_hinting.h
> >>
> >> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> >> index 6b237e3..217523f 100644
> >> --- a/drivers/virtio/virtio_balloon.c
> >> +++ b/drivers/virtio/virtio_balloon.c
> >> @@ -32,6 +32,7 @@
> >>  #include <linux/mm.h>
> >>  #include <linux/mount.h>
> >>  #include <linux/magic.h>
> >> +#include <linux/page_hinting.h>
> >>  
> >>  /*
> >>   * Balloon device works in 4K page units.  So each page is pointed to by
> >> @@ -53,8 +54,11 @@ static struct vfsmount *balloon_mnt;
> >>  
> >>  struct virtio_balloon {
> >>  	struct virtio_device *vdev;
> >> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING
> >> +	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *hinting_vq;
> >> +#else
> >>  	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
> >> -
> >> +#endif  
> > This is duplicating the exiting virtqueue pointers. It's only hinting_vq
> > that has to be protected by CONFIG_KVM_FREE_PAGE_HINTING.  
> I will move hinting_vq into a separate statement, so that other
> virtqueue pointers are not duplicated.
> >  
> >>  	/* The balloon servicing is delegated to a freezable workqueue. */
> >>  	struct work_struct update_balloon_stats_work;
> >>  	struct work_struct update_balloon_size_work;
> >> @@ -95,6 +99,33 @@ static struct virtio_device_id id_table[] = {
> >>  	{ 0 },
> >>  };
> >>  
> >> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING
> >> +static void tell_host_one_page(struct virtio_balloon *vb, struct virtqueue *vq,
> >> +			       u64 gvaddr, int len)
> >> +{  
> > I think this function actually tells the host the whole array at once,
> > right (vs. a single page). In this case, maybe it should be renamed to
> > tell_host_free_pfns?  
> It communicates the address where the first entry of hypervisor_pagelist
> is stored. Will it make sense if I rename it to tell_host_free_list?

Yes, that's good.

> >  
> >> +	unsigned int id = VIRTQUEUE_DESC_ID_INIT;
> >> +	u64 gpaddr = virt_to_phys((void *)gvaddr);
> >> +
> >> +	virtqueue_add_chain_desc(vq, gpaddr, len, &id, &id, 0);
> >> +	virtqueue_add_chain(vq, id, 0, NULL, (void *)gpaddr, NULL);
> >> +}
> >> +
> >> +void virtballoon_page_hinting(struct virtio_balloon *vb, int hyper_entries)
> >> +{
> >> +	u64 gvaddr = (u64)hypervisor_pagelist;
> >> +
> >> +	vb->num_pfns = hyper_entries;
> >> +	tell_host_one_page(vb, vb->hinting_vq, gvaddr, hyper_entries);
> >> +}
> >> +
> >> +static void hinting_ack(struct virtqueue *vq)
> >> +{
> >> +	struct virtio_balloon *vb = vq->vdev->priv;
> >> +
> >> +	wake_up(&vb->acked);  
> > Honest question: is there a process to be woken up? If I understood
> > correctly, the guest busy-waits for the host ACK, so the calling process is
> > never put to sleep. Or did I misunderstand?  
> Yes, It is not required. I will remove this.
> >  
> >> +}
> >> +#endif
> >> +
> >>  static u32 page_to_balloon_pfn(struct page *page)
> >>  {
> >>  	unsigned long pfn = page_to_pfn(page);
> >> @@ -425,6 +456,62 @@ static void update_balloon_size_func(struct work_struct *work)
> >>  		queue_work(system_freezable_wq, work);
> >>  }
> >>  
> >> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING
> >> +static int init_vqs(struct virtio_balloon *vb)
> >> +{  
> > I don't think you can duplicate init_vqs().  
> I will use a single init_vqs with ifdefs to add the code corresponding
> to page hinting in my next patch-series. Thank you for pointing this out.
> >  
> >> +	struct virtqueue *vqs[4];
> >> +	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request,
> >> +				       hinting_ack };
> >> +	static const char * const names[] = { "inflate", "deflate", "stats",
> >> +					      "hinting" };
> >> +	int err, nvqs;
> >> +	bool stats_vq_support, page_hinting_support;
> >> +
> >> +	/*
> >> +	 * We expect two virtqueues: inflate and deflate, and
> >> +	 * optionally stat and hinting.
> >> +	 */
> >> +	stats_vq_support = virtio_has_feature(vb->vdev,
> >> +					      VIRTIO_BALLOON_F_STATS_VQ);
> >> +	page_hinting_support = virtio_has_feature(vb->vdev,
> >> +						  VIRTIO_GUEST_PAGE_HINTING_VQ
> >> +						  );
> >> +	if (stats_vq_support && page_hinting_support)
> >> +		nvqs = 4;
> >> +	else if (stats_vq_support || page_hinting_support)
> >> +		nvqs = 3;
> >> +	else
> >> +		nvqs = 2;
> >> +
> >> +	err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
> >> +	if (err)
> >> +		return err;
> >> +
> >> +	vb->inflate_vq = vqs[0];
> >> +	vb->deflate_vq = vqs[1];
> >> +	if (page_hinting_support)
> >> +		vb->hinting_vq = vqs[3];
> >> +	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
> >> +		struct scatterlist sg;
> >> +		unsigned int num_stats;
> >> +
> >> +		vb->stats_vq = vqs[2];
> >> +
> >> +		/*
> >> +		 * Prime this virtqueue with one buffer so the hypervisor can
> >> +		 * use it to signal us later (it can't be broken yet!).
> >> +		 */
> >> +		num_stats = update_balloon_stats(vb);
> >> +
> >> +		sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
> >> +		if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL)
> >> +		    < 0)
> >> +			BUG();
> >> +		virtqueue_kick(vb->stats_vq);
> >> +	}
> >> +	return 0;
> >> +}
> >> +#else
> >>  static int init_vqs(struct virtio_balloon *vb)
> >>  {
> >>  	struct virtqueue *vqs[3];
> >> @@ -463,6 +550,8 @@ static int init_vqs(struct virtio_balloon *vb)
> >>  	return 0;
> >>  }
> >>  
> >> +#endif
> >> +
> >>  #ifdef CONFIG_BALLOON_COMPACTION
> >>  /*
> >>   * virtballoon_migratepage - perform the balloon page migration on behalf of
> >> @@ -604,6 +693,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
> >>  
> >>  	virtio_device_ready(vdev);
> >>  
> >> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING
> >> +	if (virtio_has_feature(vb->vdev, VIRTIO_GUEST_PAGE_HINTING_VQ)) {
> >> +		request_hypercall = (void *)&virtballoon_page_hinting;
> >> +		balloon_ptr = vb;
> >> +	}
> >> +#endif  
> > Very minor, but I think it would be nicer if you used a function from
> > virt/kvm/page_hinting.c instead of assigning global variables. Say:
> >
> > page_hinting_enable(vb, virtballoon_page_hinting);  
> I will make this change.
> >  
> >> +
> >>  	if (towards_target(vb))
> >>  		virtballoon_changed(vdev);
> >>  	return 0;
> >> @@ -692,6 +788,7 @@ static unsigned int features[] = {
> >>  	VIRTIO_BALLOON_F_MUST_TELL_HOST,
> >>  	VIRTIO_BALLOON_F_STATS_VQ,
> >>  	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
> >> +	VIRTIO_GUEST_PAGE_HINTING_VQ,
> >>  };
> >>  
> >>  static struct virtio_driver virtio_balloon_driver = {
> >> diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h
> >> new file mode 100644
> >> index 0000000..0bfb646
> >> --- /dev/null
> >> +++ b/include/linux/page_hinting.h
> >> @@ -0,0 +1,16 @@
> >> +#define MAX_FGPT_ENTRIES	1000
> >> +/*
> >> + * hypervisor_pages - It is a dummy structure passed with the hypercall.
> >> + * @pfn - page frame number for the page which is to be freed.
> >> + * @pages - number of pages which are supposed to be freed.
> >> + * A global array object is used to to hold the list of pfn and pages and is
> >> + * passed as part of the hypercall.
> >> + */
> >> +struct hypervisor_pages {
> >> +	unsigned long pfn;
> >> +	unsigned int pages;
> >> +};
> >> +
> >> +extern struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
> >> +extern void (*request_hypercall)(void *, int);
> >> +extern void *balloon_ptr;
> >> diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
> >> index 13b8cb5..16d299f 100644
> >> --- a/include/uapi/linux/virtio_balloon.h
> >> +++ b/include/uapi/linux/virtio_balloon.h
> >> @@ -34,6 +34,7 @@
> >>  #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
> >>  #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
> >>  #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
> >> +#define VIRTIO_GUEST_PAGE_HINTING_VQ	3 /* Page hinting virtqueue */
> >>  
> >>  /* Size of a PFN in the balloon interface. */
> >>  #define VIRTIO_BALLOON_PFN_SHIFT 12
> >> diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
> >> index 97500bb..417582a 100644
> >> --- a/virt/kvm/page_hinting.c
> >> +++ b/virt/kvm/page_hinting.c
> >> @@ -5,8 +5,8 @@
> >>  #include <linux/sort.h>
> >>  #include <linux/kernel.h>
> >>  #include <trace/events/kmem.h>
> >> +#include <linux/page_hinting.h>
> >>  
> >> -#define MAX_FGPT_ENTRIES	1000
> >>  #define HYPERLIST_THRESHOLD	1	/* FIXME: find a good threshold */
> >>  /*
> >>   * struct kvm_free_pages - Tracks the pages which are freed by the guest.
> >> @@ -21,22 +21,15 @@ struct kvm_free_pages {
> >>  	unsigned int pages;
> >>  };
> >>  
> >> -/*
> >> - * hypervisor_pages - It is a dummy structure passed with the hypercall.
> >> - * @pfn - page frame number for the page which is to be freed.
> >> - * @pages - number of pages which are supposed to be freed.
> >> - * A global array object is used to to hold the list of pfn and pages and is
> >> - * passed as part of the hypercall.
> >> - */
> >> -struct hypervisor_pages {
> >> -	unsigned long pfn;
> >> -	unsigned int pages;
> >> -};
> >> -
> >>  static __cacheline_aligned_in_smp DEFINE_SEQLOCK(guest_page_lock);
> >>  DEFINE_PER_CPU(struct kvm_free_pages [MAX_FGPT_ENTRIES], kvm_pt);
> >>  DEFINE_PER_CPU(int, kvm_pt_idx);
> >>  struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
> >> +EXPORT_SYMBOL(hypervisor_pagelist);
> >> +void (*request_hypercall)(void *, int);
> >> +EXPORT_SYMBOL(request_hypercall);
> >> +void *balloon_ptr;
> >> +EXPORT_SYMBOL(balloon_ptr);
> >>  
> >>  static void empty_hyperlist(void)
> >>  {
> >> @@ -49,13 +42,11 @@ static void empty_hyperlist(void)
> >>  	}
> >>  }
> >>  
> >> -void make_hypercall(void)
> >> +void hyperlist_ready(int entries)
> >>  {
> >> -	/*
> >> -	 * Dummy function: Tobe filled later.
> >> -	 */
> >> -	empty_hyperlist();
> >>  	trace_guest_str_dump("Hypercall to host...:");
> >> +	request_hypercall(balloon_ptr, entries);
> >> +	empty_hyperlist();
> >>  }
> >>  
> >>  static int sort_pfn(const void *a1, const void *b1)
> >> @@ -156,7 +147,7 @@ int compress_hyperlist(void)
> >>  	if (merge_counter != 0)
> >>  		ret = pack_hyperlist() - 1;
> >>  	else
> >> -		ret = MAX_FGPT_ENTRIES - 1;
> >> +		ret = MAX_FGPT_ENTRIES;
> >>  	return ret;
> >>  }
> >>  
> >> @@ -227,16 +218,16 @@ void arch_free_page_slowpath(void)
> >>  			 */
> >>  			if (!prev_free) {
> >>  				hyper_idx++;
> >> -				hypervisor_pagelist[hyper_idx].pfn = pfn;
> >> -				hypervisor_pagelist[hyper_idx].pages = 1;
> >>  				trace_guest_free_page_slowpath(
> >>  				hypervisor_pagelist[hyper_idx].pfn,
> >>  				hypervisor_pagelist[hyper_idx].pages);
> >> +				hypervisor_pagelist[hyper_idx].pfn = pfn;
> >> +				hypervisor_pagelist[hyper_idx].pages = 1;
> >>  				if (hyper_idx == MAX_FGPT_ENTRIES - 1) {
> >>  					hyper_idx =  compress_hyperlist();
> >>  					if (hyper_idx >=
> >>  					    HYPERLIST_THRESHOLD) {
> >> -						make_hypercall();
> >> +						hyperlist_ready(hyper_idx);
> >>  						hyper_idx = 0;
> >>  					}
> >>  				}
> >> @@ -272,6 +263,7 @@ void arch_alloc_page(struct page *page, int order)
> >>  	 * free pages is full and a hypercall will be made. Until complete free
> >>  	 * page list is traversed no further allocaiton will be allowed.
> >>  	 */
> >> +
> >>  	do {
> >>  		seq = read_seqbegin(&guest_page_lock);
> >>  	} while (read_seqretry(&guest_page_lock, seq));  
>
Yi Sun July 10, 2018, 8:42 a.m. UTC | #4
On 18-06-11 11:19:00, nilal@redhat.com wrote:
> From: Nitesh Narayan Lal <nilal@redhat.com>
> 
> This patch creates a new vq (hinting_vq) to be used for page hinting
> and adds support in the existing virtio balloon infrastructure so
> that the hyper list carrying pages which are supposed to be freed
> could be sent to the host (QEMU) for processing by using hinting_vq.
> 
> Signed-off-by: Nitesh Narayan Lal <nilal@redhat.com>
> ---
>  drivers/virtio/virtio_balloon.c     | 99 ++++++++++++++++++++++++++++++++++++-
>  include/linux/page_hinting.h        | 16 ++++++
>  include/uapi/linux/virtio_balloon.h |  1 +
>  virt/kvm/page_hinting.c             | 36 ++++++--------
>  4 files changed, 129 insertions(+), 23 deletions(-)
>  create mode 100644 include/linux/page_hinting.h
> 
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 6b237e3..217523f 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -32,6 +32,7 @@
[...]

>  #ifdef CONFIG_BALLOON_COMPACTION
>  /*
>   * virtballoon_migratepage - perform the balloon page migration on behalf of
> @@ -604,6 +693,13 @@ static int virtballoon_probe(struct virtio_device *vdev)
>  
>  	virtio_device_ready(vdev);
>  
> +#ifdef CONFIG_KVM_FREE_PAGE_HINTING
> +	if (virtio_has_feature(vb->vdev, VIRTIO_GUEST_PAGE_HINTING_VQ)) {
> +		request_hypercall = (void *)&virtballoon_page_hinting;
> +		balloon_ptr = vb;
> +	}
> +#endif

balloon_ptr is valid only when VIRTIO_GUEST_PAGE_HINTING_VQ is set.
So ....

> +void hyperlist_ready(int entries)
>  {
> -	/*
> -	 * Dummy function: Tobe filled later.
> -	 */
> -	empty_hyperlist();
>  	trace_guest_str_dump("Hypercall to host...:");
> +	request_hypercall(balloon_ptr, entries);

Should check if balloon_ptr is valid here. Otherwise, there may be
"kernel NULL pointer dereference" bug.

> +	empty_hyperlist();
>  }
diff mbox

Patch

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 6b237e3..217523f 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -32,6 +32,7 @@ 
 #include <linux/mm.h>
 #include <linux/mount.h>
 #include <linux/magic.h>
+#include <linux/page_hinting.h>
 
 /*
  * Balloon device works in 4K page units.  So each page is pointed to by
@@ -53,8 +54,11 @@  static struct vfsmount *balloon_mnt;
 
 struct virtio_balloon {
 	struct virtio_device *vdev;
+#ifdef CONFIG_KVM_FREE_PAGE_HINTING
+	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *hinting_vq;
+#else
 	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
-
+#endif
 	/* The balloon servicing is delegated to a freezable workqueue. */
 	struct work_struct update_balloon_stats_work;
 	struct work_struct update_balloon_size_work;
@@ -95,6 +99,33 @@  static struct virtio_device_id id_table[] = {
 	{ 0 },
 };
 
+#ifdef CONFIG_KVM_FREE_PAGE_HINTING
+static void tell_host_one_page(struct virtio_balloon *vb, struct virtqueue *vq,
+			       u64 gvaddr, int len)
+{
+	unsigned int id = VIRTQUEUE_DESC_ID_INIT;
+	u64 gpaddr = virt_to_phys((void *)gvaddr);
+
+	virtqueue_add_chain_desc(vq, gpaddr, len, &id, &id, 0);
+	virtqueue_add_chain(vq, id, 0, NULL, (void *)gpaddr, NULL);
+}
+
+void virtballoon_page_hinting(struct virtio_balloon *vb, int hyper_entries)
+{
+	u64 gvaddr = (u64)hypervisor_pagelist;
+
+	vb->num_pfns = hyper_entries;
+	tell_host_one_page(vb, vb->hinting_vq, gvaddr, hyper_entries);
+}
+
+static void hinting_ack(struct virtqueue *vq)
+{
+	struct virtio_balloon *vb = vq->vdev->priv;
+
+	wake_up(&vb->acked);
+}
+#endif
+
 static u32 page_to_balloon_pfn(struct page *page)
 {
 	unsigned long pfn = page_to_pfn(page);
@@ -425,6 +456,62 @@  static void update_balloon_size_func(struct work_struct *work)
 		queue_work(system_freezable_wq, work);
 }
 
+#ifdef CONFIG_KVM_FREE_PAGE_HINTING
+static int init_vqs(struct virtio_balloon *vb)
+{
+	struct virtqueue *vqs[4];
+	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request,
+				       hinting_ack };
+	static const char * const names[] = { "inflate", "deflate", "stats",
+					      "hinting" };
+	int err, nvqs;
+	bool stats_vq_support, page_hinting_support;
+
+	/*
+	 * We expect two virtqueues: inflate and deflate, and
+	 * optionally stat and hinting.
+	 */
+	stats_vq_support = virtio_has_feature(vb->vdev,
+					      VIRTIO_BALLOON_F_STATS_VQ);
+	page_hinting_support = virtio_has_feature(vb->vdev,
+						  VIRTIO_GUEST_PAGE_HINTING_VQ
+						  );
+	if (stats_vq_support && page_hinting_support)
+		nvqs = 4;
+	else if (stats_vq_support || page_hinting_support)
+		nvqs = 3;
+	else
+		nvqs = 2;
+
+	err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
+	if (err)
+		return err;
+
+	vb->inflate_vq = vqs[0];
+	vb->deflate_vq = vqs[1];
+	if (page_hinting_support)
+		vb->hinting_vq = vqs[3];
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+		struct scatterlist sg;
+		unsigned int num_stats;
+
+		vb->stats_vq = vqs[2];
+
+		/*
+		 * Prime this virtqueue with one buffer so the hypervisor can
+		 * use it to signal us later (it can't be broken yet!).
+		 */
+		num_stats = update_balloon_stats(vb);
+
+		sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats);
+		if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL)
+		    < 0)
+			BUG();
+		virtqueue_kick(vb->stats_vq);
+	}
+	return 0;
+}
+#else
 static int init_vqs(struct virtio_balloon *vb)
 {
 	struct virtqueue *vqs[3];
@@ -463,6 +550,8 @@  static int init_vqs(struct virtio_balloon *vb)
 	return 0;
 }
 
+#endif
+
 #ifdef CONFIG_BALLOON_COMPACTION
 /*
  * virtballoon_migratepage - perform the balloon page migration on behalf of
@@ -604,6 +693,13 @@  static int virtballoon_probe(struct virtio_device *vdev)
 
 	virtio_device_ready(vdev);
 
+#ifdef CONFIG_KVM_FREE_PAGE_HINTING
+	if (virtio_has_feature(vb->vdev, VIRTIO_GUEST_PAGE_HINTING_VQ)) {
+		request_hypercall = (void *)&virtballoon_page_hinting;
+		balloon_ptr = vb;
+	}
+#endif
+
 	if (towards_target(vb))
 		virtballoon_changed(vdev);
 	return 0;
@@ -692,6 +788,7 @@  static unsigned int features[] = {
 	VIRTIO_BALLOON_F_MUST_TELL_HOST,
 	VIRTIO_BALLOON_F_STATS_VQ,
 	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
+	VIRTIO_GUEST_PAGE_HINTING_VQ,
 };
 
 static struct virtio_driver virtio_balloon_driver = {
diff --git a/include/linux/page_hinting.h b/include/linux/page_hinting.h
new file mode 100644
index 0000000..0bfb646
--- /dev/null
+++ b/include/linux/page_hinting.h
@@ -0,0 +1,16 @@ 
+#define MAX_FGPT_ENTRIES	1000
+/*
+ * hypervisor_pages - It is a dummy structure passed with the hypercall.
+ * @pfn - page frame number for the page which is to be freed.
+ * @pages - number of pages which are supposed to be freed.
+ * A global array object is used to to hold the list of pfn and pages and is
+ * passed as part of the hypercall.
+ */
+struct hypervisor_pages {
+	unsigned long pfn;
+	unsigned int pages;
+};
+
+extern struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
+extern void (*request_hypercall)(void *, int);
+extern void *balloon_ptr;
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 13b8cb5..16d299f 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -34,6 +34,7 @@ 
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
 #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
 #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
+#define VIRTIO_GUEST_PAGE_HINTING_VQ	3 /* Page hinting virtqueue */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
diff --git a/virt/kvm/page_hinting.c b/virt/kvm/page_hinting.c
index 97500bb..417582a 100644
--- a/virt/kvm/page_hinting.c
+++ b/virt/kvm/page_hinting.c
@@ -5,8 +5,8 @@ 
 #include <linux/sort.h>
 #include <linux/kernel.h>
 #include <trace/events/kmem.h>
+#include <linux/page_hinting.h>
 
-#define MAX_FGPT_ENTRIES	1000
 #define HYPERLIST_THRESHOLD	1	/* FIXME: find a good threshold */
 /*
  * struct kvm_free_pages - Tracks the pages which are freed by the guest.
@@ -21,22 +21,15 @@  struct kvm_free_pages {
 	unsigned int pages;
 };
 
-/*
- * hypervisor_pages - It is a dummy structure passed with the hypercall.
- * @pfn - page frame number for the page which is to be freed.
- * @pages - number of pages which are supposed to be freed.
- * A global array object is used to to hold the list of pfn and pages and is
- * passed as part of the hypercall.
- */
-struct hypervisor_pages {
-	unsigned long pfn;
-	unsigned int pages;
-};
-
 static __cacheline_aligned_in_smp DEFINE_SEQLOCK(guest_page_lock);
 DEFINE_PER_CPU(struct kvm_free_pages [MAX_FGPT_ENTRIES], kvm_pt);
 DEFINE_PER_CPU(int, kvm_pt_idx);
 struct hypervisor_pages hypervisor_pagelist[MAX_FGPT_ENTRIES];
+EXPORT_SYMBOL(hypervisor_pagelist);
+void (*request_hypercall)(void *, int);
+EXPORT_SYMBOL(request_hypercall);
+void *balloon_ptr;
+EXPORT_SYMBOL(balloon_ptr);
 
 static void empty_hyperlist(void)
 {
@@ -49,13 +42,11 @@  static void empty_hyperlist(void)
 	}
 }
 
-void make_hypercall(void)
+void hyperlist_ready(int entries)
 {
-	/*
-	 * Dummy function: Tobe filled later.
-	 */
-	empty_hyperlist();
 	trace_guest_str_dump("Hypercall to host...:");
+	request_hypercall(balloon_ptr, entries);
+	empty_hyperlist();
 }
 
 static int sort_pfn(const void *a1, const void *b1)
@@ -156,7 +147,7 @@  int compress_hyperlist(void)
 	if (merge_counter != 0)
 		ret = pack_hyperlist() - 1;
 	else
-		ret = MAX_FGPT_ENTRIES - 1;
+		ret = MAX_FGPT_ENTRIES;
 	return ret;
 }
 
@@ -227,16 +218,16 @@  void arch_free_page_slowpath(void)
 			 */
 			if (!prev_free) {
 				hyper_idx++;
-				hypervisor_pagelist[hyper_idx].pfn = pfn;
-				hypervisor_pagelist[hyper_idx].pages = 1;
 				trace_guest_free_page_slowpath(
 				hypervisor_pagelist[hyper_idx].pfn,
 				hypervisor_pagelist[hyper_idx].pages);
+				hypervisor_pagelist[hyper_idx].pfn = pfn;
+				hypervisor_pagelist[hyper_idx].pages = 1;
 				if (hyper_idx == MAX_FGPT_ENTRIES - 1) {
 					hyper_idx =  compress_hyperlist();
 					if (hyper_idx >=
 					    HYPERLIST_THRESHOLD) {
-						make_hypercall();
+						hyperlist_ready(hyper_idx);
 						hyper_idx = 0;
 					}
 				}
@@ -272,6 +263,7 @@  void arch_alloc_page(struct page *page, int order)
 	 * free pages is full and a hypercall will be made. Until complete free
 	 * page list is traversed no further allocaiton will be allowed.
 	 */
+
 	do {
 		seq = read_seqbegin(&guest_page_lock);
 	} while (read_seqretry(&guest_page_lock, seq));