Message ID | 1523017045-18315-3-git-send-email-wei.w.wang@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Apr 06, 2018 at 08:17:23PM +0800, Wei Wang wrote: > Negotiation of the VIRTIO_BALLOON_F_FREE_PAGE_HINT feature indicates the > support of reporting hints of guest free pages to host via virtio-balloon. > > Host requests the guest to report free page hints by sending a new cmd > id to the guest via the free_page_report_cmd_id configuration register. > > When the guest starts to report, the first element added to the free page > vq is the cmd id given by host. When the guest finishes the reporting > of all the free pages, VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID is added > to the vq to tell host that the reporting is done. Host polls the free > page vq after sending the starting cmd id, so the guest doesn't need to > kick after filling an element to the vq. > > Host may also requests the guest to stop the reporting in advance by > sending the stop cmd id to the guest via the configuration register. > > Signed-off-by: Wei Wang <wei.w.wang@intel.com> > Signed-off-by: Liang Li <liang.z.li@intel.com> > Cc: Michael S. Tsirkin <mst@redhat.com> > Cc: Michal Hocko <mhocko@kernel.org> Pretty good by now, Minor comments below. > --- > drivers/virtio/virtio_balloon.c | 272 +++++++++++++++++++++++++++++++----- > include/uapi/linux/virtio_balloon.h | 4 + > 2 files changed, 240 insertions(+), 36 deletions(-) > > diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c > index dfe5684..aef73ee 100644 > --- a/drivers/virtio/virtio_balloon.c > +++ b/drivers/virtio/virtio_balloon.c > @@ -51,9 +51,22 @@ MODULE_PARM_DESC(oom_pages, "pages to free on OOM"); > static struct vfsmount *balloon_mnt; > #endif > > +enum virtio_balloon_vq { > + VIRTIO_BALLOON_VQ_INFLATE, > + VIRTIO_BALLOON_VQ_DEFLATE, > + VIRTIO_BALLOON_VQ_STATS, > + VIRTIO_BALLOON_VQ_FREE_PAGE, > + VIRTIO_BALLOON_VQ_MAX > +}; > + > struct virtio_balloon { > struct virtio_device *vdev; > - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; > + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; > + > + /* Balloon's own wq for cpu-intensive work items */ > + struct workqueue_struct *balloon_wq; > + /* The free page reporting work item submitted to the balloon wq */ > + struct work_struct report_free_page_work; > > /* The balloon servicing is delegated to a freezable workqueue. */ > struct work_struct update_balloon_stats_work; > @@ -63,6 +76,13 @@ struct virtio_balloon { > spinlock_t stop_update_lock; > bool stop_update; > > + /* The new cmd id received from host */ > + uint32_t cmd_id_received; > + /* The cmd id that is in use */ > + __virtio32 cmd_id_use; I'd prefer cmd_id_active but it's not critical. > + /* Buffer to store the stop sign */ > + __virtio32 stop_cmd_id; > + > /* Waiting for host to ack the pages we released. */ > wait_queue_head_t acked; > > @@ -320,17 +340,6 @@ static void stats_handle_request(struct virtio_balloon *vb) > virtqueue_kick(vq); > } > > -static void virtballoon_changed(struct virtio_device *vdev) > -{ > - struct virtio_balloon *vb = vdev->priv; > - unsigned long flags; > - > - spin_lock_irqsave(&vb->stop_update_lock, flags); > - if (!vb->stop_update) > - queue_work(system_freezable_wq, &vb->update_balloon_size_work); > - spin_unlock_irqrestore(&vb->stop_update_lock, flags); > -} > - > static inline s64 towards_target(struct virtio_balloon *vb) > { > s64 target; > @@ -347,6 +356,34 @@ static inline s64 towards_target(struct virtio_balloon *vb) > return target - vb->num_pages; > } > > +static void virtballoon_changed(struct virtio_device *vdev) > +{ > + struct virtio_balloon *vb = vdev->priv; > + unsigned long flags; > + s64 diff = towards_target(vb); > + > + if (diff) { > + spin_lock_irqsave(&vb->stop_update_lock, flags); > + if (!vb->stop_update) > + queue_work(system_freezable_wq, > + &vb->update_balloon_size_work); > + spin_unlock_irqrestore(&vb->stop_update_lock, flags); > + } > + > + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { > + virtio_cread(vdev, struct virtio_balloon_config, > + free_page_report_cmd_id, &vb->cmd_id_received); > + if (vb->cmd_id_received != > + VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID) { > + spin_lock_irqsave(&vb->stop_update_lock, flags); > + if (!vb->stop_update) > + queue_work(vb->balloon_wq, > + &vb->report_free_page_work); > + spin_unlock_irqrestore(&vb->stop_update_lock, flags); > + } > + } > +} > + > static void update_balloon_size(struct virtio_balloon *vb) > { > u32 actual = vb->num_pages; > @@ -421,42 +458,178 @@ static void update_balloon_size_func(struct work_struct *work) > > static int init_vqs(struct virtio_balloon *vb) > { > - struct virtqueue *vqs[3]; > - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; > - static const char * const names[] = { "inflate", "deflate", "stats" }; > - int err, nvqs; > + struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX]; > + vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX]; > + const char *names[VIRTIO_BALLOON_VQ_MAX]; > + struct scatterlist sg; > + int ret; > > /* > - * We expect two virtqueues: inflate and deflate, and > - * optionally stat. > + * Inflateq and deflateq are used unconditionally. The names[] > + * will be NULL if the related feature is not enabled, which will > + * cause no allocation for the corresponding virtqueue in find_vqs. > */ > - nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; > - err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL); > - if (err) > - return err; > + callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack; > + names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate"; > + callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack; > + names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate"; > + names[VIRTIO_BALLOON_VQ_STATS] = NULL; > + names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; > > - vb->inflate_vq = vqs[0]; > - vb->deflate_vq = vqs[1]; > if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { > - struct scatterlist sg; > - unsigned int num_stats; > - vb->stats_vq = vqs[2]; > + names[VIRTIO_BALLOON_VQ_STATS] = "stats"; > + callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request; > + } > > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { > + names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq"; > + callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; > + } > + > + ret = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX, > + vqs, callbacks, names, NULL, NULL); > + if (ret) > + return ret; > + > + vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE]; > + vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE]; > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { > + vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS]; > /* > * Prime this virtqueue with one buffer so the hypervisor can > * use it to signal us later (it can't be broken yet!). > */ > - num_stats = update_balloon_stats(vb); > - > - sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); > - if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) > - < 0) > - BUG(); > + sg_init_one(&sg, vb->stats, sizeof(vb->stats)); > + ret = virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, > + GFP_KERNEL); > + if (ret) { > + dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n", > + __func__); > + return ret; > + } > virtqueue_kick(vb->stats_vq); > } > + > + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) > + vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE]; > + > return 0; > } > > +static int add_one_sg(struct virtqueue *vq, unsigned long pfn, uint32_t len) > +{ > + struct scatterlist sg; > + unsigned int unused; > + > + sg_init_table(&sg, 1); > + sg_set_page(&sg, pfn_to_page(pfn), len, 0); > + > + /* Detach all the used buffers from the vq */ > + while (virtqueue_get_buf(vq, &unused)) > + ; > + > + /* > + * Since this is an optimization feature, losing a couple of free > + * pages to report isn't important. We simply return without adding > + * this page hint if the vq is full. > + * We are adding one entry each time, which essentially results in no > + * memory allocation, so the GFP_KERNEL flag below can be ignored. > + * Host works by polling the free page vq for hints after sending the > + * starting cmd id, so the driver doesn't need to kick after filling > + * the vq. > + * Lastly, there is always one entry reserved for the cmd id to use. > + * > + * TODO: The current implementation could be further improved by > + * stopping the reporting when the vq is full and continuing the > + * reporting when host notifies the driver that entries have been > + * used. > + */ > + if (vq->num_free > 1) > + return virtqueue_add_inbuf(vq, &sg, 1, vq, GFP_KERNEL); > + > + return 0; > +} > + > +static int virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, > + unsigned long nr_pages) > +{ > + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; > + uint32_t len = nr_pages << PAGE_SHIFT; > + > + /* > + * If a stop id or a new cmd id was just received from host, stop > + * the reporting, and return -EINTR to indicate an active stop. > + * > + * Ideally, we could have cmd_id_received accessed under locks, which > + * ensures that no more entries are added to the vq once a stop cmd > + * id is received from host. This requires host to wait for the > + * driver's ACK about finishing the update of cmd_id_received. But > + * this is not how host side works, because host doesn't work on an > + * assumption that the driver would always be responsive. So > + * theorically, there are possibilities that some entries may stay in > + * the vq when host has exited the optimization. This isn't an issue, > + * because entries simply contain guest physical addresses. There is no > + * allocated memory that needs to be freed or dma mapped pages that > + * need to be unmapped since virtio-balloon works with > + * VIRTIO_F_IOMMU_PLATFORM disabled. > + */ > + if (virtio32_to_cpu(vb->vdev, vb->cmd_id_use) != vb->cmd_id_received) > + return -EINTR; > + > + return add_one_sg(vb->free_page_vq, pfn, len); > +} > + > +static int send_start_cmd_id(struct virtio_balloon *vb, uint32_t cmd_id) > +{ > + struct scatterlist sg; > + struct virtqueue *vq = vb->free_page_vq; > + > + vb->cmd_id_use = cpu_to_virtio32(vb->vdev, cmd_id); > + sg_init_one(&sg, &vb->cmd_id_use, sizeof(vb->cmd_id_use)); > + return virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); > +} > + > +static int send_stop_cmd_id(struct virtio_balloon *vb) > +{ > + struct scatterlist sg; > + struct virtqueue *vq = vb->free_page_vq; > + > + sg_init_one(&sg, &vb->stop_cmd_id, sizeof(vb->stop_cmd_id)); > + return virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); > +} > + > +static void report_free_page_func(struct work_struct *work) > +{ > + struct virtio_balloon *vb; > + struct virtqueue *vq; > + unsigned int unused; > + int ret; > + > + vb = container_of(work, struct virtio_balloon, report_free_page_work); > + vq = vb->free_page_vq; > + > + /* Start by sending the received cmd id to host with an outbuf. */ > + ret = send_start_cmd_id(vb, vb->cmd_id_received); > + if (unlikely(ret)) > + goto err; > + > + ret = walk_free_mem_block(vb, 0, &virtio_balloon_send_free_pages); > + if (unlikely(ret == -EIO)) > + goto err; why is EIO special? I think you should special-case EINTR maybe. > + > + /* End by sending a stop id to host with an outbuf. */ > + ret = send_stop_cmd_id(vb); > + if (likely(!ret)) { What happens on failure? Don't we need to detach anyway? > + /* Ending: detach all the used buffers from the vq. */ > + while (vq->num_free != virtqueue_get_vring_size(vq)) > + virtqueue_get_buf(vq, &unused); This isn't all that happens here. It also waits for buffers to be consumed. Is this by design? And why is it good idea to busy poll while doing it? > + return; Better move this return outside. Fall through to error is confusing. > + } > +err: > + dev_err(&vb->vdev->dev, "%s: free page vq failure, ret=%d\n", > + __func__, ret); > +} > + > #ifdef CONFIG_BALLOON_COMPACTION > /* > * virtballoon_migratepage - perform the balloon page migration on behalf of > @@ -570,18 +743,36 @@ static int virtballoon_probe(struct virtio_device *vdev) > if (err) > goto out_free_vb; > > + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { > + /* > + * There is always one entry reserved for cmd id, so the ring > + * size needs to be at least two to report free page hints. > + */ > + if (virtqueue_get_vring_size(vb->free_page_vq) < 2) > + goto out_free_vb; > + vb->balloon_wq = alloc_workqueue("balloon-wq", > + WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); > + if (!vb->balloon_wq) { > + err = -ENOMEM; > + goto out_del_vqs; > + } > + vb->stop_cmd_id = cpu_to_virtio32(vb->vdev, > + VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID); > + INIT_WORK(&vb->report_free_page_work, report_free_page_func); > + } > + > vb->nb.notifier_call = virtballoon_oom_notify; > vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY; > err = register_oom_notifier(&vb->nb); > if (err < 0) > - goto out_del_vqs; > + goto out_del_balloon_wq; > > #ifdef CONFIG_BALLOON_COMPACTION > balloon_mnt = kern_mount(&balloon_fs); > if (IS_ERR(balloon_mnt)) { > err = PTR_ERR(balloon_mnt); > unregister_oom_notifier(&vb->nb); > - goto out_del_vqs; > + goto out_del_balloon_wq; > } > > vb->vb_dev_info.migratepage = virtballoon_migratepage; > @@ -591,7 +782,7 @@ static int virtballoon_probe(struct virtio_device *vdev) > kern_unmount(balloon_mnt); > unregister_oom_notifier(&vb->nb); > vb->vb_dev_info.inode = NULL; > - goto out_del_vqs; > + goto out_del_balloon_wq; > } > vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; > #endif > @@ -602,6 +793,9 @@ static int virtballoon_probe(struct virtio_device *vdev) > virtballoon_changed(vdev); > return 0; > > +out_del_balloon_wq: > + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) > + destroy_workqueue(vb->balloon_wq); > out_del_vqs: > vdev->config->del_vqs(vdev); > out_free_vb: > @@ -635,6 +829,11 @@ static void virtballoon_remove(struct virtio_device *vdev) > cancel_work_sync(&vb->update_balloon_size_work); > cancel_work_sync(&vb->update_balloon_stats_work); > > + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { > + cancel_work_sync(&vb->report_free_page_work); > + destroy_workqueue(vb->balloon_wq); > + } > + > remove_common(vb); > #ifdef CONFIG_BALLOON_COMPACTION > if (vb->vb_dev_info.inode) > @@ -686,6 +885,7 @@ static unsigned int features[] = { > VIRTIO_BALLOON_F_MUST_TELL_HOST, > VIRTIO_BALLOON_F_STATS_VQ, > VIRTIO_BALLOON_F_DEFLATE_ON_OOM, > + VIRTIO_BALLOON_F_FREE_PAGE_HINT, > }; > > static struct virtio_driver virtio_balloon_driver = { > diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h > index 4e8b830..b2d86c2 100644 > --- a/include/uapi/linux/virtio_balloon.h > +++ b/include/uapi/linux/virtio_balloon.h > @@ -34,15 +34,19 @@ > #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ > #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ > #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ > +#define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */ > > /* Size of a PFN in the balloon interface. */ > #define VIRTIO_BALLOON_PFN_SHIFT 12 > > +#define VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID 0 > struct virtio_balloon_config { > /* Number of pages host wants Guest to give up. */ > __u32 num_pages; > /* Number of pages we've actually got in balloon. */ > __u32 actual; > + /* Free page report command id, readonly by guest */ > + __u32 free_page_report_cmd_id; > }; > > #define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */ > -- > 2.7.4
On 04/09/2018 02:03 PM, Michael S. Tsirkin wrote: > On Fri, Apr 06, 2018 at 08:17:23PM +0800, Wei Wang wrote: >> Negotiation of the VIRTIO_BALLOON_F_FREE_PAGE_HINT feature indicates the >> support of reporting hints of guest free pages to host via virtio-balloon. >> >> Host requests the guest to report free page hints by sending a new cmd >> id to the guest via the free_page_report_cmd_id configuration register. >> >> When the guest starts to report, the first element added to the free page >> vq is the cmd id given by host. When the guest finishes the reporting >> of all the free pages, VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID is added >> to the vq to tell host that the reporting is done. Host polls the free >> page vq after sending the starting cmd id, so the guest doesn't need to >> kick after filling an element to the vq. >> >> Host may also requests the guest to stop the reporting in advance by >> sending the stop cmd id to the guest via the configuration register. >> >> Signed-off-by: Wei Wang <wei.w.wang@intel.com> >> Signed-off-by: Liang Li <liang.z.li@intel.com> >> Cc: Michael S. Tsirkin <mst@redhat.com> >> Cc: Michal Hocko <mhocko@kernel.org> > > Pretty good by now, Minor comments below. Thanks for the comments. > >> --- >> drivers/virtio/virtio_balloon.c | 272 +++++++++++++++++++++++++++++++----- >> include/uapi/linux/virtio_balloon.h | 4 + >> 2 files changed, 240 insertions(+), 36 deletions(-) >> >> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c >> index dfe5684..aef73ee 100644 >> --- a/drivers/virtio/virtio_balloon.c >> +++ b/drivers/virtio/virtio_balloon.c >> @@ -51,9 +51,22 @@ MODULE_PARM_DESC(oom_pages, "pages to free on OOM"); >> static struct vfsmount *balloon_mnt; >> #endif >> >> +enum virtio_balloon_vq { >> + VIRTIO_BALLOON_VQ_INFLATE, >> + VIRTIO_BALLOON_VQ_DEFLATE, >> + VIRTIO_BALLOON_VQ_STATS, >> + VIRTIO_BALLOON_VQ_FREE_PAGE, >> + VIRTIO_BALLOON_VQ_MAX >> +}; >> + >> struct virtio_balloon { >> struct virtio_device *vdev; >> - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; >> + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; >> + >> + /* Balloon's own wq for cpu-intensive work items */ >> + struct workqueue_struct *balloon_wq; >> + /* The free page reporting work item submitted to the balloon wq */ >> + struct work_struct report_free_page_work; >> >> /* The balloon servicing is delegated to a freezable workqueue. */ >> struct work_struct update_balloon_stats_work; >> @@ -63,6 +76,13 @@ struct virtio_balloon { >> spinlock_t stop_update_lock; >> bool stop_update; >> >> + /* The new cmd id received from host */ >> + uint32_t cmd_id_received; >> + /* The cmd id that is in use */ >> + __virtio32 cmd_id_use; > I'd prefer cmd_id_active but it's not critical. OK, will change. > > + > +static void report_free_page_func(struct work_struct *work) > +{ > + struct virtio_balloon *vb; > + struct virtqueue *vq; > + unsigned int unused; > + int ret; > + > + vb = container_of(work, struct virtio_balloon, report_free_page_work); > + vq = vb->free_page_vq; > + > + /* Start by sending the received cmd id to host with an outbuf. */ > + ret = send_start_cmd_id(vb, vb->cmd_id_received); > + if (unlikely(ret)) > + goto err; > + > + ret = walk_free_mem_block(vb, 0, &virtio_balloon_send_free_pages); > + if (unlikely(ret == -EIO)) > + goto err; > why is EIO special? I think you should special-case EINTR maybe. Actually EINTR isn't an error which needs to bail out. That's just the case that the vq is full, that hint isn't added. Maybe it is not necessary to treat the "vq full" case as an error. How about just returning "0" when the vq is full, instead of returning "EINTR"? (The next hint will continue to be added) > >> + >> + /* End by sending a stop id to host with an outbuf. */ >> + ret = send_stop_cmd_id(vb); >> + if (likely(!ret)) { > What happens on failure? Don't we need to detach anyway? Yes. Please see below, we could make some more change. > >> + /* Ending: detach all the used buffers from the vq. */ >> + while (vq->num_free != virtqueue_get_vring_size(vq)) >> + virtqueue_get_buf(vq, &unused); > This isn't all that happens here. It also waits for buffers to > be consumed. Is this by design? And why is it good idea to > busy poll while doing it? Because host and guest operations happen asynchronously. When the guest reaches here, host may have not put anything to the vq yet. How about doing this via the free page vq handler? Host will send a free page vq interrupt before exiting the optimization. I think this would be nicer. Best, Wei
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index dfe5684..aef73ee 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -51,9 +51,22 @@ MODULE_PARM_DESC(oom_pages, "pages to free on OOM"); static struct vfsmount *balloon_mnt; #endif +enum virtio_balloon_vq { + VIRTIO_BALLOON_VQ_INFLATE, + VIRTIO_BALLOON_VQ_DEFLATE, + VIRTIO_BALLOON_VQ_STATS, + VIRTIO_BALLOON_VQ_FREE_PAGE, + VIRTIO_BALLOON_VQ_MAX +}; + struct virtio_balloon { struct virtio_device *vdev; - struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; + struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq; + + /* Balloon's own wq for cpu-intensive work items */ + struct workqueue_struct *balloon_wq; + /* The free page reporting work item submitted to the balloon wq */ + struct work_struct report_free_page_work; /* The balloon servicing is delegated to a freezable workqueue. */ struct work_struct update_balloon_stats_work; @@ -63,6 +76,13 @@ struct virtio_balloon { spinlock_t stop_update_lock; bool stop_update; + /* The new cmd id received from host */ + uint32_t cmd_id_received; + /* The cmd id that is in use */ + __virtio32 cmd_id_use; + /* Buffer to store the stop sign */ + __virtio32 stop_cmd_id; + /* Waiting for host to ack the pages we released. */ wait_queue_head_t acked; @@ -320,17 +340,6 @@ static void stats_handle_request(struct virtio_balloon *vb) virtqueue_kick(vq); } -static void virtballoon_changed(struct virtio_device *vdev) -{ - struct virtio_balloon *vb = vdev->priv; - unsigned long flags; - - spin_lock_irqsave(&vb->stop_update_lock, flags); - if (!vb->stop_update) - queue_work(system_freezable_wq, &vb->update_balloon_size_work); - spin_unlock_irqrestore(&vb->stop_update_lock, flags); -} - static inline s64 towards_target(struct virtio_balloon *vb) { s64 target; @@ -347,6 +356,34 @@ static inline s64 towards_target(struct virtio_balloon *vb) return target - vb->num_pages; } +static void virtballoon_changed(struct virtio_device *vdev) +{ + struct virtio_balloon *vb = vdev->priv; + unsigned long flags; + s64 diff = towards_target(vb); + + if (diff) { + spin_lock_irqsave(&vb->stop_update_lock, flags); + if (!vb->stop_update) + queue_work(system_freezable_wq, + &vb->update_balloon_size_work); + spin_unlock_irqrestore(&vb->stop_update_lock, flags); + } + + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { + virtio_cread(vdev, struct virtio_balloon_config, + free_page_report_cmd_id, &vb->cmd_id_received); + if (vb->cmd_id_received != + VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID) { + spin_lock_irqsave(&vb->stop_update_lock, flags); + if (!vb->stop_update) + queue_work(vb->balloon_wq, + &vb->report_free_page_work); + spin_unlock_irqrestore(&vb->stop_update_lock, flags); + } + } +} + static void update_balloon_size(struct virtio_balloon *vb) { u32 actual = vb->num_pages; @@ -421,42 +458,178 @@ static void update_balloon_size_func(struct work_struct *work) static int init_vqs(struct virtio_balloon *vb) { - struct virtqueue *vqs[3]; - vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; - static const char * const names[] = { "inflate", "deflate", "stats" }; - int err, nvqs; + struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX]; + vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX]; + const char *names[VIRTIO_BALLOON_VQ_MAX]; + struct scatterlist sg; + int ret; /* - * We expect two virtqueues: inflate and deflate, and - * optionally stat. + * Inflateq and deflateq are used unconditionally. The names[] + * will be NULL if the related feature is not enabled, which will + * cause no allocation for the corresponding virtqueue in find_vqs. */ - nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; - err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL); - if (err) - return err; + callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack; + names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate"; + callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack; + names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate"; + names[VIRTIO_BALLOON_VQ_STATS] = NULL; + names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; - vb->inflate_vq = vqs[0]; - vb->deflate_vq = vqs[1]; if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { - struct scatterlist sg; - unsigned int num_stats; - vb->stats_vq = vqs[2]; + names[VIRTIO_BALLOON_VQ_STATS] = "stats"; + callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request; + } + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { + names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq"; + callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; + } + + ret = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX, + vqs, callbacks, names, NULL, NULL); + if (ret) + return ret; + + vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE]; + vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE]; + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { + vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS]; /* * Prime this virtqueue with one buffer so the hypervisor can * use it to signal us later (it can't be broken yet!). */ - num_stats = update_balloon_stats(vb); - - sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); - if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) - < 0) - BUG(); + sg_init_one(&sg, vb->stats, sizeof(vb->stats)); + ret = virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, + GFP_KERNEL); + if (ret) { + dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n", + __func__); + return ret; + } virtqueue_kick(vb->stats_vq); } + + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) + vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE]; + return 0; } +static int add_one_sg(struct virtqueue *vq, unsigned long pfn, uint32_t len) +{ + struct scatterlist sg; + unsigned int unused; + + sg_init_table(&sg, 1); + sg_set_page(&sg, pfn_to_page(pfn), len, 0); + + /* Detach all the used buffers from the vq */ + while (virtqueue_get_buf(vq, &unused)) + ; + + /* + * Since this is an optimization feature, losing a couple of free + * pages to report isn't important. We simply return without adding + * this page hint if the vq is full. + * We are adding one entry each time, which essentially results in no + * memory allocation, so the GFP_KERNEL flag below can be ignored. + * Host works by polling the free page vq for hints after sending the + * starting cmd id, so the driver doesn't need to kick after filling + * the vq. + * Lastly, there is always one entry reserved for the cmd id to use. + * + * TODO: The current implementation could be further improved by + * stopping the reporting when the vq is full and continuing the + * reporting when host notifies the driver that entries have been + * used. + */ + if (vq->num_free > 1) + return virtqueue_add_inbuf(vq, &sg, 1, vq, GFP_KERNEL); + + return 0; +} + +static int virtio_balloon_send_free_pages(void *opaque, unsigned long pfn, + unsigned long nr_pages) +{ + struct virtio_balloon *vb = (struct virtio_balloon *)opaque; + uint32_t len = nr_pages << PAGE_SHIFT; + + /* + * If a stop id or a new cmd id was just received from host, stop + * the reporting, and return -EINTR to indicate an active stop. + * + * Ideally, we could have cmd_id_received accessed under locks, which + * ensures that no more entries are added to the vq once a stop cmd + * id is received from host. This requires host to wait for the + * driver's ACK about finishing the update of cmd_id_received. But + * this is not how host side works, because host doesn't work on an + * assumption that the driver would always be responsive. So + * theorically, there are possibilities that some entries may stay in + * the vq when host has exited the optimization. This isn't an issue, + * because entries simply contain guest physical addresses. There is no + * allocated memory that needs to be freed or dma mapped pages that + * need to be unmapped since virtio-balloon works with + * VIRTIO_F_IOMMU_PLATFORM disabled. + */ + if (virtio32_to_cpu(vb->vdev, vb->cmd_id_use) != vb->cmd_id_received) + return -EINTR; + + return add_one_sg(vb->free_page_vq, pfn, len); +} + +static int send_start_cmd_id(struct virtio_balloon *vb, uint32_t cmd_id) +{ + struct scatterlist sg; + struct virtqueue *vq = vb->free_page_vq; + + vb->cmd_id_use = cpu_to_virtio32(vb->vdev, cmd_id); + sg_init_one(&sg, &vb->cmd_id_use, sizeof(vb->cmd_id_use)); + return virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); +} + +static int send_stop_cmd_id(struct virtio_balloon *vb) +{ + struct scatterlist sg; + struct virtqueue *vq = vb->free_page_vq; + + sg_init_one(&sg, &vb->stop_cmd_id, sizeof(vb->stop_cmd_id)); + return virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL); +} + +static void report_free_page_func(struct work_struct *work) +{ + struct virtio_balloon *vb; + struct virtqueue *vq; + unsigned int unused; + int ret; + + vb = container_of(work, struct virtio_balloon, report_free_page_work); + vq = vb->free_page_vq; + + /* Start by sending the received cmd id to host with an outbuf. */ + ret = send_start_cmd_id(vb, vb->cmd_id_received); + if (unlikely(ret)) + goto err; + + ret = walk_free_mem_block(vb, 0, &virtio_balloon_send_free_pages); + if (unlikely(ret == -EIO)) + goto err; + + /* End by sending a stop id to host with an outbuf. */ + ret = send_stop_cmd_id(vb); + if (likely(!ret)) { + /* Ending: detach all the used buffers from the vq. */ + while (vq->num_free != virtqueue_get_vring_size(vq)) + virtqueue_get_buf(vq, &unused); + return; + } +err: + dev_err(&vb->vdev->dev, "%s: free page vq failure, ret=%d\n", + __func__, ret); +} + #ifdef CONFIG_BALLOON_COMPACTION /* * virtballoon_migratepage - perform the balloon page migration on behalf of @@ -570,18 +743,36 @@ static int virtballoon_probe(struct virtio_device *vdev) if (err) goto out_free_vb; + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { + /* + * There is always one entry reserved for cmd id, so the ring + * size needs to be at least two to report free page hints. + */ + if (virtqueue_get_vring_size(vb->free_page_vq) < 2) + goto out_free_vb; + vb->balloon_wq = alloc_workqueue("balloon-wq", + WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); + if (!vb->balloon_wq) { + err = -ENOMEM; + goto out_del_vqs; + } + vb->stop_cmd_id = cpu_to_virtio32(vb->vdev, + VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID); + INIT_WORK(&vb->report_free_page_work, report_free_page_func); + } + vb->nb.notifier_call = virtballoon_oom_notify; vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY; err = register_oom_notifier(&vb->nb); if (err < 0) - goto out_del_vqs; + goto out_del_balloon_wq; #ifdef CONFIG_BALLOON_COMPACTION balloon_mnt = kern_mount(&balloon_fs); if (IS_ERR(balloon_mnt)) { err = PTR_ERR(balloon_mnt); unregister_oom_notifier(&vb->nb); - goto out_del_vqs; + goto out_del_balloon_wq; } vb->vb_dev_info.migratepage = virtballoon_migratepage; @@ -591,7 +782,7 @@ static int virtballoon_probe(struct virtio_device *vdev) kern_unmount(balloon_mnt); unregister_oom_notifier(&vb->nb); vb->vb_dev_info.inode = NULL; - goto out_del_vqs; + goto out_del_balloon_wq; } vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; #endif @@ -602,6 +793,9 @@ static int virtballoon_probe(struct virtio_device *vdev) virtballoon_changed(vdev); return 0; +out_del_balloon_wq: + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) + destroy_workqueue(vb->balloon_wq); out_del_vqs: vdev->config->del_vqs(vdev); out_free_vb: @@ -635,6 +829,11 @@ static void virtballoon_remove(struct virtio_device *vdev) cancel_work_sync(&vb->update_balloon_size_work); cancel_work_sync(&vb->update_balloon_stats_work); + if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { + cancel_work_sync(&vb->report_free_page_work); + destroy_workqueue(vb->balloon_wq); + } + remove_common(vb); #ifdef CONFIG_BALLOON_COMPACTION if (vb->vb_dev_info.inode) @@ -686,6 +885,7 @@ static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_STATS_VQ, VIRTIO_BALLOON_F_DEFLATE_ON_OOM, + VIRTIO_BALLOON_F_FREE_PAGE_HINT, }; static struct virtio_driver virtio_balloon_driver = { diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index 4e8b830..b2d86c2 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -34,15 +34,19 @@ #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ #define VIRTIO_BALLOON_F_STATS_VQ 1 /* Memory Stats virtqueue */ #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 2 /* Deflate balloon on OOM */ +#define VIRTIO_BALLOON_F_FREE_PAGE_HINT 3 /* VQ to report free pages */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 +#define VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID 0 struct virtio_balloon_config { /* Number of pages host wants Guest to give up. */ __u32 num_pages; /* Number of pages we've actually got in balloon. */ __u32 actual; + /* Free page report command id, readonly by guest */ + __u32 free_page_report_cmd_id; }; #define VIRTIO_BALLOON_S_SWAP_IN 0 /* Amount of memory swapped in */