diff mbox

[v2,3/3] virtio-balloon: add a timer to limit the free page report waiting time

Message ID 1517915299-15349-4-git-send-email-wei.w.wang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wang, Wei W Feb. 6, 2018, 11:08 a.m. UTC
This patch adds a timer to limit the time that host waits for the free
page hints reported by the guest. Users can specify the time in ms via
"free-page-wait-time" command line option. If a user doesn't specify a
time, host waits till the guest finishes reporting all the free page
hints. The policy (wait for all the free page hints to be reported or
use a time limit) is determined by the orchestration layer.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
CC: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio/virtio-balloon.c         | 84 +++++++++++++++++++++++++++++++++++++-
 hw/virtio/virtio-pci.c             |  3 ++
 include/hw/virtio/virtio-balloon.h |  4 ++
 3 files changed, 90 insertions(+), 1 deletion(-)

Comments

Michael S. Tsirkin Feb. 6, 2018, 11:43 p.m. UTC | #1
On Tue, Feb 06, 2018 at 07:08:19PM +0800, Wei Wang wrote:
> This patch adds a timer to limit the time that host waits for the free
> page hints reported by the guest. Users can specify the time in ms via
> "free-page-wait-time" command line option. If a user doesn't specify a
> time, host waits till the guest finishes reporting all the free page
> hints. The policy (wait for all the free page hints to be reported or
> use a time limit) is determined by the orchestration layer.
> 
> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> CC: Michael S. Tsirkin <mst@redhat.com>

Looks like an option the migration command should get,
as opposed to a device feature.

> ---
>  hw/virtio/virtio-balloon.c         | 84 +++++++++++++++++++++++++++++++++++++-
>  hw/virtio/virtio-pci.c             |  3 ++
>  include/hw/virtio/virtio-balloon.h |  4 ++
>  3 files changed, 90 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> index b424d4e..9ee0de4 100644
> --- a/hw/virtio/virtio-balloon.c
> +++ b/hw/virtio/virtio-balloon.c
> @@ -207,6 +207,65 @@ static void balloon_stats_set_poll_interval(Object *obj, Visitor *v,
>      balloon_stats_change_timer(s, 0);
>  }
>  
> +static void balloon_free_page_change_timer(VirtIOBalloon *s, int64_t ms)
> +{
> +    timer_mod(s->free_page_timer,
> +              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + ms);
> +}
> +
> +static void balloon_stop_free_page_report(void *opaque)
> +{
> +    VirtIOBalloon *dev = opaque;
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +
> +    timer_del(dev->free_page_timer);
> +    timer_free(dev->free_page_timer);
> +    dev->free_page_timer = NULL;
> +
> +    if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
> +        dev->host_stop_free_page = true;
> +        virtio_notify_config(vdev);
> +    }
> +}
> +
> +static void balloon_free_page_get_wait_time(Object *obj, Visitor *v,
> +                                            const char *name, void *opaque,
> +                                            Error **errp)
> +{
> +    VirtIOBalloon *s = opaque;
> +
> +    visit_type_int(v, name, &s->free_page_wait_time, errp);
> +}
> +
> +static void balloon_free_page_set_wait_time(Object *obj, Visitor *v,
> +                                            const char *name, void *opaque,
> +                                            Error **errp)
> +{
> +    VirtIOBalloon *s = opaque;
> +    Error *local_err = NULL;
> +    int64_t value;
> +
> +    visit_type_int(v, name, &value, &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return;
> +    }
> +    if (value < 0) {
> +        error_setg(errp, "free page wait time must be greater than zero");
> +        return;
> +    }
> +
> +    if (value > UINT32_MAX) {
> +        error_setg(errp, "free page wait time value is too big");
> +        return;
> +    }
> +
> +    s->free_page_wait_time = value;
> +    g_assert(s->free_page_timer == NULL);
> +    s->free_page_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
> +                                      balloon_stop_free_page_report, s);
> +}
> +
>  static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>  {
>      VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
> @@ -330,6 +389,7 @@ static void virtio_balloon_poll_free_page_hints(VirtIOBalloon *dev)
>              if (id == dev->free_page_report_cmd_id) {
>                  dev->free_page_report_status = FREE_PAGE_REPORT_S_START;
>              } else {
> +                dev->host_stop_free_page = false;
>                  dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
>                  break;
>              }
> @@ -385,6 +445,10 @@ static void virtio_balloon_free_page_poll(void *opaque)
>      virtio_notify_config(vdev);
>      s->free_page_report_status = FREE_PAGE_REPORT_S_REQUESTED;
>  
> +    if (s->free_page_wait_time) {
> +        balloon_free_page_change_timer(s, s->free_page_wait_time);
> +    }
> +
>      virtio_balloon_poll_free_page_hints(s);
>  }
>  
> @@ -395,7 +459,19 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
>  
>      config.num_pages = cpu_to_le32(dev->num_pages);
>      config.actual = cpu_to_le32(dev->actual);
> -    config.free_page_report_cmd_id = cpu_to_le32(dev->free_page_report_cmd_id);
> +    if (dev->host_stop_free_page) {
> +        /*
> +         * Host is actively requesting to stop the free page reporting, send
> +         * the stop sign to the guest. This happens when the migration thread
> +         * has reached the phase to send pages to the destination while the
> +         * guest hasn't done the reporting.
> +         */
> +        config.free_page_report_cmd_id =
> +                                    VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID;
> +    } else {
> +        config.free_page_report_cmd_id =
> +                                    cpu_to_le32(dev->free_page_report_cmd_id);
> +    }
>  
>      trace_virtio_balloon_get_config(config.num_pages, config.actual);
>      memcpy(config_data, &config, sizeof(struct virtio_balloon_config));
> @@ -539,6 +615,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>          s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE, NULL);
>          s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
>          s->free_page_report_cmd_id = VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID;
> +        s->host_stop_free_page = false;
>      }
>      reset_stats(s);
>  }
> @@ -602,6 +679,11 @@ static void virtio_balloon_instance_init(Object *obj)
>                          balloon_stats_get_poll_interval,
>                          balloon_stats_set_poll_interval,
>                          NULL, s, NULL);
> +
> +    object_property_add(obj, "free-page-wait-time", "int",
> +                        balloon_free_page_get_wait_time,
> +                        balloon_free_page_set_wait_time,
> +                        NULL, s, NULL);
>  }
>  
>  static const VMStateDescription vmstate_virtio_balloon = {
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 6c75cca..3345104 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -2250,6 +2250,9 @@ static void virtio_balloon_pci_instance_init(Object *obj)
>      object_property_add_alias(obj, "guest-stats-polling-interval",
>                                OBJECT(&dev->vdev),
>                                "guest-stats-polling-interval", &error_abort);
> +    object_property_add_alias(obj, "free-page-wait-time",
> +                              OBJECT(&dev->vdev),
> +                              "free-page-wait-time", &error_abort);
>  }
>  
>  static const TypeInfo virtio_balloon_pci_info = {
> diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
> index 11b4e01..c16855b 100644
> --- a/include/hw/virtio/virtio-balloon.h
> +++ b/include/hw/virtio/virtio-balloon.h
> @@ -40,6 +40,8 @@ enum virtio_balloon_free_page_report_status {
>  typedef struct VirtIOBalloon {
>      VirtIODevice parent_obj;
>      VirtQueue *ivq, *dvq, *svq, *free_page_vq;
> +    /* Host is requesting the guest to stop free page reporting */
> +    bool host_stop_free_page;
>      uint32_t free_page_report_status;
>      uint32_t num_pages;
>      uint32_t actual;
> @@ -49,8 +51,10 @@ typedef struct VirtIOBalloon {
>      VirtQueueElement *stats_vq_elem;
>      size_t stats_vq_offset;
>      QEMUTimer *stats_timer;
> +    QEMUTimer *free_page_timer;
>      int64_t stats_last_update;
>      int64_t stats_poll_interval;
> +    int64_t free_page_wait_time;
>      uint32_t host_features;
>  } VirtIOBalloon;
>  
> -- 
> 1.8.3.1
Dr. David Alan Gilbert Feb. 9, 2018, 12:15 p.m. UTC | #2
* Wei Wang (wei.w.wang@intel.com) wrote:
> This patch adds a timer to limit the time that host waits for the free
> page hints reported by the guest. Users can specify the time in ms via
> "free-page-wait-time" command line option. If a user doesn't specify a
> time, host waits till the guest finishes reporting all the free page
> hints. The policy (wait for all the free page hints to be reported or
> use a time limit) is determined by the orchestration layer.

That's kind of a get-out; but there's at least two problems:
   a) With a timeout of 0 (the default) we might hang forever waiting
      for the guest; broken guests are just too common, we can't do
      that.
   b) Even if we were going to do that, you'd have to make sure that
      migrate_cancel provided a way out.
   c) How does that work during a savevm snapshot or when the guest is
      stopped?
   d) OK, the timer gives us some safety (except c); but how does the
      orchestration layer ever come up with a 'safe' value for it?
      Unless we can suggest a safe value that the orchestration layer
      can use, or a way they can work it out, then they just wont use
      it.

Dave


> 
> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> CC: Michael S. Tsirkin <mst@redhat.com>
> ---
>  hw/virtio/virtio-balloon.c         | 84 +++++++++++++++++++++++++++++++++++++-
>  hw/virtio/virtio-pci.c             |  3 ++
>  include/hw/virtio/virtio-balloon.h |  4 ++
>  3 files changed, 90 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
> index b424d4e..9ee0de4 100644
> --- a/hw/virtio/virtio-balloon.c
> +++ b/hw/virtio/virtio-balloon.c
> @@ -207,6 +207,65 @@ static void balloon_stats_set_poll_interval(Object *obj, Visitor *v,
>      balloon_stats_change_timer(s, 0);
>  }
>  
> +static void balloon_free_page_change_timer(VirtIOBalloon *s, int64_t ms)
> +{
> +    timer_mod(s->free_page_timer,
> +              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + ms);
> +}
> +
> +static void balloon_stop_free_page_report(void *opaque)
> +{
> +    VirtIOBalloon *dev = opaque;
> +    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> +
> +    timer_del(dev->free_page_timer);
> +    timer_free(dev->free_page_timer);
> +    dev->free_page_timer = NULL;
> +
> +    if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
> +        dev->host_stop_free_page = true;
> +        virtio_notify_config(vdev);
> +    }
> +}
> +
> +static void balloon_free_page_get_wait_time(Object *obj, Visitor *v,
> +                                            const char *name, void *opaque,
> +                                            Error **errp)
> +{
> +    VirtIOBalloon *s = opaque;
> +
> +    visit_type_int(v, name, &s->free_page_wait_time, errp);
> +}
> +
> +static void balloon_free_page_set_wait_time(Object *obj, Visitor *v,
> +                                            const char *name, void *opaque,
> +                                            Error **errp)
> +{
> +    VirtIOBalloon *s = opaque;
> +    Error *local_err = NULL;
> +    int64_t value;
> +
> +    visit_type_int(v, name, &value, &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return;
> +    }
> +    if (value < 0) {
> +        error_setg(errp, "free page wait time must be greater than zero");
> +        return;
> +    }
> +
> +    if (value > UINT32_MAX) {
> +        error_setg(errp, "free page wait time value is too big");
> +        return;
> +    }
> +
> +    s->free_page_wait_time = value;
> +    g_assert(s->free_page_timer == NULL);
> +    s->free_page_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
> +                                      balloon_stop_free_page_report, s);
> +}
> +
>  static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>  {
>      VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
> @@ -330,6 +389,7 @@ static void virtio_balloon_poll_free_page_hints(VirtIOBalloon *dev)
>              if (id == dev->free_page_report_cmd_id) {
>                  dev->free_page_report_status = FREE_PAGE_REPORT_S_START;
>              } else {
> +                dev->host_stop_free_page = false;
>                  dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
>                  break;
>              }
> @@ -385,6 +445,10 @@ static void virtio_balloon_free_page_poll(void *opaque)
>      virtio_notify_config(vdev);
>      s->free_page_report_status = FREE_PAGE_REPORT_S_REQUESTED;
>  
> +    if (s->free_page_wait_time) {
> +        balloon_free_page_change_timer(s, s->free_page_wait_time);
> +    }
> +
>      virtio_balloon_poll_free_page_hints(s);
>  }
>  
> @@ -395,7 +459,19 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
>  
>      config.num_pages = cpu_to_le32(dev->num_pages);
>      config.actual = cpu_to_le32(dev->actual);
> -    config.free_page_report_cmd_id = cpu_to_le32(dev->free_page_report_cmd_id);
> +    if (dev->host_stop_free_page) {
> +        /*
> +         * Host is actively requesting to stop the free page reporting, send
> +         * the stop sign to the guest. This happens when the migration thread
> +         * has reached the phase to send pages to the destination while the
> +         * guest hasn't done the reporting.
> +         */
> +        config.free_page_report_cmd_id =
> +                                    VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID;
> +    } else {
> +        config.free_page_report_cmd_id =
> +                                    cpu_to_le32(dev->free_page_report_cmd_id);
> +    }
>  
>      trace_virtio_balloon_get_config(config.num_pages, config.actual);
>      memcpy(config_data, &config, sizeof(struct virtio_balloon_config));
> @@ -539,6 +615,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
>          s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE, NULL);
>          s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
>          s->free_page_report_cmd_id = VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID;
> +        s->host_stop_free_page = false;
>      }
>      reset_stats(s);
>  }
> @@ -602,6 +679,11 @@ static void virtio_balloon_instance_init(Object *obj)
>                          balloon_stats_get_poll_interval,
>                          balloon_stats_set_poll_interval,
>                          NULL, s, NULL);
> +
> +    object_property_add(obj, "free-page-wait-time", "int",
> +                        balloon_free_page_get_wait_time,
> +                        balloon_free_page_set_wait_time,
> +                        NULL, s, NULL);
>  }
>  
>  static const VMStateDescription vmstate_virtio_balloon = {
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 6c75cca..3345104 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -2250,6 +2250,9 @@ static void virtio_balloon_pci_instance_init(Object *obj)
>      object_property_add_alias(obj, "guest-stats-polling-interval",
>                                OBJECT(&dev->vdev),
>                                "guest-stats-polling-interval", &error_abort);
> +    object_property_add_alias(obj, "free-page-wait-time",
> +                              OBJECT(&dev->vdev),
> +                              "free-page-wait-time", &error_abort);
>  }
>  
>  static const TypeInfo virtio_balloon_pci_info = {
> diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
> index 11b4e01..c16855b 100644
> --- a/include/hw/virtio/virtio-balloon.h
> +++ b/include/hw/virtio/virtio-balloon.h
> @@ -40,6 +40,8 @@ enum virtio_balloon_free_page_report_status {
>  typedef struct VirtIOBalloon {
>      VirtIODevice parent_obj;
>      VirtQueue *ivq, *dvq, *svq, *free_page_vq;
> +    /* Host is requesting the guest to stop free page reporting */
> +    bool host_stop_free_page;
>      uint32_t free_page_report_status;
>      uint32_t num_pages;
>      uint32_t actual;
> @@ -49,8 +51,10 @@ typedef struct VirtIOBalloon {
>      VirtQueueElement *stats_vq_elem;
>      size_t stats_vq_offset;
>      QEMUTimer *stats_timer;
> +    QEMUTimer *free_page_timer;
>      int64_t stats_last_update;
>      int64_t stats_poll_interval;
> +    int64_t free_page_wait_time;
>      uint32_t host_features;
>  } VirtIOBalloon;
>  
> -- 
> 1.8.3.1
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Wang, Wei W Feb. 26, 2018, 4:35 a.m. UTC | #3
On 02/09/2018 08:15 PM, Dr. David Alan Gilbert wrote:
> * Wei Wang (wei.w.wang@intel.com) wrote:
>> This patch adds a timer to limit the time that host waits for the free
>> page hints reported by the guest. Users can specify the time in ms via
>> "free-page-wait-time" command line option. If a user doesn't specify a
>> time, host waits till the guest finishes reporting all the free page
>> hints. The policy (wait for all the free page hints to be reported or
>> use a time limit) is determined by the orchestration layer.
> That's kind of a get-out; but there's at least two problems:
>     a) With a timeout of 0 (the default) we might hang forever waiting
>        for the guest; broken guests are just too common, we can't do
>        that.
>     b) Even if we were going to do that, you'd have to make sure that
>        migrate_cancel provided a way out.
>     c) How does that work during a savevm snapshot or when the guest is
>        stopped?
>     d) OK, the timer gives us some safety (except c); but how does the
>        orchestration layer ever come up with a 'safe' value for it?
>        Unless we can suggest a safe value that the orchestration layer
>        can use, or a way they can work it out, then they just wont use
>        it.
>

Hi Dave,

Sorry for my late response. Please see below:

a) I think people would just kill the guest if it is broken. We can also 
change the default timeout value, for example 1 second, which is enough 
for the free page reporting.

b) How about changing it this way: if timeout happens, host sends a stop 
command to the guest, and makes virtio_balloon_poll_free_page_hints() 
"return" immediately (without getting the guest's acknowledge). The 
"return" basically goes back to the migration_thread function:
while (s->state == MIGRATION_STATUS_ACTIVE ||
            s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
...
}

migration_cancel sets the state to MIGRATION_CANCELLING, so it will stop 
the migration process.

c) This optimization needs the guest to report. If the guest is stopped, 
it wouldn't work. How about adding a check of "RUN_STATE" before going 
into the optimization?

d) Yes. Normally it is faster to wait for the guest to report all the 
free pages. Probably, we can just hardcode a value (e.g. 1s) for now 
(instead of making it configurable by users), this is used to handle the 
case that the guest is broken. What would you think?

Best,
Wei
Michael S. Tsirkin Feb. 27, 2018, 12:50 a.m. UTC | #4
On Mon, Feb 26, 2018 at 12:35:31PM +0800, Wei Wang wrote:
> On 02/09/2018 08:15 PM, Dr. David Alan Gilbert wrote:
> > * Wei Wang (wei.w.wang@intel.com) wrote:
> > > This patch adds a timer to limit the time that host waits for the free
> > > page hints reported by the guest. Users can specify the time in ms via
> > > "free-page-wait-time" command line option. If a user doesn't specify a
> > > time, host waits till the guest finishes reporting all the free page
> > > hints. The policy (wait for all the free page hints to be reported or
> > > use a time limit) is determined by the orchestration layer.
> > That's kind of a get-out; but there's at least two problems:
> >     a) With a timeout of 0 (the default) we might hang forever waiting
> >        for the guest; broken guests are just too common, we can't do
> >        that.
> >     b) Even if we were going to do that, you'd have to make sure that
> >        migrate_cancel provided a way out.
> >     c) How does that work during a savevm snapshot or when the guest is
> >        stopped?
> >     d) OK, the timer gives us some safety (except c); but how does the
> >        orchestration layer ever come up with a 'safe' value for it?
> >        Unless we can suggest a safe value that the orchestration layer
> >        can use, or a way they can work it out, then they just wont use
> >        it.
> > 
> 
> Hi Dave,
> 
> Sorry for my late response. Please see below:
> 
> a) I think people would just kill the guest if it is broken.

There's no way to know whether it's broken.

> We can also
> change the default timeout value, for example 1 second, which is enough for
> the free page reporting.

There's no way to know whether it's enough.

> b) How about changing it this way: if timeout happens, host sends a stop
> command to the guest, and makes virtio_balloon_poll_free_page_hints()
> "return" immediately (without getting the guest's acknowledge). The "return"
> basically goes back to the migration_thread function:
> while (s->state == MIGRATION_STATUS_ACTIVE ||
>            s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
> ...
> }
> 
> migration_cancel sets the state to MIGRATION_CANCELLING, so it will stop the
> migration process.
> 
> c) This optimization needs the guest to report. If the guest is stopped, it
> wouldn't work. How about adding a check of "RUN_STATE" before going into the
> optimization?
> 
> d) Yes. Normally it is faster to wait for the guest to report all the free
> pages. Probably, we can just hardcode a value (e.g. 1s) for now (instead of
> making it configurable by users), this is used to handle the case that the
> guest is broken. What would you think?
> 
> Best,
> Wei

I think all this is premature optimization. It is not at all clear that
anything is gained by delaying migration. Just ask for hints and start
sending pages immediately.  If guest tells us a page is free before it's
sent, we can skip sending it.  OTOH if migration is taking less time to
complete than it takes for guest to respond, then we are better off just
ignoring the hint.
Wang, Wei W Feb. 27, 2018, 10:10 a.m. UTC | #5
On 02/27/2018 08:50 AM, Michael S. Tsirkin wrote:
> On Mon, Feb 26, 2018 at 12:35:31PM +0800, Wei Wang wrote:
>> On 02/09/2018 08:15 PM, Dr. David Alan Gilbert wrote:
>>> * Wei Wang (wei.w.wang@intel.com) wrote:
>
> I think all this is premature optimization. It is not at all clear that
> anything is gained by delaying migration. Just ask for hints and start
> sending pages immediately.  If guest tells us a page is free before it's
> sent, we can skip sending it.  OTOH if migration is taking less time to
> complete than it takes for guest to respond, then we are better off just
> ignoring the hint.

OK, I'll try to create a thread for the free page optimization. We 
create the thread to poll for free pages at the beginning of the bulk 
stage, and stops at the end of bulk stage.
There are also comments about postcopy support with this feature, I plan 
to leave that as the second step (that support seems not urgent for now).


Best,
Wei
Dr. David Alan Gilbert Feb. 27, 2018, 10:34 a.m. UTC | #6
* Wei Wang (wei.w.wang@intel.com) wrote:
> On 02/09/2018 08:15 PM, Dr. David Alan Gilbert wrote:
> > * Wei Wang (wei.w.wang@intel.com) wrote:
> > > This patch adds a timer to limit the time that host waits for the free
> > > page hints reported by the guest. Users can specify the time in ms via
> > > "free-page-wait-time" command line option. If a user doesn't specify a
> > > time, host waits till the guest finishes reporting all the free page
> > > hints. The policy (wait for all the free page hints to be reported or
> > > use a time limit) is determined by the orchestration layer.
> > That's kind of a get-out; but there's at least two problems:
> >     a) With a timeout of 0 (the default) we might hang forever waiting
> >        for the guest; broken guests are just too common, we can't do
> >        that.
> >     b) Even if we were going to do that, you'd have to make sure that
> >        migrate_cancel provided a way out.
> >     c) How does that work during a savevm snapshot or when the guest is
> >        stopped?
> >     d) OK, the timer gives us some safety (except c); but how does the
> >        orchestration layer ever come up with a 'safe' value for it?
> >        Unless we can suggest a safe value that the orchestration layer
> >        can use, or a way they can work it out, then they just wont use
> >        it.
> > 
> 
> Hi Dave,
> 
> Sorry for my late response. Please see below:
> 
> a) I think people would just kill the guest if it is broken. We can also
> change the default timeout value, for example 1 second, which is enough for
> the free page reporting.

Remember that many VMs are automatically migrated without their being a
human involved; those VMs might be in the BIOS or Grub or shutting down at
the time of migration; there's no human to look at the VM.

> b) How about changing it this way: if timeout happens, host sends a stop
> command to the guest, and makes virtio_balloon_poll_free_page_hints()
> "return" immediately (without getting the guest's acknowledge). The "return"
> basically goes back to the migration_thread function:
> while (s->state == MIGRATION_STATUS_ACTIVE ||
>            s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
> ...
> }
> 
> migration_cancel sets the state to MIGRATION_CANCELLING, so it will stop the
> migration process.

OK, but htat does rely on there being a timeout; it means you can't have
the default no-timeout because then you can't cancel.

> c) This optimization needs the guest to report. If the guest is stopped, it
> wouldn't work. How about adding a check of "RUN_STATE" before going into the
> optimization?

Yes, that's OK.

> d) Yes. Normally it is faster to wait for the guest to report all the free
> pages. Probably, we can just hardcode a value (e.g. 1s) for now (instead of
> making it configurable by users), this is used to handle the case that the
> guest is broken. What would you think?

The issue is not about configurability - the issue is that it's
hard/impossible to find a good value for the timeout.

Dave

> Best,
> Wei
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Liang Li Feb. 27, 2018, 1:08 p.m. UTC | #7
On Tue, Feb 27, 2018 at 06:10:47PM +0800, Wei Wang wrote:
> On 02/27/2018 08:50 AM, Michael S. Tsirkin wrote:
> > On Mon, Feb 26, 2018 at 12:35:31PM +0800, Wei Wang wrote:
> > > On 02/09/2018 08:15 PM, Dr. David Alan Gilbert wrote:
> > > > * Wei Wang (wei.w.wang@intel.com) wrote:
> > 
> > I think all this is premature optimization. It is not at all clear that
> > anything is gained by delaying migration. Just ask for hints and start
> > sending pages immediately.  If guest tells us a page is free before it's
> > sent, we can skip sending it.  OTOH if migration is taking less time to
> > complete than it takes for guest to respond, then we are better off just
> > ignoring the hint.
> 
> OK, I'll try to create a thread for the free page optimization. We create
> the thread to poll for free pages at the beginning of the bulk stage, and
> stops at the end of bulk stage.
> There are also comments about postcopy support with this feature, I plan to
> leave that as the second step (that support seems not urgent for now).
> 
> 
> Best,
> Wei

you can make use the current migration thread instead of creating a new one.

Liang
Wang, Wei W Feb. 28, 2018, 10:33 a.m. UTC | #8
On 02/27/2018 09:08 PM, Liang Li wrote:
> On Tue, Feb 27, 2018 at 06:10:47PM +0800, Wei Wang wrote:
>> On 02/27/2018 08:50 AM, Michael S. Tsirkin wrote:
>>> On Mon, Feb 26, 2018 at 12:35:31PM +0800, Wei Wang wrote:
>>>> On 02/09/2018 08:15 PM, Dr. David Alan Gilbert wrote:
>>>>> * Wei Wang (wei.w.wang@intel.com) wrote:
>>> I think all this is premature optimization. It is not at all clear that
>>> anything is gained by delaying migration. Just ask for hints and start
>>> sending pages immediately.  If guest tells us a page is free before it's
>>> sent, we can skip sending it.  OTOH if migration is taking less time to
>>> complete than it takes for guest to respond, then we are better off just
>>> ignoring the hint.
>> OK, I'll try to create a thread for the free page optimization. We create
>> the thread to poll for free pages at the beginning of the bulk stage, and
>> stops at the end of bulk stage.
>> There are also comments about postcopy support with this feature, I plan to
>> leave that as the second step (that support seems not urgent for now).
>>
>>
>> Best,
>> Wei
> you can make use the current migration thread instead of creating a new one.
>

This is what this version is doing - we make the optimization 
implementation be part of the migration thread. To make the optimization 
go in parallel with the the migration thread, we need another thread for 
the optimization.

Best,
Wei
Wang, Wei W Feb. 28, 2018, 10:37 a.m. UTC | #9
On 02/27/2018 06:34 PM, Dr. David Alan Gilbert wrote:
> * Wei Wang (wei.w.wang@intel.com) wrote:
>> On 02/09/2018 08:15 PM, Dr. David Alan Gilbert wrote:
>>> * Wei Wang (wei.w.wang@intel.com) wrote:
>>>> This patch adds a timer to limit the time that host waits for the free
>>>> page hints reported by the guest. Users can specify the time in ms via
>>>> "free-page-wait-time" command line option. If a user doesn't specify a
>>>> time, host waits till the guest finishes reporting all the free page
>>>> hints. The policy (wait for all the free page hints to be reported or
>>>> use a time limit) is determined by the orchestration layer.
>>> That's kind of a get-out; but there's at least two problems:
>>>      a) With a timeout of 0 (the default) we might hang forever waiting
>>>         for the guest; broken guests are just too common, we can't do
>>>         that.
>>>      b) Even if we were going to do that, you'd have to make sure that
>>>         migrate_cancel provided a way out.
>>>      c) How does that work during a savevm snapshot or when the guest is
>>>         stopped?
>>>      d) OK, the timer gives us some safety (except c); but how does the
>>>         orchestration layer ever come up with a 'safe' value for it?
>>>         Unless we can suggest a safe value that the orchestration layer
>>>         can use, or a way they can work it out, then they just wont use
>>>         it.
>>>
>> Hi Dave,
>>
>> Sorry for my late response. Please see below:
>>
>> a) I think people would just kill the guest if it is broken. We can also
>> change the default timeout value, for example 1 second, which is enough for
>> the free page reporting.
> Remember that many VMs are automatically migrated without their being a
> human involved; those VMs might be in the BIOS or Grub or shutting down at
> the time of migration; there's no human to look at the VM.
>

OK, thanks for the sharing. I plan to take Michael's suggestion to make 
the optimization run in parallel with the migration thread. The 
optimization will be in its own thread, and the migration thread runs as 
usual (not stuck by the optimization e.g. when the optimization part 
doesn't return promptly in any case).

Best,
Wei
diff mbox

Patch

diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index b424d4e..9ee0de4 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -207,6 +207,65 @@  static void balloon_stats_set_poll_interval(Object *obj, Visitor *v,
     balloon_stats_change_timer(s, 0);
 }
 
+static void balloon_free_page_change_timer(VirtIOBalloon *s, int64_t ms)
+{
+    timer_mod(s->free_page_timer,
+              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + ms);
+}
+
+static void balloon_stop_free_page_report(void *opaque)
+{
+    VirtIOBalloon *dev = opaque;
+    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+
+    timer_del(dev->free_page_timer);
+    timer_free(dev->free_page_timer);
+    dev->free_page_timer = NULL;
+
+    if (dev->free_page_report_status == FREE_PAGE_REPORT_S_START) {
+        dev->host_stop_free_page = true;
+        virtio_notify_config(vdev);
+    }
+}
+
+static void balloon_free_page_get_wait_time(Object *obj, Visitor *v,
+                                            const char *name, void *opaque,
+                                            Error **errp)
+{
+    VirtIOBalloon *s = opaque;
+
+    visit_type_int(v, name, &s->free_page_wait_time, errp);
+}
+
+static void balloon_free_page_set_wait_time(Object *obj, Visitor *v,
+                                            const char *name, void *opaque,
+                                            Error **errp)
+{
+    VirtIOBalloon *s = opaque;
+    Error *local_err = NULL;
+    int64_t value;
+
+    visit_type_int(v, name, &value, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    if (value < 0) {
+        error_setg(errp, "free page wait time must be greater than zero");
+        return;
+    }
+
+    if (value > UINT32_MAX) {
+        error_setg(errp, "free page wait time value is too big");
+        return;
+    }
+
+    s->free_page_wait_time = value;
+    g_assert(s->free_page_timer == NULL);
+    s->free_page_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
+                                      balloon_stop_free_page_report, s);
+}
+
 static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 {
     VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
@@ -330,6 +389,7 @@  static void virtio_balloon_poll_free_page_hints(VirtIOBalloon *dev)
             if (id == dev->free_page_report_cmd_id) {
                 dev->free_page_report_status = FREE_PAGE_REPORT_S_START;
             } else {
+                dev->host_stop_free_page = false;
                 dev->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
                 break;
             }
@@ -385,6 +445,10 @@  static void virtio_balloon_free_page_poll(void *opaque)
     virtio_notify_config(vdev);
     s->free_page_report_status = FREE_PAGE_REPORT_S_REQUESTED;
 
+    if (s->free_page_wait_time) {
+        balloon_free_page_change_timer(s, s->free_page_wait_time);
+    }
+
     virtio_balloon_poll_free_page_hints(s);
 }
 
@@ -395,7 +459,19 @@  static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
 
     config.num_pages = cpu_to_le32(dev->num_pages);
     config.actual = cpu_to_le32(dev->actual);
-    config.free_page_report_cmd_id = cpu_to_le32(dev->free_page_report_cmd_id);
+    if (dev->host_stop_free_page) {
+        /*
+         * Host is actively requesting to stop the free page reporting, send
+         * the stop sign to the guest. This happens when the migration thread
+         * has reached the phase to send pages to the destination while the
+         * guest hasn't done the reporting.
+         */
+        config.free_page_report_cmd_id =
+                                    VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID;
+    } else {
+        config.free_page_report_cmd_id =
+                                    cpu_to_le32(dev->free_page_report_cmd_id);
+    }
 
     trace_virtio_balloon_get_config(config.num_pages, config.actual);
     memcpy(config_data, &config, sizeof(struct virtio_balloon_config));
@@ -539,6 +615,7 @@  static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
         s->free_page_vq = virtio_add_queue(vdev, VIRTQUEUE_MAX_SIZE, NULL);
         s->free_page_report_status = FREE_PAGE_REPORT_S_STOP;
         s->free_page_report_cmd_id = VIRTIO_BALLOON_FREE_PAGE_REPORT_STOP_ID;
+        s->host_stop_free_page = false;
     }
     reset_stats(s);
 }
@@ -602,6 +679,11 @@  static void virtio_balloon_instance_init(Object *obj)
                         balloon_stats_get_poll_interval,
                         balloon_stats_set_poll_interval,
                         NULL, s, NULL);
+
+    object_property_add(obj, "free-page-wait-time", "int",
+                        balloon_free_page_get_wait_time,
+                        balloon_free_page_set_wait_time,
+                        NULL, s, NULL);
 }
 
 static const VMStateDescription vmstate_virtio_balloon = {
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 6c75cca..3345104 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -2250,6 +2250,9 @@  static void virtio_balloon_pci_instance_init(Object *obj)
     object_property_add_alias(obj, "guest-stats-polling-interval",
                               OBJECT(&dev->vdev),
                               "guest-stats-polling-interval", &error_abort);
+    object_property_add_alias(obj, "free-page-wait-time",
+                              OBJECT(&dev->vdev),
+                              "free-page-wait-time", &error_abort);
 }
 
 static const TypeInfo virtio_balloon_pci_info = {
diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
index 11b4e01..c16855b 100644
--- a/include/hw/virtio/virtio-balloon.h
+++ b/include/hw/virtio/virtio-balloon.h
@@ -40,6 +40,8 @@  enum virtio_balloon_free_page_report_status {
 typedef struct VirtIOBalloon {
     VirtIODevice parent_obj;
     VirtQueue *ivq, *dvq, *svq, *free_page_vq;
+    /* Host is requesting the guest to stop free page reporting */
+    bool host_stop_free_page;
     uint32_t free_page_report_status;
     uint32_t num_pages;
     uint32_t actual;
@@ -49,8 +51,10 @@  typedef struct VirtIOBalloon {
     VirtQueueElement *stats_vq_elem;
     size_t stats_vq_offset;
     QEMUTimer *stats_timer;
+    QEMUTimer *free_page_timer;
     int64_t stats_last_update;
     int64_t stats_poll_interval;
+    int64_t free_page_wait_time;
     uint32_t host_features;
 } VirtIOBalloon;