Message ID | 20200410034129.24738.36022.stgit@localhost.localdomain (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | virtio-balloon: add support for free page reporting | expand |
On 10.04.20 05:41, Alexander Duyck wrote: > From: Alexander Duyck <alexander.h.duyck@linux.intel.com> > > We need to make certain to advertise support for page poison tracking if > we want to actually get data on if the guest will be poisoning pages. So > if free page hinting is active we should add page poisoning support and > let the guest disable it if it isn't using it. > > Page poisoning will result in a page being dirtied on free. As such we > cannot really avoid having to copy the page at least one more time since > we will need to write the poison value to the destination. As such we can > just ignore free page hinting if page poisoning is enabled as it will > actually reduce the work we have to do. > > Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com> > --- > hw/virtio/virtio-balloon.c | 26 ++++++++++++++++++++++---- > include/hw/virtio/virtio-balloon.h | 1 + > 2 files changed, 23 insertions(+), 4 deletions(-) > > diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c > index a4729f7fc930..1c6d36a29a04 100644 > --- a/hw/virtio/virtio-balloon.c > +++ b/hw/virtio/virtio-balloon.c > @@ -531,6 +531,15 @@ static void virtio_balloon_free_page_start(VirtIOBalloon *s) > return; > } > > + /* > + * If page poisoning is enabled then we probably shouldn't bother with > + * the hinting since the poisoning will dirty the page and invalidate > + * the work we are doing anyway. > + */ > + if (virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) { Why not check for the poison value instead? (as you do in patch #3) ? > + return; > + } > + > if (s->free_page_report_cmd_id == UINT_MAX) { > s->free_page_report_cmd_id = > VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; We should rename all "free_page_report" stuff we can to "free_page_hint"/"free_page_hinting" to avoid confusion (e.g., on my side :) ) before adding free page reporting . (looking at the virtio-balloon linux header, it's also confusing but we're stuck with that - maybe we should add better comments) > @@ -618,12 +627,10 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s) > if (s->qemu_4_0_config_size) { > return sizeof(struct virtio_balloon_config); > } > - if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) { > + if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) || > + virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { > return sizeof(struct virtio_balloon_config); > } > - if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { > - return offsetof(struct virtio_balloon_config, poison_val); > - } I am not sure this change is completely sane. Why is that necessary at all? > return offsetof(struct virtio_balloon_config, free_page_report_cmd_id); > } > > @@ -634,6 +641,7 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) > > config.num_pages = cpu_to_le32(dev->num_pages); > config.actual = cpu_to_le32(dev->actual); > + config.poison_val = cpu_to_le32(dev->poison_val); > > if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) { > config.free_page_report_cmd_id = > @@ -697,6 +705,9 @@ static void virtio_balloon_set_config(VirtIODevice *vdev, > qapi_event_send_balloon_change(vm_ram_size - > ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT)); > } > + dev->poison_val = virtio_vdev_has_feature(vdev, > + VIRTIO_BALLOON_F_PAGE_POISON) ? > + le32_to_cpu(config.poison_val) : 0; Can we just do a dev->poison_val = 0; if (virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) { dev->poison_val = le32_to_cpu(config.poison_val); } instead? > trace_virtio_balloon_set_config(dev->actual, oldactual); > } > > @@ -706,6 +717,9 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f, > VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); > f |= dev->host_features; > virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ); > + if (virtio_has_feature(f, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { > + virtio_add_feature(&f, VIRTIO_BALLOON_F_PAGE_POISON); > + } > > return f; > } > @@ -854,6 +868,8 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev) > g_free(s->stats_vq_elem); > s->stats_vq_elem = NULL; > } > + > + s->poison_val = 0; > } > > static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) > @@ -916,6 +932,8 @@ static Property virtio_balloon_properties[] = { > VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false), > DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features, > VIRTIO_BALLOON_F_FREE_PAGE_HINT, false), > + DEFINE_PROP_BIT("x-page-poison", VirtIOBalloon, host_features, > + VIRTIO_BALLOON_F_PAGE_POISON, false), Just curious, why an x- feature?
On Wed, Apr 15, 2020 at 1:08 AM David Hildenbrand <david@redhat.com> wrote: > > On 10.04.20 05:41, Alexander Duyck wrote: > > From: Alexander Duyck <alexander.h.duyck@linux.intel.com> > > > > We need to make certain to advertise support for page poison tracking if > > we want to actually get data on if the guest will be poisoning pages. So > > if free page hinting is active we should add page poisoning support and > > let the guest disable it if it isn't using it. > > > > Page poisoning will result in a page being dirtied on free. As such we > > cannot really avoid having to copy the page at least one more time since > > we will need to write the poison value to the destination. As such we can > > just ignore free page hinting if page poisoning is enabled as it will > > actually reduce the work we have to do. > > > > Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com> > > --- > > hw/virtio/virtio-balloon.c | 26 ++++++++++++++++++++++---- > > include/hw/virtio/virtio-balloon.h | 1 + > > 2 files changed, 23 insertions(+), 4 deletions(-) > > > > diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c > > index a4729f7fc930..1c6d36a29a04 100644 > > --- a/hw/virtio/virtio-balloon.c > > +++ b/hw/virtio/virtio-balloon.c > > @@ -531,6 +531,15 @@ static void virtio_balloon_free_page_start(VirtIOBalloon *s) > > return; > > } > > > > + /* > > + * If page poisoning is enabled then we probably shouldn't bother with > > + * the hinting since the poisoning will dirty the page and invalidate > > + * the work we are doing anyway. > > + */ > > + if (virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) { > > Why not check for the poison value instead? (as you do in patch #3) ? So if I recall correctly the vdev has feature requires the host to indicate that the feature is in use. If page poisoning is not enabled on the host then it clears the flag on its end and we can proceed with the feature. The comment above explains the "why". Basically poisoning a page will dirty it. So why hint a page as free when that will drop it back into the guest and result in it being dirtied again. What you end up with is all the pages that were temporarily placed in the balloon are dirty after the hinting report is finished at which point you made things worse instead of helping to improve them. > > > + return; > > + } > > + > > if (s->free_page_report_cmd_id == UINT_MAX) { > > s->free_page_report_cmd_id = > > VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; > > We should rename all "free_page_report" stuff we can to > "free_page_hint"/"free_page_hinting" to avoid confusion (e.g., on my > side :) ) before adding free page reporting . > > (looking at the virtio-balloon linux header, it's also confusing but > we're stuck with that - maybe we should add better comments) Are we stuck? Couldn't we just convert it to an anonymous union with free_page_hint_cmd_id and then use that where needed? > > @@ -618,12 +627,10 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s) > > if (s->qemu_4_0_config_size) { > > return sizeof(struct virtio_balloon_config); > > } > > - if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) { > > + if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) || > > + virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { > > return sizeof(struct virtio_balloon_config); > > } > > - if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { > > - return offsetof(struct virtio_balloon_config, poison_val); > > - } > > I am not sure this change is completely sane. Why is that necessary at all? The poison_val is stored at the end of the structure and is required in order to make free page hinting to work. What this change is doing is forcing it so that we report the config size as the full size if either poisoning or hinting are enabled since the poison val is the last member of the config structure. If the question is why bother reducing the size if free page hinting is not present then I guess we could simplify this and just report report the size of the config for all cases. > > return offsetof(struct virtio_balloon_config, free_page_report_cmd_id); > > } > > > > @@ -634,6 +641,7 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) > > > > config.num_pages = cpu_to_le32(dev->num_pages); > > config.actual = cpu_to_le32(dev->actual); > > + config.poison_val = cpu_to_le32(dev->poison_val); > > > > if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) { > > config.free_page_report_cmd_id = > > @@ -697,6 +705,9 @@ static void virtio_balloon_set_config(VirtIODevice *vdev, > > qapi_event_send_balloon_change(vm_ram_size - > > ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT)); > > } > > + dev->poison_val = virtio_vdev_has_feature(vdev, > > + VIRTIO_BALLOON_F_PAGE_POISON) ? > > + le32_to_cpu(config.poison_val) : 0; > > Can we just do a > > > dev->poison_val = 0; > if (virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) { > dev->poison_val = le32_to_cpu(config.poison_val); > } > > instead? I can change it to that if that is what is preferred. > > trace_virtio_balloon_set_config(dev->actual, oldactual); > > } > > > > @@ -706,6 +717,9 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f, > > VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); > > f |= dev->host_features; > > virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ); > > + if (virtio_has_feature(f, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { > > + virtio_add_feature(&f, VIRTIO_BALLOON_F_PAGE_POISON); > > + } > > > > return f; > > } > > @@ -854,6 +868,8 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev) > > g_free(s->stats_vq_elem); > > s->stats_vq_elem = NULL; > > } > > + > > + s->poison_val = 0; > > } > > > > static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) > > @@ -916,6 +932,8 @@ static Property virtio_balloon_properties[] = { > > VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false), > > DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features, > > VIRTIO_BALLOON_F_FREE_PAGE_HINT, false), > > + DEFINE_PROP_BIT("x-page-poison", VirtIOBalloon, host_features, > > + VIRTIO_BALLOON_F_PAGE_POISON, false), > > Just curious, why an x- feature? It was something I didn't expect the users to enable. It gets enabled when either free page hinting or free page reporting is enabled. So if you look you will see that in virtio_balloon_get_features the page poison feature is added if free page hinting is present. The guest will clear the feature bit if poisoning is not enabled in the guest. That results in the vdev getting the bit cleared. Part of it was also about making this work with the existing feature code as it had been added to the upstream kernel.
> > The comment above explains the "why". Basically poisoning a page will > dirty it. So why hint a page as free when that will drop it back into > the guest and result in it being dirtied again. What you end up with > is all the pages that were temporarily placed in the balloon are dirty > after the hinting report is finished at which point you made things > worse instead of helping to improve them. Let me think this through. What happens on free page hinting is that a) we tell the guest that migration starts b) it allocates pages (alloc_pages()), sends them to us and adds them to a list b) we exclude these pages on migration c) we tell the guest that migration is over d) the guest frees all allocated pages The "issue" with VIRTIO_BALLOON_F_PAGE_POISON is, that in d), the guest will fill all pages again with a pattern (or zero). AFAIKs, it's either 1) Not performing free page hinting, migrating pages filled with a poison value (or zero). 2) Performing free page hinting, not migrating pages filled with a poison value (or zero), letting only the guest fill them again. I have the feeling, that 2) is still better for migration, because we don't migrate useless pages and let the guest reconstruct the content? (having a poison value of zero might be debatable) Can you tell me what I am missing? :) > >> >>> + return; >>> + } >>> + >>> if (s->free_page_report_cmd_id == UINT_MAX) { >>> s->free_page_report_cmd_id = >>> VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; >> >> We should rename all "free_page_report" stuff we can to >> "free_page_hint"/"free_page_hinting" to avoid confusion (e.g., on my >> side :) ) before adding free page reporting . >> >> (looking at the virtio-balloon linux header, it's also confusing but >> we're stuck with that - maybe we should add better comments) > > Are we stuck? Couldn't we just convert it to an anonymous union with > free_page_hint_cmd_id and then use that where needed? Saw your patch already :) > >>> @@ -618,12 +627,10 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s) >>> if (s->qemu_4_0_config_size) { >>> return sizeof(struct virtio_balloon_config); >>> } >>> - if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) { >>> + if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) || >>> + virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { >>> return sizeof(struct virtio_balloon_config); >>> } >>> - if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { >>> - return offsetof(struct virtio_balloon_config, poison_val); >>> - } >> >> I am not sure this change is completely sane. Why is that necessary at all? > > The poison_val is stored at the end of the structure and is required > in order to make free page hinting to work. What this change is doing Required to make it work? In the kernel, commit 2e991629bcf55a43681aec1ee096eeb03cf81709 Author: Wei Wang <wei.w.wang@intel.com> Date: Mon Aug 27 09:32:19 2018 +0800 virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON was merged after commit 86a559787e6f5cf662c081363f64a20cad654195 Author: Wei Wang <wei.w.wang@intel.com> Date: Mon Aug 27 09:32:17 2018 +0800 virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT So I assume it's perfectly fine to not have it. I'd say it's the responsibility of the guest to *not* use VIRTIO_BALLOON_F_FREE_PAGE_HINT in case it is using page poisoning without VIRTIO_BALLOON_F_PAGE_POISON. Otherwise it will shoot itself into the foot. > is forcing it so that we report the config size as the full size if > either poisoning or hinting are enabled since the poison val is the > last member of the config structure. > > If the question is why bother reducing the size if free page hinting > is not present then I guess we could simplify this and just report > report the size of the config for all cases. I guess the idea is that if you migrate from one QEMU to another, your config size will not suddenly change (fenced by a feature set that will not change during migration, observable by a running guest). My guess would be that we cannot simply change existing configurations (defined via feature bits) as you do here - see e.g., qemu-4-0-config-size. [...] >>> >>> @@ -706,6 +717,9 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f, >>> VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); >>> f |= dev->host_features; >>> virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ); >>> + if (virtio_has_feature(f, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { >>> + virtio_add_feature(&f, VIRTIO_BALLOON_F_PAGE_POISON); >>> + } >>> >>> return f; >>> } >>> @@ -854,6 +868,8 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev) >>> g_free(s->stats_vq_elem); >>> s->stats_vq_elem = NULL; >>> } >>> + >>> + s->poison_val = 0; >>> } >>> >>> static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) >>> @@ -916,6 +932,8 @@ static Property virtio_balloon_properties[] = { >>> VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false), >>> DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features, >>> VIRTIO_BALLOON_F_FREE_PAGE_HINT, false), >>> + DEFINE_PROP_BIT("x-page-poison", VirtIOBalloon, host_features, >>> + VIRTIO_BALLOON_F_PAGE_POISON, false), >> >> Just curious, why an x- feature? > > It was something I didn't expect the users to enable. It gets enabled > when either free page hinting or free page reporting is enabled. So if > you look you will see that in virtio_balloon_get_features the page > poison feature is added if free page hinting is present. The guest > will clear the feature bit if poisoning is not enabled in the guest. > That results in the vdev getting the bit cleared. The weird thing is that setting it to "false" will still enable it automatically, depending on other features. Not sure how helpful that is. I'd prefer to simply always enable it, similar to VIRTIO_BALLOON_F_STATS_VQ - but it's late and I am confused how migration with compat machines will work. :) So, I wonder if we need this feature bit here at all.
On Wed, Apr 15, 2020 at 11:17 AM David Hildenbrand <david@redhat.com> wrote: > > > > > The comment above explains the "why". Basically poisoning a page will > > dirty it. So why hint a page as free when that will drop it back into > > the guest and result in it being dirtied again. What you end up with > > is all the pages that were temporarily placed in the balloon are dirty > > after the hinting report is finished at which point you made things > > worse instead of helping to improve them. > > Let me think this through. What happens on free page hinting is that > > a) we tell the guest that migration starts > b) it allocates pages (alloc_pages()), sends them to us and adds them to > a list > b) we exclude these pages on migration > c) we tell the guest that migration is over > d) the guest frees all allocated pages > > The "issue" with VIRTIO_BALLOON_F_PAGE_POISON is, that in d), the guest > will fill all pages again with a pattern (or zero). They should have already been filled with the poison pattern before we got to d). A bigger worry is that we at some point in the future update the kernel so that d) doesn't trigger re-poisoning, in which case the pages won't be marked as dirty, we will have skipped the migration, and we have no idea what will be in the pages at the end. > AFAIKs, it's either > > 1) Not performing free page hinting, migrating pages filled with a > poison value (or zero). > 2) Performing free page hinting, not migrating pages filled with a > poison value (or zero), letting only the guest fill them again. > > I have the feeling, that 2) is still better for migration, because we > don't migrate useless pages and let the guest reconstruct the content? > (having a poison value of zero might be debatable) > > Can you tell me what I am missing? :) The goal of the free page hinting was to reduce the number of pages that have to be migrated, in the second case you point out is is basically deferring it and will make the post-copy quite more expensive since all of the free memory will be pushed to the post-copy which I would think would be undesirable since it means the VM would have to be down for a greater amount of time with the poisoning enabled. The worst case scenario would be one in which the VM was suspended for migration while there were still pages in the balloon that needed to be drained. In such a case you would have them in an indeterminate state since the last page poisoning for them might have been ignored since they were marked as "free", so they are just going to be whatever value they were if they had been migrated at all. > > > >> > >>> + return; > >>> + } > >>> + > >>> if (s->free_page_report_cmd_id == UINT_MAX) { > >>> s->free_page_report_cmd_id = > >>> VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; > >> > >> We should rename all "free_page_report" stuff we can to > >> "free_page_hint"/"free_page_hinting" to avoid confusion (e.g., on my > >> side :) ) before adding free page reporting . > >> > >> (looking at the virtio-balloon linux header, it's also confusing but > >> we're stuck with that - maybe we should add better comments) > > > > Are we stuck? Couldn't we just convert it to an anonymous union with > > free_page_hint_cmd_id and then use that where needed? > > Saw your patch already :) > > > > >>> @@ -618,12 +627,10 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s) > >>> if (s->qemu_4_0_config_size) { > >>> return sizeof(struct virtio_balloon_config); > >>> } > >>> - if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) { > >>> + if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) || > >>> + virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { > >>> return sizeof(struct virtio_balloon_config); > >>> } > >>> - if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { > >>> - return offsetof(struct virtio_balloon_config, poison_val); > >>> - } > >> > >> I am not sure this change is completely sane. Why is that necessary at all? > > > > The poison_val is stored at the end of the structure and is required > > in order to make free page hinting to work. What this change is doing > > Required to make it work? In the kernel, > > commit 2e991629bcf55a43681aec1ee096eeb03cf81709 > Author: Wei Wang <wei.w.wang@intel.com> > Date: Mon Aug 27 09:32:19 2018 +0800 > > virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON > > was merged after > > commit 86a559787e6f5cf662c081363f64a20cad654195 > Author: Wei Wang <wei.w.wang@intel.com> > Date: Mon Aug 27 09:32:17 2018 +0800 > > virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT > > So I assume it's perfectly fine to not have it. > > I'd say it's the responsibility of the guest to *not* use > VIRTIO_BALLOON_F_FREE_PAGE_HINT in case it is using page poisoning > without VIRTIO_BALLOON_F_PAGE_POISON. Otherwise it will shoot itself > into the foot. Based on the timing I am guessing the page poisoning was in the same patch set as the free page hinting since there is only 2 seconds between when the two are merged. If I recall the page poisoning logic was added after the issue was pointed out that it needed to address it. I can probably make some changes to virtballoon_validate in the kernel driver to address the fact that if we are poisoning pages we need to either have the PAGE_POISON feature or we need to disable page reporting and page hinting. > > is forcing it so that we report the config size as the full size if > > either poisoning or hinting are enabled since the poison val is the > > last member of the config structure. > > > > If the question is why bother reducing the size if free page hinting > > is not present then I guess we could simplify this and just report > > report the size of the config for all cases. > > I guess the idea is that if you migrate from one QEMU to another, your > config size will not suddenly change (fenced by a feature set that will > not change during migration, observable by a running guest). > > My guess would be that we cannot simply change existing configurations > (defined via feature bits) as you do here - see e.g., qemu-4-0-config-size. Okay, I guess I can revert that bit. > [...] > > >>> > >>> @@ -706,6 +717,9 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f, > >>> VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); > >>> f |= dev->host_features; > >>> virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ); > >>> + if (virtio_has_feature(f, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { > >>> + virtio_add_feature(&f, VIRTIO_BALLOON_F_PAGE_POISON); > >>> + } > >>> > >>> return f; > >>> } > >>> @@ -854,6 +868,8 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev) > >>> g_free(s->stats_vq_elem); > >>> s->stats_vq_elem = NULL; > >>> } > >>> + > >>> + s->poison_val = 0; > >>> } > >>> > >>> static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) > >>> @@ -916,6 +932,8 @@ static Property virtio_balloon_properties[] = { > >>> VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false), > >>> DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features, > >>> VIRTIO_BALLOON_F_FREE_PAGE_HINT, false), > >>> + DEFINE_PROP_BIT("x-page-poison", VirtIOBalloon, host_features, > >>> + VIRTIO_BALLOON_F_PAGE_POISON, false), > >> > >> Just curious, why an x- feature? > > > > It was something I didn't expect the users to enable. It gets enabled > > when either free page hinting or free page reporting is enabled. So if > > you look you will see that in virtio_balloon_get_features the page > > poison feature is added if free page hinting is present. The guest > > will clear the feature bit if poisoning is not enabled in the guest. > > That results in the vdev getting the bit cleared. > > The weird thing is that setting it to "false" will still enable it > automatically, depending on other features. Not sure how helpful that is. > > I'd prefer to simply always enable it, similar to > VIRTIO_BALLOON_F_STATS_VQ - but it's late and I am confused how > migration with compat machines will work. :) > > So, I wonder if we need this feature bit here at all. I can drop it. I don't really recall why I added that piece.
> Am 15.04.2020 um 21:29 schrieb Alexander Duyck <alexander.duyck@gmail.com>: > > On Wed, Apr 15, 2020 at 11:17 AM David Hildenbrand <david@redhat.com> wrote: >> >>> >>> The comment above explains the "why". Basically poisoning a page will >>> dirty it. So why hint a page as free when that will drop it back into >>> the guest and result in it being dirtied again. What you end up with >>> is all the pages that were temporarily placed in the balloon are dirty >>> after the hinting report is finished at which point you made things >>> worse instead of helping to improve them. >> >> Let me think this through. What happens on free page hinting is that >> >> a) we tell the guest that migration starts >> b) it allocates pages (alloc_pages()), sends them to us and adds them to >> a list >> b) we exclude these pages on migration >> c) we tell the guest that migration is over >> d) the guest frees all allocated pages >> >> The "issue" with VIRTIO_BALLOON_F_PAGE_POISON is, that in d), the guest >> will fill all pages again with a pattern (or zero). > > They should have already been filled with the poison pattern before we > got to d). A bigger worry is that we at some point in the future > update the kernel so that d) doesn't trigger re-poisoning, in which > case the pages won't be marked as dirty, we will have skipped the > migration, and we have no idea what will be in the pages at the end. > >> AFAIKs, it's either >> >> 1) Not performing free page hinting, migrating pages filled with a >> poison value (or zero). >> 2) Performing free page hinting, not migrating pages filled with a >> poison value (or zero), letting only the guest fill them again. >> >> I have the feeling, that 2) is still better for migration, because we >> don't migrate useless pages and let the guest reconstruct the content? >> (having a poison value of zero might be debatable) >> >> Can you tell me what I am missing? :) > > The goal of the free page hinting was to reduce the number of pages > that have to be migrated, in the second case you point out is is > basically deferring it and will make the post-copy quite more > expensive since all of the free memory will be pushed to the post-copy > which I would think would be undesirable since it means the VM would > have to be down for a greater amount of time with the poisoning > enabled. Postcopy is a very good point, bought! But (what you wrote above) it sounds like that this is really what we *have to* do, not an optimization. I‘ll double check the spec tomorrow (hopefully it was documented). We should rephrase the comment then. I could imagine that we also have to care about ordinary page inflation/deflation when handling page poisoning. (Iow, don‘t inflate/deflate if set for now). > > The worst case scenario would be one in which the VM was suspended for > migration while there were still pages in the balloon that needed to > be drained. In such a case you would have them in an indeterminate > state since the last page poisoning for them might have been ignored > since they were marked as "free", so they are just going to be > whatever value they were if they had been migrated at all. > >>> >>>> >>>>> + return; >>>>> + } >>>>> + >>>>> if (s->free_page_report_cmd_id == UINT_MAX) { >>>>> s->free_page_report_cmd_id = >>>>> VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; >>>> >>>> We should rename all "free_page_report" stuff we can to >>>> "free_page_hint"/"free_page_hinting" to avoid confusion (e.g., on my >>>> side :) ) before adding free page reporting . >>>> >>>> (looking at the virtio-balloon linux header, it's also confusing but >>>> we're stuck with that - maybe we should add better comments) >>> >>> Are we stuck? Couldn't we just convert it to an anonymous union with >>> free_page_hint_cmd_id and then use that where needed? >> >> Saw your patch already :) >> >>> >>>>> @@ -618,12 +627,10 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s) >>>>> if (s->qemu_4_0_config_size) { >>>>> return sizeof(struct virtio_balloon_config); >>>>> } >>>>> - if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) { >>>>> + if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) || >>>>> + virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { >>>>> return sizeof(struct virtio_balloon_config); >>>>> } >>>>> - if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { >>>>> - return offsetof(struct virtio_balloon_config, poison_val); >>>>> - } >>>> >>>> I am not sure this change is completely sane. Why is that necessary at all? >>> >>> The poison_val is stored at the end of the structure and is required >>> in order to make free page hinting to work. What this change is doing >> >> Required to make it work? In the kernel, >> >> commit 2e991629bcf55a43681aec1ee096eeb03cf81709 >> Author: Wei Wang <wei.w.wang@intel.com> >> Date: Mon Aug 27 09:32:19 2018 +0800 >> >> virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON >> >> was merged after >> >> commit 86a559787e6f5cf662c081363f64a20cad654195 >> Author: Wei Wang <wei.w.wang@intel.com> >> Date: Mon Aug 27 09:32:17 2018 +0800 >> >> virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT >> >> So I assume it's perfectly fine to not have it. >> >> I'd say it's the responsibility of the guest to *not* use >> VIRTIO_BALLOON_F_FREE_PAGE_HINT in case it is using page poisoning >> without VIRTIO_BALLOON_F_PAGE_POISON. Otherwise it will shoot itself >> into the foot. > > Based on the timing I am guessing the page poisoning was in the same > patch set as the free page hinting since there is only 2 seconds > between when the two are merged. If I recall the page poisoning logic > was added after the issue was pointed out that it needed to address > it. > Yeah, but any other OS implementing this, not caring about page poisoning wouldn‘t need it. Maybe there is something in the spec. Mental note: we‘ll have to migrate the poison value if not already done (writing on my mobile).
On Wed, Apr 15, 2020 at 12:46 PM David Hildenbrand <dhildenb@redhat.com> wrote: > > > > > Am 15.04.2020 um 21:29 schrieb Alexander Duyck <alexander.duyck@gmail.com>: > > > > On Wed, Apr 15, 2020 at 11:17 AM David Hildenbrand <david@redhat.com> wrote: > >> > >>> > >>> The comment above explains the "why". Basically poisoning a page will > >>> dirty it. So why hint a page as free when that will drop it back into > >>> the guest and result in it being dirtied again. What you end up with > >>> is all the pages that were temporarily placed in the balloon are dirty > >>> after the hinting report is finished at which point you made things > >>> worse instead of helping to improve them. > >> > >> Let me think this through. What happens on free page hinting is that > >> > >> a) we tell the guest that migration starts > >> b) it allocates pages (alloc_pages()), sends them to us and adds them to > >> a list > >> b) we exclude these pages on migration > >> c) we tell the guest that migration is over > >> d) the guest frees all allocated pages > >> > >> The "issue" with VIRTIO_BALLOON_F_PAGE_POISON is, that in d), the guest > >> will fill all pages again with a pattern (or zero). > > > > They should have already been filled with the poison pattern before we > > got to d). A bigger worry is that we at some point in the future > > update the kernel so that d) doesn't trigger re-poisoning, in which > > case the pages won't be marked as dirty, we will have skipped the > > migration, and we have no idea what will be in the pages at the end. > > > >> AFAIKs, it's either > >> > >> 1) Not performing free page hinting, migrating pages filled with a > >> poison value (or zero). > >> 2) Performing free page hinting, not migrating pages filled with a > >> poison value (or zero), letting only the guest fill them again. > >> > >> I have the feeling, that 2) is still better for migration, because we > >> don't migrate useless pages and let the guest reconstruct the content? > >> (having a poison value of zero might be debatable) > >> > >> Can you tell me what I am missing? :) > > > > The goal of the free page hinting was to reduce the number of pages > > that have to be migrated, in the second case you point out is is > > basically deferring it and will make the post-copy quite more > > expensive since all of the free memory will be pushed to the post-copy > > which I would think would be undesirable since it means the VM would > > have to be down for a greater amount of time with the poisoning > > enabled. > > Postcopy is a very good point, bought! > > But (what you wrote above) it sounds like that this is really what we *have to* do, not an optimization. I‘ll double check the spec tomorrow (hopefully it was documented). We should rephrase the comment then. Do you have a link to the spec that I could look at? I am not hopeful that this will be listed there since the poison side of QEMU was never implemented. The flag is only here because it was copied over in the kernel header. > I could imagine that we also have to care about ordinary page inflation/deflation when handling page poisoning. (Iow, don‘t inflate/deflate if set for now). The problem is this was broken from the start for that. The issue is that the poison feature was wrapped up within the page hinting feature. So as a result enabling it for a VM that doesn't recognize the feature independently would likely leave the poison value uninitialized and flagging as though a value of 0 is used. In addition the original poison checking wasn't making sure that the page wasn't init_on_free which has the same effect as page poisoning but isn't page poisoning. > > > > The worst case scenario would be one in which the VM was suspended for > > migration while there were still pages in the balloon that needed to > > be drained. In such a case you would have them in an indeterminate > > state since the last page poisoning for them might have been ignored > > since they were marked as "free", so they are just going to be > > whatever value they were if they had been migrated at all. > > > >>> > >>>> > >>>>> + return; > >>>>> + } > >>>>> + > >>>>> if (s->free_page_report_cmd_id == UINT_MAX) { > >>>>> s->free_page_report_cmd_id = > >>>>> VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; > >>>> > >>>> We should rename all "free_page_report" stuff we can to > >>>> "free_page_hint"/"free_page_hinting" to avoid confusion (e.g., on my > >>>> side :) ) before adding free page reporting . > >>>> > >>>> (looking at the virtio-balloon linux header, it's also confusing but > >>>> we're stuck with that - maybe we should add better comments) > >>> > >>> Are we stuck? Couldn't we just convert it to an anonymous union with > >>> free_page_hint_cmd_id and then use that where needed? > >> > >> Saw your patch already :) > >> > >>> > >>>>> @@ -618,12 +627,10 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s) > >>>>> if (s->qemu_4_0_config_size) { > >>>>> return sizeof(struct virtio_balloon_config); > >>>>> } > >>>>> - if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) { > >>>>> + if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) || > >>>>> + virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { > >>>>> return sizeof(struct virtio_balloon_config); > >>>>> } > >>>>> - if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { > >>>>> - return offsetof(struct virtio_balloon_config, poison_val); > >>>>> - } > >>>> > >>>> I am not sure this change is completely sane. Why is that necessary at all? > >>> > >>> The poison_val is stored at the end of the structure and is required > >>> in order to make free page hinting to work. What this change is doing > >> > >> Required to make it work? In the kernel, > >> > >> commit 2e991629bcf55a43681aec1ee096eeb03cf81709 > >> Author: Wei Wang <wei.w.wang@intel.com> > >> Date: Mon Aug 27 09:32:19 2018 +0800 > >> > >> virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON > >> > >> was merged after > >> > >> commit 86a559787e6f5cf662c081363f64a20cad654195 > >> Author: Wei Wang <wei.w.wang@intel.com> > >> Date: Mon Aug 27 09:32:17 2018 +0800 > >> > >> virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT > >> > >> So I assume it's perfectly fine to not have it. > >> > >> I'd say it's the responsibility of the guest to *not* use > >> VIRTIO_BALLOON_F_FREE_PAGE_HINT in case it is using page poisoning > >> without VIRTIO_BALLOON_F_PAGE_POISON. Otherwise it will shoot itself > >> into the foot. > > > > Based on the timing I am guessing the page poisoning was in the same > > patch set as the free page hinting since there is only 2 seconds > > between when the two are merged. If I recall the page poisoning logic > > was added after the issue was pointed out that it needed to address > > it. > > > > Yeah, but any other OS implementing this, not caring about page poisoning wouldn‘t need it. Maybe there is something in the spec. > > Mental note: we‘ll have to migrate the poison value if not already done (writing on my mobile). Right. We need to make sure any poison or init on free is migrated over to the destination before we can say we are going to skip the migration. If anything what I probably ought to look into would be if we could change the code so that if we have a hint the page is unused, poison is enabled, and the value is 0 we just ship over a 0 page instead of giving up on hinting entirely.
>> >> Postcopy is a very good point, bought! >> >> But (what you wrote above) it sounds like that this is really what we *have to* do, not an optimization. I‘ll double check the spec tomorrow (hopefully it was documented). We should rephrase the comment then. > > Do you have a link to the spec that I could look at? I am not hopeful > that this will be listed there since the poison side of QEMU was never > implemented. The flag is only here because it was copied over in the > kernel header. Introducing a feature without a) specification what it does b) implementation (of both sides) showing what has to be done c) any kind of documentation of what needs to be done just horrible. The latest-greatest spec lives in https://github.com/oasis-tcs/virtio-spec.git I can't spot any signs of free page hinting/page poisioning. :( We should document our result of page poisoning, free page hinting, and free page reporting there as well. I hope you'll have time for the latter. ------------------------------------------------------------------------- Semantics of VIRTIO_BALLOON_F_PAGE_POISON ------------------------------------------------------------------------- "The VIRTIO_BALLOON_F_PAGE_POISON feature bit is used to indicate if the guest is using page poisoning. Guest writes to the poison_val config field to tell host about the page poisoning value that is in use." -> Very little information, no signs about what has to be done. "Let the hypervisor know that we are expecting a specific value to be written back in balloon pages." -> Okay, that talks about "balloon pages", which would include right now -- pages "inflated" and then "deflated" using free page hinting -- pages "inflated" and then "deflated" using oridnary inflate/deflate queue -- pages "inflated" using free page reporting and automatically "deflated" on access So VIRTIO_BALLOON_F_PAGE_POISON really means "whenever the guest deflates a page (either explicitly, or implicitly with free page reporting), it is filled with "poison_val". And I would add "However, if the inflated page was not filled with "poison_val" when inflating, it's not predictable if the original page or a page filled with "poison_val" is returned." Which would cover the "we did not discard the page in the hypervisor, so the original page is still there". We should also document what is expected to happen if "poison_val" is suddenly changed by the guest at one point in time again. (e.g., not supported, unexpected things can happen, etc.) Also, we might have to document that a device reset resets the poison_val to 0. (not sure if that's really necessary) ------------------------------------------------------------------------- What we have to do in the guest/Linux: ------------------------------------------------------------------------- A guest which relies on this (esp., free page reporting in Linux only, right?), has to not use/advertise VIRTIO_BALLOON_F_REPORTING *in case VIRTIO_BALLOON_F_PAGE_POISON is not provided* by the host. AFAIKS, ordinary inflation/deflation and free page hinting does currently not care, as we go via free_page(), so that is currently fine AFAIKs. ------------------------------------------------------------------------- What we have to do in the hypervisor/QEMU: ------------------------------------------------------------------------- With VIRTIO_BALLOON_F_PAGE_POISON, we could provide free page reporting easily IFF "page_val"==0. However, as you said, it will currently be expensive in case of postcopy, as the guest still zeroes out all pages. Document that properly. With VIRTIO_BALLOON_F_PAGE_POISON, don't inflate any pages right now (not discarding anything), OR fill the pages with poison_val when deflating. I guess the latter would be better - even if current Linux does not need it, it's what we are expected to do AFAIKS. With VIRTIO_BALLOON_F_PAGE_POISON and page_val != 0, discard all free page reporting attempts. (== what your patch #3 does). Document that properly. Makes sense? See below for guest migration thingies. > >> I could imagine that we also have to care about ordinary page inflation/deflation when handling page poisoning. (Iow, don‘t inflate/deflate if set for now). > > The problem is this was broken from the start for that. The issue is > that the poison feature was wrapped up within the page hinting > feature. So as a result enabling it for a VM that doesn't recognize > the feature independently would likely leave the poison value > uninitialized and flagging as though a value of 0 is used. In addition > the original poison checking wasn't making sure that the page wasn't > init_on_free which has the same effect as page poisoning but isn't > page poisoning. If the guest agrees to have VIRTIO_BALLOON_F_PAGE_POISON but does not initialize poison_val, then it's a guest bug, I wouldn't worry about that for now. > >>> >>> The worst case scenario would be one in which the VM was suspended for >>> migration while there were still pages in the balloon that needed to >>> be drained. In such a case you would have them in an indeterminate >>> state since the last page poisoning for them might have been ignored >>> since they were marked as "free", so they are just going to be >>> whatever value they were if they had been migrated at all. >>> >>>>> >>>>>> >>>>>>> + return; >>>>>>> + } >>>>>>> + >>>>>>> if (s->free_page_report_cmd_id == UINT_MAX) { >>>>>>> s->free_page_report_cmd_id = >>>>>>> VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; >>>>>> >>>>>> We should rename all "free_page_report" stuff we can to >>>>>> "free_page_hint"/"free_page_hinting" to avoid confusion (e.g., on my >>>>>> side :) ) before adding free page reporting . >>>>>> >>>>>> (looking at the virtio-balloon linux header, it's also confusing but >>>>>> we're stuck with that - maybe we should add better comments) >>>>> >>>>> Are we stuck? Couldn't we just convert it to an anonymous union with >>>>> free_page_hint_cmd_id and then use that where needed? >>>> >>>> Saw your patch already :) >>>> >>>>> >>>>>>> @@ -618,12 +627,10 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s) >>>>>>> if (s->qemu_4_0_config_size) { >>>>>>> return sizeof(struct virtio_balloon_config); >>>>>>> } >>>>>>> - if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) { >>>>>>> + if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) || >>>>>>> + virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { >>>>>>> return sizeof(struct virtio_balloon_config); >>>>>>> } >>>>>>> - if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { >>>>>>> - return offsetof(struct virtio_balloon_config, poison_val); >>>>>>> - } >>>>>> >>>>>> I am not sure this change is completely sane. Why is that necessary at all? >>>>> >>>>> The poison_val is stored at the end of the structure and is required >>>>> in order to make free page hinting to work. What this change is doing >>>> >>>> Required to make it work? In the kernel, >>>> >>>> commit 2e991629bcf55a43681aec1ee096eeb03cf81709 >>>> Author: Wei Wang <wei.w.wang@intel.com> >>>> Date: Mon Aug 27 09:32:19 2018 +0800 >>>> >>>> virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON >>>> >>>> was merged after >>>> >>>> commit 86a559787e6f5cf662c081363f64a20cad654195 >>>> Author: Wei Wang <wei.w.wang@intel.com> >>>> Date: Mon Aug 27 09:32:17 2018 +0800 >>>> >>>> virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT >>>> >>>> So I assume it's perfectly fine to not have it. >>>> >>>> I'd say it's the responsibility of the guest to *not* use >>>> VIRTIO_BALLOON_F_FREE_PAGE_HINT in case it is using page poisoning >>>> without VIRTIO_BALLOON_F_PAGE_POISON. Otherwise it will shoot itself >>>> into the foot. >>> >>> Based on the timing I am guessing the page poisoning was in the same >>> patch set as the free page hinting since there is only 2 seconds >>> between when the two are merged. If I recall the page poisoning logic >>> was added after the issue was pointed out that it needed to address >>> it. >>> >> >> Yeah, but any other OS implementing this, not caring about page poisoning wouldn‘t need it. Maybe there is something in the spec. >> >> Mental note: we‘ll have to migrate the poison value if not already done (writing on my mobile). > > Right. We need to make sure any poison or init on free is migrated > over to the destination before we can say we are going to skip the > migration. If anything what I probably ought to look into would be if > we could change the code so that if we have a hint the page is unused, > poison is enabled, and the value is 0 we just ship over a 0 page > instead of giving up on hinting entirely. > Yeah, we have to migrate poison_val from source to destination. Also, we should worry about us losing the page poisoning feature when migrating from source to destination. Thinking about it, it might make sense to completely decouple page poisoning here. Assign it a separate property (as you did), default enable it, but disable it via QEMU compat machines. Then, we won't lose the feature when migrating.
On 16.04.20 10:18, David Hildenbrand wrote: >>> >>> Postcopy is a very good point, bought! >>> >>> But (what you wrote above) it sounds like that this is really what we *have to* do, not an optimization. I‘ll double check the spec tomorrow (hopefully it was documented). We should rephrase the comment then. >> >> Do you have a link to the spec that I could look at? I am not hopeful >> that this will be listed there since the poison side of QEMU was never >> implemented. The flag is only here because it was copied over in the >> kernel header. > > Introducing a feature without > > a) specification what it does > b) implementation (of both sides) showing what has to be done > c) any kind of documentation of what needs to be done > > just horrible. > > The latest-greatest spec lives in > > https://github.com/oasis-tcs/virtio-spec.git > > I can't spot any signs of free page hinting/page poisioning. :( > > We should document our result of page poisoning, free page hinting, and > free page reporting there as well. I hope you'll have time for the latter. > > ------------------------------------------------------------------------- > Semantics of VIRTIO_BALLOON_F_PAGE_POISON > ------------------------------------------------------------------------- > > "The VIRTIO_BALLOON_F_PAGE_POISON feature bit is used to indicate if the > guest is using page poisoning. Guest writes to the poison_val config > field to tell host about the page poisoning value that is in use." > -> Very little information, no signs about what has to be done. > > "Let the hypervisor know that we are expecting a specific value to be > written back in balloon pages." > -> Okay, that talks about "balloon pages", which would include right now > -- pages "inflated" and then "deflated" using free page hinting > -- pages "inflated" and then "deflated" using oridnary inflate/deflate > queue > -- pages "inflated" using free page reporting and automatically > "deflated" on access > > So VIRTIO_BALLOON_F_PAGE_POISON really means "whenever the guest > deflates a page (either explicitly, or implicitly with free page > reporting), it is filled with "poison_val". > > And I would add > > "However, if the inflated page was not filled with "poison_val" when > inflating, it's not predictable if the original page or a page filled > with "poison_val" is returned." > > Which would cover the "we did not discard the page in the hypervisor, so > the original page is still there". > > > We should also document what is expected to happen if "poison_val" is > suddenly changed by the guest at one point in time again. (e.g., not > supported, unexpected things can happen, etc.) Also, we might have to > document that a device reset resets the poison_val to 0. (not sure if > that's really necessary) Looking at the spec, we will only have to care about "VIRTIO_BALLOON_F_MUST_TELL_HOST". Especially: "If the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is negotiated, the driver MUST NOT use pages from the balloon until the device has acknowledged the deflate request. Otherwise, if the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is not negotiated, the driver MAY begin to re-use pages previously given to the balloon before the device has acknowledged the deflate request." So I guess, in QEMU, if - VIRTIO_BALLOON_F_MUST_TELL_HOST is not set (== currently always) - VIRTIO_BALLOON_F_PAGE_POISON is set - poison_val is != 0 then, don't discard any pages, because we cannot fill the page reliably during a deflation request (== guest could already be reusing the page and expecting a certain value). In QEMU, we should always set VIRTIO_BALLOON_F_MUST_TELL_HOST when setting VIRTIO_BALLOON_F_PAGE_POISON. In the spec, we should document that VIRTIO_BALLOON_F_PAGE_POISON should only be used with VIRTIO_BALLOON_F_MUST_TELL_HOST or sth like that ... confusing stuff. Let me know what you think.
On Thu, Apr 16, 2020 at 10:18:49AM +0200, David Hildenbrand wrote: > >> > >> Postcopy is a very good point, bought! > >> > >> But (what you wrote above) it sounds like that this is really what we *have to* do, not an optimization. I‘ll double check the spec tomorrow (hopefully it was documented). We should rephrase the comment then. > > > > Do you have a link to the spec that I could look at? I am not hopeful > > that this will be listed there since the poison side of QEMU was never > > implemented. The flag is only here because it was copied over in the > > kernel header. > > Introducing a feature without > > a) specification what it does > b) implementation (of both sides) showing what has to be done > c) any kind of documentation of what needs to be done > > just horrible. > > The latest-greatest spec lives in > > https://github.com/oasis-tcs/virtio-spec.git > > I can't spot any signs of free page hinting/page poisioning. :( Right. I merged the hinting support in Linux on the hope that spec and qemu upstream will surface later. It seemed so since IIRC there were some drafts (even though I don't have any links to hand). Unfortunately neither happened. > We should document our result of page poisoning, free page hinting, and > free page reporting there as well. I hope you'll have time for the latter. > > ------------------------------------------------------------------------- > Semantics of VIRTIO_BALLOON_F_PAGE_POISON > ------------------------------------------------------------------------- > > "The VIRTIO_BALLOON_F_PAGE_POISON feature bit is used to indicate if the > guest is using page poisoning. Guest writes to the poison_val config > field to tell host about the page poisoning value that is in use." > -> Very little information, no signs about what has to be done. I think it's an informational field. Knowing that free pages are full of a specific pattern can be handy for the hypervisor for a variety of reasons. E.g. compression/deduplication? > "Let the hypervisor know that we are expecting a specific value to be > written back in balloon pages." > -> Okay, that talks about "balloon pages", which would include right now > -- pages "inflated" and then "deflated" using free page hinting > -- pages "inflated" and then "deflated" using oridnary inflate/deflate > queue ATM, in this case driver calls "free" and that fills page with the poison value. > -- pages "inflated" using free page reporting and automatically > "deflated" on access > > So VIRTIO_BALLOON_F_PAGE_POISON really means "whenever the guest > deflates a page (either explicitly, or implicitly with free page > reporting), it is filled with "poison_val". It might be a valid optimization to allow driver to skip poisoning of freed pages in this case. > And I would add > > "However, if the inflated page was not filled with "poison_val" when > inflating, it's not predictable if the original page or a page filled > with "poison_val" is returned." > > Which would cover the "we did not discard the page in the hypervisor, so > the original page is still there". > > > We should also document what is expected to happen if "poison_val" is > suddenly changed by the guest at one point in time again. (e.g., not > supported, unexpected things can happen, etc.) Right. I think we should require that this can only be changed before features have been negotiated. That is the only point where hypervisor can still fail gracefully (i.e. fail FEATURES_OK). > Also, we might have to > document that a device reset resets the poison_val to 0. (not sure if > that's really necessary) Probably yes, for things like kexec. > ------------------------------------------------------------------------- > What we have to do in the guest/Linux: > ------------------------------------------------------------------------- > > A guest which relies on this (esp., free page reporting in Linux only, > right?), has to not use/advertise VIRTIO_BALLOON_F_REPORTING *in case > VIRTIO_BALLOON_F_PAGE_POISON is not provided* by the host. AFAIKS, > ordinary inflation/deflation and free page hinting does currently not > care, as we go via free_page(), so that is currently fine AFAIKs. > > ------------------------------------------------------------------------- > What we have to do in the hypervisor/QEMU: > ------------------------------------------------------------------------- > > With VIRTIO_BALLOON_F_PAGE_POISON, we could provide free page reporting > easily IFF "page_val"==0. However, as you said, it will currently be > expensive in case of postcopy, as the guest still zeroes out all pages. > Document that properly. > > With VIRTIO_BALLOON_F_PAGE_POISON, don't inflate any pages right now > (not discarding anything), OR fill the pages with poison_val when > deflating. I guess the latter would be better - even if current Linux > does not need it, it's what we are expected to do AFAIKS. > > With VIRTIO_BALLOON_F_PAGE_POISON and page_val != 0, discard all free > page reporting attempts. (== what your patch #3 does). Document that > properly. > > > Makes sense? See below for guest migration thingies. > > > > >> I could imagine that we also have to care about ordinary page inflation/deflation when handling page poisoning. (Iow, don‘t inflate/deflate if set for now). > > > > The problem is this was broken from the start for that. The issue is > > that the poison feature was wrapped up within the page hinting > > feature. So as a result enabling it for a VM that doesn't recognize > > the feature independently would likely leave the poison value > > uninitialized and flagging as though a value of 0 is used. In addition > > the original poison checking wasn't making sure that the page wasn't > > init_on_free which has the same effect as page poisoning but isn't > > page poisoning. > > If the guest agrees to have VIRTIO_BALLOON_F_PAGE_POISON but does not > initialize poison_val, then it's a guest bug, I wouldn't worry about > that for now. > > > > >>> > >>> The worst case scenario would be one in which the VM was suspended for > >>> migration while there were still pages in the balloon that needed to > >>> be drained. In such a case you would have them in an indeterminate > >>> state since the last page poisoning for them might have been ignored > >>> since they were marked as "free", so they are just going to be > >>> whatever value they were if they had been migrated at all. > >>> > >>>>> > >>>>>> > >>>>>>> + return; > >>>>>>> + } > >>>>>>> + > >>>>>>> if (s->free_page_report_cmd_id == UINT_MAX) { > >>>>>>> s->free_page_report_cmd_id = > >>>>>>> VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; > >>>>>> > >>>>>> We should rename all "free_page_report" stuff we can to > >>>>>> "free_page_hint"/"free_page_hinting" to avoid confusion (e.g., on my > >>>>>> side :) ) before adding free page reporting . > >>>>>> > >>>>>> (looking at the virtio-balloon linux header, it's also confusing but > >>>>>> we're stuck with that - maybe we should add better comments) > >>>>> > >>>>> Are we stuck? Couldn't we just convert it to an anonymous union with > >>>>> free_page_hint_cmd_id and then use that where needed? > >>>> > >>>> Saw your patch already :) > >>>> > >>>>> > >>>>>>> @@ -618,12 +627,10 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s) > >>>>>>> if (s->qemu_4_0_config_size) { > >>>>>>> return sizeof(struct virtio_balloon_config); > >>>>>>> } > >>>>>>> - if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) { > >>>>>>> + if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) || > >>>>>>> + virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { > >>>>>>> return sizeof(struct virtio_balloon_config); > >>>>>>> } > >>>>>>> - if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { > >>>>>>> - return offsetof(struct virtio_balloon_config, poison_val); > >>>>>>> - } > >>>>>> > >>>>>> I am not sure this change is completely sane. Why is that necessary at all? > >>>>> > >>>>> The poison_val is stored at the end of the structure and is required > >>>>> in order to make free page hinting to work. What this change is doing > >>>> > >>>> Required to make it work? In the kernel, > >>>> > >>>> commit 2e991629bcf55a43681aec1ee096eeb03cf81709 > >>>> Author: Wei Wang <wei.w.wang@intel.com> > >>>> Date: Mon Aug 27 09:32:19 2018 +0800 > >>>> > >>>> virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON > >>>> > >>>> was merged after > >>>> > >>>> commit 86a559787e6f5cf662c081363f64a20cad654195 > >>>> Author: Wei Wang <wei.w.wang@intel.com> > >>>> Date: Mon Aug 27 09:32:17 2018 +0800 > >>>> > >>>> virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT > >>>> > >>>> So I assume it's perfectly fine to not have it. > >>>> > >>>> I'd say it's the responsibility of the guest to *not* use > >>>> VIRTIO_BALLOON_F_FREE_PAGE_HINT in case it is using page poisoning > >>>> without VIRTIO_BALLOON_F_PAGE_POISON. Otherwise it will shoot itself > >>>> into the foot. > >>> > >>> Based on the timing I am guessing the page poisoning was in the same > >>> patch set as the free page hinting since there is only 2 seconds > >>> between when the two are merged. If I recall the page poisoning logic > >>> was added after the issue was pointed out that it needed to address > >>> it. > >>> > >> > >> Yeah, but any other OS implementing this, not caring about page poisoning wouldn‘t need it. Maybe there is something in the spec. > >> > >> Mental note: we‘ll have to migrate the poison value if not already done (writing on my mobile). > > > > Right. We need to make sure any poison or init on free is migrated > > over to the destination before we can say we are going to skip the > > migration. If anything what I probably ought to look into would be if > > we could change the code so that if we have a hint the page is unused, > > poison is enabled, and the value is 0 we just ship over a 0 page > > instead of giving up on hinting entirely. > > > > Yeah, we have to migrate poison_val from source to destination. Also, we > should worry about us losing the page poisoning feature when migrating > from source to destination. > > Thinking about it, it might make sense to completely decouple page > poisoning here. Assign it a separate property (as you did), default > enable it, but disable it via QEMU compat machines. > > Then, we won't lose the feature when migrating. > > -- > Thanks, > > David / dhildenb
>> We should document our result of page poisoning, free page hinting, and >> free page reporting there as well. I hope you'll have time for the latter. >> >> ------------------------------------------------------------------------- >> Semantics of VIRTIO_BALLOON_F_PAGE_POISON >> ------------------------------------------------------------------------- >> >> "The VIRTIO_BALLOON_F_PAGE_POISON feature bit is used to indicate if the >> guest is using page poisoning. Guest writes to the poison_val config >> field to tell host about the page poisoning value that is in use." >> -> Very little information, no signs about what has to be done. > > I think it's an informational field. Knowing that free pages > are full of a specific pattern can be handy for the hypervisor > for a variety of reasons. E.g. compression/deduplication? I was referring to the documentation of the feature and what we (hypervisor) are expected to do (in regards to inflation/deflation). Yes, it might be valuable to know that the guest is using poisoning. I assume compression/deduplication (IOW KSM) will figure out themselves that such pages are equal. >> "Let the hypervisor know that we are expecting a specific value to be >> written back in balloon pages." > > > >> -> Okay, that talks about "balloon pages", which would include right now >> -- pages "inflated" and then "deflated" using free page hinting >> -- pages "inflated" and then "deflated" using oridnary inflate/deflate >> queue > > ATM, in this case driver calls "free" and that fills page with the > poison value. Yes, that's what I mentioned somehwere, it's currently done by Linux and ... > > It might be a valid optimization to allow driver to skip > poisoning of freed pages in this case. ... we should prepare for that :) > >> And I would add >> >> "However, if the inflated page was not filled with "poison_val" when >> inflating, it's not predictable if the original page or a page filled >> with "poison_val" is returned." >> >> Which would cover the "we did not discard the page in the hypervisor, so >> the original page is still there". >> >> >> We should also document what is expected to happen if "poison_val" is >> suddenly changed by the guest at one point in time again. (e.g., not >> supported, unexpected things can happen, etc.) > > Right. I think we should require that this can only be changed > before features have been negotiated. > That is the only point where hypervisor can still fail > gracefully (i.e. fail FEATURES_OK). Agreed. I can totally understand if Alex would want to stop working on VIRTIO_BALLOON_F_PAGE_POISON at this point and only fix the guest to not enable free page reporting in case we don't have VIRTIO_BALLOON_F_PAGE_POISON (unless that's already done), lol. :)
On Thu, Apr 16, 2020 at 7:55 AM David Hildenbrand <david@redhat.com> wrote: > > >> We should document our result of page poisoning, free page hinting, and > >> free page reporting there as well. I hope you'll have time for the latter. > >> > >> ------------------------------------------------------------------------- > >> Semantics of VIRTIO_BALLOON_F_PAGE_POISON > >> ------------------------------------------------------------------------- > >> > >> "The VIRTIO_BALLOON_F_PAGE_POISON feature bit is used to indicate if the > >> guest is using page poisoning. Guest writes to the poison_val config > >> field to tell host about the page poisoning value that is in use." > >> -> Very little information, no signs about what has to be done. > > > > I think it's an informational field. Knowing that free pages > > are full of a specific pattern can be handy for the hypervisor > > for a variety of reasons. E.g. compression/deduplication? > > I was referring to the documentation of the feature and what we > (hypervisor) are expected to do (in regards to inflation/deflation). > > Yes, it might be valuable to know that the guest is using poisoning. I > assume compression/deduplication (IOW KSM) will figure out themselves > that such pages are equal. The other thing to keep in mind is that the poison value only really comes into play with hinting/reporting. In the case of the standard balloon the pages are considered allocated from the guest's perspective until the balloon is deflated. Then any poison/init will occur over again anyway so I don't think the standard balloon should really care. For hinting it somewhat depends. Currently the implementation is inflating a balloon so having poisoning or init_on_free means it is written to immediately after it is freed so it defeats the purpose of the hinting. However that is a Linux implementation issue, not necessarily an issue with the QEMU implementation. As such may be I should fix that in the Linux driver since that has been ignored in QEMU up until now anyway. The more interesting bit is what should the behavior be from the hypervisor when a page is marked as being hinted. I think right now the behavior is to just not migrate the page. I wonder though if we shouldn't instead just consider the page a zero page, and then maybe modify the zero page behavior for the case where we know page poisoning is enabled. For reporting it is a matter of tracking the contents. We don't want to modify the contents in any way as we are attempting to essentially do in-place tracking of the page. So if it is poisoned or initialized it needs to stay in that state so we cannot invalidate the page if doing so will cause it to lose state information. > >> "Let the hypervisor know that we are expecting a specific value to be > >> written back in balloon pages." > > > > > > > >> -> Okay, that talks about "balloon pages", which would include right now > >> -- pages "inflated" and then "deflated" using free page hinting > >> -- pages "inflated" and then "deflated" using oridnary inflate/deflate > >> queue > > > > ATM, in this case driver calls "free" and that fills page with the > > poison value. > > Yes, that's what I mentioned somehwere, it's currently done by Linux and ... > > > > > It might be a valid optimization to allow driver to skip > > poisoning of freed pages in this case. > > ... we should prepare for that :) Agreed. > > > >> And I would add > >> > >> "However, if the inflated page was not filled with "poison_val" when > >> inflating, it's not predictable if the original page or a page filled > >> with "poison_val" is returned." > >> > >> Which would cover the "we did not discard the page in the hypervisor, so > >> the original page is still there". > >> > >> > >> We should also document what is expected to happen if "poison_val" is > >> suddenly changed by the guest at one point in time again. (e.g., not > >> supported, unexpected things can happen, etc.) > > > > Right. I think we should require that this can only be changed > > before features have been negotiated. > > That is the only point where hypervisor can still fail > > gracefully (i.e. fail FEATURES_OK). > > Agreed. I believe that is the current behavior. Essentially if poisoning enabled then the feature flag is left set. I think the one change I will make in the driver is that if poisoning is enabled in the kernel, but PAGE_POISON is not available as a feature, I am going to disable both the reporting and hinting features in virtballoon_validate. > I can totally understand if Alex would want to stop working on > VIRTIO_BALLOON_F_PAGE_POISON at this point and only fix the guest to not > enable free page reporting in case we don't have > VIRTIO_BALLOON_F_PAGE_POISON (unless that's already done), lol. :) I already have a patch for that. The bigger issue is how to deal with the PAGE_POISON being enabled with FREE_PAGE_HINTING. The legacy code at this point is just broken and is plowing through with FREE_PAGE_HINTING while it is enabled. That is safe for now because it is using a balloon, the side effect is that it is going to defer migration. If it switches to a page reporting type setup at some point in the future we would need to make sure something is written to the other end to identify the poison/zero pages.
> The other thing to keep in mind is that the poison value only really > comes into play with hinting/reporting. In the case of the standard > balloon the pages are considered allocated from the guest's Currently just as free page hinting IMHO. They are temporarily considered allocated. > perspective until the balloon is deflated. Then any poison/init will > occur over again anyway so I don't think the standard balloon should > really care. I think we should make this consistent. And as we discuss below, this allows for a nice optimization in the guest even for ordinary inflation/deflation (no need to zero out/poison again when giving the pages back to the buddy). > > For hinting it somewhat depends. Currently the implementation is > inflating a balloon so having poisoning or init_on_free means it is > written to immediately after it is freed so it defeats the purpose of > the hinting. However that is a Linux implementation issue, not Yeah, and as we discuss below, we can optimize that later in Linux. It's sub-optimal, I agree. > necessarily an issue with the QEMU implementation. As such may be I > should fix that in the Linux driver since that has been ignored in > QEMU up until now anyway. The more interesting bit is what should the > behavior be from the hypervisor when a page is marked as being hinted. > I think right now the behavior is to just not migrate the page. I > wonder though if we shouldn't instead just consider the page a zero > page, and then maybe modify the zero page behavior for the case where > we know page poisoning is enabled. I consider that maybe future work. Let's keep it simple for now (iow, try to get page poisoning handling right first). The optimize the guest handling on balloon deflation / end of free page hinting. [...] >> I can totally understand if Alex would want to stop working on >> VIRTIO_BALLOON_F_PAGE_POISON at this point and only fix the guest to not >> enable free page reporting in case we don't have >> VIRTIO_BALLOON_F_PAGE_POISON (unless that's already done), lol. :) > > I already have a patch for that. > > The bigger issue is how to deal with the PAGE_POISON being enabled > with FREE_PAGE_HINTING. The legacy code at this point is just broken > and is plowing through with FREE_PAGE_HINTING while it is enabled. > That is safe for now because it is using a balloon, the side effect is > that it is going to defer migration. If it switches to a page > reporting type setup at some point in the future we would need to make > sure something is written to the other end to identify the poison/zero > pages. I think we don't have to worry about that for now. Might be sub-optimal, but then, I don't think actual page poisoning isn't all that frequently used in production setups.
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index a4729f7fc930..1c6d36a29a04 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -531,6 +531,15 @@ static void virtio_balloon_free_page_start(VirtIOBalloon *s) return; } + /* + * If page poisoning is enabled then we probably shouldn't bother with + * the hinting since the poisoning will dirty the page and invalidate + * the work we are doing anyway. + */ + if (virtio_vdev_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) { + return; + } + if (s->free_page_report_cmd_id == UINT_MAX) { s->free_page_report_cmd_id = VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN; @@ -618,12 +627,10 @@ static size_t virtio_balloon_config_size(VirtIOBalloon *s) if (s->qemu_4_0_config_size) { return sizeof(struct virtio_balloon_config); } - if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON)) { + if (virtio_has_feature(features, VIRTIO_BALLOON_F_PAGE_POISON) || + virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { return sizeof(struct virtio_balloon_config); } - if (virtio_has_feature(features, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { - return offsetof(struct virtio_balloon_config, poison_val); - } return offsetof(struct virtio_balloon_config, free_page_report_cmd_id); } @@ -634,6 +641,7 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) config.num_pages = cpu_to_le32(dev->num_pages); config.actual = cpu_to_le32(dev->actual); + config.poison_val = cpu_to_le32(dev->poison_val); if (dev->free_page_report_status == FREE_PAGE_REPORT_S_REQUESTED) { config.free_page_report_cmd_id = @@ -697,6 +705,9 @@ static void virtio_balloon_set_config(VirtIODevice *vdev, qapi_event_send_balloon_change(vm_ram_size - ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT)); } + dev->poison_val = virtio_vdev_has_feature(vdev, + VIRTIO_BALLOON_F_PAGE_POISON) ? + le32_to_cpu(config.poison_val) : 0; trace_virtio_balloon_set_config(dev->actual, oldactual); } @@ -706,6 +717,9 @@ static uint64_t virtio_balloon_get_features(VirtIODevice *vdev, uint64_t f, VirtIOBalloon *dev = VIRTIO_BALLOON(vdev); f |= dev->host_features; virtio_add_feature(&f, VIRTIO_BALLOON_F_STATS_VQ); + if (virtio_has_feature(f, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) { + virtio_add_feature(&f, VIRTIO_BALLOON_F_PAGE_POISON); + } return f; } @@ -854,6 +868,8 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev) g_free(s->stats_vq_elem); s->stats_vq_elem = NULL; } + + s->poison_val = 0; } static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) @@ -916,6 +932,8 @@ static Property virtio_balloon_properties[] = { VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false), DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features, VIRTIO_BALLOON_F_FREE_PAGE_HINT, false), + DEFINE_PROP_BIT("x-page-poison", VirtIOBalloon, host_features, + VIRTIO_BALLOON_F_PAGE_POISON, false), /* QEMU 4.0 accidentally changed the config size even when free-page-hint * is disabled, resulting in QEMU 3.1 migration incompatibility. This * property retains this quirk for QEMU 4.1 machine types. diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h index d1c968d2376e..7fe78e5c14d7 100644 --- a/include/hw/virtio/virtio-balloon.h +++ b/include/hw/virtio/virtio-balloon.h @@ -70,6 +70,7 @@ typedef struct VirtIOBalloon { uint32_t host_features; bool qemu_4_0_config_size; + uint32_t poison_val; } VirtIOBalloon; #endif