diff mbox series

[v5,2/2] migration: prevent migration when a poisoned page is unknown from the VM

Message ID 20231106220319.456765-3-william.roche@oracle.com (mailing list archive)
State New, archived
Headers show
Series Qemu crashes on VM migration after an handled memory error | expand

Commit Message

William Roche Nov. 6, 2023, 10:03 p.m. UTC
From: William Roche <william.roche@oracle.com>

Migrating a poisoned page as a zero-page can only be done when the
running guest kernel knows about this poison, so that it marks this
page as inaccessible and any access in the VM would fail.

But if a poison information is not relayed to the VM, the kernel
does not prevent its access. In this case, transforming a poisoned
page into a zero-page could create a case of silent data corruption.

So we have to keep track of poisons not injected into the guest,
like the ARM VM emulation ignoring BUS_MCEERR_AO errors.
When such a page exists, the migration has to be blocked.

Signed-off-by: William Roche <william.roche@oracle.com>
---
 accel/kvm/kvm-all.c      | 27 ++++++++++++++++++++++++++-
 accel/stubs/kvm-stub.c   |  5 +++++
 include/sysemu/kvm.h     |  6 ++++++
 include/sysemu/kvm_int.h |  3 ++-
 migration/migration.c    |  6 ++++++
 target/arm/kvm64.c       |  6 +++++-
 target/i386/kvm/kvm.c    |  2 +-
 7 files changed, 51 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 64c0b37823..59af34f5a6 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -1130,8 +1130,17 @@  int kvm_vm_check_extension(KVMState *s, unsigned int extension)
     return ret;
 }
 
+/*
+ * We track the poisoned pages to be able to:
+ * - replace them on VM reset
+ * - skip them when migrating
+ * - block a migration for a VM where a poisoned page is ignored
+ *   as this VM kernel (not knowing about the error) could
+ *   incorrectly access the page.
+ */
 typedef struct HWPoisonPage {
     ram_addr_t ram_addr;
+    bool       vm_known;
     QLIST_ENTRY(HWPoisonPage) list;
 } HWPoisonPage;
 
@@ -1163,20 +1172,36 @@  bool kvm_hwpoisoned_page(RAMBlock *block, void *offset)
     return false;
 }
 
-void kvm_hwpoison_page_add(ram_addr_t ram_addr)
+void kvm_hwpoison_page_add(ram_addr_t ram_addr, bool known)
 {
     HWPoisonPage *page;
 
     QLIST_FOREACH(page, &hwpoison_page_list, list) {
         if (page->ram_addr == ram_addr) {
+            if (known && !page->vm_known) {
+                page->vm_known = true;
+            }
             return;
         }
     }
     page = g_new(HWPoisonPage, 1);
     page->ram_addr = ram_addr;
+    page->vm_known = known;
     QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
 }
 
+bool kvm_hwpoisoned_unknown(void)
+{
+    HWPoisonPage *pg;
+
+    QLIST_FOREACH(pg, &hwpoison_page_list, list) {
+        if (!pg->vm_known) {
+            return true;
+        }
+    }
+    return false;
+}
+
 static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size)
 {
 #if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
index 17774fa5ef..3c914b5b65 100644
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -129,3 +129,8 @@  bool kvm_hwpoisoned_page(RAMBlock *block, void *ram_addr)
 {
     return false;
 }
+
+bool kvm_hwpoisoned_unknown(void)
+{
+    return false;
+}
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 66937f9dfe..37d66ac614 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -549,4 +549,10 @@  uint32_t kvm_dirty_ring_size(void);
  *          false: page not yet poisoned
  */
 bool kvm_hwpoisoned_page(RAMBlock *block, void *ram_addr);
+
+/**
+ * kvm_hwpoisoned_unknown - indicate if a qemu reported memory error
+ * is still unknown to (hasn't been injected into) the VM kernel.
+ */
+bool kvm_hwpoisoned_unknown(void);
 #endif
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index fd846394be..fd0a32c34a 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -132,10 +132,11 @@  void kvm_set_max_memslot_size(hwaddr max_slot_size);
  *
  * Parameters:
  *  @ram_addr: the address in the RAM for the poisoned page
+ *  @known: indicate if the error is injected to the VM kernel
  *
  * Add a poisoned page to the list
  *
  * Return: None.
  */
-void kvm_hwpoison_page_add(ram_addr_t ram_addr);
+void kvm_hwpoison_page_add(ram_addr_t ram_addr, bool known);
 #endif
diff --git a/migration/migration.c b/migration/migration.c
index 28a34c9068..63cb2c80db 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -67,6 +67,7 @@ 
 #include "options.h"
 #include "sysemu/dirtylimit.h"
 #include "qemu/sockets.h"
+#include "sysemu/kvm.h"
 
 static NotifierList migration_state_notifiers =
     NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
@@ -1892,6 +1893,11 @@  static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
         return false;
     }
 
+    if (kvm_hwpoisoned_unknown()) {
+        error_setg(errp, "Can't migrate this vm with ignored poisoned page");
+        return false;
+    }
+
     if (migration_is_blocked(errp)) {
         return false;
     }
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index 3c175c93a7..5dea8051f1 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -1101,7 +1101,6 @@  void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
         ram_addr = qemu_ram_addr_from_host(addr);
         if (ram_addr != RAM_ADDR_INVALID &&
             kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
-            kvm_hwpoison_page_add(ram_addr);
             /*
              * If this is a BUS_MCEERR_AR, we know we have been called
              * synchronously from the vCPU thread, so we can easily
@@ -1112,7 +1111,12 @@  void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
              * called synchronously from the vCPU thread, or a bit
              * later from the main thread, so doing the injection of
              * the error would be more complicated.
+             * In this case, BUS_MCEERR_AO errors are unknown from the
+             * guest, and we will prevent migration as long as this
+             * poisoned page hasn't generated a BUS_MCEERR_AR error
+             * that the guest takes into account.
              */
+            kvm_hwpoison_page_add(ram_addr, (code == BUS_MCEERR_AR));
             if (code == BUS_MCEERR_AR) {
                 kvm_cpu_synchronize_state(c);
                 if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) {
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 770e81d56e..08410185a6 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -642,7 +642,7 @@  void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
         ram_addr = qemu_ram_addr_from_host(addr);
         if (ram_addr != RAM_ADDR_INVALID &&
             kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
-            kvm_hwpoison_page_add(ram_addr);
+            kvm_hwpoison_page_add(ram_addr, true);
             kvm_mce_inject(cpu, paddr, code);
 
             /*