@@ -1130,8 +1130,17 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension)
return ret;
}
+/*
+ * We track the poisoned pages to be able to:
+ * - replace them on VM reset
+ * - skip them when migrating
+ * - block a migration for a VM where a poisoned page is ignored
+ * as this VM kernel (not knowing about the error) could
+ * incorrectly access the page.
+ */
typedef struct HWPoisonPage {
ram_addr_t ram_addr;
+ bool vm_known;
QLIST_ENTRY(HWPoisonPage) list;
} HWPoisonPage;
@@ -1163,20 +1172,36 @@ bool kvm_hwpoisoned_page(RAMBlock *block, void *offset)
return false;
}
-void kvm_hwpoison_page_add(ram_addr_t ram_addr)
+void kvm_hwpoison_page_add(ram_addr_t ram_addr, bool known)
{
HWPoisonPage *page;
QLIST_FOREACH(page, &hwpoison_page_list, list) {
if (page->ram_addr == ram_addr) {
+ if (known && !page->vm_known) {
+ page->vm_known = true;
+ }
return;
}
}
page = g_new(HWPoisonPage, 1);
page->ram_addr = ram_addr;
+ page->vm_known = known;
QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
}
+bool kvm_hwpoisoned_unknown(void)
+{
+ HWPoisonPage *pg;
+
+ QLIST_FOREACH(pg, &hwpoison_page_list, list) {
+ if (!pg->vm_known) {
+ return true;
+ }
+ }
+ return false;
+}
+
static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size)
{
#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN
@@ -129,3 +129,8 @@ bool kvm_hwpoisoned_page(RAMBlock *block, void *ram_addr)
{
return false;
}
+
+bool kvm_hwpoisoned_unknown(void)
+{
+ return false;
+}
@@ -549,4 +549,10 @@ uint32_t kvm_dirty_ring_size(void);
* false: page not yet poisoned
*/
bool kvm_hwpoisoned_page(RAMBlock *block, void *ram_addr);
+
+/**
+ * kvm_hwpoisoned_unknown - indicate if a qemu reported memory error
+ * is still unknown to (hasn't been injected into) the VM kernel.
+ */
+bool kvm_hwpoisoned_unknown(void);
#endif
@@ -132,10 +132,11 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size);
*
* Parameters:
* @ram_addr: the address in the RAM for the poisoned page
+ * @known: indicate if the error is injected to the VM kernel
*
* Add a poisoned page to the list
*
* Return: None.
*/
-void kvm_hwpoison_page_add(ram_addr_t ram_addr);
+void kvm_hwpoison_page_add(ram_addr_t ram_addr, bool known);
#endif
@@ -67,6 +67,7 @@
#include "options.h"
#include "sysemu/dirtylimit.h"
#include "qemu/sockets.h"
+#include "sysemu/kvm.h"
static NotifierList migration_state_notifiers =
NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
@@ -1892,6 +1893,11 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
return false;
}
+ if (kvm_hwpoisoned_unknown()) {
+ error_setg(errp, "Can't migrate this vm with ignored poisoned page");
+ return false;
+ }
+
if (migration_is_blocked(errp)) {
return false;
}
@@ -1101,7 +1101,6 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
ram_addr = qemu_ram_addr_from_host(addr);
if (ram_addr != RAM_ADDR_INVALID &&
kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
- kvm_hwpoison_page_add(ram_addr);
/*
* If this is a BUS_MCEERR_AR, we know we have been called
* synchronously from the vCPU thread, so we can easily
@@ -1112,7 +1111,12 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
* called synchronously from the vCPU thread, or a bit
* later from the main thread, so doing the injection of
* the error would be more complicated.
+ * In this case, BUS_MCEERR_AO errors are unknown from the
+ * guest, and we will prevent migration as long as this
+ * poisoned page hasn't generated a BUS_MCEERR_AR error
+ * that the guest takes into account.
*/
+ kvm_hwpoison_page_add(ram_addr, (code == BUS_MCEERR_AR));
if (code == BUS_MCEERR_AR) {
kvm_cpu_synchronize_state(c);
if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) {
@@ -642,7 +642,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
ram_addr = qemu_ram_addr_from_host(addr);
if (ram_addr != RAM_ADDR_INVALID &&
kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
- kvm_hwpoison_page_add(ram_addr);
+ kvm_hwpoison_page_add(ram_addr, true);
kvm_mce_inject(cpu, paddr, code);
/*