diff mbox series

[v1,4/4] accel/kvm: Report the loss of a large memory page

Message ID 20241022213503.1189954-5-william.roche@oracle.com (mailing list archive)
State New
Headers show
Series hugetlbfs memory HW error fixes | expand

Commit Message

William Roche Oct. 22, 2024, 9:35 p.m. UTC
From: William Roche <william.roche@oracle.com>

On HW memory error, we need to report better what the impact of this
error is. So when an entire large page is impacted by an error (like the
hugetlbfs case), we give a warning message when this page is first hit:
Memory error: Loosing a large page (size: X) at QEMU addr Y and GUEST addr Z

Signed-off-by: William Roche <william.roche@oracle.com>
---
 accel/kvm/kvm-all.c      | 9 ++++++++-
 include/sysemu/kvm_int.h | 6 ++++--
 target/arm/kvm.c         | 2 +-
 target/i386/kvm/kvm.c    | 2 +-
 4 files changed, 14 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 40117eefa7..bddaf1e981 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -1284,7 +1284,7 @@  static void kvm_unpoison_all(void *param)
     }
 }
 
-void kvm_hwpoison_page_add(ram_addr_t ram_addr, size_t sz)
+void kvm_hwpoison_page_add(ram_addr_t ram_addr, size_t sz, void *ha, hwaddr gpa)
 {
     HWPoisonPage *page;
 
@@ -1300,6 +1300,13 @@  void kvm_hwpoison_page_add(ram_addr_t ram_addr, size_t sz)
     page->ram_addr = ram_addr;
     page->page_size = sz;
     QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
+
+    if (sz > TARGET_PAGE_SIZE) {
+        gpa = ROUND_DOWN(gpa, sz);
+        ha = (void *)ROUND_DOWN((uint64_t)ha, sz);
+        warn_report("Memory error: Loosing a large page (size: %zu) "
+            "at QEMU addr %p and GUEST addr 0x%" HWADDR_PRIx, sz, ha, gpa);
+    }
 }
 
 bool kvm_hwpoisoned_mem(void)
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index d2160be0ae..af569380ca 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -177,12 +177,14 @@  void kvm_set_max_memslot_size(hwaddr max_slot_size);
  * kvm_hwpoison_page_add:
  *
  * Parameters:
- *  @ram_addr: the address in the RAM for the poisoned page
+ *  @addr: the address in the RAM for the poisoned page
  *  @sz: size of the poisoned page as reported by the kernel
+ *  @hva: host virtual address aka QEMU addr
+ *  @gpa: guest physical address aka GUEST addr
  *
  * Add a poisoned page to the list
  *
  * Return: None.
  */
-void kvm_hwpoison_page_add(ram_addr_t ram_addr, size_t sz);
+void kvm_hwpoison_page_add(ram_addr_t addr, size_t sz, void *hva, hwaddr gpa);
 #endif
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 11579e170b..f8eb553f7c 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -2363,7 +2363,7 @@  void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr, short addr_lsb)
             if (sz == TARGET_PAGE_SIZE) {
                 sz = qemu_ram_pagesize_from_host(addr);
             }
-            kvm_hwpoison_page_add(ram_addr, sz);
+            kvm_hwpoison_page_add(ram_addr, sz, addr, paddr);
             /*
              * If this is a BUS_MCEERR_AR, we know we have been called
              * synchronously from the vCPU thread, so we can easily
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 71e674bca0..34cfa8b764 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -757,7 +757,7 @@  void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr, short addr_lsb)
             if (sz == TARGET_PAGE_SIZE) {
                 sz = qemu_ram_pagesize_from_host(addr);
             }
-            kvm_hwpoison_page_add(ram_addr, sz);
+            kvm_hwpoison_page_add(ram_addr, sz, addr, paddr);
             kvm_mce_inject(cpu, paddr, code);
 
             /*