diff mbox series

[v3,3/3] target-i386: post memory failure event to uplayer

Message ID 20200930100440.1060708-4-pizhenwei@bytedance.com (mailing list archive)
State New, archived
Headers show
Series add MEMORY_FAILURE event | expand

Commit Message

zhenwei pi Sept. 30, 2020, 10:04 a.m. UTC
Post memory failure event to uplayer to handle hardware memory
corrupted event. Rather than simple QEMU log, QEMU could report more
effective message to uplayer. For example, guest crashes by MCE,
selecting another host server is a better choice.

Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
---
 target/i386/helper.c | 24 ++++++++++++++++++++----
 target/i386/kvm.c    | 13 ++++++++++++-
 2 files changed, 32 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/target/i386/helper.c b/target/i386/helper.c
index 17e1684ff9..2a184c4835 100644
--- a/target/i386/helper.c
+++ b/target/i386/helper.c
@@ -18,6 +18,7 @@ 
  */
 
 #include "qemu/osdep.h"
+#include "qapi/qapi-events-run-state.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "qemu/qemu-print.h"
@@ -851,6 +852,15 @@  typedef struct MCEInjectionParams {
     int flags;
 } MCEInjectionParams;
 
+static void emit_guest_memory_failure(MemoryFailureAction action, bool ar,
+                                      bool recursive)
+{
+    MemoryFailureFlags mff = {.action_required = ar, .recursive = recursive};
+
+    qapi_event_send_memory_failure(MEMORY_FAILURE_RECIPIENT_GUEST, action,
+                                   &mff);
+}
+
 static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
 {
     MCEInjectionParams *params = data.host_ptr;
@@ -859,16 +869,18 @@  static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
     uint64_t *banks = cenv->mce_banks + 4 * params->bank;
     g_autofree char *msg = NULL;
     bool need_reset = false;
+    bool recursive;
+    bool ar = !!(params->status & MCI_STATUS_AR);
 
     cpu_synchronize_state(cs);
+    recursive = !!(cenv->mcg_status & MCG_STATUS_MCIP);
 
     /*
      * If there is an MCE exception being processed, ignore this SRAO MCE
      * unless unconditional injection was requested.
      */
-    if (!(params->flags & MCE_INJECT_UNCOND_AO)
-        && !(params->status & MCI_STATUS_AR)
-        && (cenv->mcg_status & MCG_STATUS_MCIP)) {
+    if (!(params->flags & MCE_INJECT_UNCOND_AO) && !ar && recursive) {
+        emit_guest_memory_failure(MEMORY_FAILURE_ACTION_IGNORE, ar, recursive);
         return;
     }
 
@@ -896,7 +908,7 @@  static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
             return;
         }
 
-        if (cenv->mcg_status & MCG_STATUS_MCIP) {
+        if (recursive) {
             need_reset = true;
             msg = g_strdup_printf("CPU %d: Previous MCE still in progress, "
                                   "raising triple fault", cs->cpu_index);
@@ -909,6 +921,8 @@  static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
         }
 
         if (need_reset) {
+            emit_guest_memory_failure(MEMORY_FAILURE_ACTION_FATAL, ar,
+                                      recursive);
             monitor_printf(params->mon, "%s", msg);
             qemu_log_mask(CPU_LOG_RESET, "%s\n", msg);
             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
@@ -934,6 +948,8 @@  static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data)
     } else {
         banks[1] |= MCI_STATUS_OVER;
     }
+
+    emit_guest_memory_failure(MEMORY_FAILURE_ACTION_INJECT, ar, recursive);
 }
 
 void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank,
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 9efb07e7c8..923749f5d8 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -13,6 +13,7 @@ 
  */
 
 #include "qemu/osdep.h"
+#include "qapi/qapi-events-run-state.h"
 #include "qapi/error.h"
 #include <sys/ioctl.h>
 #include <sys/utsname.h>
@@ -575,8 +576,17 @@  static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code)
                        (MCM_ADDR_PHYS << 6) | 0xc, flags);
 }
 
+static void emit_hypervisor_memory_failure(MemoryFailureAction action, bool ar)
+{
+    MemoryFailureFlags mff = {.action_required = ar, .recursive = false};
+
+    qapi_event_send_memory_failure(MEMORY_FAILURE_RECIPIENT_HYPERVISOR, action,
+                                   &mff);
+}
+
 static void hardware_memory_error(void *host_addr)
 {
+    emit_hypervisor_memory_failure(MEMORY_FAILURE_ACTION_FATAL, true);
     error_report("QEMU got Hardware memory error at addr %p", host_addr);
     exit(1);
 }
@@ -631,7 +641,8 @@  void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
         hardware_memory_error(addr);
     }
 
-    /* Hope we are lucky for AO MCE */
+    /* Hope we are lucky for AO MCE, just notify a event */
+    emit_hypervisor_memory_failure(MEMORY_FAILURE_ACTION_IGNORE, false);
 }
 
 static void kvm_reset_exception(CPUX86State *env)