diff mbox series

[v3,2/4] vl: Add option to avoid stopping VM upon guest panic

Message ID 1607705564-26264-3-git-send-email-alejandro.j.jimenez@oracle.com (mailing list archive)
State New, archived
Headers show
Series Add a new -action parameter | expand

Commit Message

Alejandro Jimenez Dec. 11, 2020, 4:52 p.m. UTC
The current default action of pausing a guest after a panic event
is received leaves the responsibility to resume guest execution to the
management layer. The reasons for this behavior are discussed here:
https://lore.kernel.org/qemu-devel/52148F88.5000509@redhat.com/

However, in instances like the case of older guests (Linux and
Windows) using a pvpanic device but missing support for the
PVPANIC_CRASHLOADED event, and Windows guests using the hv-crash
enlightenment, it is desirable to allow the guests to continue
running after sending a PVPANIC_PANICKED event. This allows such
guests to proceed to capture a crash dump and automatically reboot
without intervention of a management layer.

Add an option to avoid stopping a VM after a panic event is received,
by passing:

-action panic=none

in the command line arguments, or during runtime by using an upcoming
QMP command.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
---
 include/sysemu/runstate-action.h |  1 +
 qapi/run-state.json              | 27 ++++++++++++++++++++++++++-
 qemu-options.hx                  |  3 +++
 softmmu/runstate-action.c        | 17 +++++++++++++++++
 softmmu/vl.c                     | 23 +++++++++++++++++++----
 5 files changed, 66 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/include/sysemu/runstate-action.h b/include/sysemu/runstate-action.h
index dba4b85..cfc46ec 100644
--- a/include/sysemu/runstate-action.h
+++ b/include/sysemu/runstate-action.h
@@ -14,6 +14,7 @@ 
 /* in softmmu/runstate-action.c */
 extern RebootAction reboot_action;
 extern ShutdownAction shutdown_action;
+extern PanicAction panic_action;
 
 int process_runstate_actions(void *opaque, QemuOpts *opts, Error **errp);
 int runstate_action_parse(QemuOptsList *opts_list, const char *optarg);
diff --git a/qapi/run-state.json b/qapi/run-state.json
index 7d8ce61..03ff78b 100644
--- a/qapi/run-state.json
+++ b/qapi/run-state.json
@@ -337,12 +337,14 @@ 
 #
 # @shutdown: Guest shutdown
 #
+# @panic: Guest has panicked
+#
 # @watchdog: A watchdog device's timer has expired
 #
 # Since: 6.0
 ##
 { 'enum': 'RunStateEventType',
-  'data': [ 'reboot', 'shutdown', 'watchdog' ] }
+  'data': [ 'reboot', 'shutdown', 'panic', 'watchdog' ] }
 
 ##
 # @RunStateAction:
@@ -358,6 +360,7 @@ 
   'data': {
     'reboot': 'RunStateRebootAction',
     'shutdown': 'RunStateShutdownAction',
+    'panic': 'RunStatePanicAction',
     'watchdog': 'RunStateWatchdogAction' } }
 
 ##
@@ -409,6 +412,28 @@ 
   'data': [ 'poweroff', 'pause' ] }
 
 ##
+# @RunStatePanicAction:
+#
+# @action: Action taken by QEMU when guest panicks
+#
+# Since: 6.0
+##
+{ 'struct': 'RunStatePanicAction',
+  'data': { 'action': 'PanicAction' } }
+
+##
+# @PanicAction:
+#
+# @none: Continue VM execution
+#
+# @pause: Pause the VM
+#
+# Since: 6.0
+##
+{ 'enum': 'PanicAction',
+  'data': [ 'poweroff', 'pause', 'none' ] }
+
+##
 # @RunStateWatchdogAction:
 #
 # @action: Action taken by QEMU when watchdog device timer expires
diff --git a/qemu-options.hx b/qemu-options.hx
index a0d50f0..9ac5a26 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3899,6 +3899,8 @@  DEF("action", HAS_ARG, QEMU_OPTION_action,
     "                   action when guest reboots [default=none]\n"
     "-action shutdown=poweroff|pause\n"
     "                   action when guest shuts down [default=poweroff]\n"
+    "-action panic=poweroff|pause|none\n"
+    "                   action when guest panics [default=poweroff]\n"
     "-action watchdog=reset|shutdown|poweroff|inject-nmi|pause|debug|none\n"
     "                   action when watchdog fires [default=reset]\n",
     QEMU_ARCH_ALL)
@@ -3911,6 +3913,7 @@  SRST
 
     Examples:
 
+    ``-action panic=none``
     ``-action reboot=shutdown,shutdown=pause``
     ``-watchdog i6300esb -action watchdog=pause``
 
diff --git a/softmmu/runstate-action.c b/softmmu/runstate-action.c
index 5eae320..bc30102 100644
--- a/softmmu/runstate-action.c
+++ b/softmmu/runstate-action.c
@@ -15,6 +15,7 @@ 
 
 RebootAction reboot_action = REBOOT_ACTION_NONE;
 ShutdownAction shutdown_action = SHUTDOWN_ACTION_POWEROFF;
+PanicAction panic_action = PANIC_ACTION_POWEROFF;
 
 static void runstate_action_help(void)
 {
@@ -27,6 +28,18 @@  static void runstate_action_help(void)
 }
 
 /*
+ * When -no-shutdown is requested either explicitly or by using its equivalent
+ * -action option, the request should be honored even if a panic occurs.
+ */
+static void fix_panic_action(void)
+{
+    if (shutdown_action == SHUTDOWN_ACTION_PAUSE &&
+                panic_action == PANIC_ACTION_POWEROFF) {
+        panic_action = PANIC_ACTION_PAUSE;
+    }
+}
+
+/*
  * Process an event|action pair and set the appropriate internal
  * state if event and action are valid.
  */
@@ -45,6 +58,10 @@  static int set_runstate_action(void *opaque, const char *event,
     case RUN_STATE_EVENT_TYPE_SHUTDOWN:
         shutdown_action = qapi_enum_parse(&ShutdownAction_lookup,
                                             action, -1, errp);
+        fix_panic_action();
+        break;
+    case RUN_STATE_EVENT_TYPE_PANIC:
+        panic_action = qapi_enum_parse(&PanicAction_lookup, action, -1, errp);
         break;
     case RUN_STATE_EVENT_TYPE_WATCHDOG:
         if (select_watchdog_action(action) == -1) {
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 35575a1..2ff0291 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -1447,10 +1447,21 @@  void qemu_system_guest_panicked(GuestPanicInformation *info)
     if (current_cpu) {
         current_cpu->crash_occurred = true;
     }
-    qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE,
-                                   !!info, info);
-    vm_stop(RUN_STATE_GUEST_PANICKED);
-    if (shutdown_action == SHUTDOWN_ACTION_POWEROFF) {
+    /*
+     * TODO:  Currently the available panic actions are: none, pause, and
+     * poweroff, but in principle debug and reset could be supported as well.
+     * Investigate any potential use cases for the unimplemented actions.
+     */
+    if (panic_action != PANIC_ACTION_NONE) {
+        qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE,
+                                        !!info, info);
+        vm_stop(RUN_STATE_GUEST_PANICKED);
+    } else {
+        qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_RUN,
+                                        !!info, info);
+    }
+
+    if (panic_action == PANIC_ACTION_POWEROFF) {
         qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF,
                                        !!info, info);
         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_PANIC);
@@ -3591,6 +3602,10 @@  void qemu_init(int argc, char **argv, char **envp)
                 break;
             case QEMU_OPTION_no_shutdown:
                 shutdown_action = SHUTDOWN_ACTION_PAUSE;
+                /* no_shutdown is requested, also honor it if guest panics */
+                if (panic_action == PANIC_ACTION_POWEROFF) {
+                    panic_action = PANIC_ACTION_PAUSE;
+                }
                 break;
             case QEMU_OPTION_show_cursor:
                 warn_report("The -show-cursor option is deprecated. Please "