diff mbox series

[v2,2/4] vl: Add option to avoid stopping VM upon guest panic

Message ID 1607536336-24701-3-git-send-email-alejandro.j.jimenez@oracle.com (mailing list archive)
State New, archived
Headers show
Series Add a new -action parameter | expand

Commit Message

Alejandro Jimenez Dec. 9, 2020, 5:52 p.m. UTC
The current default action of pausing a guest after a panic event
is received leaves the responsibility to resume guest execution to the
management layer. The reasons for this behavior are discussed here:
https://lore.kernel.org/qemu-devel/52148F88.5000509@redhat.com/

However, in instances like the case of older guests (Linux and
Windows) using a pvpanic device but missing support for the
PVPANIC_CRASHLOADED event, and Windows guests using the hv-crash
enlightenment, it is desirable to allow the guests to continue
running after sending a PVPANIC_PANICKED event. This allows such
guests to proceed to capture a crash dump and automatically reboot
without intervention of a management layer.

Add an option to avoid stopping a VM after a panic event is received,
by passing:

-action panic=none

in the command line arguments, or during runtime by using an upcoming
QMP command.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
---
 include/sysemu/sysemu.h   |  1 +
 qapi/run-state.json       | 27 ++++++++++++++++++++++++++-
 qemu-options.hx           |  3 +++
 softmmu/runstate-action.c | 22 ++++++++++++++++++++++
 softmmu/vl.c              | 14 +++++++++++---
 5 files changed, 63 insertions(+), 4 deletions(-)

Comments

Paolo Bonzini Dec. 10, 2020, 2:43 a.m. UTC | #1
On 09/12/20 18:52, Alejandro Jimenez wrote:
> -    vm_stop(RUN_STATE_GUEST_PANICKED);
> +
> +    if (pause_on_panic) {
> +        qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE,
> +                                        !!info, info);
> +        vm_stop(RUN_STATE_GUEST_PANICKED);
> +    } else {
> +        qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_RUN,
> +                                        !!info, info);
> +    }
> +
>       if (!no_shutdown) {
>           qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF,
>                                          !!info, info);

The "if" below suggests making -action panic's argument a tri-state 
(none/pause/poweroff; default is poweroff and -no-shutdown becomes 
equivalent to -action shutdown=pause,panic=pause).

In principle debug and reset could be supported as well, so maybe add a 
TODO comment.

Paolo
diff mbox series

Patch

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 5480e61..863142e 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -45,6 +45,7 @@  extern int ctrl_grab;
 extern int graphic_rotate;
 extern int no_reboot;
 extern int no_shutdown;
+extern int pause_on_panic;
 extern int old_param;
 extern int boot_menu;
 extern bool boot_strict;
diff --git a/qapi/run-state.json b/qapi/run-state.json
index 6b033c1..27b62ce 100644
--- a/qapi/run-state.json
+++ b/qapi/run-state.json
@@ -337,12 +337,14 @@ 
 #
 # @shutdown: Guest shutdown
 #
+# @panic: Guest has panicked
+#
 # @watchdog: A watchdog device's timer has expired
 #
 # Since: 6.0
 ##
 { 'enum': 'RunStateEventType',
-  'data': [ 'reboot', 'shutdown', 'watchdog' ] }
+  'data': [ 'reboot', 'shutdown', 'panic', 'watchdog' ] }
 
 ##
 # @RunStateAction:
@@ -358,6 +360,7 @@ 
   'data': {
     'reboot': 'RunStateRebootAction',
     'shutdown': 'RunStateShutdownAction',
+    'panic': 'RunStatePanicAction',
     'watchdog': 'RunStateWatchdogAction' } }
 
 ##
@@ -407,6 +410,28 @@ 
   'data': [ 'pause', 'poweroff' ] }
 
 ##
+# @RunStatePanicAction:
+#
+# @action: Action taken by QEMU when guest panicks
+#
+# Since: 6.0
+##
+{ 'struct': 'RunStatePanicAction',
+  'data': { 'action': 'PanicAction' } }
+
+##
+# @PanicAction:
+#
+# @none: Continue VM execution
+#
+# @pause: Pause the VM
+#
+# Since: 6.0
+##
+{ 'enum': 'PanicAction',
+  'data': [ 'none', 'pause' ] }
+
+##
 # @RunStateWatchdogAction:
 #
 # @action: Action taken by QEMU when watchdog device timer expires
diff --git a/qemu-options.hx b/qemu-options.hx
index a0d50f0..8b7d8bb 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3899,6 +3899,8 @@  DEF("action", HAS_ARG, QEMU_OPTION_action,
     "                   action when guest reboots [default=none]\n"
     "-action shutdown=poweroff|pause\n"
     "                   action when guest shuts down [default=poweroff]\n"
+    "-action panic=pause|none\n"
+    "                   action when guest panics [default=pause]\n"
     "-action watchdog=reset|shutdown|poweroff|inject-nmi|pause|debug|none\n"
     "                   action when watchdog fires [default=reset]\n",
     QEMU_ARCH_ALL)
@@ -3911,6 +3913,7 @@  SRST
 
     Examples:
 
+    ``-action panic=none``
     ``-action reboot=shutdown,shutdown=pause``
     ``-watchdog i6300esb -action watchdog=pause``
 
diff --git a/softmmu/runstate-action.c b/softmmu/runstate-action.c
index f1fd457..a644d80 100644
--- a/softmmu/runstate-action.c
+++ b/softmmu/runstate-action.c
@@ -62,6 +62,24 @@  static void shutdown_set_action(ShutdownAction act, Error **errp)
 }
 
 /*
+ * Set the internal state to react to a guest panic event
+ * as specified by the action parameter.
+ */
+static void panic_set_action(PanicAction action, Error **errp)
+{
+    switch (action) {
+    case PANIC_ACTION_NONE:
+        pause_on_panic = 0;
+        break;
+    case PANIC_ACTION_PAUSE:
+        pause_on_panic = 1;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+/*
  * Process an event|action pair and set the appropriate internal
  * state if event and action are valid.
  */
@@ -81,6 +99,10 @@  static int set_runstate_action(void *opaque, const char *event,
         act_idx = qapi_enum_parse(&ShutdownAction_lookup, action, -1, errp);
         shutdown_set_action(act_idx, NULL);
         break;
+    case RUN_STATE_EVENT_TYPE_PANIC:
+        act_idx = qapi_enum_parse(&PanicAction_lookup, action, -1, errp);
+        panic_set_action(act_idx, NULL);
+        break;
     case RUN_STATE_EVENT_TYPE_WATCHDOG:
         if (select_watchdog_action(action) == -1) {
             error_report("invalid parameter value: %s", action);
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 2b1583e..20f89cb 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -150,6 +150,7 @@  int singlestep = 0;
 int fd_bootchk = 1;
 int no_reboot;
 int no_shutdown = 0;
+int pause_on_panic = 1;
 int graphic_rotate = 0;
 const char *watchdog;
 QEMUOptionRom option_rom[MAX_OPTION_ROMS];
@@ -1449,9 +1450,16 @@  void qemu_system_guest_panicked(GuestPanicInformation *info)
     if (current_cpu) {
         current_cpu->crash_occurred = true;
     }
-    qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE,
-                                   !!info, info);
-    vm_stop(RUN_STATE_GUEST_PANICKED);
+
+    if (pause_on_panic) {
+        qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE,
+                                        !!info, info);
+        vm_stop(RUN_STATE_GUEST_PANICKED);
+    } else {
+        qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_RUN,
+                                        !!info, info);
+    }
+
     if (!no_shutdown) {
         qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF,
                                        !!info, info);