diff mbox series

[v7,18/21] multi-process: heartbeat messages to remote

Message ID c93ff92180d1e5a1bcb620331bc57f6ae9c5447c.1593273671.git.elena.ufimtseva@oracle.com (mailing list archive)
State New, archived
Headers show
Series Initial support for multi-process qemu | expand

Commit Message

Elena Ufimtseva June 27, 2020, 5:09 p.m. UTC
From: Elena Ufimtseva <elena.ufimtseva@oracle.com>

In order to detect remote processes which are hung, the
proxy periodically sends heartbeat messages to confirm if
the remote process is alive. The remote process responds
to this heartbeat message to confirm it is alive.

Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
---
 hw/i386/remote-msg.c     | 14 ++++++++++
 hw/pci/proxy.c           | 58 ++++++++++++++++++++++++++++++++++++++++
 include/hw/pci/proxy.h   |  2 ++
 include/io/mpqemu-link.h |  1 +
 io/mpqemu-link.c         |  1 +
 5 files changed, 76 insertions(+)

Comments

Stefan Hajnoczi July 2, 2020, 1:16 p.m. UTC | #1
On Sat, Jun 27, 2020 at 10:09:40AM -0700, elena.ufimtseva@oracle.com wrote:
> From: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> 
> In order to detect remote processes which are hung, the
> proxy periodically sends heartbeat messages to confirm if
> the remote process is alive. The remote process responds
> to this heartbeat message to confirm it is alive.
> 
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> ---
>  hw/i386/remote-msg.c     | 14 ++++++++++
>  hw/pci/proxy.c           | 58 ++++++++++++++++++++++++++++++++++++++++
>  include/hw/pci/proxy.h   |  2 ++
>  include/io/mpqemu-link.h |  1 +
>  io/mpqemu-link.c         |  1 +
>  5 files changed, 76 insertions(+)
> 

This patch seems incomplete since no action is taken when the device
fails to respond. vCPU threads that access the device will still get
stuck.

The simplest way to make this useful is to close the connection when a
timeout occurs. Then the G_IO_HUP handler for the UNIX domain socket
should perform connection cleanup. At that point there are a few
choices:

1. Stop guest execution and wait for the host admin to restore the
   mplink so execution can resume. This is similar to how -drive
   rerror=stop pauses the guest when a disk I/O error is encountered.

2. Stop guest execution but defer it until this stale device is actually
   accessed. This maximizes guest uptime. Guests that rarely access the
   device may not notice at all.

3. Return 0 from MemoryRegion read operations and ignore writes. The
   guest continues executing but the device is broken. This is risky
   because device drivers inside the guest may not be ready to deal with
   this. The result could be data loss or corruption.

4. Raise a bus-level event. Maybe PCI error reporting can be used to
   offline the device.

5. Terminate the guest with an error message.

6. ?

Until the heartbeat is fully implemented and tested I suggest dropping
it from this patch series. Remember the G_IO_HUP will happen anyway if
the remote device process terminates.
diff mbox series

Patch

diff --git a/hw/i386/remote-msg.c b/hw/i386/remote-msg.c
index 9379ee6442..919bddc1d5 100644
--- a/hw/i386/remote-msg.c
+++ b/hw/i386/remote-msg.c
@@ -22,6 +22,7 @@  static void process_bar_write(QIOChannel *ioc, MPQemuMsg *msg, Error **errp);
 static void process_bar_read(QIOChannel *ioc, MPQemuMsg *msg, Error **errp);
 static void process_get_pci_info_msg(QIOChannel *ioc, MPQemuMsg *msg,
                                      PCIDevice *pci_dev);
+static void process_proxy_ping_msg(QIOChannel *ioc);
 
 gboolean mpqemu_process_msg(QIOChannel *ioc, GIOCondition cond,
                             gpointer opaque)
@@ -76,6 +77,9 @@  gboolean mpqemu_process_msg(QIOChannel *ioc, GIOCondition cond,
     case GET_PCI_INFO:
         process_get_pci_info_msg(ioc, &msg, pci_dev);
         break;
+    case PROXY_PING:
+        process_proxy_ping_msg(ioc);
+        break;
     default:
         error_setg(&local_err, "Unknown command (%d) received from proxy \
                    in remote process pid=%d", msg.cmd, getpid());
@@ -269,3 +273,13 @@  static void process_get_pci_info_msg(QIOChannel *ioc, MPQemuMsg *msg,
 
     mpqemu_msg_send(&ret, ioc);
 }
+
+static void process_proxy_ping_msg(QIOChannel *ioc)
+{
+    MPQemuMsg ret = { 0 };
+
+    ret.cmd = RET_MSG;
+    ret.size = sizeof(ret.data1);
+
+    mpqemu_msg_send(&ret, ioc);
+}
diff --git a/hw/pci/proxy.c b/hw/pci/proxy.c
index 449341e459..e2e9a13287 100644
--- a/hw/pci/proxy.c
+++ b/hw/pci/proxy.c
@@ -24,6 +24,8 @@ 
 #include "util/event_notifier-posix.c"
 
 static void probe_pci_info(PCIDevice *dev);
+static void start_hb_timer(PCIProxyDev *dev);
+static void pci_proxy_dev_exit(PCIDevice *pdev);
 
 static void proxy_set_socket(PCIProxyDev *pdev, int fd, Error **errp)
 {
@@ -132,6 +134,8 @@  static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
     setup_irqfd(dev);
 
     probe_pci_info(PCI_DEVICE(dev));
+
+    start_hb_timer(dev);
 }
 
 static int config_op_send(PCIProxyDev *pdev, uint32_t addr, uint32_t *val,
@@ -192,6 +196,7 @@  static void pci_proxy_dev_class_init(ObjectClass *klass, void *data)
     k->realize = pci_proxy_dev_realize;
     k->config_read = pci_proxy_read_config;
     k->config_write = pci_proxy_write_config;
+    k->exit = pci_proxy_dev_exit;
 
     device_class_set_props(dc, proxy_properties);
 }
@@ -356,3 +361,56 @@  static void probe_pci_info(PCIDevice *dev)
         }
     }
 }
+
+static void hb_msg(PCIProxyDev *dev)
+{
+    DeviceState *ds = DEVICE(dev);
+    MPQemuMsg msg = { 0 };
+    long ret = -EINVAL;
+    Error *local_err = NULL;
+
+    msg.cmd = PROXY_PING;
+    msg.bytestream = 0;
+    msg.size = 0;
+
+    ret = mpqemu_msg_send_reply_co(&msg, dev->com, &local_err);
+    if (local_err) {
+        error_report("Lost contact with remote device %s, error code %ld",
+                     ds->id, ret);
+    }
+}
+
+#define NOP_INTERVAL 1000
+
+static void remote_ping(void *opaque)
+{
+    PCIProxyDev *dev = opaque;
+
+    hb_msg(dev);
+
+    timer_mod(dev->hb_timer,
+              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NOP_INTERVAL);
+}
+
+static void start_hb_timer(PCIProxyDev *dev)
+{
+    dev->hb_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+                                 remote_ping,
+                                 dev);
+
+    timer_mod(dev->hb_timer,
+              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NOP_INTERVAL);
+}
+
+static void stop_hb_timer(PCIProxyDev *dev)
+{
+    timer_del(dev->hb_timer);
+    timer_free(dev->hb_timer);
+}
+
+static void pci_proxy_dev_exit(PCIDevice *pdev)
+{
+    PCIProxyDev *dev = PCI_PROXY_DEV(pdev);
+
+    stop_hb_timer(dev);
+}
diff --git a/include/hw/pci/proxy.h b/include/hw/pci/proxy.h
index e6f076ae95..037740309d 100644
--- a/include/hw/pci/proxy.h
+++ b/include/hw/pci/proxy.h
@@ -53,6 +53,8 @@  struct PCIProxyDev {
     EventNotifier intr;
     EventNotifier resample;
 
+    QEMUTimer *hb_timer;
+
     ProxyMemoryRegion region[PCI_NUM_REGIONS];
 };
 
diff --git a/include/io/mpqemu-link.h b/include/io/mpqemu-link.h
index 4b96cb8ccb..676d7eb3ef 100644
--- a/include/io/mpqemu-link.h
+++ b/include/io/mpqemu-link.h
@@ -44,6 +44,7 @@  typedef enum {
     BAR_READ,
     SET_IRQFD,
     GET_PCI_INFO,
+    PROXY_PING,
     MAX = INT_MAX,
 } MPQemuCmd;
 
diff --git a/io/mpqemu-link.c b/io/mpqemu-link.c
index d09b2a2f50..7452e55e17 100644
--- a/io/mpqemu-link.c
+++ b/io/mpqemu-link.c
@@ -264,6 +264,7 @@  bool mpqemu_msg_valid(MPQemuMsg *msg)
         }
         break;
     case GET_PCI_INFO:
+    case PROXY_PING:
         if (msg->size) {
             return false;
         }