diff mbox series

[v5,30/50] multi-process: send heartbeat messages to remote

Message ID a7d4a080616996b90942aa8790a00dfdfc264702.1582576372.git.jag.raman@oracle.com (mailing list archive)
State New, archived
Headers show
Series Initial support for multi-process qemu | expand

Commit Message

Jag Raman Feb. 24, 2020, 8:55 p.m. UTC
From: Elena Ufimtseva <elena.ufimtseva@oracle.com>

In order to detect remote processes which are hung, the
proxy periodically sends heartbeat messages to confirm if
the remote process is alive

Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
---
 v4 -> v5:
  - Heart beat messages are sent asynchronously

 hw/proxy/qemu-proxy.c         | 115 ++++++++++++++++++++++++++++++++++++++++++
 include/hw/proxy/qemu-proxy.h |   2 +
 include/io/mpqemu-link.h      |   1 +
 3 files changed, 118 insertions(+)
diff mbox series

Patch

diff --git a/hw/proxy/qemu-proxy.c b/hw/proxy/qemu-proxy.c
index c888677..bcb24f7 100644
--- a/hw/proxy/qemu-proxy.c
+++ b/hw/proxy/qemu-proxy.c
@@ -24,11 +24,104 @@ 
 #include "hw/boards.h"
 #include "include/qemu/log.h"
 
+QEMUTimer *hb_timer;
 static void pci_proxy_dev_realize(PCIDevice *dev, Error **errp);
 static void setup_irqfd(PCIProxyDev *dev);
+static void pci_dev_exit(PCIDevice *dev);
+static void start_broadcast_timer(void);
+static void stop_broadcast_timer(void);
+static void childsig_handler(int sig, siginfo_t *siginfo, void *ctx);
+static void broadcast_init(void);
 static int config_op_send(PCIProxyDev *dev, uint32_t addr, uint32_t *val, int l,
                           unsigned int op);
 
+static void childsig_handler(int sig, siginfo_t *siginfo, void *ctx)
+{
+    /* TODO: Add proper handler. */
+    printf("Child (pid %d) is dead? Signal is %d, Exit code is %d.\n",
+           siginfo->si_pid, siginfo->si_signo, siginfo->si_code);
+}
+
+static void remote_ping_handler(void *opaque)
+{
+    PCIProxyDev *pdev = opaque;
+
+    if (!event_notifier_test_and_clear(&pdev->en_ping)) {
+        /*
+         * TODO: Is retry needed? Add the handling of the
+         * non-responsive process. How its done in case
+         * of managed process?
+         */
+        printf("No reply from remote process, pid %d\n", pdev->remote_pid);
+        event_notifier_cleanup(&pdev->en_ping);
+    }
+}
+
+static void broadcast_msg(void)
+{
+    MPQemuMsg msg;
+    PCIProxyDev *entry;
+
+    QLIST_FOREACH(entry, &proxy_dev_list.devices, next) {
+        if (event_notifier_get_fd(&entry->en_ping) == -1) {
+            continue;
+        }
+
+        memset(&msg, 0, sizeof(MPQemuMsg));
+
+        msg.num_fds = 1;
+        msg.cmd = PROXY_PING;
+        msg.bytestream = 0;
+        msg.size = 0;
+        msg.fds[0] = event_notifier_get_fd(&entry->en_ping);
+
+        mpqemu_msg_send(&msg, entry->mpqemu_link->com);
+    }
+}
+
+static void broadcast_init(void)
+{
+    PCIProxyDev *entry;
+
+    QLIST_FOREACH(entry, &proxy_dev_list.devices, next) {
+        event_notifier_init(&entry->en_ping, 0);
+        qemu_set_fd_handler(event_notifier_get_fd(&entry->en_ping),
+                            remote_ping_handler, NULL, entry);
+    }
+}
+
+#define NOP_INTERVAL 1000000
+
+static void remote_ping(void *opaque)
+{
+    broadcast_msg();
+    timer_mod(hb_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NOP_INTERVAL);
+}
+
+static void start_broadcast_timer(void)
+{
+    hb_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+                                            remote_ping,
+                                            &proxy_dev_list);
+    timer_mod(hb_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NOP_INTERVAL);
+
+}
+
+static void stop_broadcast_timer(void)
+{
+    timer_del(hb_timer);
+    timer_free(hb_timer);
+}
+
+static void set_sigchld_handler(void)
+{
+    struct sigaction sa_sigterm;
+    memset(&sa_sigterm, 0, sizeof(sa_sigterm));
+    sa_sigterm.sa_sigaction = childsig_handler;
+    sa_sigterm.sa_flags = SA_SIGINFO | SA_NOCLDWAIT | SA_NOCLDSTOP;
+    sigaction(SIGCHLD, &sa_sigterm, NULL);
+}
+
 static void probe_pci_info(PCIDevice *dev)
 {
     PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
@@ -111,6 +204,9 @@  static void proxy_ready(PCIDevice *dev)
 
     setup_irqfd(pdev);
     probe_pci_info(dev);
+    set_sigchld_handler();
+    broadcast_init();
+    start_broadcast_timer();
 }
 
 static int set_remote_opts(PCIDevice *dev, QDict *qdict, unsigned int cmd)
@@ -325,6 +421,7 @@  static void pci_proxy_dev_class_init(ObjectClass *klass, void *data)
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
     k->realize = pci_proxy_dev_realize;
+    k->exit = pci_dev_exit;
     k->config_read = pci_proxy_read_config;
     k->config_write = pci_proxy_write_config;
 }
@@ -455,6 +552,24 @@  static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
     dev->proxy_ready = proxy_ready;
 }
 
+static void pci_dev_exit(PCIDevice *pdev)
+{
+    PCIProxyDev *entry, *sentry;
+    PCIProxyDev *dev = PCI_PROXY_DEV(pdev);
+
+    stop_broadcast_timer();
+
+    QLIST_FOREACH_SAFE(entry, &proxy_dev_list.devices, next, sentry) {
+        if (entry->remote_pid == dev->remote_pid) {
+            QLIST_REMOVE(entry, next);
+        }
+    }
+
+    if (!QLIST_EMPTY(&proxy_dev_list.devices)) {
+        start_broadcast_timer();
+    }
+}
+
 static void send_bar_access_msg(PCIProxyDev *dev, MemoryRegion *mr,
                                 bool write, hwaddr addr, uint64_t *val,
                                 unsigned size, bool memory)
diff --git a/include/hw/proxy/qemu-proxy.h b/include/hw/proxy/qemu-proxy.h
index 7c6bddd..f7666fb 100644
--- a/include/hw/proxy/qemu-proxy.h
+++ b/include/hw/proxy/qemu-proxy.h
@@ -55,6 +55,8 @@  struct PCIProxyDev {
     EventNotifier resample;
 
     pid_t remote_pid;
+    EventNotifier en_ping;
+
     int socket;
 
     char *rid;
diff --git a/include/io/mpqemu-link.h b/include/io/mpqemu-link.h
index 2ca89c5..aaaf1a4 100644
--- a/include/io/mpqemu-link.h
+++ b/include/io/mpqemu-link.h
@@ -60,6 +60,7 @@  typedef enum {
     DEVICE_DEL,
     GET_PCI_INFO,
     RET_PCI_INFO,
+    PROXY_PING,
     MAX,
 } mpqemu_cmd_t;