diff mbox series

[RESEND,v6,28/36] multi-process: send heartbeat messages to remote

Message ID bf13fc6e633b70498ee47ad4bc5f22890edcf422.1587614626.git.elena.ufimtseva@oracle.com (mailing list archive)
State New, archived
Headers show
Series [RESEND,v6,01/36] memory: alloc RAM from file at offset | expand

Commit Message

Elena Ufimtseva April 23, 2020, 4:14 a.m. UTC
From: Elena Ufimtseva <elena.ufimtseva@oracle.com>

In order to detect remote processes which are hung, the
proxy periodically sends heartbeat messages to confirm if
the remote process is alive

Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
---
 hw/proxy/qemu-proxy.c         | 86 +++++++++++++++++++++++++++++++++++
 include/hw/proxy/qemu-proxy.h |  3 ++
 include/io/mpqemu-link.h      |  1 +
 io/mpqemu-link.c              |  5 ++
 4 files changed, 95 insertions(+)
diff mbox series

Patch

diff --git a/hw/proxy/qemu-proxy.c b/hw/proxy/qemu-proxy.c
index 730e28483e..162014353f 100644
--- a/hw/proxy/qemu-proxy.c
+++ b/hw/proxy/qemu-proxy.c
@@ -21,6 +21,78 @@ 
 
 static void probe_pci_info(PCIDevice *dev);
 
+static void childsig_handler(int sig, siginfo_t *siginfo, void *ctx)
+{
+    /* TODO: Add proper handler. */
+    printf("Child (pid %d) is dead? Signal is %d, Exit code is %d.\n",
+           siginfo->si_pid, siginfo->si_signo, siginfo->si_code);
+}
+
+static void hb_msg(PCIProxyDev *dev)
+{
+    DeviceState *ds = DEVICE(dev);
+    MPQemuMsg msg = { 0 };
+    uint64_t ret;
+
+    if (event_notifier_get_fd(&dev->en_ping) == -1) {
+        return;
+    }
+
+    memset(&msg, 0, sizeof(MPQemuMsg));
+
+    msg.num_fds = 1;
+    msg.cmd = PROXY_PING;
+    msg.bytestream = 0;
+    msg.size = 0;
+    msg.fds[0] = event_notifier_get_fd(&dev->en_ping);
+
+    mpqemu_msg_send(&msg, dev->mpqemu_link->com);
+
+    ret = wait_for_remote(msg.fds[0]);
+
+    if (ret) {
+        printf("Lost contact with remote device %s\n", ds->id);
+        /* TODO: Initiate error recovery */
+    }
+}
+
+#define NOP_INTERVAL 1000
+
+static void remote_ping(void *opaque)
+{
+    PCIProxyDev *dev = opaque;
+
+    hb_msg(dev);
+
+    timer_mod(dev->hb_timer,
+              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NOP_INTERVAL);
+}
+
+static void start_hb_timer(PCIProxyDev *dev)
+{
+    dev->hb_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+                                 remote_ping,
+                                 dev);
+
+    timer_mod(dev->hb_timer,
+              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NOP_INTERVAL);
+}
+
+static void stop_hb_timer(PCIProxyDev *dev)
+{
+    timer_del(dev->hb_timer);
+    timer_free(dev->hb_timer);
+}
+
+static void set_sigchld_handler(void)
+{
+    struct sigaction sa_sigterm;
+    memset(&sa_sigterm, 0, sizeof(sa_sigterm));
+    sa_sigterm.sa_sigaction = childsig_handler;
+    sa_sigterm.sa_flags = SA_SIGINFO | SA_NOCLDWAIT | SA_NOCLDSTOP;
+    sigaction(SIGCHLD, &sa_sigterm, NULL);
+}
+
 static int config_op_send(PCIProxyDev *dev, uint32_t addr, uint32_t *val, int l,
                           unsigned int op)
 {
@@ -204,6 +276,19 @@  static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
     setup_irqfd(dev);
 
     probe_pci_info(PCI_DEVICE(dev));
+
+    set_sigchld_handler();
+
+    event_notifier_init(&dev->en_ping, 0);
+
+    start_hb_timer(dev);
+}
+
+static void pci_proxy_dev_exit(PCIDevice *pdev)
+{
+    PCIProxyDev *dev = PCI_PROXY_DEV(pdev);
+
+    stop_hb_timer(dev);
 }
 
 static void pci_proxy_dev_class_init(ObjectClass *klass, void *data)
@@ -211,6 +296,7 @@  static void pci_proxy_dev_class_init(ObjectClass *klass, void *data)
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
     k->realize = pci_proxy_dev_realize;
+    k->exit = pci_proxy_dev_exit;
     k->config_read = pci_proxy_read_config;
     k->config_write = pci_proxy_write_config;
 }
diff --git a/include/hw/proxy/qemu-proxy.h b/include/hw/proxy/qemu-proxy.h
index 0d8ec6d686..26f0a41110 100644
--- a/include/hw/proxy/qemu-proxy.h
+++ b/include/hw/proxy/qemu-proxy.h
@@ -55,6 +55,9 @@  struct PCIProxyDev {
     EventNotifier intr;
     EventNotifier resample;
 
+    EventNotifier en_ping;
+    QEMUTimer *hb_timer;
+
     int socket;
 
     ProxyMemoryRegion region[PCI_NUM_REGIONS];
diff --git a/include/io/mpqemu-link.h b/include/io/mpqemu-link.h
index 102c736705..45ea1fcafa 100644
--- a/include/io/mpqemu-link.h
+++ b/include/io/mpqemu-link.h
@@ -50,6 +50,7 @@  typedef enum {
     SET_IRQFD,
     GET_PCI_INFO,
     RET_PCI_INFO,
+    PROXY_PING,
     MAX,
 } mpqemu_cmd_t;
 
diff --git a/io/mpqemu-link.c b/io/mpqemu-link.c
index ea519a980e..91a3395566 100644
--- a/io/mpqemu-link.c
+++ b/io/mpqemu-link.c
@@ -394,6 +394,11 @@  bool mpqemu_msg_valid(MPQemuMsg *msg)
             return false;
         }
         break;
+    case PROXY_PING:
+        if (msg->size != 0) {
+            return false;
+        }
+        break;
     default:
         break;
     }