diff mbox series

[v7,15/21] multi-process: Synchronize remote memory

Message ID f9eb12b75572e91e7e0e530dbc9b8efae41f449e.1593273671.git.elena.ufimtseva@oracle.com (mailing list archive)
State New, archived
Headers show
Series Initial support for multi-process qemu | expand

Commit Message

Elena Ufimtseva June 27, 2020, 5:09 p.m. UTC
From: Jagannathan Raman <jag.raman@oracle.com>

Add memory-listener object which is used to keep the view of the RAM
in sync between QEMU and remote process.
A MemoryListener is registered for system-memory AddressSpace. The
listener sends SYNC_SYSMEM message to the remote process when memory
listener commits the changes to memory, the remote process receives
the message and processes it in the handler for SYNC_SYSMEM message.

Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
---
 MAINTAINERS                  |   2 +
 hw/i386/remote-msg.c         |   4 +
 hw/pci/Makefile.objs         |   1 +
 hw/pci/memory-sync.c         | 214 +++++++++++++++++++++++++++++++++++
 hw/pci/proxy.c               |   4 +
 include/hw/pci/memory-sync.h |  30 +++++
 include/hw/pci/proxy.h       |   3 +
 7 files changed, 258 insertions(+)
 create mode 100644 hw/pci/memory-sync.c
 create mode 100644 include/hw/pci/memory-sync.h

Comments

Stefan Hajnoczi July 1, 2020, 10:55 a.m. UTC | #1
On Sat, Jun 27, 2020 at 10:09:37AM -0700, elena.ufimtseva@oracle.com wrote:
> +static bool try_merge(RemoteMemSync *sync, MemoryRegionSection *section)
> +{
> +    uint64_t mrs_size, mrs_gpa, mrs_page;
> +    MemoryRegionSection *prev_sec;
> +    bool merged = false;
> +    uintptr_t mrs_host;
> +    RAMBlock *mrs_rb;
> +
> +    if (!sync->n_mr_sections) {
> +        return false;
> +    }
> +
> +    mrs_rb = section->mr->ram_block;
> +    mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb);
> +    mrs_size = int128_get64(section->size);
> +    mrs_gpa = section->offset_within_address_space;
> +    mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
> +               section->offset_within_region;
> +
> +    if (get_fd_from_hostaddr(mrs_host, NULL) < 0) {
> +        return true;
> +    }
> +
> +    mrs_host = mrs_host & ~(mrs_page - 1);
> +    mrs_gpa = mrs_gpa & ~(mrs_page - 1);
> +    mrs_size = ROUND_UP(mrs_size, mrs_page);
> +
> +    if (sync->n_mr_sections) {

The check is unnecessary because of the if statement above:

  if (!sync->n_mr_sections) {
     return false;
  }

> +static void proxy_ml_commit(MemoryListener *listener)
> +{
> +    RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> +    MPQemuMsg msg;
> +    MemoryRegionSection section;
> +    ram_addr_t offset;
> +    uintptr_t host_addr;
> +    int region;
> +
> +    memset(&msg, 0, sizeof(MPQemuMsg));
> +
> +    msg.cmd = SYNC_SYSMEM;
> +    msg.bytestream = 0;
> +    msg.num_fds = sync->n_mr_sections;
> +    msg.size = sizeof(msg.data1);
> +    assert(msg.num_fds <= REMOTE_MAX_FDS);
> +
> +    for (region = 0; region < sync->n_mr_sections; region++) {
> +        section = sync->mr_sections[region];

Why is section copied here? It's conventional to take a pointer to a
struct instead of copying its value.

> +        msg.data1.sync_sysmem.gpas[region] =
> +            section.offset_within_address_space;
> +        msg.data1.sync_sysmem.sizes[region] = int128_get64(section.size);
> +        host_addr = (uintptr_t)memory_region_get_ram_ptr(section.mr) +
> +                    section.offset_within_region;
> +        msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset);
> +        msg.data1.sync_sysmem.offsets[region] = offset;
> +    }
> +    mpqemu_msg_send(&msg, sync->ioc);
> +}
> +
> +void deconfigure_memory_sync(RemoteMemSync *sync)
> +{

Missing proxy_ml_begin() call to free sync->mr_sections[] and unref
sections.

> +    memory_listener_unregister(&sync->listener);
> +}

This function isn't called. It's good practice to implement the object
lifecycle from the start.
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index b48c3114c1..38d605445e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2954,6 +2954,8 @@  F: include/hw/i386/remote-memory.h
 F: hw/i386/remote-memory.c
 F: hw/pci/proxy.c
 F: include/hw/pci/proxy.h
+F: hw/pci/memory-sync.c
+F: include/hw/pci/memory-sync.h
 
 Build and test automation
 -------------------------
diff --git a/hw/i386/remote-msg.c b/hw/i386/remote-msg.c
index ffb4143736..48b153eaae 100644
--- a/hw/i386/remote-msg.c
+++ b/hw/i386/remote-msg.c
@@ -9,6 +9,7 @@ 
 #include "io/channel-util.h"
 #include "hw/pci/pci.h"
 #include "exec/memattrs.h"
+#include "hw/i386/remote-memory.h"
 
 static void process_connect_dev_msg(MPQemuMsg *msg, QIOChannel *com,
                                     Error **errp);
@@ -63,6 +64,9 @@  gboolean mpqemu_process_msg(QIOChannel *ioc, GIOCondition cond,
     case BAR_READ:
         process_bar_read(ioc, &msg, &local_err);
         break;
+    case SYNC_SYSMEM:
+        remote_sysmem_reconfig(&msg, &local_err);
+        break;
     default:
         error_setg(&local_err, "Unknown command (%d) received from proxy \
                    in remote process pid=%d", msg.cmd, getpid());
diff --git a/hw/pci/Makefile.objs b/hw/pci/Makefile.objs
index 515dda506c..c90acd5a6e 100644
--- a/hw/pci/Makefile.objs
+++ b/hw/pci/Makefile.objs
@@ -13,3 +13,4 @@  common-obj-$(CONFIG_PCI_EXPRESS) += pcie_port.o pcie_host.o
 common-obj-$(call lnot,$(CONFIG_PCI)) += pci-stub.o
 common-obj-$(CONFIG_ALL) += pci-stub.o
 obj-$(CONFIG_MPQEMU) += proxy.o
+obj-$(CONFIG_MPQEMU) += memory-sync.o
diff --git a/hw/pci/memory-sync.c b/hw/pci/memory-sync.c
new file mode 100644
index 0000000000..5f867974c4
--- /dev/null
+++ b/hw/pci/memory-sync.c
@@ -0,0 +1,214 @@ 
+/*
+ * Copyright © 2018, 2020 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "qemu/compiler.h"
+#include "qemu/int128.h"
+#include "qemu/range.h"
+#include "exec/memory.h"
+#include "exec/cpu-common.h"
+#include "cpu.h"
+#include "exec/ram_addr.h"
+#include "exec/address-spaces.h"
+#include "io/mpqemu-link.h"
+#include "hw/pci/memory-sync.h"
+
+static void proxy_ml_begin(MemoryListener *listener)
+{
+    RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
+    int mrs;
+
+    for (mrs = 0; mrs < sync->n_mr_sections; mrs++) {
+        memory_region_unref(sync->mr_sections[mrs].mr);
+    }
+
+    g_free(sync->mr_sections);
+    sync->mr_sections = NULL;
+    sync->n_mr_sections = 0;
+}
+
+static int get_fd_from_hostaddr(uint64_t host, ram_addr_t *offset)
+{
+    MemoryRegion *mr;
+    ram_addr_t off;
+
+    /**
+     * Assumes that the host address is a valid address as it's
+     * coming from the MemoryListener system. In the case host
+     * address is not valid, the following call would return
+     * the default subregion of "system_memory" region, and
+     * not NULL. So it's not possible to check for NULL here.
+     */
+    mr = memory_region_from_host((void *)(uintptr_t)host, &off);
+
+    if (offset) {
+        *offset = off;
+    }
+
+    return memory_region_get_fd(mr);
+}
+
+static bool proxy_mrs_can_merge(uint64_t host, uint64_t prev_host, size_t size)
+{
+    bool merge;
+    int fd1, fd2;
+
+    fd1 = get_fd_from_hostaddr(host, NULL);
+
+    fd2 = get_fd_from_hostaddr(prev_host, NULL);
+
+    merge = (fd1 == fd2);
+
+    merge &= ((prev_host + size) == host);
+
+    return merge;
+}
+
+static bool try_merge(RemoteMemSync *sync, MemoryRegionSection *section)
+{
+    uint64_t mrs_size, mrs_gpa, mrs_page;
+    MemoryRegionSection *prev_sec;
+    bool merged = false;
+    uintptr_t mrs_host;
+    RAMBlock *mrs_rb;
+
+    if (!sync->n_mr_sections) {
+        return false;
+    }
+
+    mrs_rb = section->mr->ram_block;
+    mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb);
+    mrs_size = int128_get64(section->size);
+    mrs_gpa = section->offset_within_address_space;
+    mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
+               section->offset_within_region;
+
+    if (get_fd_from_hostaddr(mrs_host, NULL) < 0) {
+        return true;
+    }
+
+    mrs_host = mrs_host & ~(mrs_page - 1);
+    mrs_gpa = mrs_gpa & ~(mrs_page - 1);
+    mrs_size = ROUND_UP(mrs_size, mrs_page);
+
+    if (sync->n_mr_sections) {
+        prev_sec = sync->mr_sections + (sync->n_mr_sections - 1);
+        uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
+        uint64_t prev_size = int128_get64(prev_sec->size);
+        uint64_t prev_gpa_end   = range_get_last(prev_gpa_start, prev_size);
+        uint64_t prev_host_start =
+            (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
+            prev_sec->offset_within_region;
+        uint64_t prev_host_end = range_get_last(prev_host_start, prev_size);
+
+        if (mrs_gpa <= (prev_gpa_end + 1)) {
+            g_assert(mrs_gpa > prev_gpa_start);
+
+            if ((section->mr == prev_sec->mr) &&
+                proxy_mrs_can_merge(mrs_host, prev_host_start,
+                                    (mrs_gpa - prev_gpa_start))) {
+                uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
+                merged = true;
+                prev_sec->offset_within_address_space =
+                    MIN(prev_gpa_start, mrs_gpa);
+                prev_sec->offset_within_region =
+                    MIN(prev_host_start, mrs_host) -
+                    (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr);
+                prev_sec->size = int128_make64(max_end - MIN(prev_host_start,
+                                                             mrs_host));
+            }
+        }
+    }
+
+    return merged;
+}
+
+static void proxy_ml_region_addnop(MemoryListener *listener,
+                                   MemoryRegionSection *section)
+{
+    RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
+
+    if (!(memory_region_is_ram(section->mr) &&
+          !memory_region_is_rom(section->mr))) {
+        return;
+    }
+
+    if (try_merge(sync, section)) {
+        return;
+    }
+
+    ++sync->n_mr_sections;
+    sync->mr_sections = g_renew(MemoryRegionSection, sync->mr_sections,
+                                sync->n_mr_sections);
+    sync->mr_sections[sync->n_mr_sections - 1] = *section;
+    sync->mr_sections[sync->n_mr_sections - 1].fv = NULL;
+    memory_region_ref(section->mr);
+}
+
+static void proxy_ml_commit(MemoryListener *listener)
+{
+    RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
+    MPQemuMsg msg;
+    MemoryRegionSection section;
+    ram_addr_t offset;
+    uintptr_t host_addr;
+    int region;
+
+    memset(&msg, 0, sizeof(MPQemuMsg));
+
+    msg.cmd = SYNC_SYSMEM;
+    msg.bytestream = 0;
+    msg.num_fds = sync->n_mr_sections;
+    msg.size = sizeof(msg.data1);
+    assert(msg.num_fds <= REMOTE_MAX_FDS);
+
+    for (region = 0; region < sync->n_mr_sections; region++) {
+        section = sync->mr_sections[region];
+        msg.data1.sync_sysmem.gpas[region] =
+            section.offset_within_address_space;
+        msg.data1.sync_sysmem.sizes[region] = int128_get64(section.size);
+        host_addr = (uintptr_t)memory_region_get_ram_ptr(section.mr) +
+                    section.offset_within_region;
+        msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset);
+        msg.data1.sync_sysmem.offsets[region] = offset;
+    }
+    mpqemu_msg_send(&msg, sync->ioc);
+}
+
+void deconfigure_memory_sync(RemoteMemSync *sync)
+{
+    memory_listener_unregister(&sync->listener);
+}
+
+/*
+ * TODO: Memory Sync need not be instantianted once per every proxy device.
+ *       All remote devices are going to get the exact same updates at the
+ *       same time. It therefore makes sense to have a broadcast model.
+ *
+ *       Broadcast model would involve running the MemorySync object in a
+ *       thread. MemorySync would contain a list of mpqemu-link objects
+ *       that need notification. proxy_ml_commit() could send the same
+ *       message to all the links at the same time.
+ */
+void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc)
+{
+    sync->n_mr_sections = 0;
+    sync->mr_sections = NULL;
+
+    sync->ioc = ioc;
+
+    sync->listener.begin = proxy_ml_begin;
+    sync->listener.commit = proxy_ml_commit;
+    sync->listener.region_add = proxy_ml_region_addnop;
+    sync->listener.region_nop = proxy_ml_region_addnop;
+    sync->listener.priority = 10;
+
+    memory_listener_register(&sync->listener, &address_space_memory);
+}
diff --git a/hw/pci/proxy.c b/hw/pci/proxy.c
index fff021a06a..5ecbdd2dcf 100644
--- a/hw/pci/proxy.c
+++ b/hw/pci/proxy.c
@@ -17,6 +17,8 @@ 
 #include "monitor/monitor.h"
 #include "io/mpqemu-link.h"
 #include "qemu/error-report.h"
+#include "hw/pci/memory-sync.h"
+#include "qom/object.h"
 
 static void proxy_set_socket(PCIProxyDev *pdev, int fd, Error **errp)
 {
@@ -68,6 +70,8 @@  static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
         }
         proxy_set_socket(dev, proxyfd, errp);
     }
+
+    configure_memory_sync(&dev->sync, dev->com);
 }
 
 static int config_op_send(PCIProxyDev *pdev, uint32_t addr, uint32_t *val,
diff --git a/include/hw/pci/memory-sync.h b/include/hw/pci/memory-sync.h
new file mode 100644
index 0000000000..3c9007f318
--- /dev/null
+++ b/include/hw/pci/memory-sync.h
@@ -0,0 +1,30 @@ 
+/*
+ * Copyright © 2018, 2020 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef MEMORY_SYNC_H
+#define MEMORY_SYNC_H
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "exec/memory.h"
+#include "io/channel.h"
+
+typedef struct RemoteMemSync {
+    MemoryListener listener;
+
+    int n_mr_sections;
+    MemoryRegionSection *mr_sections;
+
+    QIOChannel *ioc;
+} RemoteMemSync;
+
+void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc);
+void deconfigure_memory_sync(RemoteMemSync *sync);
+
+#endif
diff --git a/include/hw/pci/proxy.h b/include/hw/pci/proxy.h
index 4f9f9c4e15..a41a6aeaa5 100644
--- a/include/hw/pci/proxy.h
+++ b/include/hw/pci/proxy.h
@@ -14,6 +14,7 @@ 
 
 #include "hw/pci/pci.h"
 #include "io/channel.h"
+#include "hw/pci/memory-sync.h"
 
 #define TYPE_PCI_PROXY_DEV "pci-proxy-dev"
 
@@ -42,6 +43,8 @@  struct PCIProxyDev {
     QIOChannel *com;
     QIOChannel *dev;
 
+    RemoteMemSync sync;
+
     ProxyMemoryRegion region[PCI_NUM_REGIONS];
 };