Message ID | 52bdcfdf44bcc4cd8a1a707b9c22f545fe0f1491.1582576372.git.jag.raman@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Initial support for multi-process qemu | expand |
* Jagannathan Raman (jag.raman@oracle.com) wrote: > Add memory-listener object which is used to keep the view of the RAM > in sync between QEMU and remote process. > A MemoryListener is registered for system-memory AddressSpace. The > listener sends SYNC_SYSMEM message to the remote process when memory > listener commits the changes to memory, the remote process receives > the message and processes it in the handler for SYNC_SYSMEM message. > > TODO: No need to create object for remote memory listener. > > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > --- > Makefile.target | 3 + > hw/proxy/memory-sync.c | 212 +++++++++++++++++++++++++++++++++++++++++ > hw/proxy/qemu-proxy.c | 5 + > include/hw/proxy/memory-sync.h | 37 +++++++ > include/hw/proxy/qemu-proxy.h | 5 + > remote/remote-main.c | 11 +++ > 6 files changed, 273 insertions(+) > create mode 100644 hw/proxy/memory-sync.c > create mode 100644 include/hw/proxy/memory-sync.h > > diff --git a/Makefile.target b/Makefile.target > index cfd36c1..271d883 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -127,6 +127,9 @@ obj-$(CONFIG_TCG) += fpu/softfloat.o > obj-y += target/$(TARGET_BASE_ARCH)/ > obj-y += disas.o > obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o > +ifeq ($(TARGET_NAME)-$(CONFIG_MPQEMU)-$(CONFIG_USER_ONLY), x86_64-y-) > +obj-$(CONFIG_MPQEMU) += hw/proxy/memory-sync.o > +endif > LIBS := $(libs_cpu) $(LIBS) > > obj-$(CONFIG_PLUGIN) += plugins/ > diff --git a/hw/proxy/memory-sync.c b/hw/proxy/memory-sync.c > new file mode 100644 > index 0000000..3edbb19 > --- /dev/null > +++ b/hw/proxy/memory-sync.c > @@ -0,0 +1,212 @@ > +/* > + * Copyright © 2018, 2020 Oracle and/or its affiliates. > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ > + > +#include <sys/types.h> > +#include <stdio.h> > +#include <string.h> > + > +#include "qemu/osdep.h" > +#include "qemu/compiler.h" > +#include "qemu/int128.h" > +#include "qemu/range.h" > +#include "exec/memory.h" > +#include "exec/cpu-common.h" > +#include "cpu.h" > +#include "exec/ram_addr.h" > +#include "exec/address-spaces.h" > +#include "io/mpqemu-link.h" > +#include "hw/proxy/memory-sync.h" > + > +static const TypeInfo remote_mem_sync_type_info = { > + .name = TYPE_MEMORY_LISTENER, > + .parent = TYPE_OBJECT, > + .instance_size = sizeof(RemoteMemSync), > +}; > + > +static void remote_mem_sync_register_types(void) > +{ > + type_register_static(&remote_mem_sync_type_info); > +} > + > +type_init(remote_mem_sync_register_types) > + > +static void proxy_ml_begin(MemoryListener *listener) > +{ > + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); > + int mrs; > + > + for (mrs = 0; mrs < sync->n_mr_sections; mrs++) { > + memory_region_unref(sync->mr_sections[mrs].mr); > + } > + > + g_free(sync->mr_sections); > + sync->mr_sections = NULL; > + sync->n_mr_sections = 0; > +} > + > +static int get_fd_from_hostaddr(uint64_t host, ram_addr_t *offset) > +{ > + MemoryRegion *mr; > + ram_addr_t off; > + > + mr = memory_region_from_host((void *)(uintptr_t)host, &off); Do you need to just check we found an 'mr' ? > + if (offset) { > + *offset = off; > + } > + > + return memory_region_get_fd(mr); > +} > + > +static bool proxy_mrs_can_merge(uint64_t host, uint64_t prev_host, size_t size) > +{ > + bool merge; > + int fd1, fd2; > + > + fd1 = get_fd_from_hostaddr(host, NULL); > + > + fd2 = get_fd_from_hostaddr(prev_host, NULL); > + > + merge = (fd1 == fd2); > + > + merge &= ((prev_host + size) == host); It's interesting; I think the vhost code checks that the two mr's are the same where you are checking for the same underlying fd - but I think that's OK. (I wonder if we need to check offset's within the fd's match up when they're merged - since you added that offset feature in an earlier patch? That would also need checking in vhost_region_add_section) > + return merge; > +} > + > +static void proxy_ml_region_addnop(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); > + bool need_add = true; > + uint64_t mrs_size, mrs_gpa, mrs_page; > + uintptr_t mrs_host; > + RAMBlock *mrs_rb; > + MemoryRegionSection *prev_sec; > + > + if (!(memory_region_is_ram(section->mr) && > + !memory_region_is_rom(section->mr))) { > + return; > + } > + > + mrs_rb = section->mr->ram_block; > + mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb); > + mrs_size = int128_get64(section->size); > + mrs_gpa = section->offset_within_address_space; > + mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + > + section->offset_within_region; > + > + if (get_fd_from_hostaddr(mrs_host, NULL) <= 0) { > + return; > + } > + > + mrs_host = mrs_host & ~(mrs_page - 1); > + mrs_gpa = mrs_gpa & ~(mrs_page - 1); > + mrs_size = ROUND_UP(mrs_size, mrs_page); OK, just note the more complex code in vhost_region_add_section for page aligning regions that are needed for postcopy; I think that would be the same if you wanted to do postcopy with remote processes. > + if (sync->n_mr_sections) { > + prev_sec = sync->mr_sections + (sync->n_mr_sections - 1); > + uint64_t prev_gpa_start = prev_sec->offset_within_address_space; > + uint64_t prev_size = int128_get64(prev_sec->size); > + uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size); > + uint64_t prev_host_start = > + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) + > + prev_sec->offset_within_region; > + uint64_t prev_host_end = range_get_last(prev_host_start, prev_size); > + > + if (mrs_gpa <= (prev_gpa_end + 1)) { > + if (mrs_gpa < prev_gpa_start) { > + assert(0); > + } g_assert(mrs_gpa < prev_gpa_start); > + if ((section->mr == prev_sec->mr) && > + proxy_mrs_can_merge(mrs_host, prev_host_start, > + (mrs_gpa - prev_gpa_start))) { > + uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size); > + need_add = false; > + prev_sec->offset_within_address_space = > + MIN(prev_gpa_start, mrs_gpa); > + prev_sec->offset_within_region = > + MIN(prev_host_start, mrs_host) - > + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr); > + prev_sec->size = int128_make64(max_end - MIN(prev_host_start, > + mrs_host)); > + } > + } > + } > + > + if (need_add) { > + ++sync->n_mr_sections; > + sync->mr_sections = g_renew(MemoryRegionSection, sync->mr_sections, > + sync->n_mr_sections); > + sync->mr_sections[sync->n_mr_sections - 1] = *section; > + sync->mr_sections[sync->n_mr_sections - 1].fv = NULL; > + memory_region_ref(section->mr); > + } I'd add some tracing in this function; it's a nightmare to debug when it does something unexpected. > +} > + > +static void proxy_ml_commit(MemoryListener *listener) > +{ > + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); > + MPQemuMsg msg; > + MemoryRegionSection section; > + ram_addr_t offset; > + uintptr_t host_addr; > + int region; > + > + memset(&msg, 0, sizeof(MPQemuMsg)); > + > + msg.cmd = SYNC_SYSMEM; > + msg.bytestream = 0; > + msg.num_fds = sync->n_mr_sections; > + msg.size = sizeof(msg.data1); > + assert(msg.num_fds <= REMOTE_MAX_FDS); > + > + for (region = 0; region < sync->n_mr_sections; region++) { > + section = sync->mr_sections[region]; > + msg.data1.sync_sysmem.gpas[region] = > + section.offset_within_address_space; > + msg.data1.sync_sysmem.sizes[region] = int128_get64(section.size); > + host_addr = (uintptr_t)memory_region_get_ram_ptr(section.mr) + > + section.offset_within_region; > + msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset); Since you already have section.mr you cna use memory_region_get_fd. > + msg.data1.sync_sysmem.offsets[region] = offset; > + } > + mpqemu_msg_send(&msg, sync->mpqemu_link->com); > +} > + > +void deconfigure_memory_sync(RemoteMemSync *sync) > +{ > + memory_listener_unregister(&sync->listener); > +} > + > +/* > + * TODO: Memory Sync need not be instantianted once per every proxy device. > + * All remote devices are going to get the exact same updates at the > + * same time. It therefore makes sense to have a broadcast model. > + * > + * Broadcast model would involve running the MemorySync object in a > + * thread. MemorySync would contain a list of mpqemu-link objects > + * that need notification. proxy_ml_commit() could send the same > + * message to all the links at the same time. > + */ > +void configure_memory_sync(RemoteMemSync *sync, MPQemuLinkState *mpqemu_link) > +{ > + sync->n_mr_sections = 0; > + sync->mr_sections = NULL; > + > + sync->mpqemu_link = mpqemu_link; > + > + sync->listener.begin = proxy_ml_begin; > + sync->listener.commit = proxy_ml_commit; > + sync->listener.region_add = proxy_ml_region_addnop; > + sync->listener.region_nop = proxy_ml_region_addnop; > + sync->listener.priority = 10; > + > + memory_listener_register(&sync->listener, &address_space_memory); > +} > diff --git a/hw/proxy/qemu-proxy.c b/hw/proxy/qemu-proxy.c > index b17d9bb..d3a9d38 100644 > --- a/hw/proxy/qemu-proxy.c > +++ b/hw/proxy/qemu-proxy.c > @@ -16,6 +16,8 @@ > #include "qapi/qmp/qjson.h" > #include "qapi/qmp/qstring.h" > #include "hw/proxy/qemu-proxy.h" > +#include "hw/proxy/memory-sync.h" > +#include "qom/object.h" > > static void pci_proxy_dev_realize(PCIDevice *dev, Error **errp); > > @@ -243,6 +245,8 @@ static void init_proxy(PCIDevice *dev, char *command, char *exec_name, > > mpqemu_init_channel(pdev->mpqemu_link, &pdev->mpqemu_link->com, > pdev->socket); > + > + configure_memory_sync(pdev->sync, pdev->mpqemu_link); > } > > static void pci_proxy_dev_realize(PCIDevice *device, Error **errp) > @@ -261,6 +265,7 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp) > dev->set_proxy_sock = set_proxy_sock; > dev->get_proxy_sock = get_proxy_sock; > dev->init_proxy = init_proxy; > + dev->sync = REMOTE_MEM_SYNC(object_new(TYPE_MEMORY_LISTENER)); > } > > static void send_bar_access_msg(PCIProxyDev *dev, MemoryRegion *mr, > diff --git a/include/hw/proxy/memory-sync.h b/include/hw/proxy/memory-sync.h > new file mode 100644 > index 0000000..d8329c9 > --- /dev/null > +++ b/include/hw/proxy/memory-sync.h > @@ -0,0 +1,37 @@ > +/* > + * Copyright © 2018, 2020 Oracle and/or its affiliates. > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ > + > +#ifndef MEMORY_SYNC_H > +#define MEMORY_SYNC_H > + > +#include <sys/types.h> > + > +#include "qemu/osdep.h" > +#include "qom/object.h" > +#include "exec/memory.h" > +#include "io/mpqemu-link.h" > + > +#define TYPE_MEMORY_LISTENER "memory-listener" > +#define REMOTE_MEM_SYNC(obj) \ > + OBJECT_CHECK(RemoteMemSync, (obj), TYPE_MEMORY_LISTENER) > + > +typedef struct RemoteMemSync { > + Object obj; > + > + MemoryListener listener; > + > + int n_mr_sections; > + MemoryRegionSection *mr_sections; > + > + MPQemuLinkState *mpqemu_link; > +} RemoteMemSync; > + > +void configure_memory_sync(RemoteMemSync *sync, MPQemuLinkState *mpqemu_link); > +void deconfigure_memory_sync(RemoteMemSync *sync); > + > +#endif > diff --git a/include/hw/proxy/qemu-proxy.h b/include/hw/proxy/qemu-proxy.h > index 44e370e..c93ffe3 100644 > --- a/include/hw/proxy/qemu-proxy.h > +++ b/include/hw/proxy/qemu-proxy.h > @@ -10,6 +10,7 @@ > #define QEMU_PROXY_H > > #include "io/mpqemu-link.h" > +#include "hw/proxy/memory-sync.h" > > #define TYPE_PCI_PROXY_DEV "pci-proxy-dev" > > @@ -37,8 +38,12 @@ extern const MemoryRegionOps proxy_default_ops; > struct PCIProxyDev { > PCIDevice parent_dev; > > + int n_mr_sections; > + MemoryRegionSection *mr_sections; > + > MPQemuLinkState *mpqemu_link; > > + RemoteMemSync *sync; > pid_t remote_pid; > int socket; > > diff --git a/remote/remote-main.c b/remote/remote-main.c > index acd8daf..9512a3b 100644 > --- a/remote/remote-main.c > +++ b/remote/remote-main.c > @@ -34,6 +34,7 @@ > #include "block/block.h" > #include "exec/ramlist.h" > #include "exec/memattrs.h" > +#include "exec/address-spaces.h" > > static MPQemuLinkState *mpqemu_link; > PCIDevice *remote_pci_dev; > @@ -161,6 +162,16 @@ static void process_msg(GIOCondition cond, MPQemuChannel *chan) > goto finalize_loop; > } > break; > + case SYNC_SYSMEM: > + /* > + * TODO: ensure no active DMA is happening when > + * sysmem is being updated In practice this turns out to be hard! Dave > + */ > + remote_sysmem_reconfig(msg, &err); > + if (err) { > + goto finalize_loop; > + } > + break; > default: > error_setg(&err, "Unknown command"); > goto finalize_loop; > -- > 1.8.3.1 > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On 3/4/2020 6:53 AM, Dr. David Alan Gilbert wrote: > * Jagannathan Raman (jag.raman@oracle.com) wrote: >> Add memory-listener object which is used to keep the view of the RAM >> in sync between QEMU and remote process. >> A MemoryListener is registered for system-memory AddressSpace. The >> listener sends SYNC_SYSMEM message to the remote process when memory >> listener commits the changes to memory, the remote process receives >> the message and processes it in the handler for SYNC_SYSMEM message. >> >> TODO: No need to create object for remote memory listener. >> >> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> >> Signed-off-by: John G Johnson <john.g.johnson@oracle.com> >> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> >> --- >> Makefile.target | 3 + >> hw/proxy/memory-sync.c | 212 +++++++++++++++++++++++++++++++++++++++++ >> hw/proxy/qemu-proxy.c | 5 + >> include/hw/proxy/memory-sync.h | 37 +++++++ >> include/hw/proxy/qemu-proxy.h | 5 + >> remote/remote-main.c | 11 +++ >> 6 files changed, 273 insertions(+) >> create mode 100644 hw/proxy/memory-sync.c >> create mode 100644 include/hw/proxy/memory-sync.h >> >> diff --git a/Makefile.target b/Makefile.target >> index cfd36c1..271d883 100644 >> --- a/Makefile.target >> +++ b/Makefile.target >> @@ -127,6 +127,9 @@ obj-$(CONFIG_TCG) += fpu/softfloat.o >> obj-y += target/$(TARGET_BASE_ARCH)/ >> obj-y += disas.o >> obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o >> +ifeq ($(TARGET_NAME)-$(CONFIG_MPQEMU)-$(CONFIG_USER_ONLY), x86_64-y-) >> +obj-$(CONFIG_MPQEMU) += hw/proxy/memory-sync.o >> +endif >> LIBS := $(libs_cpu) $(LIBS) >> >> obj-$(CONFIG_PLUGIN) += plugins/ >> diff --git a/hw/proxy/memory-sync.c b/hw/proxy/memory-sync.c >> new file mode 100644 >> index 0000000..3edbb19 >> --- /dev/null >> +++ b/hw/proxy/memory-sync.c >> @@ -0,0 +1,212 @@ >> +/* >> + * Copyright © 2018, 2020 Oracle and/or its affiliates. >> + * >> + * This work is licensed under the terms of the GNU GPL, version 2 or later. >> + * See the COPYING file in the top-level directory. >> + * >> + */ >> + >> +#include <sys/types.h> >> +#include <stdio.h> >> +#include <string.h> >> + >> +#include "qemu/osdep.h" >> +#include "qemu/compiler.h" >> +#include "qemu/int128.h" >> +#include "qemu/range.h" >> +#include "exec/memory.h" >> +#include "exec/cpu-common.h" >> +#include "cpu.h" >> +#include "exec/ram_addr.h" >> +#include "exec/address-spaces.h" >> +#include "io/mpqemu-link.h" >> +#include "hw/proxy/memory-sync.h" >> + >> +static const TypeInfo remote_mem_sync_type_info = { >> + .name = TYPE_MEMORY_LISTENER, >> + .parent = TYPE_OBJECT, >> + .instance_size = sizeof(RemoteMemSync), >> +}; >> + >> +static void remote_mem_sync_register_types(void) >> +{ >> + type_register_static(&remote_mem_sync_type_info); >> +} >> + >> +type_init(remote_mem_sync_register_types) >> + >> +static void proxy_ml_begin(MemoryListener *listener) >> +{ >> + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); >> + int mrs; >> + >> + for (mrs = 0; mrs < sync->n_mr_sections; mrs++) { >> + memory_region_unref(sync->mr_sections[mrs].mr); >> + } >> + >> + g_free(sync->mr_sections); >> + sync->mr_sections = NULL; >> + sync->n_mr_sections = 0; >> +} >> + >> +static int get_fd_from_hostaddr(uint64_t host, ram_addr_t *offset) >> +{ >> + MemoryRegion *mr; >> + ram_addr_t off; >> + >> + mr = memory_region_from_host((void *)(uintptr_t)host, &off); > > Do you need to just check we found an 'mr' ? OK, we'll add this check. > >> + if (offset) { >> + *offset = off; >> + } >> + >> + return memory_region_get_fd(mr); >> +} >> + >> +static bool proxy_mrs_can_merge(uint64_t host, uint64_t prev_host, size_t size) >> +{ >> + bool merge; >> + int fd1, fd2; >> + >> + fd1 = get_fd_from_hostaddr(host, NULL); >> + >> + fd2 = get_fd_from_hostaddr(prev_host, NULL); >> + >> + merge = (fd1 == fd2); >> + >> + merge &= ((prev_host + size) == host); > > It's interesting; I think the vhost code checks that the two mr's are > the same where you are checking for the same underlying fd - but I think > that's OK. > (I wonder if we need to check offset's within the fd's match up when > they're merged - since you added that offset feature in an earlier > patch? > That would also need checking in vhost_region_add_section) If the fds are the same, and the subsequent check ((prev_host + size) == host) passes, then I believe the offsets would match as well. > >> + return merge; >> +} >> + >> +static void proxy_ml_region_addnop(MemoryListener *listener, >> + MemoryRegionSection *section) >> +{ >> + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); >> + bool need_add = true; >> + uint64_t mrs_size, mrs_gpa, mrs_page; >> + uintptr_t mrs_host; >> + RAMBlock *mrs_rb; >> + MemoryRegionSection *prev_sec; >> + >> + if (!(memory_region_is_ram(section->mr) && >> + !memory_region_is_rom(section->mr))) { >> + return; >> + } >> + >> + mrs_rb = section->mr->ram_block; >> + mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb); >> + mrs_size = int128_get64(section->size); >> + mrs_gpa = section->offset_within_address_space; >> + mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + >> + section->offset_within_region; >> + >> + if (get_fd_from_hostaddr(mrs_host, NULL) <= 0) { >> + return; >> + } >> + >> + mrs_host = mrs_host & ~(mrs_page - 1); >> + mrs_gpa = mrs_gpa & ~(mrs_page - 1); >> + mrs_size = ROUND_UP(mrs_size, mrs_page); > > OK, just note the more complex code in vhost_region_add_section for page > aligning regions that are needed for postcopy; I think that would be the > same if you wanted to do postcopy with remote processes. Since mmap requires the addresses to be aligned with a page boundry, we added these checks. We are essentially doing the same with alignage as compared with vhost user. So we should be compliant with postcopy I believe. > >> + if (sync->n_mr_sections) { >> + prev_sec = sync->mr_sections + (sync->n_mr_sections - 1); >> + uint64_t prev_gpa_start = prev_sec->offset_within_address_space; >> + uint64_t prev_size = int128_get64(prev_sec->size); >> + uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size); >> + uint64_t prev_host_start = >> + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) + >> + prev_sec->offset_within_region; >> + uint64_t prev_host_end = range_get_last(prev_host_start, prev_size); >> + >> + if (mrs_gpa <= (prev_gpa_end + 1)) { >> + if (mrs_gpa < prev_gpa_start) { >> + assert(0); >> + } > > g_assert(mrs_gpa < prev_gpa_start); Thank you, we'll update the above check with the version you proposed. > > >> + if ((section->mr == prev_sec->mr) && >> + proxy_mrs_can_merge(mrs_host, prev_host_start, >> + (mrs_gpa - prev_gpa_start))) { >> + uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size); >> + need_add = false; >> + prev_sec->offset_within_address_space = >> + MIN(prev_gpa_start, mrs_gpa); >> + prev_sec->offset_within_region = >> + MIN(prev_host_start, mrs_host) - >> + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr); >> + prev_sec->size = int128_make64(max_end - MIN(prev_host_start, >> + mrs_host)); >> + } >> + } >> + } >> + >> + if (need_add) { >> + ++sync->n_mr_sections; >> + sync->mr_sections = g_renew(MemoryRegionSection, sync->mr_sections, >> + sync->n_mr_sections); >> + sync->mr_sections[sync->n_mr_sections - 1] = *section; >> + sync->mr_sections[sync->n_mr_sections - 1].fv = NULL; >> + memory_region_ref(section->mr); >> + } > > I'd add some tracing in this function; it's a nightmare to debug when it > does something unexpected. Thank you, we'll add the tracing. > >> +} >> + >> +static void proxy_ml_commit(MemoryListener *listener) >> +{ >> + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); >> + MPQemuMsg msg; >> + MemoryRegionSection section; >> + ram_addr_t offset; >> + uintptr_t host_addr; >> + int region; >> + >> + memset(&msg, 0, sizeof(MPQemuMsg)); >> + >> + msg.cmd = SYNC_SYSMEM; >> + msg.bytestream = 0; >> + msg.num_fds = sync->n_mr_sections; >> + msg.size = sizeof(msg.data1); >> + assert(msg.num_fds <= REMOTE_MAX_FDS); >> + >> + for (region = 0; region < sync->n_mr_sections; region++) { >> + section = sync->mr_sections[region]; >> + msg.data1.sync_sysmem.gpas[region] = >> + section.offset_within_address_space; >> + msg.data1.sync_sysmem.sizes[region] = int128_get64(section.size); >> + host_addr = (uintptr_t)memory_region_get_ram_ptr(section.mr) + >> + section.offset_within_region; >> + msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset); > > Since you already have section.mr you cna use memory_region_get_fd. OK. Thank you! -- Jag > >> + msg.data1.sync_sysmem.offsets[region] = offset; >> + } >> + mpqemu_msg_send(&msg, sync->mpqemu_link->com); >> +} >> + >> +void deconfigure_memory_sync(RemoteMemSync *sync) >> +{ >> + memory_listener_unregister(&sync->listener); >> +} >> + >> +/* >> + * TODO: Memory Sync need not be instantianted once per every proxy device. >> + * All remote devices are going to get the exact same updates at the >> + * same time. It therefore makes sense to have a broadcast model. >> + * >> + * Broadcast model would involve running the MemorySync object in a >> + * thread. MemorySync would contain a list of mpqemu-link objects >> + * that need notification. proxy_ml_commit() could send the same >> + * message to all the links at the same time. >> + */ >> +void configure_memory_sync(RemoteMemSync *sync, MPQemuLinkState *mpqemu_link) >> +{ >> + sync->n_mr_sections = 0; >> + sync->mr_sections = NULL; >> + >> + sync->mpqemu_link = mpqemu_link; >> + >> + sync->listener.begin = proxy_ml_begin; >> + sync->listener.commit = proxy_ml_commit; >> + sync->listener.region_add = proxy_ml_region_addnop; >> + sync->listener.region_nop = proxy_ml_region_addnop; >> + sync->listener.priority = 10; >> + >> + memory_listener_register(&sync->listener, &address_space_memory); >> +} >> diff --git a/hw/proxy/qemu-proxy.c b/hw/proxy/qemu-proxy.c >> index b17d9bb..d3a9d38 100644 >> --- a/hw/proxy/qemu-proxy.c >> +++ b/hw/proxy/qemu-proxy.c >> @@ -16,6 +16,8 @@ >> #include "qapi/qmp/qjson.h" >> #include "qapi/qmp/qstring.h" >> #include "hw/proxy/qemu-proxy.h" >> +#include "hw/proxy/memory-sync.h" >> +#include "qom/object.h" >> >> static void pci_proxy_dev_realize(PCIDevice *dev, Error **errp); >> >> @@ -243,6 +245,8 @@ static void init_proxy(PCIDevice *dev, char *command, char *exec_name, >> >> mpqemu_init_channel(pdev->mpqemu_link, &pdev->mpqemu_link->com, >> pdev->socket); >> + >> + configure_memory_sync(pdev->sync, pdev->mpqemu_link); >> } >> >> static void pci_proxy_dev_realize(PCIDevice *device, Error **errp) >> @@ -261,6 +265,7 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp) >> dev->set_proxy_sock = set_proxy_sock; >> dev->get_proxy_sock = get_proxy_sock; >> dev->init_proxy = init_proxy; >> + dev->sync = REMOTE_MEM_SYNC(object_new(TYPE_MEMORY_LISTENER)); >> } >> >> static void send_bar_access_msg(PCIProxyDev *dev, MemoryRegion *mr, >> diff --git a/include/hw/proxy/memory-sync.h b/include/hw/proxy/memory-sync.h >> new file mode 100644 >> index 0000000..d8329c9 >> --- /dev/null >> +++ b/include/hw/proxy/memory-sync.h >> @@ -0,0 +1,37 @@ >> +/* >> + * Copyright © 2018, 2020 Oracle and/or its affiliates. >> + * >> + * This work is licensed under the terms of the GNU GPL, version 2 or later. >> + * See the COPYING file in the top-level directory. >> + * >> + */ >> + >> +#ifndef MEMORY_SYNC_H >> +#define MEMORY_SYNC_H >> + >> +#include <sys/types.h> >> + >> +#include "qemu/osdep.h" >> +#include "qom/object.h" >> +#include "exec/memory.h" >> +#include "io/mpqemu-link.h" >> + >> +#define TYPE_MEMORY_LISTENER "memory-listener" >> +#define REMOTE_MEM_SYNC(obj) \ >> + OBJECT_CHECK(RemoteMemSync, (obj), TYPE_MEMORY_LISTENER) >> + >> +typedef struct RemoteMemSync { >> + Object obj; >> + >> + MemoryListener listener; >> + >> + int n_mr_sections; >> + MemoryRegionSection *mr_sections; >> + >> + MPQemuLinkState *mpqemu_link; >> +} RemoteMemSync; >> + >> +void configure_memory_sync(RemoteMemSync *sync, MPQemuLinkState *mpqemu_link); >> +void deconfigure_memory_sync(RemoteMemSync *sync); >> + >> +#endif >> diff --git a/include/hw/proxy/qemu-proxy.h b/include/hw/proxy/qemu-proxy.h >> index 44e370e..c93ffe3 100644 >> --- a/include/hw/proxy/qemu-proxy.h >> +++ b/include/hw/proxy/qemu-proxy.h >> @@ -10,6 +10,7 @@ >> #define QEMU_PROXY_H >> >> #include "io/mpqemu-link.h" >> +#include "hw/proxy/memory-sync.h" >> >> #define TYPE_PCI_PROXY_DEV "pci-proxy-dev" >> >> @@ -37,8 +38,12 @@ extern const MemoryRegionOps proxy_default_ops; >> struct PCIProxyDev { >> PCIDevice parent_dev; >> >> + int n_mr_sections; >> + MemoryRegionSection *mr_sections; >> + >> MPQemuLinkState *mpqemu_link; >> >> + RemoteMemSync *sync; >> pid_t remote_pid; >> int socket; >> >> diff --git a/remote/remote-main.c b/remote/remote-main.c >> index acd8daf..9512a3b 100644 >> --- a/remote/remote-main.c >> +++ b/remote/remote-main.c >> @@ -34,6 +34,7 @@ >> #include "block/block.h" >> #include "exec/ramlist.h" >> #include "exec/memattrs.h" >> +#include "exec/address-spaces.h" >> >> static MPQemuLinkState *mpqemu_link; >> PCIDevice *remote_pci_dev; >> @@ -161,6 +162,16 @@ static void process_msg(GIOCondition cond, MPQemuChannel *chan) >> goto finalize_loop; >> } >> break; >> + case SYNC_SYSMEM: >> + /* >> + * TODO: ensure no active DMA is happening when >> + * sysmem is being updated > > In practice this turns out to be hard! > > Dave > >> + */ >> + remote_sysmem_reconfig(msg, &err); >> + if (err) { >> + goto finalize_loop; >> + } >> + break; >> default: >> error_setg(&err, "Unknown command"); >> goto finalize_loop; >> -- >> 1.8.3.1 >> > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK >
diff --git a/Makefile.target b/Makefile.target index cfd36c1..271d883 100644 --- a/Makefile.target +++ b/Makefile.target @@ -127,6 +127,9 @@ obj-$(CONFIG_TCG) += fpu/softfloat.o obj-y += target/$(TARGET_BASE_ARCH)/ obj-y += disas.o obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o +ifeq ($(TARGET_NAME)-$(CONFIG_MPQEMU)-$(CONFIG_USER_ONLY), x86_64-y-) +obj-$(CONFIG_MPQEMU) += hw/proxy/memory-sync.o +endif LIBS := $(libs_cpu) $(LIBS) obj-$(CONFIG_PLUGIN) += plugins/ diff --git a/hw/proxy/memory-sync.c b/hw/proxy/memory-sync.c new file mode 100644 index 0000000..3edbb19 --- /dev/null +++ b/hw/proxy/memory-sync.c @@ -0,0 +1,212 @@ +/* + * Copyright © 2018, 2020 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include <sys/types.h> +#include <stdio.h> +#include <string.h> + +#include "qemu/osdep.h" +#include "qemu/compiler.h" +#include "qemu/int128.h" +#include "qemu/range.h" +#include "exec/memory.h" +#include "exec/cpu-common.h" +#include "cpu.h" +#include "exec/ram_addr.h" +#include "exec/address-spaces.h" +#include "io/mpqemu-link.h" +#include "hw/proxy/memory-sync.h" + +static const TypeInfo remote_mem_sync_type_info = { + .name = TYPE_MEMORY_LISTENER, + .parent = TYPE_OBJECT, + .instance_size = sizeof(RemoteMemSync), +}; + +static void remote_mem_sync_register_types(void) +{ + type_register_static(&remote_mem_sync_type_info); +} + +type_init(remote_mem_sync_register_types) + +static void proxy_ml_begin(MemoryListener *listener) +{ + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); + int mrs; + + for (mrs = 0; mrs < sync->n_mr_sections; mrs++) { + memory_region_unref(sync->mr_sections[mrs].mr); + } + + g_free(sync->mr_sections); + sync->mr_sections = NULL; + sync->n_mr_sections = 0; +} + +static int get_fd_from_hostaddr(uint64_t host, ram_addr_t *offset) +{ + MemoryRegion *mr; + ram_addr_t off; + + mr = memory_region_from_host((void *)(uintptr_t)host, &off); + + if (offset) { + *offset = off; + } + + return memory_region_get_fd(mr); +} + +static bool proxy_mrs_can_merge(uint64_t host, uint64_t prev_host, size_t size) +{ + bool merge; + int fd1, fd2; + + fd1 = get_fd_from_hostaddr(host, NULL); + + fd2 = get_fd_from_hostaddr(prev_host, NULL); + + merge = (fd1 == fd2); + + merge &= ((prev_host + size) == host); + + return merge; +} + +static void proxy_ml_region_addnop(MemoryListener *listener, + MemoryRegionSection *section) +{ + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); + bool need_add = true; + uint64_t mrs_size, mrs_gpa, mrs_page; + uintptr_t mrs_host; + RAMBlock *mrs_rb; + MemoryRegionSection *prev_sec; + + if (!(memory_region_is_ram(section->mr) && + !memory_region_is_rom(section->mr))) { + return; + } + + mrs_rb = section->mr->ram_block; + mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb); + mrs_size = int128_get64(section->size); + mrs_gpa = section->offset_within_address_space; + mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + + section->offset_within_region; + + if (get_fd_from_hostaddr(mrs_host, NULL) <= 0) { + return; + } + + mrs_host = mrs_host & ~(mrs_page - 1); + mrs_gpa = mrs_gpa & ~(mrs_page - 1); + mrs_size = ROUND_UP(mrs_size, mrs_page); + + if (sync->n_mr_sections) { + prev_sec = sync->mr_sections + (sync->n_mr_sections - 1); + uint64_t prev_gpa_start = prev_sec->offset_within_address_space; + uint64_t prev_size = int128_get64(prev_sec->size); + uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size); + uint64_t prev_host_start = + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) + + prev_sec->offset_within_region; + uint64_t prev_host_end = range_get_last(prev_host_start, prev_size); + + if (mrs_gpa <= (prev_gpa_end + 1)) { + if (mrs_gpa < prev_gpa_start) { + assert(0); + } + + if ((section->mr == prev_sec->mr) && + proxy_mrs_can_merge(mrs_host, prev_host_start, + (mrs_gpa - prev_gpa_start))) { + uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size); + need_add = false; + prev_sec->offset_within_address_space = + MIN(prev_gpa_start, mrs_gpa); + prev_sec->offset_within_region = + MIN(prev_host_start, mrs_host) - + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr); + prev_sec->size = int128_make64(max_end - MIN(prev_host_start, + mrs_host)); + } + } + } + + if (need_add) { + ++sync->n_mr_sections; + sync->mr_sections = g_renew(MemoryRegionSection, sync->mr_sections, + sync->n_mr_sections); + sync->mr_sections[sync->n_mr_sections - 1] = *section; + sync->mr_sections[sync->n_mr_sections - 1].fv = NULL; + memory_region_ref(section->mr); + } +} + +static void proxy_ml_commit(MemoryListener *listener) +{ + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); + MPQemuMsg msg; + MemoryRegionSection section; + ram_addr_t offset; + uintptr_t host_addr; + int region; + + memset(&msg, 0, sizeof(MPQemuMsg)); + + msg.cmd = SYNC_SYSMEM; + msg.bytestream = 0; + msg.num_fds = sync->n_mr_sections; + msg.size = sizeof(msg.data1); + assert(msg.num_fds <= REMOTE_MAX_FDS); + + for (region = 0; region < sync->n_mr_sections; region++) { + section = sync->mr_sections[region]; + msg.data1.sync_sysmem.gpas[region] = + section.offset_within_address_space; + msg.data1.sync_sysmem.sizes[region] = int128_get64(section.size); + host_addr = (uintptr_t)memory_region_get_ram_ptr(section.mr) + + section.offset_within_region; + msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset); + msg.data1.sync_sysmem.offsets[region] = offset; + } + mpqemu_msg_send(&msg, sync->mpqemu_link->com); +} + +void deconfigure_memory_sync(RemoteMemSync *sync) +{ + memory_listener_unregister(&sync->listener); +} + +/* + * TODO: Memory Sync need not be instantianted once per every proxy device. + * All remote devices are going to get the exact same updates at the + * same time. It therefore makes sense to have a broadcast model. + * + * Broadcast model would involve running the MemorySync object in a + * thread. MemorySync would contain a list of mpqemu-link objects + * that need notification. proxy_ml_commit() could send the same + * message to all the links at the same time. + */ +void configure_memory_sync(RemoteMemSync *sync, MPQemuLinkState *mpqemu_link) +{ + sync->n_mr_sections = 0; + sync->mr_sections = NULL; + + sync->mpqemu_link = mpqemu_link; + + sync->listener.begin = proxy_ml_begin; + sync->listener.commit = proxy_ml_commit; + sync->listener.region_add = proxy_ml_region_addnop; + sync->listener.region_nop = proxy_ml_region_addnop; + sync->listener.priority = 10; + + memory_listener_register(&sync->listener, &address_space_memory); +} diff --git a/hw/proxy/qemu-proxy.c b/hw/proxy/qemu-proxy.c index b17d9bb..d3a9d38 100644 --- a/hw/proxy/qemu-proxy.c +++ b/hw/proxy/qemu-proxy.c @@ -16,6 +16,8 @@ #include "qapi/qmp/qjson.h" #include "qapi/qmp/qstring.h" #include "hw/proxy/qemu-proxy.h" +#include "hw/proxy/memory-sync.h" +#include "qom/object.h" static void pci_proxy_dev_realize(PCIDevice *dev, Error **errp); @@ -243,6 +245,8 @@ static void init_proxy(PCIDevice *dev, char *command, char *exec_name, mpqemu_init_channel(pdev->mpqemu_link, &pdev->mpqemu_link->com, pdev->socket); + + configure_memory_sync(pdev->sync, pdev->mpqemu_link); } static void pci_proxy_dev_realize(PCIDevice *device, Error **errp) @@ -261,6 +265,7 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp) dev->set_proxy_sock = set_proxy_sock; dev->get_proxy_sock = get_proxy_sock; dev->init_proxy = init_proxy; + dev->sync = REMOTE_MEM_SYNC(object_new(TYPE_MEMORY_LISTENER)); } static void send_bar_access_msg(PCIProxyDev *dev, MemoryRegion *mr, diff --git a/include/hw/proxy/memory-sync.h b/include/hw/proxy/memory-sync.h new file mode 100644 index 0000000..d8329c9 --- /dev/null +++ b/include/hw/proxy/memory-sync.h @@ -0,0 +1,37 @@ +/* + * Copyright © 2018, 2020 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef MEMORY_SYNC_H +#define MEMORY_SYNC_H + +#include <sys/types.h> + +#include "qemu/osdep.h" +#include "qom/object.h" +#include "exec/memory.h" +#include "io/mpqemu-link.h" + +#define TYPE_MEMORY_LISTENER "memory-listener" +#define REMOTE_MEM_SYNC(obj) \ + OBJECT_CHECK(RemoteMemSync, (obj), TYPE_MEMORY_LISTENER) + +typedef struct RemoteMemSync { + Object obj; + + MemoryListener listener; + + int n_mr_sections; + MemoryRegionSection *mr_sections; + + MPQemuLinkState *mpqemu_link; +} RemoteMemSync; + +void configure_memory_sync(RemoteMemSync *sync, MPQemuLinkState *mpqemu_link); +void deconfigure_memory_sync(RemoteMemSync *sync); + +#endif diff --git a/include/hw/proxy/qemu-proxy.h b/include/hw/proxy/qemu-proxy.h index 44e370e..c93ffe3 100644 --- a/include/hw/proxy/qemu-proxy.h +++ b/include/hw/proxy/qemu-proxy.h @@ -10,6 +10,7 @@ #define QEMU_PROXY_H #include "io/mpqemu-link.h" +#include "hw/proxy/memory-sync.h" #define TYPE_PCI_PROXY_DEV "pci-proxy-dev" @@ -37,8 +38,12 @@ extern const MemoryRegionOps proxy_default_ops; struct PCIProxyDev { PCIDevice parent_dev; + int n_mr_sections; + MemoryRegionSection *mr_sections; + MPQemuLinkState *mpqemu_link; + RemoteMemSync *sync; pid_t remote_pid; int socket; diff --git a/remote/remote-main.c b/remote/remote-main.c index acd8daf..9512a3b 100644 --- a/remote/remote-main.c +++ b/remote/remote-main.c @@ -34,6 +34,7 @@ #include "block/block.h" #include "exec/ramlist.h" #include "exec/memattrs.h" +#include "exec/address-spaces.h" static MPQemuLinkState *mpqemu_link; PCIDevice *remote_pci_dev; @@ -161,6 +162,16 @@ static void process_msg(GIOCondition cond, MPQemuChannel *chan) goto finalize_loop; } break; + case SYNC_SYSMEM: + /* + * TODO: ensure no active DMA is happening when + * sysmem is being updated + */ + remote_sysmem_reconfig(msg, &err); + if (err) { + goto finalize_loop; + } + break; default: error_setg(&err, "Unknown command"); goto finalize_loop;