Message ID | 63a7f84be8c1c86d1bdea5f538239d0d9c3cdb06.1587614626.git.elena.ufimtseva@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [RESEND,v6,01/36] memory: alloc RAM from file at offset | expand |
On Wed, Apr 22, 2020 at 09:13:57PM -0700, elena.ufimtseva@oracle.com wrote: > diff --git a/hw/proxy/memory-sync.c b/hw/proxy/memory-sync.c > new file mode 100644 > index 0000000000..b3f57747f3 > --- /dev/null > +++ b/hw/proxy/memory-sync.c > @@ -0,0 +1,217 @@ > +/* > + * Copyright © 2018, 2020 Oracle and/or its affiliates. > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ > + > +#include <sys/types.h> > +#include <stdio.h> > +#include <string.h> These headers should already be included by "qemu/osdep.h". > +static void proxy_ml_region_addnop(MemoryListener *listener, > + MemoryRegionSection *section) > +{ > + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); > + bool need_add = true; > + uint64_t mrs_size, mrs_gpa, mrs_page; > + uintptr_t mrs_host; > + RAMBlock *mrs_rb; > + MemoryRegionSection *prev_sec; > + > + if (!(memory_region_is_ram(section->mr) && > + !memory_region_is_rom(section->mr))) { > + return; > + } > + > + mrs_rb = section->mr->ram_block; > + mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb); > + mrs_size = int128_get64(section->size); > + mrs_gpa = section->offset_within_address_space; > + mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + > + section->offset_within_region; These variables are only used in the if (sync->n_mr_sections) case. This function could be split into a something like this: static void proxy_ml_region_addnop(MemoryListener *listener, MemoryRegionSection *section) RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); if (!(memory_region_is_ram(section->mr) && !memory_region_is_rom(section->mr))) { return; } if (try_merge(sync, section)) { return; } ...add new section... } And the try_merge() helper function has the rest of the code: /* Returns true if the section was merged */ static bool try_merge(RemoteMemSync *sync, MemoryRegionSection *section) { if (sync->n_mr_sections == 0) { return false; } ...most of the code... } > + > + if (get_fd_from_hostaddr(mrs_host, NULL) <= 0) { 0 is a valid fd number, the comparison should probably be < 0? > + return; > + } > + > + mrs_host = mrs_host & ~(mrs_page - 1); > + mrs_gpa = mrs_gpa & ~(mrs_page - 1); > + mrs_size = ROUND_UP(mrs_size, mrs_page); Why is it necessary to align to the RAM block's page size? Can mrs_host and mrs_size be misaligned to the RAM block's page size? Why round the *guest* physical address down using the *host* page size? > + > + if (sync->n_mr_sections) { > + prev_sec = sync->mr_sections + (sync->n_mr_sections - 1); > + uint64_t prev_gpa_start = prev_sec->offset_within_address_space; > + uint64_t prev_size = int128_get64(prev_sec->size); > + uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size); > + uint64_t prev_host_start = > + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) + > + prev_sec->offset_within_region; > + uint64_t prev_host_end = range_get_last(prev_host_start, prev_size); Is it okay not to do the page alignment stuff for the previous MemoryRegionSection? > +void deconfigure_memory_sync(RemoteMemSync *sync) > +{ > + memory_listener_unregister(&sync->listener); > +} This function is unused? It must be tied into the mpqemu_link lifecycle. It must be possible to hot plug/unplug proxy PCI devices without memory leaks or use-after-frees. > diff --git a/include/hw/proxy/memory-sync.h b/include/hw/proxy/memory-sync.h > new file mode 100644 > index 0000000000..d8329c9b52 > --- /dev/null > +++ b/include/hw/proxy/memory-sync.h > @@ -0,0 +1,37 @@ > +/* > + * Copyright © 2018, 2020 Oracle and/or its affiliates. > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ > + > +#ifndef MEMORY_SYNC_H > +#define MEMORY_SYNC_H > + > +#include <sys/types.h> > + > +#include "qemu/osdep.h" > +#include "qom/object.h" > +#include "exec/memory.h" > +#include "io/mpqemu-link.h" > + > +#define TYPE_MEMORY_LISTENER "memory-listener" This name is too generic. There is already a C struct called MemoryListener. Please call this class "remote-memory-sync". I'm not sure if a QOM object is needed here. Can this just be a plain C struct? If you're not using QOM object-orientated features then there is no need to define a QOM object. > @@ -39,8 +40,13 @@ typedef struct ProxyMemoryRegion { > struct PCIProxyDev { > PCIDevice parent_dev; > > + int n_mr_sections; > + MemoryRegionSection *mr_sections; Is it necessary to duplicate these fields here since a RemoteMemSync field is also being added and it contains these same fields?
* Stefan Hajnoczi (stefanha@redhat.com) wrote: > On Wed, Apr 22, 2020 at 09:13:57PM -0700, elena.ufimtseva@oracle.com wrote: > > diff --git a/hw/proxy/memory-sync.c b/hw/proxy/memory-sync.c > > new file mode 100644 > > index 0000000000..b3f57747f3 > > --- /dev/null > > +++ b/hw/proxy/memory-sync.c > > @@ -0,0 +1,217 @@ > > +/* > > + * Copyright © 2018, 2020 Oracle and/or its affiliates. > > + * > > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > > + * See the COPYING file in the top-level directory. > > + * > > + */ > > + > > +#include <sys/types.h> > > +#include <stdio.h> > > +#include <string.h> > > These headers should already be included by "qemu/osdep.h". > > > +static void proxy_ml_region_addnop(MemoryListener *listener, > > + MemoryRegionSection *section) > > +{ > > + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); > > + bool need_add = true; > > + uint64_t mrs_size, mrs_gpa, mrs_page; > > + uintptr_t mrs_host; > > + RAMBlock *mrs_rb; > > + MemoryRegionSection *prev_sec; > > + > > + if (!(memory_region_is_ram(section->mr) && > > + !memory_region_is_rom(section->mr))) { > > + return; > > + } > > + > > + mrs_rb = section->mr->ram_block; > > + mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb); > > + mrs_size = int128_get64(section->size); > > + mrs_gpa = section->offset_within_address_space; > > + mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + > > + section->offset_within_region; > > These variables are only used in the if (sync->n_mr_sections) case. This > function could be split into a something like this: > > static void proxy_ml_region_addnop(MemoryListener *listener, > MemoryRegionSection *section) > RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); > > if (!(memory_region_is_ram(section->mr) && > !memory_region_is_rom(section->mr))) { > return; > } > > if (try_merge(sync, section)) { > return; > } > > ...add new section... > } > > And the try_merge() helper function has the rest of the code: > > /* Returns true if the section was merged */ > static bool try_merge(RemoteMemSync *sync, MemoryRegionSection *section) > { > if (sync->n_mr_sections == 0) { > return false; > } > > ...most of the code... > } > > > + > > + if (get_fd_from_hostaddr(mrs_host, NULL) <= 0) { > > 0 is a valid fd number, the comparison should probably be < 0? > > > + return; > > + } > > + > > + mrs_host = mrs_host & ~(mrs_page - 1); > > + mrs_gpa = mrs_gpa & ~(mrs_page - 1); > > + mrs_size = ROUND_UP(mrs_size, mrs_page); > > Why is it necessary to align to the RAM block's page size? > > Can mrs_host and mrs_size be misaligned to the RAM block's page size? > > Why round the *guest* physical address down using the *host* page size? That sounds like the type of magic we do for postcopy; where we can only 'place' pages atomically on a host page boundary. Dave > > + > > + if (sync->n_mr_sections) { > > + prev_sec = sync->mr_sections + (sync->n_mr_sections - 1); > > + uint64_t prev_gpa_start = prev_sec->offset_within_address_space; > > + uint64_t prev_size = int128_get64(prev_sec->size); > > + uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size); > > + uint64_t prev_host_start = > > + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) + > > + prev_sec->offset_within_region; > > + uint64_t prev_host_end = range_get_last(prev_host_start, prev_size); > > Is it okay not to do the page alignment stuff for the previous > MemoryRegionSection? > > > +void deconfigure_memory_sync(RemoteMemSync *sync) > > +{ > > + memory_listener_unregister(&sync->listener); > > +} > > This function is unused? It must be tied into the mpqemu_link lifecycle. > It must be possible to hot plug/unplug proxy PCI devices without memory > leaks or use-after-frees. > > > diff --git a/include/hw/proxy/memory-sync.h b/include/hw/proxy/memory-sync.h > > new file mode 100644 > > index 0000000000..d8329c9b52 > > --- /dev/null > > +++ b/include/hw/proxy/memory-sync.h > > @@ -0,0 +1,37 @@ > > +/* > > + * Copyright © 2018, 2020 Oracle and/or its affiliates. > > + * > > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > > + * See the COPYING file in the top-level directory. > > + * > > + */ > > + > > +#ifndef MEMORY_SYNC_H > > +#define MEMORY_SYNC_H > > + > > +#include <sys/types.h> > > + > > +#include "qemu/osdep.h" > > +#include "qom/object.h" > > +#include "exec/memory.h" > > +#include "io/mpqemu-link.h" > > + > > +#define TYPE_MEMORY_LISTENER "memory-listener" > > This name is too generic. There is already a C struct called > MemoryListener. Please call this class "remote-memory-sync". > > I'm not sure if a QOM object is needed here. Can this just be a plain C > struct? If you're not using QOM object-orientated features then there is > no need to define a QOM object. > > > @@ -39,8 +40,13 @@ typedef struct ProxyMemoryRegion { > > struct PCIProxyDev { > > PCIDevice parent_dev; > > > > + int n_mr_sections; > > + MemoryRegionSection *mr_sections; > > Is it necessary to duplicate these fields here since a RemoteMemSync > field is also being added and it contains these same fields? -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff --git a/MAINTAINERS b/MAINTAINERS index 3da3dcd311..9ebb46722a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2869,6 +2869,8 @@ F: remote/memory.c F: hw/proxy/Makefile.objs F: hw/proxy/qemu-proxy.c F: include/hw/proxy/qemu-proxy.h +F: include/hw/proxy/memory-sync.h +F: hw/proxy/memory-sync.c Build and test automation ------------------------- diff --git a/Makefile.target b/Makefile.target index 500fa07fda..c64d860895 100644 --- a/Makefile.target +++ b/Makefile.target @@ -127,6 +127,9 @@ obj-$(CONFIG_TCG) += fpu/softfloat.o obj-y += target/$(TARGET_BASE_ARCH)/ obj-y += disas.o obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o +ifeq ($(TARGET_NAME)-$(CONFIG_MPQEMU)-$(CONFIG_USER_ONLY), x86_64-y-) +obj-$(CONFIG_MPQEMU) += hw/proxy/memory-sync.o +endif LIBS := $(libs_cpu) $(LIBS) obj-$(CONFIG_PLUGIN) += plugins/ diff --git a/hw/proxy/memory-sync.c b/hw/proxy/memory-sync.c new file mode 100644 index 0000000000..b3f57747f3 --- /dev/null +++ b/hw/proxy/memory-sync.c @@ -0,0 +1,217 @@ +/* + * Copyright © 2018, 2020 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include <sys/types.h> +#include <stdio.h> +#include <string.h> + +#include "qemu/osdep.h" +#include "qemu/compiler.h" +#include "qemu/int128.h" +#include "qemu/range.h" +#include "exec/memory.h" +#include "exec/cpu-common.h" +#include "cpu.h" +#include "exec/ram_addr.h" +#include "exec/address-spaces.h" +#include "io/mpqemu-link.h" +#include "hw/proxy/memory-sync.h" + +static const TypeInfo remote_mem_sync_type_info = { + .name = TYPE_MEMORY_LISTENER, + .parent = TYPE_OBJECT, + .instance_size = sizeof(RemoteMemSync), +}; + +static void remote_mem_sync_register_types(void) +{ + type_register_static(&remote_mem_sync_type_info); +} + +type_init(remote_mem_sync_register_types) + +static void proxy_ml_begin(MemoryListener *listener) +{ + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); + int mrs; + + for (mrs = 0; mrs < sync->n_mr_sections; mrs++) { + memory_region_unref(sync->mr_sections[mrs].mr); + } + + g_free(sync->mr_sections); + sync->mr_sections = NULL; + sync->n_mr_sections = 0; +} + +static int get_fd_from_hostaddr(uint64_t host, ram_addr_t *offset) +{ + MemoryRegion *mr; + ram_addr_t off; + + /** + * Assumes that the host address is a valid address as it's + * coming from the MemoryListener system. In the case host + * address is not valid, the following call would return + * the default subregion of "system_memory" region, and + * not NULL. So it's not possible to check for NULL here. + */ + mr = memory_region_from_host((void *)(uintptr_t)host, &off); + + if (offset) { + *offset = off; + } + + return memory_region_get_fd(mr); +} + +static bool proxy_mrs_can_merge(uint64_t host, uint64_t prev_host, size_t size) +{ + bool merge; + int fd1, fd2; + + fd1 = get_fd_from_hostaddr(host, NULL); + + fd2 = get_fd_from_hostaddr(prev_host, NULL); + + merge = (fd1 == fd2); + + merge &= ((prev_host + size) == host); + + return merge; +} + +static void proxy_ml_region_addnop(MemoryListener *listener, + MemoryRegionSection *section) +{ + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); + bool need_add = true; + uint64_t mrs_size, mrs_gpa, mrs_page; + uintptr_t mrs_host; + RAMBlock *mrs_rb; + MemoryRegionSection *prev_sec; + + if (!(memory_region_is_ram(section->mr) && + !memory_region_is_rom(section->mr))) { + return; + } + + mrs_rb = section->mr->ram_block; + mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb); + mrs_size = int128_get64(section->size); + mrs_gpa = section->offset_within_address_space; + mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + + section->offset_within_region; + + if (get_fd_from_hostaddr(mrs_host, NULL) <= 0) { + return; + } + + mrs_host = mrs_host & ~(mrs_page - 1); + mrs_gpa = mrs_gpa & ~(mrs_page - 1); + mrs_size = ROUND_UP(mrs_size, mrs_page); + + if (sync->n_mr_sections) { + prev_sec = sync->mr_sections + (sync->n_mr_sections - 1); + uint64_t prev_gpa_start = prev_sec->offset_within_address_space; + uint64_t prev_size = int128_get64(prev_sec->size); + uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size); + uint64_t prev_host_start = + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) + + prev_sec->offset_within_region; + uint64_t prev_host_end = range_get_last(prev_host_start, prev_size); + + if (mrs_gpa <= (prev_gpa_end + 1)) { + g_assert(mrs_gpa > prev_gpa_start); + + if ((section->mr == prev_sec->mr) && + proxy_mrs_can_merge(mrs_host, prev_host_start, + (mrs_gpa - prev_gpa_start))) { + uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size); + need_add = false; + prev_sec->offset_within_address_space = + MIN(prev_gpa_start, mrs_gpa); + prev_sec->offset_within_region = + MIN(prev_host_start, mrs_host) - + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr); + prev_sec->size = int128_make64(max_end - MIN(prev_host_start, + mrs_host)); + } + } + } + + if (need_add) { + ++sync->n_mr_sections; + sync->mr_sections = g_renew(MemoryRegionSection, sync->mr_sections, + sync->n_mr_sections); + sync->mr_sections[sync->n_mr_sections - 1] = *section; + sync->mr_sections[sync->n_mr_sections - 1].fv = NULL; + memory_region_ref(section->mr); + } +} + +static void proxy_ml_commit(MemoryListener *listener) +{ + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener); + MPQemuMsg msg; + MemoryRegionSection section; + ram_addr_t offset; + uintptr_t host_addr; + int region; + + memset(&msg, 0, sizeof(MPQemuMsg)); + + msg.cmd = SYNC_SYSMEM; + msg.bytestream = 0; + msg.num_fds = sync->n_mr_sections; + msg.size = sizeof(msg.data1); + assert(msg.num_fds <= REMOTE_MAX_FDS); + + for (region = 0; region < sync->n_mr_sections; region++) { + section = sync->mr_sections[region]; + msg.data1.sync_sysmem.gpas[region] = + section.offset_within_address_space; + msg.data1.sync_sysmem.sizes[region] = int128_get64(section.size); + host_addr = (uintptr_t)memory_region_get_ram_ptr(section.mr) + + section.offset_within_region; + msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset); + msg.data1.sync_sysmem.offsets[region] = offset; + } + mpqemu_msg_send(&msg, sync->mpqemu_link->com); +} + +void deconfigure_memory_sync(RemoteMemSync *sync) +{ + memory_listener_unregister(&sync->listener); +} + +/* + * TODO: Memory Sync need not be instantianted once per every proxy device. + * All remote devices are going to get the exact same updates at the + * same time. It therefore makes sense to have a broadcast model. + * + * Broadcast model would involve running the MemorySync object in a + * thread. MemorySync would contain a list of mpqemu-link objects + * that need notification. proxy_ml_commit() could send the same + * message to all the links at the same time. + */ +void configure_memory_sync(RemoteMemSync *sync, MPQemuLinkState *mpqemu_link) +{ + sync->n_mr_sections = 0; + sync->mr_sections = NULL; + + sync->mpqemu_link = mpqemu_link; + + sync->listener.begin = proxy_ml_begin; + sync->listener.commit = proxy_ml_commit; + sync->listener.region_add = proxy_ml_region_addnop; + sync->listener.region_nop = proxy_ml_region_addnop; + sync->listener.priority = 10; + + memory_listener_register(&sync->listener, &address_space_memory); +} diff --git a/hw/proxy/qemu-proxy.c b/hw/proxy/qemu-proxy.c index 7fd0a312a5..2ac4c1528a 100644 --- a/hw/proxy/qemu-proxy.c +++ b/hw/proxy/qemu-proxy.c @@ -13,6 +13,8 @@ #include "io/mpqemu-link.h" #include "hw/proxy/qemu-proxy.h" #include "hw/pci/pci.h" +#include "hw/proxy/memory-sync.h" +#include "qom/object.h" static int config_op_send(PCIProxyDev *dev, uint32_t addr, uint32_t *val, int l, unsigned int op) @@ -138,6 +140,10 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp) error_propagate(errp, local_err); } } + + dev->sync = REMOTE_MEM_SYNC(object_new(TYPE_MEMORY_LISTENER)); + + configure_memory_sync(dev->sync, dev->mpqemu_link); } static void pci_proxy_dev_class_init(ObjectClass *klass, void *data) diff --git a/include/hw/proxy/memory-sync.h b/include/hw/proxy/memory-sync.h new file mode 100644 index 0000000000..d8329c9b52 --- /dev/null +++ b/include/hw/proxy/memory-sync.h @@ -0,0 +1,37 @@ +/* + * Copyright © 2018, 2020 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef MEMORY_SYNC_H +#define MEMORY_SYNC_H + +#include <sys/types.h> + +#include "qemu/osdep.h" +#include "qom/object.h" +#include "exec/memory.h" +#include "io/mpqemu-link.h" + +#define TYPE_MEMORY_LISTENER "memory-listener" +#define REMOTE_MEM_SYNC(obj) \ + OBJECT_CHECK(RemoteMemSync, (obj), TYPE_MEMORY_LISTENER) + +typedef struct RemoteMemSync { + Object obj; + + MemoryListener listener; + + int n_mr_sections; + MemoryRegionSection *mr_sections; + + MPQemuLinkState *mpqemu_link; +} RemoteMemSync; + +void configure_memory_sync(RemoteMemSync *sync, MPQemuLinkState *mpqemu_link); +void deconfigure_memory_sync(RemoteMemSync *sync); + +#endif diff --git a/include/hw/proxy/qemu-proxy.h b/include/hw/proxy/qemu-proxy.h index 9e4127eccb..6d14876ba9 100644 --- a/include/hw/proxy/qemu-proxy.h +++ b/include/hw/proxy/qemu-proxy.h @@ -14,6 +14,7 @@ #include "io/mpqemu-link.h" #include "hw/pci/pci.h" +#include "hw/proxy/memory-sync.h" #define TYPE_PCI_PROXY_DEV "pci-proxy-dev" @@ -39,8 +40,13 @@ typedef struct ProxyMemoryRegion { struct PCIProxyDev { PCIDevice parent_dev; + int n_mr_sections; + MemoryRegionSection *mr_sections; + MPQemuLinkState *mpqemu_link; + RemoteMemSync *sync; + int socket; ProxyMemoryRegion region[PCI_NUM_REGIONS]; diff --git a/remote/remote-main.c b/remote/remote-main.c index 0990509f7a..90f241064f 100644 --- a/remote/remote-main.c +++ b/remote/remote-main.c @@ -35,6 +35,7 @@ #include "exec/ramlist.h" #include "remote/remote-common.h" #include "exec/memattrs.h" +#include "exec/address-spaces.h" static void process_msg(GIOCondition cond, MPQemuLinkState *link, MPQemuChannel *chan); @@ -231,6 +232,16 @@ static void process_msg(GIOCondition cond, MPQemuLinkState *link, goto finalize_loop; } break; + case SYNC_SYSMEM: + /* + * TODO: ensure no active DMA is happening when + * sysmem is being updated + */ + remote_sysmem_reconfig(msg, &err); + if (err) { + goto finalize_loop; + } + break; default: error_setg(&err, "Unknown command in %s", print_pid_exec(pid_exec)); goto finalize_loop;