Message ID | cb792b8d6f93d00c10790de8698c468b6ff4ab69.1587614626.git.elena.ufimtseva@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [RESEND,v6,01/36] memory: alloc RAM from file at offset | expand |
On Wed, Apr 22, 2020 at 09:13:36PM -0700, elena.ufimtseva@oracle.com wrote: > From: Jagannathan Raman <jag.raman@oracle.com> > > Allow RAM MemoryRegion to be created from an offset in a file, instead > of allocating at offset of 0 by default. This is needed to synchronize > RAM between QEMU & remote process. > > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > --- > exec.c | 11 +++++++---- > include/exec/ram_addr.h | 2 +- > include/qemu/mmap-alloc.h | 3 ++- > memory.c | 2 +- > util/mmap-alloc.c | 7 ++++--- > util/oslib-posix.c | 2 +- > 6 files changed, 16 insertions(+), 11 deletions(-) Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
On Wed, Apr 22, 2020 at 09:13:36PM -0700, elena.ufimtseva@oracle.com wrote: > From: Jagannathan Raman <jag.raman@oracle.com> > > Allow RAM MemoryRegion to be created from an offset in a file, instead > of allocating at offset of 0 by default. This is needed to synchronize > RAM between QEMU & remote process. Can you elaborate on why remote processes require the RAM to be offset from zero ? NB, I'm not objecting - I'm just curious to understand more. > > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > --- > exec.c | 11 +++++++---- > include/exec/ram_addr.h | 2 +- > include/qemu/mmap-alloc.h | 3 ++- > memory.c | 2 +- > util/mmap-alloc.c | 7 ++++--- > util/oslib-posix.c | 2 +- > 6 files changed, 16 insertions(+), 11 deletions(-) > > diff --git a/exec.c b/exec.c > index 2874bb5088..d0ac9545f4 100644 > --- a/exec.c > +++ b/exec.c > @@ -1801,6 +1801,7 @@ static void *file_ram_alloc(RAMBlock *block, > ram_addr_t memory, > int fd, > bool truncate, > + off_t offset, > Error **errp) > { > void *area; > @@ -1851,7 +1852,8 @@ static void *file_ram_alloc(RAMBlock *block, > } > > area = qemu_ram_mmap(fd, memory, block->mr->align, > - block->flags & RAM_SHARED, block->flags & RAM_PMEM); > + block->flags & RAM_SHARED, block->flags & RAM_PMEM, > + offset); > if (area == MAP_FAILED) { > error_setg_errno(errp, errno, > "unable to map backing store for guest RAM"); > @@ -2283,7 +2285,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared) > #ifdef CONFIG_POSIX > RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, > uint32_t ram_flags, int fd, > - Error **errp) > + off_t offset, Error **errp) > { > RAMBlock *new_block; > Error *local_err = NULL; > @@ -2328,7 +2330,8 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, > new_block->used_length = size; > new_block->max_length = size; > new_block->flags = ram_flags; > - new_block->host = file_ram_alloc(new_block, size, fd, !file_size, errp); > + new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset, > + errp); > if (!new_block->host) { > g_free(new_block); > return NULL; > @@ -2358,7 +2361,7 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, > return NULL; > } > > - block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, errp); > + block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, 0, errp); > if (!block) { > if (created) { > unlink(mem_path); > diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h > index 5e59a3d8d7..1b9f489ff0 100644 > --- a/include/exec/ram_addr.h > +++ b/include/exec/ram_addr.h > @@ -121,7 +121,7 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, > Error **errp); > RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, > uint32_t ram_flags, int fd, > - Error **errp); > + off_t offset, Error **errp); > > RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, > MemoryRegion *mr, Error **errp); > diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h > index e786266b92..4f579858bc 100644 > --- a/include/qemu/mmap-alloc.h > +++ b/include/qemu/mmap-alloc.h > @@ -25,7 +25,8 @@ void *qemu_ram_mmap(int fd, > size_t size, > size_t align, > bool shared, > - bool is_pmem); > + bool is_pmem, > + off_t start); > > void qemu_ram_munmap(int fd, void *ptr, size_t size); > > diff --git a/memory.c b/memory.c > index 601b749906..f5fec476b7 100644 > --- a/memory.c > +++ b/memory.c > @@ -1596,7 +1596,7 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr, > mr->destructor = memory_region_destructor_ram; > mr->ram_block = qemu_ram_alloc_from_fd(size, mr, > share ? RAM_SHARED : 0, > - fd, &err); > + fd, 0, &err); > mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; > if (err) { > mr->size = int128_zero(); > diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c > index 27dcccd8ec..a28f7025f0 100644 > --- a/util/mmap-alloc.c > +++ b/util/mmap-alloc.c > @@ -86,7 +86,8 @@ void *qemu_ram_mmap(int fd, > size_t size, > size_t align, > bool shared, > - bool is_pmem) > + bool is_pmem, > + off_t start) > { > int flags; > int map_sync_flags = 0; > @@ -147,7 +148,7 @@ void *qemu_ram_mmap(int fd, > offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr; > > ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, > - flags | map_sync_flags, fd, 0); > + flags | map_sync_flags, fd, start); > > if (ptr == MAP_FAILED && map_sync_flags) { > if (errno == ENOTSUP) { > @@ -172,7 +173,7 @@ void *qemu_ram_mmap(int fd, > * we will remove these flags to handle compatibility. > */ > ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, > - flags, fd, 0); > + flags, fd, start); > } > > if (ptr == MAP_FAILED) { > diff --git a/util/oslib-posix.c b/util/oslib-posix.c > index 062236a1ab..4c6b9e90c6 100644 > --- a/util/oslib-posix.c > +++ b/util/oslib-posix.c > @@ -209,7 +209,7 @@ void *qemu_memalign(size_t alignment, size_t size) > void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared) > { > size_t align = QEMU_VMALLOC_ALIGN; > - void *ptr = qemu_ram_mmap(-1, size, align, shared, false); > + void *ptr = qemu_ram_mmap(-1, size, align, shared, false, 0); > > if (ptr == MAP_FAILED) { > return NULL; > -- > 2.25.GIT > Regards, Daniel
> On May 12, 2020, at 4:48 AM, Daniel P. Berrangé <berrange@redhat.com> wrote: > > On Wed, Apr 22, 2020 at 09:13:36PM -0700, elena.ufimtseva@oracle.com wrote: >> From: Jagannathan Raman <jag.raman@oracle.com> >> >> Allow RAM MemoryRegion to be created from an offset in a file, instead >> of allocating at offset of 0 by default. This is needed to synchronize >> RAM between QEMU & remote process. > > Can you elaborate on why remote processes require the RAM to be offset > from zero ? Hi Daniel, As it turns out, the RAM is scattered across the physical address space (system_memory) of QEMU. Therefore, the system memory is composed of multiple sections of RAM, and some sections start at a non-zero RAM offset. As a result, the remote process needs the ability to map these RAM sections into system_memory. Thank you! -- Jag > > NB, I'm not objecting - I'm just curious to understand more. > >> >> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> >> Signed-off-by: John G Johnson <john.g.johnson@oracle.com> >> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> >> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com> >> --- >> exec.c | 11 +++++++---- >> include/exec/ram_addr.h | 2 +- >> include/qemu/mmap-alloc.h | 3 ++- >> memory.c | 2 +- >> util/mmap-alloc.c | 7 ++++--- >> util/oslib-posix.c | 2 +- >> 6 files changed, 16 insertions(+), 11 deletions(-) >> >> diff --git a/exec.c b/exec.c >> index 2874bb5088..d0ac9545f4 100644 >> --- a/exec.c >> +++ b/exec.c >> @@ -1801,6 +1801,7 @@ static void *file_ram_alloc(RAMBlock *block, >> ram_addr_t memory, >> int fd, >> bool truncate, >> + off_t offset, >> Error **errp) >> { >> void *area; >> @@ -1851,7 +1852,8 @@ static void *file_ram_alloc(RAMBlock *block, >> } >> >> area = qemu_ram_mmap(fd, memory, block->mr->align, >> - block->flags & RAM_SHARED, block->flags & RAM_PMEM); >> + block->flags & RAM_SHARED, block->flags & RAM_PMEM, >> + offset); >> if (area == MAP_FAILED) { >> error_setg_errno(errp, errno, >> "unable to map backing store for guest RAM"); >> @@ -2283,7 +2285,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared) >> #ifdef CONFIG_POSIX >> RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, >> uint32_t ram_flags, int fd, >> - Error **errp) >> + off_t offset, Error **errp) >> { >> RAMBlock *new_block; >> Error *local_err = NULL; >> @@ -2328,7 +2330,8 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, >> new_block->used_length = size; >> new_block->max_length = size; >> new_block->flags = ram_flags; >> - new_block->host = file_ram_alloc(new_block, size, fd, !file_size, errp); >> + new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset, >> + errp); >> if (!new_block->host) { >> g_free(new_block); >> return NULL; >> @@ -2358,7 +2361,7 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, >> return NULL; >> } >> >> - block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, errp); >> + block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, 0, errp); >> if (!block) { >> if (created) { >> unlink(mem_path); >> diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h >> index 5e59a3d8d7..1b9f489ff0 100644 >> --- a/include/exec/ram_addr.h >> +++ b/include/exec/ram_addr.h >> @@ -121,7 +121,7 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, >> Error **errp); >> RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, >> uint32_t ram_flags, int fd, >> - Error **errp); >> + off_t offset, Error **errp); >> >> RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, >> MemoryRegion *mr, Error **errp); >> diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h >> index e786266b92..4f579858bc 100644 >> --- a/include/qemu/mmap-alloc.h >> +++ b/include/qemu/mmap-alloc.h >> @@ -25,7 +25,8 @@ void *qemu_ram_mmap(int fd, >> size_t size, >> size_t align, >> bool shared, >> - bool is_pmem); >> + bool is_pmem, >> + off_t start); >> >> void qemu_ram_munmap(int fd, void *ptr, size_t size); >> >> diff --git a/memory.c b/memory.c >> index 601b749906..f5fec476b7 100644 >> --- a/memory.c >> +++ b/memory.c >> @@ -1596,7 +1596,7 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr, >> mr->destructor = memory_region_destructor_ram; >> mr->ram_block = qemu_ram_alloc_from_fd(size, mr, >> share ? RAM_SHARED : 0, >> - fd, &err); >> + fd, 0, &err); >> mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; >> if (err) { >> mr->size = int128_zero(); >> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c >> index 27dcccd8ec..a28f7025f0 100644 >> --- a/util/mmap-alloc.c >> +++ b/util/mmap-alloc.c >> @@ -86,7 +86,8 @@ void *qemu_ram_mmap(int fd, >> size_t size, >> size_t align, >> bool shared, >> - bool is_pmem) >> + bool is_pmem, >> + off_t start) >> { >> int flags; >> int map_sync_flags = 0; >> @@ -147,7 +148,7 @@ void *qemu_ram_mmap(int fd, >> offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr; >> >> ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, >> - flags | map_sync_flags, fd, 0); >> + flags | map_sync_flags, fd, start); >> >> if (ptr == MAP_FAILED && map_sync_flags) { >> if (errno == ENOTSUP) { >> @@ -172,7 +173,7 @@ void *qemu_ram_mmap(int fd, >> * we will remove these flags to handle compatibility. >> */ >> ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, >> - flags, fd, 0); >> + flags, fd, start); >> } >> >> if (ptr == MAP_FAILED) { >> diff --git a/util/oslib-posix.c b/util/oslib-posix.c >> index 062236a1ab..4c6b9e90c6 100644 >> --- a/util/oslib-posix.c >> +++ b/util/oslib-posix.c >> @@ -209,7 +209,7 @@ void *qemu_memalign(size_t alignment, size_t size) >> void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared) >> { >> size_t align = QEMU_VMALLOC_ALIGN; >> - void *ptr = qemu_ram_mmap(-1, size, align, shared, false); >> + void *ptr = qemu_ram_mmap(-1, size, align, shared, false, 0); >> >> if (ptr == MAP_FAILED) { >> return NULL; >> -- >> 2.25.GIT >> > > Regards, > Daniel > -- > |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| > |: https://libvirt.org -o- https://fstop138.berrange.com :| > |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :| > >
On Tue, May 12, 2020 at 07:56:33AM -0400, Jag Raman wrote: > > > > On May 12, 2020, at 4:48 AM, Daniel P. Berrangé <berrange@redhat.com> wrote: > > > > On Wed, Apr 22, 2020 at 09:13:36PM -0700, elena.ufimtseva@oracle.com wrote: > >> From: Jagannathan Raman <jag.raman@oracle.com> > >> > >> Allow RAM MemoryRegion to be created from an offset in a file, instead > >> of allocating at offset of 0 by default. This is needed to synchronize > >> RAM between QEMU & remote process. > > > > Can you elaborate on why remote processes require the RAM to be offset > > from zero ? > > Hi Daniel, > > As it turns out, the RAM is scattered across the physical address space > (system_memory) of QEMU. Therefore, the system memory is composed > of multiple sections of RAM, and some sections start at a non-zero RAM > offset. > > As a result, the remote process needs the ability to map these RAM > sections into system_memory. To explain a bit more, my understanding is that the offset is specifically for mmap(2). As Jag alluded to, multiple sections can use a single backing RAM file. These sections have different offsets in the file. Jag, maybe you can include a concrete explanation like the following in the commit description: Launch QEMU like this: qemu-system-x86_64 -mem-path /dev/shm -m 8G There is only one RAM file descriptor: $ cat /proc/$(pidof qemu)/fd ... lrwx------. 1 stefanha stefanha 64 May 13 09:34 19 -> '/dev/shm/qemu_back_mem.pc.ram.7YAlqn (deleted)' But the memory tree shows that single file is split into multiple ranges of guest physical address space: (qemu) info mtree memory-region: system 0000000000000000-ffffffffffffffff (prio 0, i/o): system 0000000000000000-00000000bfffffff (prio 0, i/o): alias ram-below-4g @pc.ram 0000000000000000-00000000bfffffff ... 0000000100000000-000000023fffffff (prio 0, i/o): alias ram-above-4g @pc.ram 00000000c0000000-00000001ffffffff This means QEMU needs to send two regions to the remote device process. They both mmap the same file but from different starting file offsets. Stefan
On Wed, 13 May 2020 09:40:42 +0100 Stefan Hajnoczi <stefanha@redhat.com> wrote: > On Tue, May 12, 2020 at 07:56:33AM -0400, Jag Raman wrote: > > > > > > > On May 12, 2020, at 4:48 AM, Daniel P. Berrangé <berrange@redhat.com> wrote: > > > > > > On Wed, Apr 22, 2020 at 09:13:36PM -0700, elena.ufimtseva@oracle.com wrote: > > >> From: Jagannathan Raman <jag.raman@oracle.com> > > >> > > >> Allow RAM MemoryRegion to be created from an offset in a file, instead > > >> of allocating at offset of 0 by default. This is needed to synchronize > > >> RAM between QEMU & remote process. > > > > > > Can you elaborate on why remote processes require the RAM to be offset > > > from zero ? > > > > Hi Daniel, > > > > As it turns out, the RAM is scattered across the physical address space > > (system_memory) of QEMU. Therefore, the system memory is composed > > of multiple sections of RAM, and some sections start at a non-zero RAM > > offset. > > > > As a result, the remote process needs the ability to map these RAM > > sections into system_memory. > > To explain a bit more, my understanding is that the offset is > specifically for mmap(2). As Jag alluded to, multiple sections can use a > single backing RAM file. These sections have different offsets in the > file. > > Jag, maybe you can include a concrete explanation like the following in > the commit description: > > Launch QEMU like this: > > qemu-system-x86_64 -mem-path /dev/shm -m 8G > > There is only one RAM file descriptor: > > $ cat /proc/$(pidof qemu)/fd > ... > lrwx------. 1 stefanha stefanha 64 May 13 09:34 19 -> '/dev/shm/qemu_back_mem.pc.ram.7YAlqn (deleted)' > > But the memory tree shows that single file is split into multiple ranges > of guest physical address space: > > (qemu) info mtree > memory-region: system > 0000000000000000-ffffffffffffffff (prio 0, i/o): system > 0000000000000000-00000000bfffffff (prio 0, i/o): alias ram-below-4g @pc.ram 0000000000000000-00000000bfffffff > ... > 0000000100000000-000000023fffffff (prio 0, i/o): alias ram-above-4g @pc.ram 00000000c0000000-00000001ffffffff > > This means QEMU needs to send two regions to the remote device process. > They both mmap the same file but from different starting file offsets. are we talking here about GPA offests her or about host offsets in mmaped host file? If it's the later then above mtree doesn't show true picture (those entries are just aliases), main guest RAM is allocated as a single continuous chunk (so far) which belongs to a memory-backend. > Stefan
> On May 13, 2020, at 11:25 AM, Igor Mammedov <imammedo@redhat.com> wrote: > > On Wed, 13 May 2020 09:40:42 +0100 > Stefan Hajnoczi <stefanha@redhat.com> wrote: > >> On Tue, May 12, 2020 at 07:56:33AM -0400, Jag Raman wrote: >>> >>> >>>> On May 12, 2020, at 4:48 AM, Daniel P. Berrangé <berrange@redhat.com> wrote: >>>> >>>> On Wed, Apr 22, 2020 at 09:13:36PM -0700, elena.ufimtseva@oracle.com wrote: >>>>> From: Jagannathan Raman <jag.raman@oracle.com> >>>>> >>>>> Allow RAM MemoryRegion to be created from an offset in a file, instead >>>>> of allocating at offset of 0 by default. This is needed to synchronize >>>>> RAM between QEMU & remote process. >>>> >>>> Can you elaborate on why remote processes require the RAM to be offset >>>> from zero ? >>> >>> Hi Daniel, >>> >>> As it turns out, the RAM is scattered across the physical address space >>> (system_memory) of QEMU. Therefore, the system memory is composed >>> of multiple sections of RAM, and some sections start at a non-zero RAM >>> offset. >>> >>> As a result, the remote process needs the ability to map these RAM >>> sections into system_memory. >> >> To explain a bit more, my understanding is that the offset is >> specifically for mmap(2). As Jag alluded to, multiple sections can use a >> single backing RAM file. These sections have different offsets in the >> file. >> >> Jag, maybe you can include a concrete explanation like the following in >> the commit description: >> >> Launch QEMU like this: >> >> qemu-system-x86_64 -mem-path /dev/shm -m 8G >> >> There is only one RAM file descriptor: >> >> $ cat /proc/$(pidof qemu)/fd >> ... >> lrwx------. 1 stefanha stefanha 64 May 13 09:34 19 -> '/dev/shm/qemu_back_mem.pc.ram.7YAlqn (deleted)' >> >> But the memory tree shows that single file is split into multiple ranges >> of guest physical address space: >> >> (qemu) info mtree >> memory-region: system >> 0000000000000000-ffffffffffffffff (prio 0, i/o): system >> 0000000000000000-00000000bfffffff (prio 0, i/o): alias ram-below-4g @pc.ram 0000000000000000-00000000bfffffff >> ... >> 0000000100000000-000000023fffffff (prio 0, i/o): alias ram-above-4g @pc.ram 00000000c0000000-00000001ffffffff >> >> This means QEMU needs to send two regions to the remote device process. >> They both mmap the same file but from different starting file offsets. > > are we talking here about GPA offests her or about host offsets in mmaped host file? > If it's the later then above mtree doesn't show true picture (those entries are just aliases), > main guest RAM is allocated as a single continuous chunk (so far) which belongs > to a memory-backend. Thanks for the info about ‘mtree’ QMP option. We’ll use that to better explain the need for offset during memory allocation. As we can observe from the ‘mtree’ output, different sections of system memory vector into the RAM. Since we have only one file descriptor available for all of RAM, we need to mmap() these sections at different offsets within the memory file. Hey Igor, the offset passed into the mmap() syscall is the offset within host file. Thanks for pointing out that these are aliases. I believe the mmap() operation is equivalent to the “alias” operation within MemoryRegion framework. We are sending the ‘fd’, ‘offset’ within the fd and the size of these RAM regions to the remote device over the unix socket. Hopefully, this looks good to you. Thanks! -- Jag > >> Stefan > >
On Wed, 13 May 2020 16:08:06 -0400 Jag Raman <jag.raman@oracle.com> wrote: > > On May 13, 2020, at 11:25 AM, Igor Mammedov <imammedo@redhat.com> wrote: > > > > On Wed, 13 May 2020 09:40:42 +0100 > > Stefan Hajnoczi <stefanha@redhat.com> wrote: > > > >> On Tue, May 12, 2020 at 07:56:33AM -0400, Jag Raman wrote: > >>> > >>> > >>>> On May 12, 2020, at 4:48 AM, Daniel P. Berrangé <berrange@redhat.com> wrote: > >>>> > >>>> On Wed, Apr 22, 2020 at 09:13:36PM -0700, elena.ufimtseva@oracle.com wrote: > >>>>> From: Jagannathan Raman <jag.raman@oracle.com> > >>>>> > >>>>> Allow RAM MemoryRegion to be created from an offset in a file, instead > >>>>> of allocating at offset of 0 by default. This is needed to synchronize > >>>>> RAM between QEMU & remote process. > >>>> > >>>> Can you elaborate on why remote processes require the RAM to be offset > >>>> from zero ? > >>> > >>> Hi Daniel, > >>> > >>> As it turns out, the RAM is scattered across the physical address space > >>> (system_memory) of QEMU. Therefore, the system memory is composed > >>> of multiple sections of RAM, and some sections start at a non-zero RAM > >>> offset. > >>> > >>> As a result, the remote process needs the ability to map these RAM > >>> sections into system_memory. > >> > >> To explain a bit more, my understanding is that the offset is > >> specifically for mmap(2). As Jag alluded to, multiple sections can use a > >> single backing RAM file. These sections have different offsets in the > >> file. > >> > >> Jag, maybe you can include a concrete explanation like the following in > >> the commit description: > >> > >> Launch QEMU like this: > >> > >> qemu-system-x86_64 -mem-path /dev/shm -m 8G > >> > >> There is only one RAM file descriptor: > >> > >> $ cat /proc/$(pidof qemu)/fd > >> ... > >> lrwx------. 1 stefanha stefanha 64 May 13 09:34 19 -> '/dev/shm/qemu_back_mem.pc.ram.7YAlqn (deleted)' > >> > >> But the memory tree shows that single file is split into multiple ranges > >> of guest physical address space: > >> > >> (qemu) info mtree > >> memory-region: system > >> 0000000000000000-ffffffffffffffff (prio 0, i/o): system > >> 0000000000000000-00000000bfffffff (prio 0, i/o): alias ram-below-4g @pc.ram 0000000000000000-00000000bfffffff > >> ... > >> 0000000100000000-000000023fffffff (prio 0, i/o): alias ram-above-4g @pc.ram 00000000c0000000-00000001ffffffff > >> > >> This means QEMU needs to send two regions to the remote device process. > >> They both mmap the same file but from different starting file offsets. > > > > are we talking here about GPA offests her or about host offsets in mmaped host file? > > If it's the later then above mtree doesn't show true picture (those entries are just aliases), > > main guest RAM is allocated as a single continuous chunk (so far) which belongs > > to a memory-backend. > > Thanks for the info about ‘mtree’ QMP option. We’ll use that to better explain the need for offset > during memory allocation. > > As we can observe from the ‘mtree’ output, different sections of system memory vector into the > RAM. Since we have only one file descriptor available for all of RAM, we need to mmap() these > sections at different offsets within the memory file. > > Hey Igor, the offset passed into the mmap() syscall is the offset within host file. Thanks for > pointing out that these are aliases. I believe the mmap() operation is equivalent to the “alias” > operation within MemoryRegion framework. We are sending the ‘fd’, ‘offset’ within the fd and > the size of these RAM regions to the remote device over the unix socket. Hopefully, this looks > good to you. could you point to a specific patch where on remote device that is being received and used? > > Thanks! > -- > Jag > > > > >> Stefan > > > > >
* Jag Raman (jag.raman@oracle.com) wrote: > > > > On May 13, 2020, at 11:25 AM, Igor Mammedov <imammedo@redhat.com> wrote: > > > > On Wed, 13 May 2020 09:40:42 +0100 > > Stefan Hajnoczi <stefanha@redhat.com> wrote: > > > >> On Tue, May 12, 2020 at 07:56:33AM -0400, Jag Raman wrote: > >>> > >>> > >>>> On May 12, 2020, at 4:48 AM, Daniel P. Berrangé <berrange@redhat.com> wrote: > >>>> > >>>> On Wed, Apr 22, 2020 at 09:13:36PM -0700, elena.ufimtseva@oracle.com wrote: > >>>>> From: Jagannathan Raman <jag.raman@oracle.com> > >>>>> > >>>>> Allow RAM MemoryRegion to be created from an offset in a file, instead > >>>>> of allocating at offset of 0 by default. This is needed to synchronize > >>>>> RAM between QEMU & remote process. > >>>> > >>>> Can you elaborate on why remote processes require the RAM to be offset > >>>> from zero ? > >>> > >>> Hi Daniel, > >>> > >>> As it turns out, the RAM is scattered across the physical address space > >>> (system_memory) of QEMU. Therefore, the system memory is composed > >>> of multiple sections of RAM, and some sections start at a non-zero RAM > >>> offset. > >>> > >>> As a result, the remote process needs the ability to map these RAM > >>> sections into system_memory. > >> > >> To explain a bit more, my understanding is that the offset is > >> specifically for mmap(2). As Jag alluded to, multiple sections can use a > >> single backing RAM file. These sections have different offsets in the > >> file. > >> > >> Jag, maybe you can include a concrete explanation like the following in > >> the commit description: > >> > >> Launch QEMU like this: > >> > >> qemu-system-x86_64 -mem-path /dev/shm -m 8G > >> > >> There is only one RAM file descriptor: > >> > >> $ cat /proc/$(pidof qemu)/fd > >> ... > >> lrwx------. 1 stefanha stefanha 64 May 13 09:34 19 -> '/dev/shm/qemu_back_mem.pc.ram.7YAlqn (deleted)' > >> > >> But the memory tree shows that single file is split into multiple ranges > >> of guest physical address space: > >> > >> (qemu) info mtree > >> memory-region: system > >> 0000000000000000-ffffffffffffffff (prio 0, i/o): system > >> 0000000000000000-00000000bfffffff (prio 0, i/o): alias ram-below-4g @pc.ram 0000000000000000-00000000bfffffff > >> ... > >> 0000000100000000-000000023fffffff (prio 0, i/o): alias ram-above-4g @pc.ram 00000000c0000000-00000001ffffffff > >> > >> This means QEMU needs to send two regions to the remote device process. > >> They both mmap the same file but from different starting file offsets. > > > > are we talking here about GPA offests her or about host offsets in mmaped host file? > > If it's the later then above mtree doesn't show true picture (those entries are just aliases), > > main guest RAM is allocated as a single continuous chunk (so far) which belongs > > to a memory-backend. > > Thanks for the info about ‘mtree’ QMP option. We’ll use that to better explain the need for offset > during memory allocation. > > As we can observe from the ‘mtree’ output, different sections of system memory vector into the > RAM. Since we have only one file descriptor available for all of RAM, we need to mmap() these > sections at different offsets within the memory file. > > Hey Igor, the offset passed into the mmap() syscall is the offset within host file. Thanks for > pointing out that these are aliases. I believe the mmap() operation is equivalent to the “alias” > operation within MemoryRegion framework. We are sending the ‘fd’, ‘offset’ within the fd and > the size of these RAM regions to the remote device over the unix socket. Hopefully, this looks > good to you. Vhost-user already does something similar; see it's vhost_user_fill_set_mem_table_msg: msg->payload.memory.regions[*fd_num].userspace_addr = reg->userspace_addr; msg->payload.memory.regions[*fd_num].memory_size = reg->memory_size; msg->payload.memory.regions[*fd_num].guest_phys_addr = reg->guest_phys_addr; msg->payload.memory.regions[*fd_num].mmap_offset = offset; Note you're not needing to map the fd with an offset on the qemu side, it's something only needed on the remote. Dave > Thanks! > -- > Jag > > > > >> Stefan > > > > > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff --git a/exec.c b/exec.c index 2874bb5088..d0ac9545f4 100644 --- a/exec.c +++ b/exec.c @@ -1801,6 +1801,7 @@ static void *file_ram_alloc(RAMBlock *block, ram_addr_t memory, int fd, bool truncate, + off_t offset, Error **errp) { void *area; @@ -1851,7 +1852,8 @@ static void *file_ram_alloc(RAMBlock *block, } area = qemu_ram_mmap(fd, memory, block->mr->align, - block->flags & RAM_SHARED, block->flags & RAM_PMEM); + block->flags & RAM_SHARED, block->flags & RAM_PMEM, + offset); if (area == MAP_FAILED) { error_setg_errno(errp, errno, "unable to map backing store for guest RAM"); @@ -2283,7 +2285,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared) #ifdef CONFIG_POSIX RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, uint32_t ram_flags, int fd, - Error **errp) + off_t offset, Error **errp) { RAMBlock *new_block; Error *local_err = NULL; @@ -2328,7 +2330,8 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, new_block->used_length = size; new_block->max_length = size; new_block->flags = ram_flags; - new_block->host = file_ram_alloc(new_block, size, fd, !file_size, errp); + new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset, + errp); if (!new_block->host) { g_free(new_block); return NULL; @@ -2358,7 +2361,7 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, return NULL; } - block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, errp); + block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, 0, errp); if (!block) { if (created) { unlink(mem_path); diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 5e59a3d8d7..1b9f489ff0 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -121,7 +121,7 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, Error **errp); RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, uint32_t ram_flags, int fd, - Error **errp); + off_t offset, Error **errp); RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, MemoryRegion *mr, Error **errp); diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h index e786266b92..4f579858bc 100644 --- a/include/qemu/mmap-alloc.h +++ b/include/qemu/mmap-alloc.h @@ -25,7 +25,8 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared, - bool is_pmem); + bool is_pmem, + off_t start); void qemu_ram_munmap(int fd, void *ptr, size_t size); diff --git a/memory.c b/memory.c index 601b749906..f5fec476b7 100644 --- a/memory.c +++ b/memory.c @@ -1596,7 +1596,7 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr, mr->destructor = memory_region_destructor_ram; mr->ram_block = qemu_ram_alloc_from_fd(size, mr, share ? RAM_SHARED : 0, - fd, &err); + fd, 0, &err); mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; if (err) { mr->size = int128_zero(); diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c index 27dcccd8ec..a28f7025f0 100644 --- a/util/mmap-alloc.c +++ b/util/mmap-alloc.c @@ -86,7 +86,8 @@ void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared, - bool is_pmem) + bool is_pmem, + off_t start) { int flags; int map_sync_flags = 0; @@ -147,7 +148,7 @@ void *qemu_ram_mmap(int fd, offset = QEMU_ALIGN_UP((uintptr_t)guardptr, align) - (uintptr_t)guardptr; ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, - flags | map_sync_flags, fd, 0); + flags | map_sync_flags, fd, start); if (ptr == MAP_FAILED && map_sync_flags) { if (errno == ENOTSUP) { @@ -172,7 +173,7 @@ void *qemu_ram_mmap(int fd, * we will remove these flags to handle compatibility. */ ptr = mmap(guardptr + offset, size, PROT_READ | PROT_WRITE, - flags, fd, 0); + flags, fd, start); } if (ptr == MAP_FAILED) { diff --git a/util/oslib-posix.c b/util/oslib-posix.c index 062236a1ab..4c6b9e90c6 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -209,7 +209,7 @@ void *qemu_memalign(size_t alignment, size_t size) void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared) { size_t align = QEMU_VMALLOC_ALIGN; - void *ptr = qemu_ram_mmap(-1, size, align, shared, false); + void *ptr = qemu_ram_mmap(-1, size, align, shared, false, 0); if (ptr == MAP_FAILED) { return NULL;