diff mbox series

[v1,7/9] memory: introduce RAM_NORESERVE and wire it up in qemu_ram_mmap()

Message ID 20210209134939.13083-8-david@redhat.com (mailing list archive)
State New, archived
Headers show
Series None | expand

Commit Message

David Hildenbrand Feb. 9, 2021, 1:49 p.m. UTC
Let's introduce RAM_NORESERVE, allowing mmap'ing with MAP_NORESERVE. The
new flag has the following semantics:

  RAM is mmap-ed with MAP_NORESERVE. When set, reserving swap space (or
  huge pages on Linux) is skipped: will bail out if not supported. When not
  set, the OS might reserve swap space (or huge pages on Linux), depending
  on OS support.

Allow passing it into:
- memory_region_init_ram_nomigrate()
- memory_region_init_resizeable_ram()
- memory_region_init_ram_from_file()

... and teach qemu_ram_mmap() and qemu_anon_ram_alloc() about the flag.
Bail out if the flag is not supported, which is the case right now for
both, POSIX and win32. We will add the POSIX mmap implementation next and
allow specifying RAM_NORESERVE via memory backends.

The target use case is virtio-mem, which dynamically exposes memory
inside a large, sparse memory area to the VM.

Cc: Juan Quintela <quintela@redhat.com>
Cc: Halil Pasic <pasic@linux.ibm.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Stefan Weil <sw@weilnetz.de>
Cc: kvm@vger.kernel.org
Cc: qemu-s390x@nongnu.org
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 include/exec/cpu-common.h |  1 +
 include/exec/memory.h     | 16 +++++++++++++---
 include/exec/ram_addr.h   |  3 ++-
 include/qemu/mmap-alloc.h |  4 +++-
 include/qemu/osdep.h      |  3 ++-
 include/sysemu/kvm.h      |  3 ++-
 migration/ram.c           |  3 +--
 softmmu/physmem.c         | 23 ++++++++++++++++-------
 target/s390x/kvm.c        |  6 +++++-
 util/mmap-alloc.c         |  9 ++++++++-
 util/oslib-posix.c        |  5 +++--
 util/oslib-win32.c        | 13 ++++++++++++-
 12 files changed, 68 insertions(+), 21 deletions(-)

Comments

Peter Xu March 2, 2021, 5:32 p.m. UTC | #1
On Tue, Feb 09, 2021 at 02:49:37PM +0100, David Hildenbrand wrote:
> @@ -899,13 +899,17 @@ int kvm_s390_mem_op_pv(S390CPU *cpu, uint64_t offset, void *hostbuf,
>   * to grow. We also have to use MAP parameters that avoid
>   * read-only mapping of guest pages.
>   */
> -static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared)
> +static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared,
> +                               bool noreserve)
>  {
>      static void *mem;
>  
>      if (mem) {
>          /* we only support one allocation, which is enough for initial ram */
>          return NULL;
> +    } else if (noreserve) {
> +        error_report("Skipping reservation of swap space is not supported.");
> +        return NULL

Semicolon missing.

>      }
>  
>      mem = mmap((void *) 0x800000000ULL, size,
> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
> index b50dc86a3c..bb99843106 100644
> --- a/util/mmap-alloc.c
> +++ b/util/mmap-alloc.c
> @@ -20,6 +20,7 @@
>  #include "qemu/osdep.h"
>  #include "qemu/mmap-alloc.h"
>  #include "qemu/host-utils.h"
> +#include "qemu/error-report.h"
>  
>  #define HUGETLBFS_MAGIC       0x958458f6
>  
> @@ -174,12 +175,18 @@ void *qemu_ram_mmap(int fd,
>                      size_t align,
>                      bool readonly,
>                      bool shared,
> -                    bool is_pmem)
> +                    bool is_pmem,
> +                    bool noreserve)

Maybe at some point we should use flags too here to cover all bools.

Thanks,
David Hildenbrand March 2, 2021, 7:02 p.m. UTC | #2
On 02.03.21 18:32, Peter Xu wrote:
> On Tue, Feb 09, 2021 at 02:49:37PM +0100, David Hildenbrand wrote:
>> @@ -899,13 +899,17 @@ int kvm_s390_mem_op_pv(S390CPU *cpu, uint64_t offset, void *hostbuf,
>>    * to grow. We also have to use MAP parameters that avoid
>>    * read-only mapping of guest pages.
>>    */
>> -static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared)
>> +static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared,
>> +                               bool noreserve)
>>   {
>>       static void *mem;
>>   
>>       if (mem) {
>>           /* we only support one allocation, which is enough for initial ram */
>>           return NULL;
>> +    } else if (noreserve) {
>> +        error_report("Skipping reservation of swap space is not supported.");
>> +        return NULL
> 
> Semicolon missing.

Thanks for catching that!

> 
>>       }
>>   
>>       mem = mmap((void *) 0x800000000ULL, size,
>> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
>> index b50dc86a3c..bb99843106 100644
>> --- a/util/mmap-alloc.c
>> +++ b/util/mmap-alloc.c
>> @@ -20,6 +20,7 @@
>>   #include "qemu/osdep.h"
>>   #include "qemu/mmap-alloc.h"
>>   #include "qemu/host-utils.h"
>> +#include "qemu/error-report.h"
>>   
>>   #define HUGETLBFS_MAGIC       0x958458f6
>>   
>> @@ -174,12 +175,18 @@ void *qemu_ram_mmap(int fd,
>>                       size_t align,
>>                       bool readonly,
>>                       bool shared,
>> -                    bool is_pmem)
>> +                    bool is_pmem,
>> +                    bool noreserve)
> 
> Maybe at some point we should use flags too here to cover all bools.
> 

Right. I guess the main point was to not reuse RAM_XXX.

Should I introduce RAM_MMAP_XXX ?

Thanks!
Peter Xu March 2, 2021, 8:54 p.m. UTC | #3
On Tue, Mar 02, 2021 at 08:02:34PM +0100, David Hildenbrand wrote:
> > > @@ -174,12 +175,18 @@ void *qemu_ram_mmap(int fd,
> > >                       size_t align,
> > >                       bool readonly,
> > >                       bool shared,
> > > -                    bool is_pmem)
> > > +                    bool is_pmem,
> > > +                    bool noreserve)
> > 
> > Maybe at some point we should use flags too here to cover all bools.
> > 
> 
> Right. I guess the main point was to not reuse RAM_XXX.
> 
> Should I introduce RAM_MMAP_XXX ?

Maybe we can directly use MAP_*?  Since I see qemu_ram_mmap() should only exist
with CONFIG_POSIX.  However indeed I see no sign to extend more bools in the
near future either, so maybe also fine to keep it as is, as 4 bools still looks
okay - your call. :)
David Hildenbrand March 2, 2021, 8:58 p.m. UTC | #4
On 02.03.21 21:54, Peter Xu wrote:
> On Tue, Mar 02, 2021 at 08:02:34PM +0100, David Hildenbrand wrote:
>>>> @@ -174,12 +175,18 @@ void *qemu_ram_mmap(int fd,
>>>>                        size_t align,
>>>>                        bool readonly,
>>>>                        bool shared,
>>>> -                    bool is_pmem)
>>>> +                    bool is_pmem,
>>>> +                    bool noreserve)
>>>
>>> Maybe at some point we should use flags too here to cover all bools.
>>>
>>
>> Right. I guess the main point was to not reuse RAM_XXX.
>>
>> Should I introduce RAM_MMAP_XXX ?
> 
> Maybe we can directly use MAP_*?  Since I see qemu_ram_mmap() should only exist

I think the issue is that there is for example no flag that corresponds 
to "is_pmem" - and the fallback logic in our mmap code to make "is_pmem" 
still work is a little bit more involved. In addition, "readonly" 
translates to PROT_READ ...

> with CONFIG_POSIX.  However indeed I see no sign to extend more bools in the
> near future either, so maybe also fine to keep it as is, as 4 bools still looks
> okay - your call. :)

Well, I had the same idea when I added yet another bool :) But I guess 
we won't be adding a lot of additional flags in the near future. 
(MAP_POPULATE? ;) fortunately we use a different approach to populate 
memory)

I'll think about it, not sure yet if this is worth proper flags. Thanks!
Cornelia Huck March 3, 2021, 11:35 a.m. UTC | #5
On Tue, 2 Mar 2021 20:02:34 +0100
David Hildenbrand <david@redhat.com> wrote:

> On 02.03.21 18:32, Peter Xu wrote:
> > On Tue, Feb 09, 2021 at 02:49:37PM +0100, David Hildenbrand wrote:  
> >> @@ -899,13 +899,17 @@ int kvm_s390_mem_op_pv(S390CPU *cpu, uint64_t offset, void *hostbuf,
> >>    * to grow. We also have to use MAP parameters that avoid
> >>    * read-only mapping of guest pages.
> >>    */
> >> -static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared)
> >> +static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared,
> >> +                               bool noreserve)
> >>   {
> >>       static void *mem;
> >>   
> >>       if (mem) {
> >>           /* we only support one allocation, which is enough for initial ram */
> >>           return NULL;
> >> +    } else if (noreserve) {
> >> +        error_report("Skipping reservation of swap space is not supported.");
> >> +        return NULL  
> > 
> > Semicolon missing.  
> 
> Thanks for catching that!

Regardless of that (and this patch set), can we finally get rid of
legacy_s390_alloc? We already fence off running with a kernel prior to
3.15, and KVM_CAP_S390_COW depends on ESOP -- are non-ESOP kvm hosts
still relevant? This seems to be a generation 10 feature; do we
realistically expect anyone running this on e.g. a z/VM host that
doesn't provide ESOP?
David Hildenbrand March 3, 2021, 11:37 a.m. UTC | #6
On 03.03.21 12:35, Cornelia Huck wrote:
> On Tue, 2 Mar 2021 20:02:34 +0100
> David Hildenbrand <david@redhat.com> wrote:
> 
>> On 02.03.21 18:32, Peter Xu wrote:
>>> On Tue, Feb 09, 2021 at 02:49:37PM +0100, David Hildenbrand wrote:
>>>> @@ -899,13 +899,17 @@ int kvm_s390_mem_op_pv(S390CPU *cpu, uint64_t offset, void *hostbuf,
>>>>     * to grow. We also have to use MAP parameters that avoid
>>>>     * read-only mapping of guest pages.
>>>>     */
>>>> -static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared)
>>>> +static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared,
>>>> +                               bool noreserve)
>>>>    {
>>>>        static void *mem;
>>>>    
>>>>        if (mem) {
>>>>            /* we only support one allocation, which is enough for initial ram */
>>>>            return NULL;
>>>> +    } else if (noreserve) {
>>>> +        error_report("Skipping reservation of swap space is not supported.");
>>>> +        return NULL
>>>
>>> Semicolon missing.
>>
>> Thanks for catching that!
> 
> Regardless of that (and this patch set), can we finally get rid of
> legacy_s390_alloc? We already fence off running with a kernel prior to
> 3.15, and KVM_CAP_S390_COW depends on ESOP -- are non-ESOP kvm hosts
> still relevant? This seems to be a generation 10 feature; do we
> realistically expect anyone running this on e.g. a z/VM host that
> doesn't provide ESOP?

Good question - last time I asked that question (~2 years ago) I was 
told that such z/VM environemnts are still relevant.
Thomas Huth March 3, 2021, 11:39 a.m. UTC | #7
On 03/03/2021 12.35, Cornelia Huck wrote:
> On Tue, 2 Mar 2021 20:02:34 +0100
> David Hildenbrand <david@redhat.com> wrote:
> 
>> On 02.03.21 18:32, Peter Xu wrote:
>>> On Tue, Feb 09, 2021 at 02:49:37PM +0100, David Hildenbrand wrote:
>>>> @@ -899,13 +899,17 @@ int kvm_s390_mem_op_pv(S390CPU *cpu, uint64_t offset, void *hostbuf,
>>>>     * to grow. We also have to use MAP parameters that avoid
>>>>     * read-only mapping of guest pages.
>>>>     */
>>>> -static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared)
>>>> +static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared,
>>>> +                               bool noreserve)
>>>>    {
>>>>        static void *mem;
>>>>    
>>>>        if (mem) {
>>>>            /* we only support one allocation, which is enough for initial ram */
>>>>            return NULL;
>>>> +    } else if (noreserve) {
>>>> +        error_report("Skipping reservation of swap space is not supported.");
>>>> +        return NULL
>>>
>>> Semicolon missing.
>>
>> Thanks for catching that!
> 
> Regardless of that (and this patch set), can we finally get rid of
> legacy_s390_alloc? We already fence off running with a kernel prior to
> 3.15, and KVM_CAP_S390_COW depends on ESOP -- are non-ESOP kvm hosts
> still relevant? This seems to be a generation 10 feature; do we
> realistically expect anyone running this on e.g. a z/VM host that
> doesn't provide ESOP?

Looking at the support charts ( 
https://www.ibm.com/support/pages/ibm-mainframe-life-cycle-history ), the 
z10 is already unsupported. So if all newer mainframes have ESOP, I guess it 
should be fine to get rid of this code now.

  Thomas
David Hildenbrand March 3, 2021, 11:41 a.m. UTC | #8
On 03.03.21 12:39, Thomas Huth wrote:
> On 03/03/2021 12.35, Cornelia Huck wrote:
>> On Tue, 2 Mar 2021 20:02:34 +0100
>> David Hildenbrand <david@redhat.com> wrote:
>>
>>> On 02.03.21 18:32, Peter Xu wrote:
>>>> On Tue, Feb 09, 2021 at 02:49:37PM +0100, David Hildenbrand wrote:
>>>>> @@ -899,13 +899,17 @@ int kvm_s390_mem_op_pv(S390CPU *cpu, uint64_t offset, void *hostbuf,
>>>>>      * to grow. We also have to use MAP parameters that avoid
>>>>>      * read-only mapping of guest pages.
>>>>>      */
>>>>> -static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared)
>>>>> +static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared,
>>>>> +                               bool noreserve)
>>>>>     {
>>>>>         static void *mem;
>>>>>     
>>>>>         if (mem) {
>>>>>             /* we only support one allocation, which is enough for initial ram */
>>>>>             return NULL;
>>>>> +    } else if (noreserve) {
>>>>> +        error_report("Skipping reservation of swap space is not supported.");
>>>>> +        return NULL
>>>>
>>>> Semicolon missing.
>>>
>>> Thanks for catching that!
>>
>> Regardless of that (and this patch set), can we finally get rid of
>> legacy_s390_alloc? We already fence off running with a kernel prior to
>> 3.15, and KVM_CAP_S390_COW depends on ESOP -- are non-ESOP kvm hosts
>> still relevant? This seems to be a generation 10 feature; do we
>> realistically expect anyone running this on e.g. a z/VM host that
>> doesn't provide ESOP?
> 
> Looking at the support charts (
> https://www.ibm.com/support/pages/ibm-mainframe-life-cycle-history ), the
> z10 is already unsupported. So if all newer mainframes have ESOP, I guess it
> should be fine to get rid of this code now.

I remember this was a z/VM issue, which would not provide this facility 
to its guests.
Thomas Huth March 3, 2021, 12:12 p.m. UTC | #9
On 03/03/2021 12.37, David Hildenbrand wrote:
> On 03.03.21 12:35, Cornelia Huck wrote:
>> On Tue, 2 Mar 2021 20:02:34 +0100
>> David Hildenbrand <david@redhat.com> wrote:
>>
>>> On 02.03.21 18:32, Peter Xu wrote:
>>>> On Tue, Feb 09, 2021 at 02:49:37PM +0100, David Hildenbrand wrote:
>>>>> @@ -899,13 +899,17 @@ int kvm_s390_mem_op_pv(S390CPU *cpu, uint64_t 
>>>>> offset, void *hostbuf,
>>>>>     * to grow. We also have to use MAP parameters that avoid
>>>>>     * read-only mapping of guest pages.
>>>>>     */
>>>>> -static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared)
>>>>> +static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared,
>>>>> +                               bool noreserve)
>>>>>    {
>>>>>        static void *mem;
>>>>>        if (mem) {
>>>>>            /* we only support one allocation, which is enough for 
>>>>> initial ram */
>>>>>            return NULL;
>>>>> +    } else if (noreserve) {
>>>>> +        error_report("Skipping reservation of swap space is not 
>>>>> supported.");
>>>>> +        return NULL
>>>>
>>>> Semicolon missing.
>>>
>>> Thanks for catching that!
>>
>> Regardless of that (and this patch set), can we finally get rid of
>> legacy_s390_alloc? We already fence off running with a kernel prior to
>> 3.15, and KVM_CAP_S390_COW depends on ESOP -- are non-ESOP kvm hosts
>> still relevant? This seems to be a generation 10 feature; do we
>> realistically expect anyone running this on e.g. a z/VM host that
>> doesn't provide ESOP?
> 
> Good question - last time I asked that question (~2 years ago) I was told 
> that such z/VM environemnts are still relevant.

Now that you've mentioned it ... I've even wrote a blog post about z/VM and 
ESOP some years ago:

 
http://people.redhat.com/~thuth/blog/qemu/2017/04/05/s390x-selinux-problem.html

So if I've got that right again, the z/VM ESOP problem only exists on 
versions older than 6.3. And according to 
https://www.ibm.com/support/lifecycle/search?q=z%2FVM those old versions are 
now unsupported since June 2017 ... thus I guess it's valid to assume that 
nobody is running such an old z/VM version anymore (at least not to use it 
as an environment to run nested KVM guests).

  Thomas
David Hildenbrand March 3, 2021, 12:24 p.m. UTC | #10
On 03.03.21 13:12, Thomas Huth wrote:
> On 03/03/2021 12.37, David Hildenbrand wrote:
>> On 03.03.21 12:35, Cornelia Huck wrote:
>>> On Tue, 2 Mar 2021 20:02:34 +0100
>>> David Hildenbrand <david@redhat.com> wrote:
>>>
>>>> On 02.03.21 18:32, Peter Xu wrote:
>>>>> On Tue, Feb 09, 2021 at 02:49:37PM +0100, David Hildenbrand wrote:
>>>>>> @@ -899,13 +899,17 @@ int kvm_s390_mem_op_pv(S390CPU *cpu, uint64_t
>>>>>> offset, void *hostbuf,
>>>>>>      * to grow. We also have to use MAP parameters that avoid
>>>>>>      * read-only mapping of guest pages.
>>>>>>      */
>>>>>> -static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared)
>>>>>> +static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared,
>>>>>> +                               bool noreserve)
>>>>>>     {
>>>>>>         static void *mem;
>>>>>>         if (mem) {
>>>>>>             /* we only support one allocation, which is enough for
>>>>>> initial ram */
>>>>>>             return NULL;
>>>>>> +    } else if (noreserve) {
>>>>>> +        error_report("Skipping reservation of swap space is not
>>>>>> supported.");
>>>>>> +        return NULL
>>>>>
>>>>> Semicolon missing.
>>>>
>>>> Thanks for catching that!
>>>
>>> Regardless of that (and this patch set), can we finally get rid of
>>> legacy_s390_alloc? We already fence off running with a kernel prior to
>>> 3.15, and KVM_CAP_S390_COW depends on ESOP -- are non-ESOP kvm hosts
>>> still relevant? This seems to be a generation 10 feature; do we
>>> realistically expect anyone running this on e.g. a z/VM host that
>>> doesn't provide ESOP?
>>
>> Good question - last time I asked that question (~2 years ago) I was told
>> that such z/VM environemnts are still relevant.
> 
> Now that you've mentioned it ... I've even wrote a blog post about z/VM and
> ESOP some years ago:
> 
>   
> http://people.redhat.com/~thuth/blog/qemu/2017/04/05/s390x-selinux-problem.html
> 
> So if I've got that right again, the z/VM ESOP problem only exists on
> versions older than 6.3. And according to
> https://www.ibm.com/support/lifecycle/search?q=z%2FVM those old versions are
> now unsupported since June 2017 ... thus I guess it's valid to assume that
> nobody is running such an old z/VM version anymore (at least not to use it
> as an environment to run nested KVM guests).

Thanks for that info, I'll send a patch proposing to rip it out - that 
will make things nicer.
diff mbox series

Patch

diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 5a0a2d93e0..38a47ad4ac 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -58,6 +58,7 @@  void *qemu_ram_get_host_addr(RAMBlock *rb);
 ram_addr_t qemu_ram_get_offset(RAMBlock *rb);
 ram_addr_t qemu_ram_get_used_length(RAMBlock *rb);
 bool qemu_ram_is_shared(RAMBlock *rb);
+bool qemu_ram_is_noreserve(RAMBlock *rb);
 bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
 void qemu_ram_set_uf_zeroable(RAMBlock *rb);
 bool qemu_ram_is_migratable(RAMBlock *rb);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 7d2db168c7..587d14257c 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -157,6 +157,14 @@  typedef struct IOMMUTLBEvent {
  */
 #define RAM_UF_WRITEPROTECT (1 << 6)
 
+/*
+ * RAM is mmap-ed with MAP_NORESERVE. When set, reserving swap space (or huge
+ * pages Linux) is skipped: will bail out if not supported. When not set, the
+ * OS might reserve swap space (or huge pages on Linux), depending on OS
+ * support.
+ */
+#define RAM_NORESERVE (1 << 7)
+
 static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
                                        IOMMUNotifierFlag flags,
                                        hwaddr start, hwaddr end,
@@ -915,7 +923,7 @@  void memory_region_init_ram_nomigrate(MemoryRegion *mr,
  * @name: Region name, becomes part of RAMBlock name used in migration stream
  *        must be unique within any device
  * @size: size of the region.
- * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED.
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE.
  * @errp: pointer to Error*, to store an error if it happens.
  *
  * Note that this function does not do anything to cause the data in the
@@ -969,7 +977,8 @@  void memory_region_init_resizeable_ram(MemoryRegion *mr,
  * @size: size of the region.
  * @align: alignment of the region base address; if 0, the default alignment
  *         (getpagesize()) will be used.
- * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM.
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
+ *             RAM_NORESERVE,
  * @path: the path in which to allocate the RAM.
  * @readonly: true to open @path for reading, false for read/write.
  * @errp: pointer to Error*, to store an error if it happens.
@@ -995,7 +1004,8 @@  void memory_region_init_ram_from_file(MemoryRegion *mr,
  * @owner: the object that tracks the region's reference count
  * @name: the name of the region.
  * @size: size of the region.
- * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM.
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
+ *             RAM_NORESERVE.
  * @fd: the fd to mmap.
  * @errp: pointer to Error*, to store an error if it happens.
  *
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index ce9e140c54..1325c7760e 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -104,7 +104,8 @@  long qemu_maxrampagesize(void);
  * Parameters:
  *  @size: the size in bytes of the ram block
  *  @mr: the memory region where the ram block is
- *  @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM.
+ *  @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
+ *              RAM_NORESERVE.
  *  @mem_path or @fd: specify the backing file or device
  *  @readonly: true to open @path for reading, false for read/write.
  *  @errp: pointer to Error*, to store an error if it happens
diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
index 8b7a5c70f3..a996d9b15a 100644
--- a/include/qemu/mmap-alloc.h
+++ b/include/qemu/mmap-alloc.h
@@ -17,6 +17,7 @@  size_t qemu_mempath_getpagesize(const char *mem_path);
  *  @readonly: true for a read-only mapping, false for read/write.
  *  @shared: map has RAM_SHARED flag.
  *  @is_pmem: map has RAM_PMEM flag.
+ *  @noreserve: map has RAM_NORESERVE flag.
  *
  * Return:
  *  On success, return a pointer to the mapped area.
@@ -27,7 +28,8 @@  void *qemu_ram_mmap(int fd,
                     size_t align,
                     bool readonly,
                     bool shared,
-                    bool is_pmem);
+                    bool is_pmem,
+                    bool noreserve);
 
 void qemu_ram_munmap(int fd, void *ptr, size_t size);
 
diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index ba15be9c56..d6d8ef0999 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -343,7 +343,8 @@  extern int daemon(int, int);
 int qemu_daemon(int nochdir, int noclose);
 void *qemu_try_memalign(size_t alignment, size_t size);
 void *qemu_memalign(size_t alignment, size_t size);
-void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared);
+void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared,
+                          bool noreserve);
 void qemu_vfree(void *ptr);
 void qemu_anon_ram_free(void *ptr, size_t size);
 
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index c5546bdecc..4a0a7a4e89 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -251,7 +251,8 @@  int kvm_on_sigbus(int code, void *addr);
 
 /* interface with exec.c */
 
-void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align, bool shared));
+void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align, bool shared,
+                                       bool noreserve));
 
 /* internal API */
 
diff --git a/migration/ram.c b/migration/ram.c
index 72143da0ac..dd8daad386 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3322,8 +3322,7 @@  int colo_init_ram_cache(void)
     WITH_RCU_READ_LOCK_GUARD() {
         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
             block->colo_cache = qemu_anon_ram_alloc(block->used_length,
-                                                    NULL,
-                                                    false);
+                                                    NULL, false, false);
             if (!block->colo_cache) {
                 error_report("%s: Can't alloc memory for COLO cache of block %s,"
                              "size 0x" RAM_ADDR_FMT, __func__, block->idstr,
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 2243e2a87a..9820d845c0 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -1144,7 +1144,8 @@  static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end,
                             uint16_t section);
 static subpage_t *subpage_init(FlatView *fv, hwaddr base);
 
-static void *(*phys_mem_alloc)(size_t size, uint64_t *align, bool shared) =
+static void *(*phys_mem_alloc)(size_t size, uint64_t *align, bool shared,
+                               bool noreserve) =
                                qemu_anon_ram_alloc;
 
 /*
@@ -1152,7 +1153,8 @@  static void *(*phys_mem_alloc)(size_t size, uint64_t *align, bool shared) =
  * Accelerators with unusual needs may need this.  Hopefully, we can
  * get rid of it eventually.
  */
-void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align, bool shared))
+void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align,
+                        bool shared, bool noreserve))
 {
     phys_mem_alloc = alloc;
 }
@@ -1593,7 +1595,8 @@  static void *file_ram_alloc(RAMBlock *block,
     }
 
     area = qemu_ram_mmap(fd, memory, block->mr->align, readonly,
-                         block->flags & RAM_SHARED, block->flags & RAM_PMEM);
+                         block->flags & RAM_SHARED, block->flags & RAM_PMEM,
+                         block->flags & RAM_NORESERVE);
     if (area == MAP_FAILED) {
         error_setg_errno(errp, errno,
                          "unable to map backing store for guest RAM");
@@ -1713,6 +1716,11 @@  bool qemu_ram_is_shared(RAMBlock *rb)
     return rb->flags & RAM_SHARED;
 }
 
+bool qemu_ram_is_noreserve(RAMBlock *rb)
+{
+    return rb->flags & RAM_NORESERVE;
+}
+
 /* Note: Only set at the start of postcopy */
 bool qemu_ram_is_uf_zeroable(RAMBlock *rb)
 {
@@ -1962,7 +1970,8 @@  static void ram_block_add(RAMBlock *new_block, Error **errp)
         } else {
             new_block->host = phys_mem_alloc(new_block->max_length,
                                              &new_block->mr->align,
-                                             qemu_ram_is_shared(new_block));
+                                             qemu_ram_is_shared(new_block),
+                                             qemu_ram_is_noreserve(new_block));
             if (!new_block->host) {
                 error_setg_errno(errp, errno,
                                  "cannot set up guest memory '%s'",
@@ -2033,7 +2042,7 @@  RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
     int64_t file_size, file_align;
 
     /* Just support these ram flags by now. */
-    assert((ram_flags & ~(RAM_SHARED | RAM_PMEM)) == 0);
+    assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE)) == 0);
 
     if (xen_enabled()) {
         error_setg(errp, "-mem-path not supported with Xen");
@@ -2135,7 +2144,7 @@  RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
     RAMBlock *new_block;
     Error *local_err = NULL;
 
-    assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE)) == 0);
+    assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_NORESERVE)) == 0);
 
     size = HOST_PAGE_ALIGN(size);
     max_size = HOST_PAGE_ALIGN(max_size);
@@ -2170,7 +2179,7 @@  RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
 RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
                          MemoryRegion *mr, Error **errp)
 {
-    assert((ram_flags & ~RAM_SHARED) == 0);
+    assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0);
     return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
 }
 
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index dc27fa36c9..bd5178bf81 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -899,13 +899,17 @@  int kvm_s390_mem_op_pv(S390CPU *cpu, uint64_t offset, void *hostbuf,
  * to grow. We also have to use MAP parameters that avoid
  * read-only mapping of guest pages.
  */
-static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared)
+static void *legacy_s390_alloc(size_t size, uint64_t *align, bool shared,
+                               bool noreserve)
 {
     static void *mem;
 
     if (mem) {
         /* we only support one allocation, which is enough for initial ram */
         return NULL;
+    } else if (noreserve) {
+        error_report("Skipping reservation of swap space is not supported.");
+        return NULL
     }
 
     mem = mmap((void *) 0x800000000ULL, size,
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index b50dc86a3c..bb99843106 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c
@@ -20,6 +20,7 @@ 
 #include "qemu/osdep.h"
 #include "qemu/mmap-alloc.h"
 #include "qemu/host-utils.h"
+#include "qemu/error-report.h"
 
 #define HUGETLBFS_MAGIC       0x958458f6
 
@@ -174,12 +175,18 @@  void *qemu_ram_mmap(int fd,
                     size_t align,
                     bool readonly,
                     bool shared,
-                    bool is_pmem)
+                    bool is_pmem,
+                    bool noreserve)
 {
     const size_t guard_pagesize = mmap_guard_pagesize(fd);
     size_t offset, total;
     void *ptr, *guardptr;
 
+    if (noreserve) {
+        error_report("Skipping reservation of swap space is not supported");
+        return MAP_FAILED;
+    }
+
     /*
      * Note: this always allocates at least one extra page of virtual address
      * space, even if size is already aligned.
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index bf57d3b030..7c9d870723 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -227,10 +227,11 @@  void *qemu_memalign(size_t alignment, size_t size)
 }
 
 /* alloc shared memory pages */
-void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared)
+void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared,
+                          bool noreserve)
 {
     size_t align = QEMU_VMALLOC_ALIGN;
-    void *ptr = qemu_ram_mmap(-1, size, align, false, shared, false);
+    void *ptr = qemu_ram_mmap(-1, size, align, false, shared, false, noreserve);
 
     if (ptr == MAP_FAILED) {
         return NULL;
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index f68b8012bb..8cafe44179 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -39,6 +39,7 @@ 
 #include "trace.h"
 #include "qemu/sockets.h"
 #include "qemu/cutils.h"
+#include "qemu/error-report.h"
 #include <malloc.h>
 
 /* this must come after including "trace.h" */
@@ -77,10 +78,20 @@  static int get_allocation_granularity(void)
     return system_info.dwAllocationGranularity;
 }
 
-void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared)
+void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared,
+                          bool noreserve)
 {
     void *ptr;
 
+    if (noreserve) {
+        /*
+         * We need a MEM_COMMIT before accessing any memory in a MEM_RESERVE
+         * area; we cannot easily mimic POSIX MAP_NORESERVE semantics.
+         */
+        error_report("Skipping reservation of swap space is not supported.");
+        return NULL;
+    }
+
     ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
     trace_qemu_anon_ram_alloc(size, ptr);