diff mbox

[RFC,1/4] pc-dimm: add 'reserved-size' to reserve address range after the ending address

Message ID 20170331084147.32716-2-haozhong.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Haozhong Zhang March 31, 2017, 8:41 a.m. UTC
If option 'reserved-size=RSVD' is present, QEMU will reserve an
address range of size 'RSVD' after the ending address of pc-dimm
device.

For the following example,
    -object memory-backend-file,id=mem0,size=4G,...
    -device nvdimm,id=dimm0,memdev=mem0,reserved-size=4K,...
    -device pc-dimm,id=dimm1,...
if dimm0 is allocated to address N ~ N+4G, the address range of
dimm1 will start from N+4G+4K or higher.

Its current usage is to reserve spaces for flush hint addresses
of nvdimm devices.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
---
 hw/mem/pc-dimm.c         | 48 +++++++++++++++++++++++++++++++++++++++++++++---
 include/hw/mem/pc-dimm.h |  2 ++
 2 files changed, 47 insertions(+), 3 deletions(-)

Comments

Stefan Hajnoczi April 6, 2017, 10:24 a.m. UTC | #1
On Fri, Mar 31, 2017 at 04:41:44PM +0800, Haozhong Zhang wrote:
> If option 'reserved-size=RSVD' is present, QEMU will reserve an
> address range of size 'RSVD' after the ending address of pc-dimm
> device.
> 
> For the following example,
>     -object memory-backend-file,id=mem0,size=4G,...
>     -device nvdimm,id=dimm0,memdev=mem0,reserved-size=4K,...

"reserved-size" is not a clear name.  I suggest calling the property
"num-flush-hints" (default 0).  QEMU can calculate the actual size in
bytes.

  -device nvdimm,num-flush-hints=1

QEMU will use one flush hint and reserve enough space (e.g. 1 page) for
the MMIO region.

>     -device pc-dimm,id=dimm1,...
> if dimm0 is allocated to address N ~ N+4G, the address range of
> dimm1 will start from N+4G+4K or higher.
> 
> Its current usage is to reserve spaces for flush hint addresses
> of nvdimm devices.
> 
> Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> ---
>  hw/mem/pc-dimm.c         | 48 +++++++++++++++++++++++++++++++++++++++++++++---
>  include/hw/mem/pc-dimm.h |  2 ++
>  2 files changed, 47 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
> index 9e8dab0..13dcd71 100644
> --- a/hw/mem/pc-dimm.c
> +++ b/hw/mem/pc-dimm.c
> @@ -28,6 +28,7 @@
>  #include "sysemu/kvm.h"
>  #include "trace.h"
>  #include "hw/virtio/vhost.h"
> +#include "exec/address-spaces.h"
>  
>  typedef struct pc_dimms_capacity {
>       uint64_t size;
> @@ -44,7 +45,12 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
>      MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm);
>      Error *local_err = NULL;
>      uint64_t existing_dimms_capacity = 0;
> -    uint64_t addr;
> +    uint64_t addr, size = memory_region_size(mr);
> +
> +    size += object_property_get_int(OBJECT(dimm), PC_DIMM_RSVD_PROP, &local_err);
> +    if (local_err) {
> +        goto out;
> +    }
>  
>      addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
>      if (local_err) {
> @@ -54,7 +60,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
>      addr = pc_dimm_get_free_addr(hpms->base,
>                                   memory_region_size(&hpms->mr),
>                                   !addr ? NULL : &addr, align,
> -                                 memory_region_size(mr), &local_err);
> +                                 size, &local_err);
>      if (local_err) {
>          goto out;
>      }
> @@ -64,7 +70,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
>          goto out;
>      }
>  
> -    if (existing_dimms_capacity + memory_region_size(mr) >
> +    if (existing_dimms_capacity + size >
>          machine->maxram_size - machine->ram_size) {
>          error_setg(&local_err, "not enough space, currently 0x%" PRIx64
>                     " in use of total hot pluggable 0x" RAM_ADDR_FMT,
> @@ -315,6 +321,9 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
>          PCDIMMDevice *dimm = item->data;
>          uint64_t dimm_size = object_property_get_int(OBJECT(dimm),
>                                                       PC_DIMM_SIZE_PROP,
> +                                                     errp) +
> +                             object_property_get_int(OBJECT(dimm),
> +                                                     PC_DIMM_RSVD_PROP,
>                                                       errp);
>          if (errp && *errp) {
>              goto out;
> @@ -382,6 +391,37 @@ static void pc_dimm_check_memdev_is_busy(Object *obj, const char *name,
>      error_propagate(errp, local_err);
>  }
>  
> +static void pc_dimm_get_reserved_size(Object *obj, Visitor *v, const char *name,
> +                                      void *opaque, Error **errp)
> +{
> +    PCDIMMDevice *dimm = PC_DIMM(obj);
> +    uint64_t value = dimm->reserved_size;
> +
> +    visit_type_size(v, name, &value, errp);
> +}
> +
> +static void pc_dimm_set_reserved_size(Object *obj, Visitor *v, const char *name,
> +                                      void *opaque, Error **errp)
> +{
> +    PCDIMMDevice *dimm = PC_DIMM(obj);
> +    Error *local_err = NULL;
> +    uint64_t value;
> +
> +    if (dimm->reserved_size) {
> +        error_setg(&local_err, "cannot change 'reserved-size'");
> +        goto out;
> +    }
> +
> +    visit_type_size(v, name, &value, &local_err);
> +    if (local_err) {
> +        goto out;
> +    }
> +    dimm->reserved_size = value;
> +
> + out:
> +    error_propagate(errp, local_err);
> +}
> +
>  static void pc_dimm_init(Object *obj)
>  {
>      PCDIMMDevice *dimm = PC_DIMM(obj);
> @@ -393,6 +433,8 @@ static void pc_dimm_init(Object *obj)
>                               pc_dimm_check_memdev_is_busy,
>                               OBJ_PROP_LINK_UNREF_ON_RELEASE,
>                               &error_abort);
> +    object_property_add(obj, PC_DIMM_RSVD_PROP, "int", pc_dimm_get_reserved_size,
> +                        pc_dimm_set_reserved_size, NULL, NULL, &error_abort);
>  }
>  
>  static void pc_dimm_realize(DeviceState *dev, Error **errp)
> diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h
> index 1e483f2..99c4dfd 100644
> --- a/include/hw/mem/pc-dimm.h
> +++ b/include/hw/mem/pc-dimm.h
> @@ -33,6 +33,7 @@
>  #define PC_DIMM_NODE_PROP "node"
>  #define PC_DIMM_SIZE_PROP "size"
>  #define PC_DIMM_MEMDEV_PROP "memdev"
> +#define PC_DIMM_RSVD_PROP "reserved-size"
>  
>  #define PC_DIMM_UNASSIGNED_SLOT -1
>  
> @@ -53,6 +54,7 @@ typedef struct PCDIMMDevice {
>      uint64_t addr;
>      uint32_t node;
>      int32_t slot;
> +    uint64_t reserved_size;
>      HostMemoryBackend *hostmem;
>  } PCDIMMDevice;
>  
> -- 
> 2.10.1
> 
>
Haozhong Zhang April 6, 2017, 10:46 a.m. UTC | #2
On 04/06/17 11:24 +0100, Stefan Hajnoczi wrote:
> On Fri, Mar 31, 2017 at 04:41:44PM +0800, Haozhong Zhang wrote:
> > If option 'reserved-size=RSVD' is present, QEMU will reserve an
> > address range of size 'RSVD' after the ending address of pc-dimm
> > device.
> > 
> > For the following example,
> >     -object memory-backend-file,id=mem0,size=4G,...
> >     -device nvdimm,id=dimm0,memdev=mem0,reserved-size=4K,...
> 
> "reserved-size" is not a clear name.  I suggest calling the property
> "num-flush-hints" (default 0).  QEMU can calculate the actual size in
> bytes.
> 
>   -device nvdimm,num-flush-hints=1
> 
> QEMU will use one flush hint and reserve enough space (e.g. 1 page) for
> the MMIO region.
>

Each flush hint can be as small as one cache line size which is also
the size used in this patch series.

We need to calculate the size of all flush hints in pc_dimm_memory_plug(),
because when building the flush hint address structure we need to know
the address of flush hints.

IIUC, pc_dimm_memory_plug() is not specific to x86, so it's better
take a general way to get the vcpu cache line size in pc_dimm_memory_plug(),
which seemingly lacks in QEMU (though I believe it should be doable).

To make things simple, I leave the size decision to users, and check
whether it's large enough when building the flush hint address
structures in patch 4.


Haozhong

> >     -device pc-dimm,id=dimm1,...
> > if dimm0 is allocated to address N ~ N+4G, the address range of
> > dimm1 will start from N+4G+4K or higher.
> > 
> > Its current usage is to reserve spaces for flush hint addresses
> > of nvdimm devices.
> > 
> > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> > ---
> >  hw/mem/pc-dimm.c         | 48 +++++++++++++++++++++++++++++++++++++++++++++---
> >  include/hw/mem/pc-dimm.h |  2 ++
> >  2 files changed, 47 insertions(+), 3 deletions(-)
> > 
> > diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
> > index 9e8dab0..13dcd71 100644
> > --- a/hw/mem/pc-dimm.c
> > +++ b/hw/mem/pc-dimm.c
> > @@ -28,6 +28,7 @@
> >  #include "sysemu/kvm.h"
> >  #include "trace.h"
> >  #include "hw/virtio/vhost.h"
> > +#include "exec/address-spaces.h"
> >  
> >  typedef struct pc_dimms_capacity {
> >       uint64_t size;
> > @@ -44,7 +45,12 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
> >      MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm);
> >      Error *local_err = NULL;
> >      uint64_t existing_dimms_capacity = 0;
> > -    uint64_t addr;
> > +    uint64_t addr, size = memory_region_size(mr);
> > +
> > +    size += object_property_get_int(OBJECT(dimm), PC_DIMM_RSVD_PROP, &local_err);
> > +    if (local_err) {
> > +        goto out;
> > +    }
> >  
> >      addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
> >      if (local_err) {
> > @@ -54,7 +60,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
> >      addr = pc_dimm_get_free_addr(hpms->base,
> >                                   memory_region_size(&hpms->mr),
> >                                   !addr ? NULL : &addr, align,
> > -                                 memory_region_size(mr), &local_err);
> > +                                 size, &local_err);
> >      if (local_err) {
> >          goto out;
> >      }
> > @@ -64,7 +70,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
> >          goto out;
> >      }
> >  
> > -    if (existing_dimms_capacity + memory_region_size(mr) >
> > +    if (existing_dimms_capacity + size >
> >          machine->maxram_size - machine->ram_size) {
> >          error_setg(&local_err, "not enough space, currently 0x%" PRIx64
> >                     " in use of total hot pluggable 0x" RAM_ADDR_FMT,
> > @@ -315,6 +321,9 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
> >          PCDIMMDevice *dimm = item->data;
> >          uint64_t dimm_size = object_property_get_int(OBJECT(dimm),
> >                                                       PC_DIMM_SIZE_PROP,
> > +                                                     errp) +
> > +                             object_property_get_int(OBJECT(dimm),
> > +                                                     PC_DIMM_RSVD_PROP,
> >                                                       errp);
> >          if (errp && *errp) {
> >              goto out;
> > @@ -382,6 +391,37 @@ static void pc_dimm_check_memdev_is_busy(Object *obj, const char *name,
> >      error_propagate(errp, local_err);
> >  }
> >  
> > +static void pc_dimm_get_reserved_size(Object *obj, Visitor *v, const char *name,
> > +                                      void *opaque, Error **errp)
> > +{
> > +    PCDIMMDevice *dimm = PC_DIMM(obj);
> > +    uint64_t value = dimm->reserved_size;
> > +
> > +    visit_type_size(v, name, &value, errp);
> > +}
> > +
> > +static void pc_dimm_set_reserved_size(Object *obj, Visitor *v, const char *name,
> > +                                      void *opaque, Error **errp)
> > +{
> > +    PCDIMMDevice *dimm = PC_DIMM(obj);
> > +    Error *local_err = NULL;
> > +    uint64_t value;
> > +
> > +    if (dimm->reserved_size) {
> > +        error_setg(&local_err, "cannot change 'reserved-size'");
> > +        goto out;
> > +    }
> > +
> > +    visit_type_size(v, name, &value, &local_err);
> > +    if (local_err) {
> > +        goto out;
> > +    }
> > +    dimm->reserved_size = value;
> > +
> > + out:
> > +    error_propagate(errp, local_err);
> > +}
> > +
> >  static void pc_dimm_init(Object *obj)
> >  {
> >      PCDIMMDevice *dimm = PC_DIMM(obj);
> > @@ -393,6 +433,8 @@ static void pc_dimm_init(Object *obj)
> >                               pc_dimm_check_memdev_is_busy,
> >                               OBJ_PROP_LINK_UNREF_ON_RELEASE,
> >                               &error_abort);
> > +    object_property_add(obj, PC_DIMM_RSVD_PROP, "int", pc_dimm_get_reserved_size,
> > +                        pc_dimm_set_reserved_size, NULL, NULL, &error_abort);
> >  }
> >  
> >  static void pc_dimm_realize(DeviceState *dev, Error **errp)
> > diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h
> > index 1e483f2..99c4dfd 100644
> > --- a/include/hw/mem/pc-dimm.h
> > +++ b/include/hw/mem/pc-dimm.h
> > @@ -33,6 +33,7 @@
> >  #define PC_DIMM_NODE_PROP "node"
> >  #define PC_DIMM_SIZE_PROP "size"
> >  #define PC_DIMM_MEMDEV_PROP "memdev"
> > +#define PC_DIMM_RSVD_PROP "reserved-size"
> >  
> >  #define PC_DIMM_UNASSIGNED_SLOT -1
> >  
> > @@ -53,6 +54,7 @@ typedef struct PCDIMMDevice {
> >      uint64_t addr;
> >      uint32_t node;
> >      int32_t slot;
> > +    uint64_t reserved_size;
> >      HostMemoryBackend *hostmem;
> >  } PCDIMMDevice;
> >  
> > -- 
> > 2.10.1
> > 
> >
Xiao Guangrong April 6, 2017, 11:50 a.m. UTC | #3
On 03/31/2017 04:41 PM, Haozhong Zhang wrote:
> If option 'reserved-size=RSVD' is present, QEMU will reserve an
> address range of size 'RSVD' after the ending address of pc-dimm
> device.
>
> For the following example,
>     -object memory-backend-file,id=mem0,size=4G,...
>     -device nvdimm,id=dimm0,memdev=mem0,reserved-size=4K,...
>     -device pc-dimm,id=dimm1,...
> if dimm0 is allocated to address N ~ N+4G, the address range of
> dimm1 will start from N+4G+4K or higher.
>
> Its current usage is to reserve spaces for flush hint addresses
> of nvdimm devices.
>
> Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> ---
>  hw/mem/pc-dimm.c         | 48 +++++++++++++++++++++++++++++++++++++++++++++---
>  include/hw/mem/pc-dimm.h |  2 ++
>  2 files changed, 47 insertions(+), 3 deletions(-)
>
> diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
> index 9e8dab0..13dcd71 100644
> --- a/hw/mem/pc-dimm.c
> +++ b/hw/mem/pc-dimm.c
> @@ -28,6 +28,7 @@
>  #include "sysemu/kvm.h"
>  #include "trace.h"
>  #include "hw/virtio/vhost.h"
> +#include "exec/address-spaces.h"
>
>  typedef struct pc_dimms_capacity {
>       uint64_t size;
> @@ -44,7 +45,12 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
>      MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm);
>      Error *local_err = NULL;
>      uint64_t existing_dimms_capacity = 0;
> -    uint64_t addr;
> +    uint64_t addr, size = memory_region_size(mr);
> +
> +    size += object_property_get_int(OBJECT(dimm), PC_DIMM_RSVD_PROP, &local_err);
> +    if (local_err) {
> +        goto out;
> +    }
>
>      addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
>      if (local_err) {
> @@ -54,7 +60,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
>      addr = pc_dimm_get_free_addr(hpms->base,
>                                   memory_region_size(&hpms->mr),
>                                   !addr ? NULL : &addr, align,
> -                                 memory_region_size(mr), &local_err);
> +                                 size, &local_err);
>      if (local_err) {
>          goto out;
>      }
> @@ -64,7 +70,7 @@ void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
>          goto out;
>      }
>
> -    if (existing_dimms_capacity + memory_region_size(mr) >
> +    if (existing_dimms_capacity + size >
>          machine->maxram_size - machine->ram_size) {
>          error_setg(&local_err, "not enough space, currently 0x%" PRIx64
>                     " in use of total hot pluggable 0x" RAM_ADDR_FMT,
> @@ -315,6 +321,9 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
>          PCDIMMDevice *dimm = item->data;
>          uint64_t dimm_size = object_property_get_int(OBJECT(dimm),
>                                                       PC_DIMM_SIZE_PROP,
> +                                                     errp) +
> +                             object_property_get_int(OBJECT(dimm),
> +                                                     PC_DIMM_RSVD_PROP,
>                                                       errp);
>          if (errp && *errp) {
>              goto out;
> @@ -382,6 +391,37 @@ static void pc_dimm_check_memdev_is_busy(Object *obj, const char *name,
>      error_propagate(errp, local_err);
>  }
>

Should count the reserved size in pc_existing_dimms_capacity_internal()
too.
Stefan Hajnoczi April 7, 2017, 1:46 p.m. UTC | #4
On Thu, Apr 06, 2017 at 06:46:49PM +0800, Haozhong Zhang wrote:
> On 04/06/17 11:24 +0100, Stefan Hajnoczi wrote:
> > On Fri, Mar 31, 2017 at 04:41:44PM +0800, Haozhong Zhang wrote:
> > > If option 'reserved-size=RSVD' is present, QEMU will reserve an
> > > address range of size 'RSVD' after the ending address of pc-dimm
> > > device.
> > > 
> > > For the following example,
> > >     -object memory-backend-file,id=mem0,size=4G,...
> > >     -device nvdimm,id=dimm0,memdev=mem0,reserved-size=4K,...
> > 
> > "reserved-size" is not a clear name.  I suggest calling the property
> > "num-flush-hints" (default 0).  QEMU can calculate the actual size in
> > bytes.
> > 
> >   -device nvdimm,num-flush-hints=1
> > 
> > QEMU will use one flush hint and reserve enough space (e.g. 1 page) for
> > the MMIO region.
> >
> 
> Each flush hint can be as small as one cache line size which is also
> the size used in this patch series.
> 
> We need to calculate the size of all flush hints in pc_dimm_memory_plug(),
> because when building the flush hint address structure we need to know
> the address of flush hints.
> 
> IIUC, pc_dimm_memory_plug() is not specific to x86, so it's better
> take a general way to get the vcpu cache line size in pc_dimm_memory_plug(),
> which seemingly lacks in QEMU (though I believe it should be doable).
> 
> To make things simple, I leave the size decision to users, and check
> whether it's large enough when building the flush hint address
> structures in patch 4.

Do you see my concern that "reserved-size" is not a good property?

 * How does the user choose a sensible value?

 * Why is it called "reserved-size" instead of "flush-hints-size"?

Stefan
Haozhong Zhang April 11, 2017, 8:57 a.m. UTC | #5
On 04/07/17 14:46 +0100, Stefan Hajnoczi wrote:
> On Thu, Apr 06, 2017 at 06:46:49PM +0800, Haozhong Zhang wrote:
> > On 04/06/17 11:24 +0100, Stefan Hajnoczi wrote:
> > > On Fri, Mar 31, 2017 at 04:41:44PM +0800, Haozhong Zhang wrote:
> > > > If option 'reserved-size=RSVD' is present, QEMU will reserve an
> > > > address range of size 'RSVD' after the ending address of pc-dimm
> > > > device.
> > > > 
> > > > For the following example,
> > > >     -object memory-backend-file,id=mem0,size=4G,...
> > > >     -device nvdimm,id=dimm0,memdev=mem0,reserved-size=4K,...
> > > 
> > > "reserved-size" is not a clear name.  I suggest calling the property
> > > "num-flush-hints" (default 0).  QEMU can calculate the actual size in
> > > bytes.
> > > 
> > >   -device nvdimm,num-flush-hints=1
> > > 
> > > QEMU will use one flush hint and reserve enough space (e.g. 1 page) for
> > > the MMIO region.
> > >
> > 
> > Each flush hint can be as small as one cache line size which is also
> > the size used in this patch series.
> > 
> > We need to calculate the size of all flush hints in pc_dimm_memory_plug(),
> > because when building the flush hint address structure we need to know
> > the address of flush hints.
> > 
> > IIUC, pc_dimm_memory_plug() is not specific to x86, so it's better
> > take a general way to get the vcpu cache line size in pc_dimm_memory_plug(),
> > which seemingly lacks in QEMU (though I believe it should be doable).
> > 
> > To make things simple, I leave the size decision to users, and check
> > whether it's large enough when building the flush hint address
> > structures in patch 4.
> 
> Do you see my concern that "reserved-size" is not a good property?
> 
>  * How does the user choose a sensible value?

A user has to calculate the size by himself by multiplying the number
of flush hint address (currently it's hardcoded to 1) with the cache
line size (e.g. 64 bytes on x86) and properly align it, or simply uses
4KB or even a larger value.

In case the reserved-size is too small, QEMU will complain and abort,
so the user still has chance to know what goes wrong.

> 
>  * Why is it called "reserved-size" instead of "flush-hints-size"?
>

This property is made as a property of pc-dimm rather than nvdimm,
which could be used for other purposes, e.g. adjust the alignment the
base address of hotplugged pc-dimm (I remember old linux kernels on
x86-64 requires hotplugged memory chunk be 128MB aligned). Of course,
it can be limited to be a nvdimm-only property, and renamed to
'flush-hints-size' in that case.

Thanks,
Haozhong
Igor Mammedov April 20, 2017, 10:54 a.m. UTC | #6
On Tue, 11 Apr 2017 16:57:00 +0800
Haozhong Zhang <haozhong.zhang@intel.com> wrote:

> On 04/07/17 14:46 +0100, Stefan Hajnoczi wrote:
> > On Thu, Apr 06, 2017 at 06:46:49PM +0800, Haozhong Zhang wrote:  
> > > On 04/06/17 11:24 +0100, Stefan Hajnoczi wrote:  
> > > > On Fri, Mar 31, 2017 at 04:41:44PM +0800, Haozhong Zhang wrote:  
> > > > > If option 'reserved-size=RSVD' is present, QEMU will reserve an
> > > > > address range of size 'RSVD' after the ending address of pc-dimm
> > > > > device.
> > > > > 
> > > > > For the following example,
> > > > >     -object memory-backend-file,id=mem0,size=4G,...
> > > > >     -device nvdimm,id=dimm0,memdev=mem0,reserved-size=4K,...  
> > > > 
> > > > "reserved-size" is not a clear name.  I suggest calling the property
> > > > "num-flush-hints" (default 0).  QEMU can calculate the actual size in
> > > > bytes.
> > > > 
> > > >   -device nvdimm,num-flush-hints=1
I'm for this approach as well

I see that in patchset you hardcode cache line size to 64 for pc/q35
machines. What you could do is to introduce machine or cpu callback to
fetch cache line size and then you can calculate needed MMIO region size
using it and num-flush-hints in target independent way

> > > > 
> > > > QEMU will use one flush hint and reserve enough space (e.g. 1 page) for
> > > > the MMIO region.
> > > >  
> > > 
> > > Each flush hint can be as small as one cache line size which is also
> > > the size used in this patch series.
> > > 
> > > We need to calculate the size of all flush hints in pc_dimm_memory_plug(),
> > > because when building the flush hint address structure we need to know
> > > the address of flush hints.
> > > 
> > > IIUC, pc_dimm_memory_plug() is not specific to x86, so it's better
> > > take a general way to get the vcpu cache line size in pc_dimm_memory_plug(),
> > > which seemingly lacks in QEMU (though I believe it should be doable).
> > > 
> > > To make things simple, I leave the size decision to users, and check
> > > whether it's large enough when building the flush hint address
> > > structures in patch 4.  
> > 
> > Do you see my concern that "reserved-size" is not a good property?
> > 
> >  * How does the user choose a sensible value?  
> 
> A user has to calculate the size by himself by multiplying the number
> of flush hint address (currently it's hardcoded to 1) with the cache
> line size (e.g. 64 bytes on x86) and properly align it, or simply uses
> 4KB or even a larger value.
> 
> In case the reserved-size is too small, QEMU will complain and abort,
> so the user still has chance to know what goes wrong.
> 
> > 
> >  * Why is it called "reserved-size" instead of "flush-hints-size"?
> >  
> 
> This property is made as a property of pc-dimm rather than nvdimm,
> which could be used for other purposes, e.g. adjust the alignment the
> base address of hotplugged pc-dimm (I remember old linux kernels on
> x86-64 requires hotplugged memory chunk be 128MB aligned).
it would be rather awkward to use with pc-dimm:
 1: it would affect not the dimm for which it's specified but the following one
 2: to set it to correct value that would align base address of the next dimm
    to need value, the user first would have to know (set manually) a base address
    of dimm for which reserved-size is provided.

Users would be better off if he/she just provided properly aligned base address
instead of reserved-size.

> Of course,
> it can be limited to be a nvdimm-only property, and renamed to
> 'flush-hints-size' in that case.
pls move this property to nvdimm

> 
> Thanks,
> Haozhong
diff mbox

Patch

diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 9e8dab0..13dcd71 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -28,6 +28,7 @@ 
 #include "sysemu/kvm.h"
 #include "trace.h"
 #include "hw/virtio/vhost.h"
+#include "exec/address-spaces.h"
 
 typedef struct pc_dimms_capacity {
      uint64_t size;
@@ -44,7 +45,12 @@  void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
     MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm);
     Error *local_err = NULL;
     uint64_t existing_dimms_capacity = 0;
-    uint64_t addr;
+    uint64_t addr, size = memory_region_size(mr);
+
+    size += object_property_get_int(OBJECT(dimm), PC_DIMM_RSVD_PROP, &local_err);
+    if (local_err) {
+        goto out;
+    }
 
     addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
     if (local_err) {
@@ -54,7 +60,7 @@  void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
     addr = pc_dimm_get_free_addr(hpms->base,
                                  memory_region_size(&hpms->mr),
                                  !addr ? NULL : &addr, align,
-                                 memory_region_size(mr), &local_err);
+                                 size, &local_err);
     if (local_err) {
         goto out;
     }
@@ -64,7 +70,7 @@  void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
         goto out;
     }
 
-    if (existing_dimms_capacity + memory_region_size(mr) >
+    if (existing_dimms_capacity + size >
         machine->maxram_size - machine->ram_size) {
         error_setg(&local_err, "not enough space, currently 0x%" PRIx64
                    " in use of total hot pluggable 0x" RAM_ADDR_FMT,
@@ -315,6 +321,9 @@  uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
         PCDIMMDevice *dimm = item->data;
         uint64_t dimm_size = object_property_get_int(OBJECT(dimm),
                                                      PC_DIMM_SIZE_PROP,
+                                                     errp) +
+                             object_property_get_int(OBJECT(dimm),
+                                                     PC_DIMM_RSVD_PROP,
                                                      errp);
         if (errp && *errp) {
             goto out;
@@ -382,6 +391,37 @@  static void pc_dimm_check_memdev_is_busy(Object *obj, const char *name,
     error_propagate(errp, local_err);
 }
 
+static void pc_dimm_get_reserved_size(Object *obj, Visitor *v, const char *name,
+                                      void *opaque, Error **errp)
+{
+    PCDIMMDevice *dimm = PC_DIMM(obj);
+    uint64_t value = dimm->reserved_size;
+
+    visit_type_size(v, name, &value, errp);
+}
+
+static void pc_dimm_set_reserved_size(Object *obj, Visitor *v, const char *name,
+                                      void *opaque, Error **errp)
+{
+    PCDIMMDevice *dimm = PC_DIMM(obj);
+    Error *local_err = NULL;
+    uint64_t value;
+
+    if (dimm->reserved_size) {
+        error_setg(&local_err, "cannot change 'reserved-size'");
+        goto out;
+    }
+
+    visit_type_size(v, name, &value, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    dimm->reserved_size = value;
+
+ out:
+    error_propagate(errp, local_err);
+}
+
 static void pc_dimm_init(Object *obj)
 {
     PCDIMMDevice *dimm = PC_DIMM(obj);
@@ -393,6 +433,8 @@  static void pc_dimm_init(Object *obj)
                              pc_dimm_check_memdev_is_busy,
                              OBJ_PROP_LINK_UNREF_ON_RELEASE,
                              &error_abort);
+    object_property_add(obj, PC_DIMM_RSVD_PROP, "int", pc_dimm_get_reserved_size,
+                        pc_dimm_set_reserved_size, NULL, NULL, &error_abort);
 }
 
 static void pc_dimm_realize(DeviceState *dev, Error **errp)
diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h
index 1e483f2..99c4dfd 100644
--- a/include/hw/mem/pc-dimm.h
+++ b/include/hw/mem/pc-dimm.h
@@ -33,6 +33,7 @@ 
 #define PC_DIMM_NODE_PROP "node"
 #define PC_DIMM_SIZE_PROP "size"
 #define PC_DIMM_MEMDEV_PROP "memdev"
+#define PC_DIMM_RSVD_PROP "reserved-size"
 
 #define PC_DIMM_UNASSIGNED_SLOT -1
 
@@ -53,6 +54,7 @@  typedef struct PCDIMMDevice {
     uint64_t addr;
     uint32_t node;
     int32_t slot;
+    uint64_t reserved_size;
     HostMemoryBackend *hostmem;
 } PCDIMMDevice;