diff mbox

[1/2] nvdimm: warn if the backend is not a DAX device

Message ID 20170526022438.17478-1-haozhong.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Haozhong Zhang May 26, 2017, 2:24 a.m. UTC
Applications in Linux guest that use device-dax never trigger flush
that can be trapped by KVM/QEMU. Meanwhile, if the host backend is not
device-dax, QEMU cannot guarantee the persistence of guest writes.
Before solving this flushing problem, QEMU should warn users if the
host backend is not device-dax.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
Message-id: CAPcyv4hV2-ZW8SMCRtD0P_86KgR3DHOvNe+6T5SY2u7wXg3gEg@mail.gmail.com
---
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Cc: Stefan Hajnoczi <stefanha@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
---
 hw/mem/nvdimm.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

Comments

Stefan Hajnoczi May 30, 2017, 9:15 a.m. UTC | #1
On Fri, May 26, 2017 at 10:24:37AM +0800, Haozhong Zhang wrote:
> Applications in Linux guest that use device-dax never trigger flush
> that can be trapped by KVM/QEMU. Meanwhile, if the host backend is not
> device-dax, QEMU cannot guarantee the persistence of guest writes.
> Before solving this flushing problem, QEMU should warn users if the
> host backend is not device-dax.
> 
> Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> Message-id: CAPcyv4hV2-ZW8SMCRtD0P_86KgR3DHOvNe+6T5SY2u7wXg3gEg@mail.gmail.com
> ---
> Cc: "Michael S. Tsirkin" <mst@redhat.com>
> Cc: Igor Mammedov <imammedo@redhat.com>
> Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> Cc: Stefan Hajnoczi <stefanha@gmail.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> ---
>  hw/mem/nvdimm.c | 37 +++++++++++++++++++++++++++++++++++++
>  1 file changed, 37 insertions(+)
> 
> diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
> index db896b0bb6..c7bb407f33 100644
> --- a/hw/mem/nvdimm.c
> +++ b/hw/mem/nvdimm.c
> @@ -26,6 +26,7 @@
>  #include "qapi/error.h"
>  #include "qapi/visitor.h"
>  #include "hw/mem/nvdimm.h"
> +#include "qemu/error-report.h"
>  
>  static void nvdimm_get_label_size(Object *obj, Visitor *v, const char *name,
>                                    void *opaque, Error **errp)
> @@ -78,12 +79,48 @@ static MemoryRegion *nvdimm_get_memory_region(PCDIMMDevice *dimm)
>      return &nvdimm->nvdimm_mr;
>  }
>  
> +static void nvdimm_check_dax(HostMemoryBackend *hostmem)
> +{
> +    char *mem_path =
> +        object_property_get_str(OBJECT(hostmem), "mem-path", NULL);
> +    char *dev_name = NULL, *sysfs_path = NULL;
> +    bool is_dax = false;
> +
> +    if (!mem_path) {
> +        goto out;
> +    }
> +
> +    if (!g_str_has_prefix(mem_path, "/dev/dax")) {

Does not work with relative paths, symlinks, or device nodes in
non-standard locations.

A more accurate check is to fstat the file descriptor for the char
device major/minor number and verify that
/sys/dev/char/MAJ:MIN/device/devtype is "nd_dax".

> +        goto out;
> +    }
> +
> +    dev_name = mem_path + strlen("/dev/");
> +    sysfs_path = g_strdup_printf("/sys/class/dax/%s", dev_name);
> +    if (access(sysfs_path, F_OK)) {
> +        goto out;
> +    }
> +
> +    is_dax = true;

This check is Linux-specific, please use #ifdef __linux__.  It's okay to
always print an error on other operating systems.

> +
> + out:
> +    if (!is_dax) {
> +        error_report("warning: nvdimm backend %s is not DAX device, "

The check is not 100% accurate so:

s/is not/does not look like a/

> +                     "unable to guarantee persistence of guest writes",
> +                     mem_path ?: "RAM");
> +    }
> +
> +    g_free(sysfs_path);
> +    g_free(mem_path);
> +}
> +
>  static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp)
>  {
>      MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem, errp);
>      NVDIMMDevice *nvdimm = NVDIMM(dimm);
>      uint64_t align, pmem_size, size = memory_region_size(mr);
>  
> +    nvdimm_check_dax(dimm->hostmem);
> +
>      align = memory_region_get_alignment(mr);
>  
>      pmem_size = size - nvdimm->label_size;
> -- 
> 2.11.0
>
Haozhong Zhang May 31, 2017, 6:12 a.m. UTC | #2
On 05/30/17 10:15 +0100, Stefan Hajnoczi wrote:
> On Fri, May 26, 2017 at 10:24:37AM +0800, Haozhong Zhang wrote:
> > Applications in Linux guest that use device-dax never trigger flush
> > that can be trapped by KVM/QEMU. Meanwhile, if the host backend is not
> > device-dax, QEMU cannot guarantee the persistence of guest writes.
> > Before solving this flushing problem, QEMU should warn users if the
> > host backend is not device-dax.
> > 
> > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> > Message-id: CAPcyv4hV2-ZW8SMCRtD0P_86KgR3DHOvNe+6T5SY2u7wXg3gEg@mail.gmail.com
> > ---
> > Cc: "Michael S. Tsirkin" <mst@redhat.com>
> > Cc: Igor Mammedov <imammedo@redhat.com>
> > Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> > Cc: Stefan Hajnoczi <stefanha@gmail.com>
> > Cc: Dan Williams <dan.j.williams@intel.com>
> > ---
> >  hw/mem/nvdimm.c | 37 +++++++++++++++++++++++++++++++++++++
> >  1 file changed, 37 insertions(+)
> > 
> > diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
> > index db896b0bb6..c7bb407f33 100644
> > --- a/hw/mem/nvdimm.c
> > +++ b/hw/mem/nvdimm.c
> > @@ -26,6 +26,7 @@
> >  #include "qapi/error.h"
> >  #include "qapi/visitor.h"
> >  #include "hw/mem/nvdimm.h"
> > +#include "qemu/error-report.h"
> >  
> >  static void nvdimm_get_label_size(Object *obj, Visitor *v, const char *name,
> >                                    void *opaque, Error **errp)
> > @@ -78,12 +79,48 @@ static MemoryRegion *nvdimm_get_memory_region(PCDIMMDevice *dimm)
> >      return &nvdimm->nvdimm_mr;
> >  }
> >  
> > +static void nvdimm_check_dax(HostMemoryBackend *hostmem)
> > +{
> > +    char *mem_path =
> > +        object_property_get_str(OBJECT(hostmem), "mem-path", NULL);
> > +    char *dev_name = NULL, *sysfs_path = NULL;
> > +    bool is_dax = false;
> > +
> > +    if (!mem_path) {
> > +        goto out;
> > +    }
> > +
> > +    if (!g_str_has_prefix(mem_path, "/dev/dax")) {
> 
> Does not work with relative paths, symlinks, or device nodes in
> non-standard locations.
> 
> A more accurate check is to fstat the file descriptor for the char
> device major/minor number and verify that
> /sys/dev/char/MAJ:MIN/device/devtype is "nd_dax".
>

Yes, Dan also suggested to use this approach. I'll turn to it in v2.

> > +        goto out;
> > +    }
> > +
> > +    dev_name = mem_path + strlen("/dev/");
> > +    sysfs_path = g_strdup_printf("/sys/class/dax/%s", dev_name);
> > +    if (access(sysfs_path, F_OK)) {
> > +        goto out;
> > +    }
> > +
> > +    is_dax = true;
> 
> This check is Linux-specific, please use #ifdef __linux__.  It's okay to
> always print an error on other operating systems.
>

will change

> > +
> > + out:
> > +    if (!is_dax) {
> > +        error_report("warning: nvdimm backend %s is not DAX device, "
> 
> The check is not 100% accurate so:
> 
> s/is not/does not look like a/

ditto

Thanks,
Haozhong

> 
> > +                     "unable to guarantee persistence of guest writes",
> > +                     mem_path ?: "RAM");
> > +    }
> > +
> > +    g_free(sysfs_path);
> > +    g_free(mem_path);
> > +}
> > +
> >  static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp)
> >  {
> >      MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem, errp);
> >      NVDIMMDevice *nvdimm = NVDIMM(dimm);
> >      uint64_t align, pmem_size, size = memory_region_size(mr);
> >  
> > +    nvdimm_check_dax(dimm->hostmem);
> > +
> >      align = memory_region_get_alignment(mr);
> >  
> >      pmem_size = size - nvdimm->label_size;
> > -- 
> > 2.11.0
> >
diff mbox

Patch

diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index db896b0bb6..c7bb407f33 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -26,6 +26,7 @@ 
 #include "qapi/error.h"
 #include "qapi/visitor.h"
 #include "hw/mem/nvdimm.h"
+#include "qemu/error-report.h"
 
 static void nvdimm_get_label_size(Object *obj, Visitor *v, const char *name,
                                   void *opaque, Error **errp)
@@ -78,12 +79,48 @@  static MemoryRegion *nvdimm_get_memory_region(PCDIMMDevice *dimm)
     return &nvdimm->nvdimm_mr;
 }
 
+static void nvdimm_check_dax(HostMemoryBackend *hostmem)
+{
+    char *mem_path =
+        object_property_get_str(OBJECT(hostmem), "mem-path", NULL);
+    char *dev_name = NULL, *sysfs_path = NULL;
+    bool is_dax = false;
+
+    if (!mem_path) {
+        goto out;
+    }
+
+    if (!g_str_has_prefix(mem_path, "/dev/dax")) {
+        goto out;
+    }
+
+    dev_name = mem_path + strlen("/dev/");
+    sysfs_path = g_strdup_printf("/sys/class/dax/%s", dev_name);
+    if (access(sysfs_path, F_OK)) {
+        goto out;
+    }
+
+    is_dax = true;
+
+ out:
+    if (!is_dax) {
+        error_report("warning: nvdimm backend %s is not DAX device, "
+                     "unable to guarantee persistence of guest writes",
+                     mem_path ?: "RAM");
+    }
+
+    g_free(sysfs_path);
+    g_free(mem_path);
+}
+
 static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp)
 {
     MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem, errp);
     NVDIMMDevice *nvdimm = NVDIMM(dimm);
     uint64_t align, pmem_size, size = memory_region_size(mr);
 
+    nvdimm_check_dax(dimm->hostmem);
+
     align = memory_region_get_alignment(mr);
 
     pmem_size = size - nvdimm->label_size;