diff mbox

[2/2] hostmem-file: add an attribute 'align' to set its alignment

Message ID 20170526022438.17478-2-haozhong.zhang@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Haozhong Zhang May 26, 2017, 2:24 a.m. UTC
file_ram_alloc() currently maps the backend file via mmap to a virtual
address aligned to the value returned by qemu_fd_getpagesize(). When a
DAX device (e.g. /dev/dax0.0) is used as the backend file, its kernel
mmap implementation may require an alignment larger than what
qemu_fd_get_pagesize() returns (e.g. 2MB vs. 4KB), and mmap may fail.

This commit adds an attribute 'align' to hostmem-file, so that users
can specify a proper alignment that satisfies the kernel requirement.

If 'align' is not specified or is 0, the value returned by
qemu_fd_get_pagesize() will be used as before.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
---
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Igor Mammedov <imammedo@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Crosthwaite <crosthwaite.peter@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Cc: Stefan Hajnoczi <stefanha@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
---
 backends/hostmem-file.c | 41 ++++++++++++++++++++++++++++++++++++++++-
 exec.c                  |  8 +++++++-
 include/exec/memory.h   |  2 ++
 memory.c                |  2 ++
 numa.c                  |  2 +-
 5 files changed, 52 insertions(+), 3 deletions(-)

Comments

Stefan Hajnoczi May 30, 2017, 9:16 a.m. UTC | #1
On Fri, May 26, 2017 at 10:24:38AM +0800, Haozhong Zhang wrote:
> file_ram_alloc() currently maps the backend file via mmap to a virtual
> address aligned to the value returned by qemu_fd_getpagesize(). When a
> DAX device (e.g. /dev/dax0.0) is used as the backend file, its kernel
> mmap implementation may require an alignment larger than what
> qemu_fd_get_pagesize() returns (e.g. 2MB vs. 4KB), and mmap may fail.
> 
> This commit adds an attribute 'align' to hostmem-file, so that users
> can specify a proper alignment that satisfies the kernel requirement.
> 
> If 'align' is not specified or is 0, the value returned by
> qemu_fd_get_pagesize() will be used as before.

How are users supposed to determine alignment requirements?
Haozhong Zhang May 31, 2017, 6:24 a.m. UTC | #2
On 05/30/17 10:16 +0100, Stefan Hajnoczi wrote:
> On Fri, May 26, 2017 at 10:24:38AM +0800, Haozhong Zhang wrote:
> > file_ram_alloc() currently maps the backend file via mmap to a virtual
> > address aligned to the value returned by qemu_fd_getpagesize(). When a
> > DAX device (e.g. /dev/dax0.0) is used as the backend file, its kernel
> > mmap implementation may require an alignment larger than what
> > qemu_fd_get_pagesize() returns (e.g. 2MB vs. 4KB), and mmap may fail.
> > 
> > This commit adds an attribute 'align' to hostmem-file, so that users
> > can specify a proper alignment that satisfies the kernel requirement.
> > 
> > If 'align' is not specified or is 0, the value returned by
> > qemu_fd_get_pagesize() will be used as before.
> 
> How are users supposed to determine alignment requirements?

As discussed in another thread [1,2], the alignment of device-dax can
be got from /sys/dev/char/$major:$minor/device/align, and I'll let
QEMU read the value from it instead of user input.

[1] https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg06235.html
[2] https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg06189.html

Haozhong
diff mbox

Patch

diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index fc4ef46d11..d44fb41b55 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -33,6 +33,7 @@  struct HostMemoryBackendFile {
 
     bool share;
     char *mem_path;
+    uint64_t align;
 };
 
 static void
@@ -57,7 +58,7 @@  file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
         path = object_get_canonical_path(OBJECT(backend));
         memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
                                  path,
-                                 backend->size, fb->share,
+                                 backend->size, fb->align, fb->share,
                                  fb->mem_path, errp);
         g_free(path);
     }
@@ -104,6 +105,40 @@  static void file_memory_backend_set_share(Object *o, bool value, Error **errp)
 }
 
 static void
+file_memory_backend_get_align(Object *o, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
+    uint64_t val = fb->align;
+
+    visit_type_size(v, name, &val, errp);
+}
+
+static void
+file_memory_backend_set_align(Object *o, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    HostMemoryBackend *backend = MEMORY_BACKEND(o);
+    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
+    Error *local_err = NULL;
+    uint64_t val;
+
+    if (host_memory_backend_mr_inited(backend)) {
+        error_setg(&local_err, "cannot change property value");
+        goto out;
+    }
+
+    visit_type_size(v, name, &val, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    fb->align = val;
+
+ out:
+    error_propagate(errp, local_err);
+}
+
+static void
 file_backend_class_init(ObjectClass *oc, void *data)
 {
     HostMemoryBackendClass *bc = MEMORY_BACKEND_CLASS(oc);
@@ -116,6 +151,10 @@  file_backend_class_init(ObjectClass *oc, void *data)
     object_class_property_add_str(oc, "mem-path",
         get_mem_path, set_mem_path,
         &error_abort);
+    object_class_property_add(oc, "align", "int",
+        file_memory_backend_get_align,
+        file_memory_backend_set_align,
+        NULL, NULL, &error_abort);
 }
 
 static void file_backend_instance_finalize(Object *o)
diff --git a/exec.c b/exec.c
index ff16f04f2b..5bb62e2e98 100644
--- a/exec.c
+++ b/exec.c
@@ -1549,7 +1549,13 @@  static void *file_ram_alloc(RAMBlock *block,
     }
 
     block->page_size = qemu_fd_getpagesize(fd);
-    block->mr->align = block->page_size;
+    if (block->mr->align % block->page_size) {
+        error_setg(errp, "alignment 0x%" PRIx64 " must be "
+                   "multiple of page size 0x%" PRIx64,
+                   block->mr->align, block->page_size);
+        goto error;
+    }
+    block->mr->align = MAX(block->page_size, block->mr->align);
 #if defined(__s390x__)
     if (kvm_enabled()) {
         block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 99e0f54d86..05d3d0da3b 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -441,6 +441,7 @@  void memory_region_init_resizeable_ram(MemoryRegion *mr,
  * @name: Region name, becomes part of RAMBlock name used in migration stream
  *        must be unique within any device
  * @size: size of the region.
+ * @align: alignment of the region.
  * @share: %true if memory must be mmaped with the MAP_SHARED flag
  * @path: the path in which to allocate the RAM.
  * @errp: pointer to Error*, to store an error if it happens.
@@ -449,6 +450,7 @@  void memory_region_init_ram_from_file(MemoryRegion *mr,
                                       struct Object *owner,
                                       const char *name,
                                       uint64_t size,
+                                      uint64_t align,
                                       bool share,
                                       const char *path,
                                       Error **errp);
diff --git a/memory.c b/memory.c
index b727f5ec0e..5165b9aa08 100644
--- a/memory.c
+++ b/memory.c
@@ -1386,6 +1386,7 @@  void memory_region_init_ram_from_file(MemoryRegion *mr,
                                       struct Object *owner,
                                       const char *name,
                                       uint64_t size,
+                                      uint64_t align,
                                       bool share,
                                       const char *path,
                                       Error **errp)
@@ -1394,6 +1395,7 @@  void memory_region_init_ram_from_file(MemoryRegion *mr,
     mr->ram = true;
     mr->terminates = true;
     mr->destructor = memory_region_destructor_ram;
+    mr->align = align;
     mr->ram_block = qemu_ram_alloc_from_file(size, mr, share, path, errp);
     mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0;
 }
diff --git a/numa.c b/numa.c
index ca731455e9..39a25aa1d2 100644
--- a/numa.c
+++ b/numa.c
@@ -541,7 +541,7 @@  static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner,
     if (mem_path) {
 #ifdef __linux__
         Error *err = NULL;
-        memory_region_init_ram_from_file(mr, owner, name, ram_size, false,
+        memory_region_init_ram_from_file(mr, owner, name, ram_size, 0, false,
                                          mem_path, &err);
         if (err) {
             error_report_err(err);