@@ -1064,6 +1064,12 @@ FIFO-based event channel ABI support up to 131,071 event channels.
Other guests are limited to 4095 (64-bit x86 and ARM) or 1023 (32-bit
x86).
+=item B<vnvdimms=[ 'PATH0', 'PATH1', ... ]>
+
+Specify the virtual NVDIMM devices which are provided to the guest.
+B<PATH0>, B<PATH1>, ... specify the host NVDIMM pmem devices which are used
+as the backend storage of each virtual NVDIMM device.
+
=back
=head2 Paravirtualised (PV) Guest Specific Options
@@ -24,6 +24,10 @@
#include <sys/types.h>
#include <pwd.h>
+#if defined(__linux__)
+#include <linux/fs.h> /* for ioctl(BLKGETSIZE64) */
+#endif
+
static const char *libxl_tapif_script(libxl__gc *gc)
{
#if defined(__linux__) || defined(__FreeBSD__)
@@ -910,6 +914,82 @@ static char *qemu_disk_ide_drive_string(libxl__gc *gc, const char *target_path,
return drive;
}
+#if defined(__linux__)
+
+static uint64_t libxl__build_dm_vnvdimm_args(libxl__gc *gc, flexarray_t *dm_args,
+ struct libxl_device_vnvdimm *dev,
+ int dev_no)
+{
+ int fd, rc;
+ struct stat st;
+ uint64_t size = 0;
+ char *arg;
+
+ fd = open(dev->file, O_RDONLY);
+ if (fd < 0) {
+ LOG(ERROR, "failed to open file %s: %s",
+ dev->file, strerror(errno));
+ goto out;
+ }
+
+ if (stat(dev->file, &st)) {
+ LOG(ERROR, "failed to get status of file %s: %s",
+ dev->file, strerror(errno));
+ goto out_fclose;
+ }
+
+ switch (st.st_mode & S_IFMT) {
+ case S_IFBLK:
+ rc = ioctl(fd, BLKGETSIZE64, &size);
+ if (rc == -1) {
+ LOG(ERROR, "failed to get size of block device %s: %s",
+ dev->file, strerror(errno));
+ size = 0;
+ }
+ break;
+
+ default:
+ LOG(ERROR, "%s not block device", dev->file);
+ break;
+ }
+
+ if (!size)
+ goto out_fclose;
+
+ flexarray_append(dm_args, "-object");
+ arg = GCSPRINTF("memory-backend-xen,id=mem%d,size=%"PRIu64",mem-path=%s",
+ dev_no + 1, size, dev->file);
+ flexarray_append(dm_args, arg);
+
+ flexarray_append(dm_args, "-device");
+ arg = GCSPRINTF("nvdimm,id=nvdimm%d,memdev=mem%d", dev_no + 1, dev_no + 1);
+ flexarray_append(dm_args, arg);
+
+ out_fclose:
+ close(fd);
+ out:
+ return size;
+}
+
+static uint64_t libxl__build_dm_vnvdimms_args(
+ libxl__gc *gc, flexarray_t *dm_args,
+ struct libxl_device_vnvdimm *vnvdimms, int num_vnvdimms)
+{
+ uint64_t total_size = 0, size;
+ unsigned int i;
+
+ for (i = 0; i < num_vnvdimms; i++) {
+ size = libxl__build_dm_vnvdimm_args(gc, dm_args, &vnvdimms[i], i);
+ if (!size)
+ break;
+ total_size += size;
+ }
+
+ return total_size;
+}
+
+#endif /* __linux__ */
+
static int libxl__build_device_model_args_new(libxl__gc *gc,
const char *dm, int guest_domid,
const libxl_domain_config *guest_config,
@@ -923,13 +1003,18 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
const libxl_device_nic *nics = guest_config->nics;
const int num_disks = guest_config->num_disks;
const int num_nics = guest_config->num_nics;
+#if defined(__linux__)
+ const int num_vnvdimms = guest_config->num_vnvdimms;
+#else
+ const int num_vnvdimms = 0;
+#endif
const libxl_vnc_info *vnc = libxl__dm_vnc(guest_config);
const libxl_sdl_info *sdl = dm_sdl(guest_config);
const char *keymap = dm_keymap(guest_config);
char *machinearg;
flexarray_t *dm_args, *dm_envs;
int i, connection, devid, ret;
- uint64_t ram_size;
+ uint64_t ram_size, ram_size_in_byte, vnvdimms_size = 0;
const char *path, *chardev;
char *user = NULL;
@@ -1313,6 +1398,9 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
}
}
+ if (num_vnvdimms)
+ machinearg = libxl__sprintf(gc, "%s,nvdimm", machinearg);
+
flexarray_append(dm_args, machinearg);
for (i = 0; b_info->extra_hvm && b_info->extra_hvm[i] != NULL; i++)
flexarray_append(dm_args, b_info->extra_hvm[i]);
@@ -1322,8 +1410,25 @@ static int libxl__build_device_model_args_new(libxl__gc *gc,
}
ram_size = libxl__sizekb_to_mb(b_info->max_memkb - b_info->video_memkb);
+ ram_size_in_byte = ram_size * 1024 * 1024;
+ if (num_vnvdimms) {
+ vnvdimms_size = libxl__build_dm_vnvdimms_args(gc, dm_args,
+ guest_config->vnvdimms,
+ num_vnvdimms);
+ if (ram_size_in_byte + vnvdimms_size < ram_size_in_byte) {
+ LOG(ERROR,
+ "total size of RAM (%"PRIu64") and NVDIMM (%"PRIu64") overflow",
+ ram_size_in_byte, vnvdimms_size);
+ return ERROR_INVAL;
+ }
+ }
flexarray_append(dm_args, "-m");
- flexarray_append(dm_args, GCSPRINTF("%"PRId64, ram_size));
+ flexarray_append(dm_args,
+ vnvdimms_size ?
+ GCSPRINTF("%"PRId64",slots=%d,maxmem=%"PRId64,
+ ram_size, num_vnvdimms + 1,
+ ROUNDUP(ram_size_in_byte + vnvdimms_size, 12)) :
+ GCSPRINTF("%"PRId64, ram_size));
if (b_info->type == LIBXL_DOMAIN_TYPE_HVM) {
if (b_info->u.hvm.hdtype == LIBXL_HDTYPE_AHCI)
@@ -704,6 +704,13 @@ libxl_device_channel = Struct("device_channel", [
])),
])
+libxl_device_vnvdimm = Struct("device_vnvdimm", [
+ ("backend_domid", libxl_domid),
+ ("backend_domname", string),
+ ("devid", libxl_devid),
+ ("file", string),
+])
+
libxl_domain_config = Struct("domain_config", [
("c_info", libxl_domain_create_info),
("b_info", libxl_domain_build_info),
@@ -721,6 +728,7 @@ libxl_domain_config = Struct("domain_config", [
("channels", Array(libxl_device_channel, "num_channels")),
("usbctrls", Array(libxl_device_usbctrl, "num_usbctrls")),
("usbdevs", Array(libxl_device_usbdev, "num_usbdevs")),
+ ("vnvdimms", Array(libxl_device_vnvdimm, "num_vnvdimms")),
("on_poweroff", libxl_action_on_shutdown),
("on_reboot", libxl_action_on_shutdown),
@@ -718,6 +718,7 @@ void parse_config_data(const char *config_source,
XLU_ConfigList *cpus, *vbds, *nics, *pcis, *cvfbs, *cpuids, *vtpms,
*usbctrls, *usbdevs;
XLU_ConfigList *channels, *ioports, *irqs, *iomem, *viridian, *dtdevs;
+ XLU_ConfigList *vnvdimms;
int num_ioports, num_irqs, num_iomem, num_cpus, num_viridian;
int pci_power_mgmt = 0;
int pci_msitranslate = 0;
@@ -1902,6 +1903,21 @@ skip_usbdev:
}
}
+ if (!xlu_cfg_get_list (config, "vnvdimms", &vnvdimms, 0, 0)) {
+#if defined(__linux__)
+ while ((buf = xlu_cfg_get_listitem(vnvdimms,
+ d_config->num_vnvdimms)) != NULL) {
+ libxl_device_vnvdimm *vnvdimm =
+ ARRAY_EXTEND_INIT(d_config->vnvdimms, d_config->num_vnvdimms,
+ libxl_device_vnvdimm_init);
+ vnvdimm->file = strdup(buf);
+ }
+#else
+ fprintf(stderr, "ERROR: vnvdimms is only supported on Linux\n");
+ exit(-ERROR_FAIL);
+#endif /* __linux__ */
+ }
+
xlu_cfg_destroy(config);
}
For xl configs vnvdimms = [ '/path/to/pmem0', '/path/to/pmem1', ... ] the following qemu options are built -machine <existing options>,nvdimm -m <existing options>,slots=$NR_SLOTS,maxmem=$MEM_SIZE -object memory-backend-xen,id=mem1,size=$PMEM0_SIZE,mem-path=/path/to/pmem0 -device nvdimm,id=nvdimm1,memdev=mem1 -object memory-backend-xen,id=mem2,size=$PMEM1_SIZE,mem-path=/path/to/pmem1 -device nvdimm,id=nvdimm2,memdev=mem2 ... where - NR_SLOTS is the number of entries in vnvdimms + 1, - MEM_SIZE is the total size of all RAM and NVDIMM devices, - PMEM#_SIZE is the size of the host pmem device/file '/path/to/pmem#'. Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com> --- Cc: Ian Jackson <ian.jackson@eu.citrix.com> Cc: Wei Liu <wei.liu2@citrix.com> The qemu option "-object memory-backend-xen" is added by the QEMU patch "hostmem: add a host memory backend for Xen". Other qemu options have been implemented since QEMU 2.6.0. Changes in v2: * Update the manpage of xl.cfg for the new option "vnvdimms". --- docs/man/xl.cfg.pod.5.in | 6 +++ tools/libxl/libxl_dm.c | 109 +++++++++++++++++++++++++++++++++++++++++++- tools/libxl/libxl_types.idl | 8 ++++ tools/xl/xl_parse.c | 16 +++++++ 4 files changed, 137 insertions(+), 2 deletions(-)