Message ID | 20161022070041.23763-1-rafael.tinoco@canonical.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Sat, Oct 22, 2016 at 07:00:41AM +0000, Rafael David Tinoco wrote: > Commit 31190ed7 added a migration blocker in vhost_dev_init() to > check if memfd would succeed. It is better if this blocker first > checks if vhost backend requires shared log. This will avoid a > situation where a blocker is added inappropriately (e.g. shared > log allocation fails when vhost backend doesn't support it). Sounds like a bugfix but I'm not sure. Can this part be split out in a patch by itself? > Commit: 35f9b6e added a fallback mechanism for systems not supporting > memfd_create syscall (started being supported since 3.17). > > Backporting memfd_create might not be accepted for distros relying > on older kernels. Nowadays there is no way for security driver > to discover memfd filename to be created: <tmpdir>/memfd-XXXXXX. > > Also, because vhost log file descriptors can be passed to other > processes, after discussion, we thought it is best to back mmap by > using files that can be placed into a specific directory: this commit > creates "vhostlog" argv parameter for such purpose. This will allow > security drivers to operate on those files appropriately. > > Argv examples: > > -netdev tap,id=net0,vhost=on > -netdev tap,id=net0,vhost=on,vhostlog=/tmp/guest.log > -netdev tap,id=net0,vhost=on,vhostlog=/tmp > > For vhost backends supporting shared logs, if vhostlog is non-existent, > or a directory, random files are going to be created in the specified > directory (or, for non-existent, in tmpdir). If vhostlog is specified, > the filepath is always used when allocating vhost log files. When vhostlog is not specified, can we just use memfd as we did? > Signed-off-by: Rafael David Tinoco <rafael.tinoco@canonical.com> > --- > hw/net/vhost_net.c | 4 +- > hw/scsi/vhost-scsi.c | 2 +- > hw/virtio/vhost-vsock.c | 2 +- > hw/virtio/vhost.c | 41 +++++++------ > include/hw/virtio/vhost.h | 4 +- > include/net/vhost_net.h | 1 + > include/qemu/mmap-file.h | 10 +++ > net/tap.c | 6 ++ > qapi-schema.json | 3 + > qemu-options.hx | 3 +- > util/Makefile.objs | 1 + > util/mmap-file.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++ > 12 files changed, 207 insertions(+), 23 deletions(-) > create mode 100644 include/qemu/mmap-file.h > create mode 100644 util/mmap-file.c > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c > index f2d49ad..d650c92 100644 > --- a/hw/net/vhost_net.c > +++ b/hw/net/vhost_net.c > @@ -171,8 +171,8 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) > net->dev.vq_index = net->nc->queue_index * net->dev.nvqs; > } > > - r = vhost_dev_init(&net->dev, options->opaque, > - options->backend_type, options->busyloop_timeout); > + r = vhost_dev_init(&net->dev, options->opaque, options->backend_type, > + options->busyloop_timeout, options->vhostlog); > if (r < 0) { > goto fail; > } > diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c > index 5b26946..5dc3d30 100644 > --- a/hw/scsi/vhost-scsi.c > +++ b/hw/scsi/vhost-scsi.c > @@ -248,7 +248,7 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) > s->dev.backend_features = 0; > > ret = vhost_dev_init(&s->dev, (void *)(uintptr_t)vhostfd, > - VHOST_BACKEND_TYPE_KERNEL, 0); > + VHOST_BACKEND_TYPE_KERNEL, 0, NULL); > if (ret < 0) { > error_setg(errp, "vhost-scsi: vhost initialization failed: %s", > strerror(-ret)); > diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c > index b481562..6cf6081 100644 > --- a/hw/virtio/vhost-vsock.c > +++ b/hw/virtio/vhost-vsock.c > @@ -342,7 +342,7 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) > vsock->vhost_dev.nvqs = ARRAY_SIZE(vsock->vhost_vqs); > vsock->vhost_dev.vqs = vsock->vhost_vqs; > ret = vhost_dev_init(&vsock->vhost_dev, (void *)(uintptr_t)vhostfd, > - VHOST_BACKEND_TYPE_KERNEL, 0); > + VHOST_BACKEND_TYPE_KERNEL, 0, NULL); > if (ret < 0) { > error_setg_errno(errp, -ret, "vhost-vsock: vhost_dev_init failed"); > goto err_virtio; > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c > index bd051ab..d874ebb 100644 > --- a/hw/virtio/vhost.c > +++ b/hw/virtio/vhost.c > @@ -20,7 +20,7 @@ > #include "qemu/atomic.h" > #include "qemu/range.h" > #include "qemu/error-report.h" > -#include "qemu/memfd.h" > +#include "qemu/mmap-file.h" > #include <linux/vhost.h> > #include "exec/address-spaces.h" > #include "hw/virtio/virtio-bus.h" > @@ -326,7 +326,7 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev) > return log_size; > } > > -static struct vhost_log *vhost_log_alloc(uint64_t size, bool share) > +static struct vhost_log *vhost_log_alloc(char *path, uint64_t size, bool share) > { > struct vhost_log *log; > uint64_t logsize = size * sizeof(*(log->log)); > @@ -334,9 +334,7 @@ static struct vhost_log *vhost_log_alloc(uint64_t size, bool share) > > log = g_new0(struct vhost_log, 1); > if (share) { > - log->log = qemu_memfd_alloc("vhost-log", logsize, > - F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL, > - &fd); > + log->log = qemu_mmap_alloc(path, logsize, &fd); > memset(log->log, 0, logsize); > } else { > log->log = g_malloc0(logsize); > @@ -349,12 +347,12 @@ static struct vhost_log *vhost_log_alloc(uint64_t size, bool share) > return log; > } > > -static struct vhost_log *vhost_log_get(uint64_t size, bool share) > +static struct vhost_log *vhost_log_get(char *path, uint64_t size, bool share) > { > struct vhost_log *log = share ? vhost_log_shm : vhost_log; > > if (!log || log->size != size) { > - log = vhost_log_alloc(size, share); > + log = vhost_log_alloc(path, size, share); > if (share) { > vhost_log_shm = log; > } else { > @@ -388,8 +386,7 @@ static void vhost_log_put(struct vhost_dev *dev, bool sync) > g_free(log->log); > vhost_log = NULL; > } else if (vhost_log_shm == log) { > - qemu_memfd_free(log->log, log->size * sizeof(*(log->log)), > - log->fd); > + qemu_mmap_free(log->log, log->size * sizeof(*(log->log)), log->fd); > vhost_log_shm = NULL; > } > > @@ -405,9 +402,12 @@ static bool vhost_dev_log_is_shared(struct vhost_dev *dev) > > static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) > { > - struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev)); > - uint64_t log_base = (uintptr_t)log->log; > int r; > + struct vhost_log *log; > + uint64_t log_base; > + > + log = vhost_log_get(dev->log_filename, size, vhost_dev_log_is_shared(dev)); > + log_base = (uintptr_t)log->log; > > /* inform backend of log switching, this must be done before > releasing the current log, to ensure no logging is lost */ > @@ -1049,7 +1049,8 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq) > } > > int vhost_dev_init(struct vhost_dev *hdev, void *opaque, > - VhostBackendType backend_type, uint32_t busyloop_timeout) > + VhostBackendType backend_type, > + uint32_t busyloop_timeout, char *vhostlog) > { > uint64_t features; > int i, r, n_initialized_vqs = 0; > @@ -1118,11 +1119,18 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, > .priority = 10 > }; > > + hdev->log = NULL; > + hdev->log_size = 0; > + hdev->log_enabled = false; > + hdev->log_filename = vhostlog ? g_strdup(vhostlog) : NULL; > + g_free(vhostlog); > + > if (hdev->migration_blocker == NULL) { > if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) { > error_setg(&hdev->migration_blocker, > "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature."); > - } else if (!qemu_memfd_check()) { > + } else if (vhost_dev_log_is_shared(hdev) && > + !qemu_mmap_check(hdev->log_filename)) { > error_setg(&hdev->migration_blocker, > "Migration disabled: failed to allocate shared memory"); > } > @@ -1135,9 +1143,6 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, > hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions)); > hdev->n_mem_sections = 0; > hdev->mem_sections = NULL; > - hdev->log = NULL; > - hdev->log_size = 0; > - hdev->log_enabled = false; > hdev->started = false; > hdev->memory_changed = false; > memory_listener_register(&hdev->memory_listener, &address_space_memory); > @@ -1175,6 +1180,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) > if (hdev->vhost_ops) { > hdev->vhost_ops->vhost_backend_cleanup(hdev); > } > + g_free(hdev->log_filename); > assert(!hdev->log); > > memset(hdev, 0, sizeof(struct vhost_dev)); > @@ -1335,7 +1341,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) > uint64_t log_base; > > hdev->log_size = vhost_get_log_size(hdev); > - hdev->log = vhost_log_get(hdev->log_size, > + hdev->log = vhost_log_get(hdev->log_filename, > + hdev->log_size, > vhost_dev_log_is_shared(hdev)); > log_base = (uintptr_t)hdev->log->log; > r = hdev->vhost_ops->vhost_set_log_base(hdev, > diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h > index e433089..1ea4f3a 100644 > --- a/include/hw/virtio/vhost.h > +++ b/include/hw/virtio/vhost.h > @@ -52,6 +52,7 @@ struct vhost_dev { > uint64_t max_queues; > bool started; > bool log_enabled; > + char *log_filename; > uint64_t log_size; > Error *migration_blocker; > bool memory_changed; > @@ -65,7 +66,8 @@ struct vhost_dev { > > int vhost_dev_init(struct vhost_dev *hdev, void *opaque, > VhostBackendType backend_type, > - uint32_t busyloop_timeout); > + uint32_t busyloop_timeout, > + char *vhostlog); > void vhost_dev_cleanup(struct vhost_dev *hdev); > int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev); > void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev); > diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h > index 5a08eff..94161b2 100644 > --- a/include/net/vhost_net.h > +++ b/include/net/vhost_net.h > @@ -12,6 +12,7 @@ typedef struct VhostNetOptions { > NetClientState *net_backend; > uint32_t busyloop_timeout; > void *opaque; > + char *vhostlog; > } VhostNetOptions; > > uint64_t vhost_net_get_max_queues(VHostNetState *net); > diff --git a/include/qemu/mmap-file.h b/include/qemu/mmap-file.h > new file mode 100644 > index 0000000..427612a > --- /dev/null > +++ b/include/qemu/mmap-file.h > @@ -0,0 +1,10 @@ > +#ifndef QEMU_MMAP_FILE_H > +#define QEMU_MMAP_FILE_H > + > +#include "qemu-common.h" > + > +void *qemu_mmap_alloc(const char *path, size_t size, int *fd); > +void qemu_mmap_free(void *ptr, size_t size, int fd); > +bool qemu_mmap_check(const char *path); > + > +#endif > diff --git a/net/tap.c b/net/tap.c > index b6896a7..7b242cd 100644 > --- a/net/tap.c > +++ b/net/tap.c > @@ -699,6 +699,12 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, > } > options.opaque = (void *)(uintptr_t)vhostfd; > > + if (tap->has_vhostlog) { > + options.vhostlog = g_strdup(tap->vhostlog); > + } else { > + options.vhostlog = NULL; > + } > + > s->vhost_net = vhost_net_init(&options); > if (!s->vhost_net) { > error_setg(errp, > diff --git a/qapi-schema.json b/qapi-schema.json > index 5a8ec38..72608bd 100644 > --- a/qapi-schema.json > +++ b/qapi-schema.json > @@ -2640,6 +2640,8 @@ > # > # @vhostforce: #optional vhost on for non-MSIX virtio guests > # > +# @vhostlog: #optional file or directory for vhost backend log > +# > # @queues: #optional number of queues to be created for multiqueue capable tap > # > # @poll-us: #optional maximum number of microseconds that could > @@ -2662,6 +2664,7 @@ > '*vhostfd': 'str', > '*vhostfds': 'str', > '*vhostforce': 'bool', > + '*vhostlog': 'str', > '*queues': 'uint32', > '*poll-us': 'uint32'} } > > diff --git a/qemu-options.hx b/qemu-options.hx > index b1fbdb0..5c09c09 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -1599,7 +1599,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, > #else > "-netdev tap,id=str[,fd=h][,fds=x:y:...:z][,ifname=name][,script=file][,downscript=dfile]\n" > " [,br=bridge][,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off]\n" > - " [,vhostfd=h][,vhostfds=x:y:...:z][,vhostforce=on|off][,queues=n]\n" > + " [,vhostfd=h][,vhostfds=x:y:...:z][,vhostforce=on|off][,vhostlog=file|dir][,queues=n]\n" > " [,poll-us=n]\n" > " configure a host TAP network backend with ID 'str'\n" > " connected to a bridge (default=" DEFAULT_BRIDGE_INTERFACE ")\n" > @@ -1618,6 +1618,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, > " use vhost=on to enable experimental in kernel accelerator\n" > " (only has effect for virtio guests which use MSIX)\n" > " use vhostforce=on to force vhost on for non-MSIX virtio guests\n" > + " use 'vhostlog=file|dir' file or directory for vhost backend log\n" > " use 'vhostfd=h' to connect to an already opened vhost net device\n" > " use 'vhostfds=x:y:...:z to connect to multiple already opened vhost net devices\n" > " use 'queues=n' to specify the number of queues to be created for multiqueue TAP\n" > diff --git a/util/Makefile.objs b/util/Makefile.objs > index 36c7dcc..69bb27a 100644 > --- a/util/Makefile.objs > +++ b/util/Makefile.objs > @@ -3,6 +3,7 @@ util-obj-y += bufferiszero.o > util-obj-$(CONFIG_POSIX) += compatfd.o > util-obj-$(CONFIG_POSIX) += event_notifier-posix.o > util-obj-$(CONFIG_POSIX) += mmap-alloc.o > +util-obj-$(CONFIG_POSIX) += mmap-file.o > util-obj-$(CONFIG_POSIX) += oslib-posix.o > util-obj-$(CONFIG_POSIX) += qemu-openpty.o > util-obj-$(CONFIG_POSIX) += qemu-thread-posix.o > diff --git a/util/mmap-file.c b/util/mmap-file.c > new file mode 100644 > index 0000000..ce778cf > --- /dev/null > +++ b/util/mmap-file.c > @@ -0,0 +1,153 @@ > +/* > + * Support for file backed by mmaped host memory. > + * > + * Authors: > + * Rafael David Tinoco <rafael.tinoco@canonical.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or > + * later. See the COPYING file in the top-level directory. > + */ > + > +#include "qemu/osdep.h" > +#include "qemu/mmap-file.h" > + > +static char *qemu_mmap_rand_name(void) > +{ > + char *name; > + GRand *rsufix; > + guint32 sufix; > + > + rsufix = g_rand_new(); > + sufix = g_rand_int(rsufix); > + g_free(rsufix); > + name = g_strdup_printf("mmap-%u", sufix); > + > + return name; > +} > + > +static inline void qemu_mmap_rand_name_free(char *str) > +{ > + g_free(str); > +} > + > +static bool qemu_mmap_is(const char *path, mode_t what) > +{ > + struct stat s; > + > + memset(&s, 0, sizeof(struct stat)); > + if (stat(path, &s)) { > + perror("stat"); > + goto negative; > + } > + > + if ((s.st_mode & S_IFMT) == what) { > + return true; > + } > + > +negative: > + return false; > +} > + > +static inline bool qemu_mmap_is_file(const char *path) > +{ > + return qemu_mmap_is(path, S_IFREG); > +} > + > +static inline bool qemu_mmap_is_dir(const char *path) > +{ > + return qemu_mmap_is(path, S_IFDIR); > +} > + > +static void *qemu_mmap_alloc_file(const char *filepath, size_t size, int *fd) > +{ > + void *ptr; > + int mfd = -1; > + > + *fd = -1; > + > + mfd = open(filepath, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR); > + if (mfd == -1) { > + perror("open"); > + return NULL; > + } > + > + unlink(filepath); > + > + if (ftruncate(mfd, size) == -1) { > + perror("ftruncate"); > + close(mfd); > + return NULL; > + } > + > + ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0); > + if (ptr == MAP_FAILED) { > + perror("mmap"); > + close(mfd); > + return NULL; > + } > + > + *fd = mfd; > + return ptr; > +} > + > +static void *qemu_mmap_alloc_dir(const char *dirpath, size_t size, int *fd) > +{ > + void *ptr; > + char *file, *rand, *tmp, *dir2use = NULL; > + > + if (dirpath && !qemu_mmap_is_dir(dirpath)) { > + return NULL; > + } > + > + tmp = (char *) g_get_tmp_dir(); > + dir2use = dirpath ? (char *) dirpath : tmp; > + rand = qemu_mmap_rand_name(); > + file = g_strdup_printf("%s/%s", dir2use, rand); > + ptr = qemu_mmap_alloc_file(file, size, fd); > + g_free(tmp); > + qemu_mmap_rand_name_free(rand); > + > + return ptr; > +} > + > +/* > + * "path" can be: > + * > + * filename = full path for the file to back mmap > + * dir path = full dir path where to create random file for mmap > + * null = will use <tmpdir> to create random file for mmap > + */ > +void *qemu_mmap_alloc(const char *path, size_t size, int *fd) > +{ > + if (!path || qemu_mmap_is_dir(path)) { > + return qemu_mmap_alloc_dir(path, size, fd); > + } > + > + return qemu_mmap_alloc_file(path, size, fd); > +} > + > +void qemu_mmap_free(void *ptr, size_t size, int fd) > +{ > + if (ptr) { > + munmap(ptr, size); > + } > + > + if (fd != -1) { > + close(fd); > + } > +} > + > +bool qemu_mmap_check(const char *path) > +{ > + void *ptr; > + int fd = -1; > + bool r = true; > + > + ptr = qemu_mmap_alloc(path, 4096, &fd); > + if (!ptr) { > + r = false; > + } > + qemu_mmap_free(ptr, 4096, fd); > + > + return r == true ? true : false; > +} > -- > 2.9.3
On Sun, Oct 30, 2016 at 5:26 PM, Michael S. Tsirkin <mst@redhat.com> wrote: > > On Sat, Oct 22, 2016 at 07:00:41AM +0000, Rafael David Tinoco wrote: > > Commit 31190ed7 added a migration blocker in vhost_dev_init() to > > check if memfd would succeed. It is better if this blocker first > > checks if vhost backend requires shared log. This will avoid a > > situation where a blocker is added inappropriately (e.g. shared > > log allocation fails when vhost backend doesn't support it). > > Sounds like a bugfix but I'm not sure. Can this part be split > out in a patch by itself? Already sent some days ago (and pointed by Marc today). > > Commit: 35f9b6e added a fallback mechanism for systems not supporting > > memfd_create syscall (started being supported since 3.17). > > > > Backporting memfd_create might not be accepted for distros relying > > on older kernels. Nowadays there is no way for security driver > > to discover memfd filename to be created: <tmpdir>/memfd-XXXXXX. > > > > Also, because vhost log file descriptors can be passed to other > > processes, after discussion, we thought it is best to back mmap by > > using files that can be placed into a specific directory: this commit > > creates "vhostlog" argv parameter for such purpose. This will allow > > security drivers to operate on those files appropriately. > > > > Argv examples: > > > > -netdev tap,id=net0,vhost=on > > -netdev tap,id=net0,vhost=on,vhostlog=/tmp/guest.log > > -netdev tap,id=net0,vhost=on,vhostlog=/tmp > > > > For vhost backends supporting shared logs, if vhostlog is non-existent, > > or a directory, random files are going to be created in the specified > > directory (or, for non-existent, in tmpdir). If vhostlog is specified, > > the filepath is always used when allocating vhost log files. > > When vhostlog is not specified, can we just use memfd as we did? > This was my approach on a "pastebin" example before this patch (in the discussion thread we had). Problem goes back to when vhost log file descriptor is shared with some vhost-user implementation - like the interface allows to - and the security driver labelling issue. IMO, yes, we could let vhostlog to specify a log file, and, if not specified, assume memfd is ok to be used. Please let me know if you - and Marc - want me to keep using memfd. I'll create the mmap-file tests and files in a different commit, like Marc has asked for, and will propose the patch again by the end of this week.
On Mon, Oct 31, 2016 at 08:35:33AM -0200, Rafael David Tinoco wrote: > On Sun, Oct 30, 2016 at 5:26 PM, Michael S. Tsirkin <mst@redhat.com> wrote: > > > > On Sat, Oct 22, 2016 at 07:00:41AM +0000, Rafael David Tinoco wrote: > > > Commit 31190ed7 added a migration blocker in vhost_dev_init() to > > > check if memfd would succeed. It is better if this blocker first > > > checks if vhost backend requires shared log. This will avoid a > > > situation where a blocker is added inappropriately (e.g. shared > > > log allocation fails when vhost backend doesn't support it). > > > > Sounds like a bugfix but I'm not sure. Can this part be split > > out in a patch by itself? > > Already sent some days ago (and pointed by Marc today). > > > > Commit: 35f9b6e added a fallback mechanism for systems not supporting > > > memfd_create syscall (started being supported since 3.17). > > > > > > Backporting memfd_create might not be accepted for distros relying > > > on older kernels. Nowadays there is no way for security driver > > > to discover memfd filename to be created: <tmpdir>/memfd-XXXXXX. > > > > > > Also, because vhost log file descriptors can be passed to other > > > processes, after discussion, we thought it is best to back mmap by > > > using files that can be placed into a specific directory: this commit > > > creates "vhostlog" argv parameter for such purpose. This will allow > > > security drivers to operate on those files appropriately. > > > > > > Argv examples: > > > > > > -netdev tap,id=net0,vhost=on > > > -netdev tap,id=net0,vhost=on,vhostlog=/tmp/guest.log > > > -netdev tap,id=net0,vhost=on,vhostlog=/tmp > > > > > > For vhost backends supporting shared logs, if vhostlog is non-existent, > > > or a directory, random files are going to be created in the specified > > > directory (or, for non-existent, in tmpdir). If vhostlog is specified, > > > the filepath is always used when allocating vhost log files. > > > > When vhostlog is not specified, can we just use memfd as we did? > > > > This was my approach on a "pastebin" example before this patch (in the > discussion thread we had). Problem goes back to when vhost log file > descriptor is shared with some vhost-user implementation - like the > interface allows to - and the security driver labelling issue. IMO, > yes, we could let vhostlog to specify a log file, and, if not > specified, assume memfd is ok to be used. > > Please let me know if you - and Marc - want me to keep using memfd. > I'll create the mmap-file tests and files in a different commit, like > Marc has asked for, and will propose the patch again by the end of > this week. I think that the best approach is to allow passing in the fd, not the file path. If not passed, use memfd.
Hello Michael, André, Could you do a quick review before a final submission ? http://paste.ubuntu.com/23446279/ - I split the commits into 1) bugfix, 2) new util with test, 3) vhostlog The unit test is testing passing fds between 2 processes and asserting contents of mmap buffer coming from the "vhostlog" util (mmap-file). Your final comment on the "vhostlog" was: >> Argv examples: >> >> -netdev tap,id=net0,vhost=on >> -netdev tap,id=net0,vhost=on,vhostlog=/tmp/guest.log >> -netdev tap,id=net0,vhost=on,vhostlog=/tmp (André) > Could it be only a filename? This would simplify testing. (Michael) > When vhostlog is not specified, can we just use memfd as we did? I'm going to change this to: 1 - if vhostlog is not provided shared log can't be used. Use memfd. 2 - for shared logs, vhostlog has to be provided as a "file" ? Should i keep vhostlog being a directory also ? (i know we are unlinking the file so might not be needed BUT a static file might have a race condition in between different instances and providing a directory - that creates random files on it - might be better approach). Is there anything else ? Thank you Rafael Tinoco On Mon, Oct 31, 2016 at 8:30 PM, Michael S. Tsirkin <mst@redhat.com> wrote: > On Mon, Oct 31, 2016 at 08:35:33AM -0200, Rafael David Tinoco wrote: >> On Sun, Oct 30, 2016 at 5:26 PM, Michael S. Tsirkin <mst@redhat.com> wrote: >> > >> > On Sat, Oct 22, 2016 at 07:00:41AM +0000, Rafael David Tinoco wrote: >> > > Commit 31190ed7 added a migration blocker in vhost_dev_init() to >> > > check if memfd would succeed. It is better if this blocker first >> > > checks if vhost backend requires shared log. This will avoid a >> > > situation where a blocker is added inappropriately (e.g. shared >> > > log allocation fails when vhost backend doesn't support it). >> > >> > Sounds like a bugfix but I'm not sure. Can this part be split >> > out in a patch by itself? >> >> Already sent some days ago (and pointed by Marc today). >> >> > > Commit: 35f9b6e added a fallback mechanism for systems not supporting >> > > memfd_create syscall (started being supported since 3.17). >> > > >> > > Backporting memfd_create might not be accepted for distros relying >> > > on older kernels. Nowadays there is no way for security driver >> > > to discover memfd filename to be created: <tmpdir>/memfd-XXXXXX. >> > > >> > > Also, because vhost log file descriptors can be passed to other >> > > processes, after discussion, we thought it is best to back mmap by >> > > using files that can be placed into a specific directory: this commit >> > > creates "vhostlog" argv parameter for such purpose. This will allow >> > > security drivers to operate on those files appropriately. >> > > >> > > Argv examples: >> > > >> > > -netdev tap,id=net0,vhost=on >> > > -netdev tap,id=net0,vhost=on,vhostlog=/tmp/guest.log >> > > -netdev tap,id=net0,vhost=on,vhostlog=/tmp >> > > >> > > For vhost backends supporting shared logs, if vhostlog is non-existent, >> > > or a directory, random files are going to be created in the specified >> > > directory (or, for non-existent, in tmpdir). If vhostlog is specified, >> > > the filepath is always used when allocating vhost log files. >> > >> > When vhostlog is not specified, can we just use memfd as we did? >> > >> >> This was my approach on a "pastebin" example before this patch (in the >> discussion thread we had). Problem goes back to when vhost log file >> descriptor is shared with some vhost-user implementation - like the >> interface allows to - and the security driver labelling issue. IMO, >> yes, we could let vhostlog to specify a log file, and, if not >> specified, assume memfd is ok to be used. >> >> Please let me know if you - and Marc - want me to keep using memfd. >> I'll create the mmap-file tests and files in a different commit, like >> Marc has asked for, and will propose the patch again by the end of >> this week. > > I think that the best approach is to allow passing in the fd, > not the file path. If not passed, use memfd. > > -- > MST
Hi On Tue, Nov 8, 2016 at 4:49 PM Rafael David Tinoco < rafael.tinoco@canonical.com> wrote: > Hello Michael, André, > > Could you do a quick review before a final submission ? > > http://paste.ubuntu.com/23446279/ > > - I split the commits into 1) bugfix, 2) new util with test, 3) vhostlog > > The unit test is testing passing fds between 2 processes and asserting > contents of mmap buffer coming from the "vhostlog" util (mmap-file). > > Your final comment on the "vhostlog" was: > > >> Argv examples: > >> > >> -netdev tap,id=net0,vhost=on > >> -netdev tap,id=net0,vhost=on,vhostlog=/tmp/guest.log > >> -netdev tap,id=net0,vhost=on,vhostlog=/tmp > > (André) > Could it be only a filename? This would simplify testing. > (Michael) > When vhostlog is not specified, can we just use memfd as we > did? > > Michael said: https://lists.gnu.org/archive/html/qemu-devel/2016-10/msg08197.html I think that the best approach is to allow passing in the fd, not the file path. If not passed, use memfd. I do agree :) I'm going to change this to: > > 1 - if vhostlog is not provided shared log can't be used. Use memfd. > > 2 - for shared logs, vhostlog has to be provided as a "file" ? > > Should i keep vhostlog being a directory also ? (i know we are unlinking > the > file so might not be needed BUT a static file might have a race condition > in > between different instances and providing a directory - that creates random > files on it - might be better approach). > > Is there anything else ? > Do we really need to give a path? (pass fd with -add-fd/qmp add-fd) Thank you > > Rafael Tinoco > > On Mon, Oct 31, 2016 at 8:30 PM, Michael S. Tsirkin <mst@redhat.com> > wrote: > > On Mon, Oct 31, 2016 at 08:35:33AM -0200, Rafael David Tinoco wrote: > >> On Sun, Oct 30, 2016 at 5:26 PM, Michael S. Tsirkin <mst@redhat.com> > wrote: > >> > > >> > On Sat, Oct 22, 2016 at 07:00:41AM +0000, Rafael David Tinoco wrote: > >> > > Commit 31190ed7 added a migration blocker in vhost_dev_init() to > >> > > check if memfd would succeed. It is better if this blocker first > >> > > checks if vhost backend requires shared log. This will avoid a > >> > > situation where a blocker is added inappropriately (e.g. shared > >> > > log allocation fails when vhost backend doesn't support it). > >> > > >> > Sounds like a bugfix but I'm not sure. Can this part be split > >> > out in a patch by itself? > >> > >> Already sent some days ago (and pointed by Marc today). > >> > >> > > Commit: 35f9b6e added a fallback mechanism for systems not > supporting > >> > > memfd_create syscall (started being supported since 3.17). > >> > > > >> > > Backporting memfd_create might not be accepted for distros relying > >> > > on older kernels. Nowadays there is no way for security driver > >> > > to discover memfd filename to be created: <tmpdir>/memfd-XXXXXX. > >> > > > >> > > Also, because vhost log file descriptors can be passed to other > >> > > processes, after discussion, we thought it is best to back mmap by > >> > > using files that can be placed into a specific directory: this > commit > >> > > creates "vhostlog" argv parameter for such purpose. This will allow > >> > > security drivers to operate on those files appropriately. > >> > > > >> > > Argv examples: > >> > > > >> > > -netdev tap,id=net0,vhost=on > >> > > -netdev tap,id=net0,vhost=on,vhostlog=/tmp/guest.log > >> > > -netdev tap,id=net0,vhost=on,vhostlog=/tmp > >> > > > >> > > For vhost backends supporting shared logs, if vhostlog is > non-existent, > >> > > or a directory, random files are going to be created in the > specified > >> > > directory (or, for non-existent, in tmpdir). If vhostlog is > specified, > >> > > the filepath is always used when allocating vhost log files. > >> > > >> > When vhostlog is not specified, can we just use memfd as we did? > >> > > >> > >> This was my approach on a "pastebin" example before this patch (in the > >> discussion thread we had). Problem goes back to when vhost log file > >> descriptor is shared with some vhost-user implementation - like the > >> interface allows to - and the security driver labelling issue. IMO, > >> yes, we could let vhostlog to specify a log file, and, if not > >> specified, assume memfd is ok to be used. > >> > >> Please let me know if you - and Marc - want me to keep using memfd. > >> I'll create the mmap-file tests and files in a different commit, like > >> Marc has asked for, and will propose the patch again by the end of > >> this week. > > > > I think that the best approach is to allow passing in the fd, > > not the file path. If not passed, use memfd. > > > > -- > > MST > > -- Marc-André Lureau
Hello, > On Tue, Nov 8, 2016 at 4:49 PM Rafael David Tinoco <rafael.tinoco@canonical.com> wrote: > Hello Michael, André, > > Could you do a quick review before a final submission ? > > http://paste.ubuntu.com/23446279/ > ... > (André) > Could it be only a filename? This would simplify testing. > (Michael) > When vhostlog is not specified, can we just use memfd as we did? > > Michael said: https://lists.gnu.org/archive/html/qemu-devel/2016-10/msg08197.html > I think that the best approach is to allow passing in the fd, not the file path. If not passed, use memfd. Missed this one. > I do agree :) Sounds good. I see that the new approach is to let the managing library to create the files and just pass the file descriptors, this way security rules are applied to library itself and not to qemu processes. > Do we really need to give a path? (pass fd with -add-fd/qmp add-fd) I guess not. So, for shared logs: - vhostlogfd has to be provided. - if vhostlogfd is not provided, use memfd. (we don't want writes in /tmp, should i remove fallback mechanism from memfd logic) - if memfd fails, log can't be shared/created and there is a migration blocker. André, Michael, I'll work on that and get the patches soon, meanwhile, could u push: - "vhost: migration blocker only if shared log is use" so I can backport it to Debian ? Thank you, -Rafael Tinoco
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index f2d49ad..d650c92 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -171,8 +171,8 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) net->dev.vq_index = net->nc->queue_index * net->dev.nvqs; } - r = vhost_dev_init(&net->dev, options->opaque, - options->backend_type, options->busyloop_timeout); + r = vhost_dev_init(&net->dev, options->opaque, options->backend_type, + options->busyloop_timeout, options->vhostlog); if (r < 0) { goto fail; } diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 5b26946..5dc3d30 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -248,7 +248,7 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) s->dev.backend_features = 0; ret = vhost_dev_init(&s->dev, (void *)(uintptr_t)vhostfd, - VHOST_BACKEND_TYPE_KERNEL, 0); + VHOST_BACKEND_TYPE_KERNEL, 0, NULL); if (ret < 0) { error_setg(errp, "vhost-scsi: vhost initialization failed: %s", strerror(-ret)); diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c index b481562..6cf6081 100644 --- a/hw/virtio/vhost-vsock.c +++ b/hw/virtio/vhost-vsock.c @@ -342,7 +342,7 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp) vsock->vhost_dev.nvqs = ARRAY_SIZE(vsock->vhost_vqs); vsock->vhost_dev.vqs = vsock->vhost_vqs; ret = vhost_dev_init(&vsock->vhost_dev, (void *)(uintptr_t)vhostfd, - VHOST_BACKEND_TYPE_KERNEL, 0); + VHOST_BACKEND_TYPE_KERNEL, 0, NULL); if (ret < 0) { error_setg_errno(errp, -ret, "vhost-vsock: vhost_dev_init failed"); goto err_virtio; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index bd051ab..d874ebb 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -20,7 +20,7 @@ #include "qemu/atomic.h" #include "qemu/range.h" #include "qemu/error-report.h" -#include "qemu/memfd.h" +#include "qemu/mmap-file.h" #include <linux/vhost.h> #include "exec/address-spaces.h" #include "hw/virtio/virtio-bus.h" @@ -326,7 +326,7 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev) return log_size; } -static struct vhost_log *vhost_log_alloc(uint64_t size, bool share) +static struct vhost_log *vhost_log_alloc(char *path, uint64_t size, bool share) { struct vhost_log *log; uint64_t logsize = size * sizeof(*(log->log)); @@ -334,9 +334,7 @@ static struct vhost_log *vhost_log_alloc(uint64_t size, bool share) log = g_new0(struct vhost_log, 1); if (share) { - log->log = qemu_memfd_alloc("vhost-log", logsize, - F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL, - &fd); + log->log = qemu_mmap_alloc(path, logsize, &fd); memset(log->log, 0, logsize); } else { log->log = g_malloc0(logsize); @@ -349,12 +347,12 @@ static struct vhost_log *vhost_log_alloc(uint64_t size, bool share) return log; } -static struct vhost_log *vhost_log_get(uint64_t size, bool share) +static struct vhost_log *vhost_log_get(char *path, uint64_t size, bool share) { struct vhost_log *log = share ? vhost_log_shm : vhost_log; if (!log || log->size != size) { - log = vhost_log_alloc(size, share); + log = vhost_log_alloc(path, size, share); if (share) { vhost_log_shm = log; } else { @@ -388,8 +386,7 @@ static void vhost_log_put(struct vhost_dev *dev, bool sync) g_free(log->log); vhost_log = NULL; } else if (vhost_log_shm == log) { - qemu_memfd_free(log->log, log->size * sizeof(*(log->log)), - log->fd); + qemu_mmap_free(log->log, log->size * sizeof(*(log->log)), log->fd); vhost_log_shm = NULL; } @@ -405,9 +402,12 @@ static bool vhost_dev_log_is_shared(struct vhost_dev *dev) static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) { - struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev)); - uint64_t log_base = (uintptr_t)log->log; int r; + struct vhost_log *log; + uint64_t log_base; + + log = vhost_log_get(dev->log_filename, size, vhost_dev_log_is_shared(dev)); + log_base = (uintptr_t)log->log; /* inform backend of log switching, this must be done before releasing the current log, to ensure no logging is lost */ @@ -1049,7 +1049,8 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq) } int vhost_dev_init(struct vhost_dev *hdev, void *opaque, - VhostBackendType backend_type, uint32_t busyloop_timeout) + VhostBackendType backend_type, + uint32_t busyloop_timeout, char *vhostlog) { uint64_t features; int i, r, n_initialized_vqs = 0; @@ -1118,11 +1119,18 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, .priority = 10 }; + hdev->log = NULL; + hdev->log_size = 0; + hdev->log_enabled = false; + hdev->log_filename = vhostlog ? g_strdup(vhostlog) : NULL; + g_free(vhostlog); + if (hdev->migration_blocker == NULL) { if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) { error_setg(&hdev->migration_blocker, "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature."); - } else if (!qemu_memfd_check()) { + } else if (vhost_dev_log_is_shared(hdev) && + !qemu_mmap_check(hdev->log_filename)) { error_setg(&hdev->migration_blocker, "Migration disabled: failed to allocate shared memory"); } @@ -1135,9 +1143,6 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions)); hdev->n_mem_sections = 0; hdev->mem_sections = NULL; - hdev->log = NULL; - hdev->log_size = 0; - hdev->log_enabled = false; hdev->started = false; hdev->memory_changed = false; memory_listener_register(&hdev->memory_listener, &address_space_memory); @@ -1175,6 +1180,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) if (hdev->vhost_ops) { hdev->vhost_ops->vhost_backend_cleanup(hdev); } + g_free(hdev->log_filename); assert(!hdev->log); memset(hdev, 0, sizeof(struct vhost_dev)); @@ -1335,7 +1341,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) uint64_t log_base; hdev->log_size = vhost_get_log_size(hdev); - hdev->log = vhost_log_get(hdev->log_size, + hdev->log = vhost_log_get(hdev->log_filename, + hdev->log_size, vhost_dev_log_is_shared(hdev)); log_base = (uintptr_t)hdev->log->log; r = hdev->vhost_ops->vhost_set_log_base(hdev, diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index e433089..1ea4f3a 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -52,6 +52,7 @@ struct vhost_dev { uint64_t max_queues; bool started; bool log_enabled; + char *log_filename; uint64_t log_size; Error *migration_blocker; bool memory_changed; @@ -65,7 +66,8 @@ struct vhost_dev { int vhost_dev_init(struct vhost_dev *hdev, void *opaque, VhostBackendType backend_type, - uint32_t busyloop_timeout); + uint32_t busyloop_timeout, + char *vhostlog); void vhost_dev_cleanup(struct vhost_dev *hdev); int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev); void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev); diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h index 5a08eff..94161b2 100644 --- a/include/net/vhost_net.h +++ b/include/net/vhost_net.h @@ -12,6 +12,7 @@ typedef struct VhostNetOptions { NetClientState *net_backend; uint32_t busyloop_timeout; void *opaque; + char *vhostlog; } VhostNetOptions; uint64_t vhost_net_get_max_queues(VHostNetState *net); diff --git a/include/qemu/mmap-file.h b/include/qemu/mmap-file.h new file mode 100644 index 0000000..427612a --- /dev/null +++ b/include/qemu/mmap-file.h @@ -0,0 +1,10 @@ +#ifndef QEMU_MMAP_FILE_H +#define QEMU_MMAP_FILE_H + +#include "qemu-common.h" + +void *qemu_mmap_alloc(const char *path, size_t size, int *fd); +void qemu_mmap_free(void *ptr, size_t size, int fd); +bool qemu_mmap_check(const char *path); + +#endif diff --git a/net/tap.c b/net/tap.c index b6896a7..7b242cd 100644 --- a/net/tap.c +++ b/net/tap.c @@ -699,6 +699,12 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, } options.opaque = (void *)(uintptr_t)vhostfd; + if (tap->has_vhostlog) { + options.vhostlog = g_strdup(tap->vhostlog); + } else { + options.vhostlog = NULL; + } + s->vhost_net = vhost_net_init(&options); if (!s->vhost_net) { error_setg(errp, diff --git a/qapi-schema.json b/qapi-schema.json index 5a8ec38..72608bd 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2640,6 +2640,8 @@ # # @vhostforce: #optional vhost on for non-MSIX virtio guests # +# @vhostlog: #optional file or directory for vhost backend log +# # @queues: #optional number of queues to be created for multiqueue capable tap # # @poll-us: #optional maximum number of microseconds that could @@ -2662,6 +2664,7 @@ '*vhostfd': 'str', '*vhostfds': 'str', '*vhostforce': 'bool', + '*vhostlog': 'str', '*queues': 'uint32', '*poll-us': 'uint32'} } diff --git a/qemu-options.hx b/qemu-options.hx index b1fbdb0..5c09c09 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -1599,7 +1599,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, #else "-netdev tap,id=str[,fd=h][,fds=x:y:...:z][,ifname=name][,script=file][,downscript=dfile]\n" " [,br=bridge][,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off]\n" - " [,vhostfd=h][,vhostfds=x:y:...:z][,vhostforce=on|off][,queues=n]\n" + " [,vhostfd=h][,vhostfds=x:y:...:z][,vhostforce=on|off][,vhostlog=file|dir][,queues=n]\n" " [,poll-us=n]\n" " configure a host TAP network backend with ID 'str'\n" " connected to a bridge (default=" DEFAULT_BRIDGE_INTERFACE ")\n" @@ -1618,6 +1618,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, " use vhost=on to enable experimental in kernel accelerator\n" " (only has effect for virtio guests which use MSIX)\n" " use vhostforce=on to force vhost on for non-MSIX virtio guests\n" + " use 'vhostlog=file|dir' file or directory for vhost backend log\n" " use 'vhostfd=h' to connect to an already opened vhost net device\n" " use 'vhostfds=x:y:...:z to connect to multiple already opened vhost net devices\n" " use 'queues=n' to specify the number of queues to be created for multiqueue TAP\n" diff --git a/util/Makefile.objs b/util/Makefile.objs index 36c7dcc..69bb27a 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -3,6 +3,7 @@ util-obj-y += bufferiszero.o util-obj-$(CONFIG_POSIX) += compatfd.o util-obj-$(CONFIG_POSIX) += event_notifier-posix.o util-obj-$(CONFIG_POSIX) += mmap-alloc.o +util-obj-$(CONFIG_POSIX) += mmap-file.o util-obj-$(CONFIG_POSIX) += oslib-posix.o util-obj-$(CONFIG_POSIX) += qemu-openpty.o util-obj-$(CONFIG_POSIX) += qemu-thread-posix.o diff --git a/util/mmap-file.c b/util/mmap-file.c new file mode 100644 index 0000000..ce778cf --- /dev/null +++ b/util/mmap-file.c @@ -0,0 +1,153 @@ +/* + * Support for file backed by mmaped host memory. + * + * Authors: + * Rafael David Tinoco <rafael.tinoco@canonical.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/mmap-file.h" + +static char *qemu_mmap_rand_name(void) +{ + char *name; + GRand *rsufix; + guint32 sufix; + + rsufix = g_rand_new(); + sufix = g_rand_int(rsufix); + g_free(rsufix); + name = g_strdup_printf("mmap-%u", sufix); + + return name; +} + +static inline void qemu_mmap_rand_name_free(char *str) +{ + g_free(str); +} + +static bool qemu_mmap_is(const char *path, mode_t what) +{ + struct stat s; + + memset(&s, 0, sizeof(struct stat)); + if (stat(path, &s)) { + perror("stat"); + goto negative; + } + + if ((s.st_mode & S_IFMT) == what) { + return true; + } + +negative: + return false; +} + +static inline bool qemu_mmap_is_file(const char *path) +{ + return qemu_mmap_is(path, S_IFREG); +} + +static inline bool qemu_mmap_is_dir(const char *path) +{ + return qemu_mmap_is(path, S_IFDIR); +} + +static void *qemu_mmap_alloc_file(const char *filepath, size_t size, int *fd) +{ + void *ptr; + int mfd = -1; + + *fd = -1; + + mfd = open(filepath, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR); + if (mfd == -1) { + perror("open"); + return NULL; + } + + unlink(filepath); + + if (ftruncate(mfd, size) == -1) { + perror("ftruncate"); + close(mfd); + return NULL; + } + + ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + close(mfd); + return NULL; + } + + *fd = mfd; + return ptr; +} + +static void *qemu_mmap_alloc_dir(const char *dirpath, size_t size, int *fd) +{ + void *ptr; + char *file, *rand, *tmp, *dir2use = NULL; + + if (dirpath && !qemu_mmap_is_dir(dirpath)) { + return NULL; + } + + tmp = (char *) g_get_tmp_dir(); + dir2use = dirpath ? (char *) dirpath : tmp; + rand = qemu_mmap_rand_name(); + file = g_strdup_printf("%s/%s", dir2use, rand); + ptr = qemu_mmap_alloc_file(file, size, fd); + g_free(tmp); + qemu_mmap_rand_name_free(rand); + + return ptr; +} + +/* + * "path" can be: + * + * filename = full path for the file to back mmap + * dir path = full dir path where to create random file for mmap + * null = will use <tmpdir> to create random file for mmap + */ +void *qemu_mmap_alloc(const char *path, size_t size, int *fd) +{ + if (!path || qemu_mmap_is_dir(path)) { + return qemu_mmap_alloc_dir(path, size, fd); + } + + return qemu_mmap_alloc_file(path, size, fd); +} + +void qemu_mmap_free(void *ptr, size_t size, int fd) +{ + if (ptr) { + munmap(ptr, size); + } + + if (fd != -1) { + close(fd); + } +} + +bool qemu_mmap_check(const char *path) +{ + void *ptr; + int fd = -1; + bool r = true; + + ptr = qemu_mmap_alloc(path, 4096, &fd); + if (!ptr) { + r = false; + } + qemu_mmap_free(ptr, 4096, fd); + + return r == true ? true : false; +}
Commit 31190ed7 added a migration blocker in vhost_dev_init() to check if memfd would succeed. It is better if this blocker first checks if vhost backend requires shared log. This will avoid a situation where a blocker is added inappropriately (e.g. shared log allocation fails when vhost backend doesn't support it). Commit: 35f9b6e added a fallback mechanism for systems not supporting memfd_create syscall (started being supported since 3.17). Backporting memfd_create might not be accepted for distros relying on older kernels. Nowadays there is no way for security driver to discover memfd filename to be created: <tmpdir>/memfd-XXXXXX. Also, because vhost log file descriptors can be passed to other processes, after discussion, we thought it is best to back mmap by using files that can be placed into a specific directory: this commit creates "vhostlog" argv parameter for such purpose. This will allow security drivers to operate on those files appropriately. Argv examples: -netdev tap,id=net0,vhost=on -netdev tap,id=net0,vhost=on,vhostlog=/tmp/guest.log -netdev tap,id=net0,vhost=on,vhostlog=/tmp For vhost backends supporting shared logs, if vhostlog is non-existent, or a directory, random files are going to be created in the specified directory (or, for non-existent, in tmpdir). If vhostlog is specified, the filepath is always used when allocating vhost log files. Signed-off-by: Rafael David Tinoco <rafael.tinoco@canonical.com> --- hw/net/vhost_net.c | 4 +- hw/scsi/vhost-scsi.c | 2 +- hw/virtio/vhost-vsock.c | 2 +- hw/virtio/vhost.c | 41 +++++++------ include/hw/virtio/vhost.h | 4 +- include/net/vhost_net.h | 1 + include/qemu/mmap-file.h | 10 +++ net/tap.c | 6 ++ qapi-schema.json | 3 + qemu-options.hx | 3 +- util/Makefile.objs | 1 + util/mmap-file.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 207 insertions(+), 23 deletions(-) create mode 100644 include/qemu/mmap-file.h create mode 100644 util/mmap-file.c