diff mbox

vhost: secure vhost shared log files using argv paremeter

Message ID CAJ+F1C+M7N+5bzQ+md226gU3Vh7Z2O9wBqxK_Ln_nKvLfOfGeg@mail.gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Marc-André Lureau Oct. 22, 2016, 7:18 a.m. UTC
Hi

On Sat, Oct 22, 2016 at 10:01 AM Rafael David Tinoco <
rafael.tinoco@canonical.com> wrote:

Commit 31190ed7 added a migration blocker in vhost_dev_init() to
check if memfd would succeed. It is better if this blocker first
checks if vhost backend requires shared log. This will avoid a
situation where a blocker is added inappropriately (e.g. shared
log allocation fails when vhost backend doesn't support it).

Could you make this a seperate patch?


Commit: 35f9b6e added a fallback mechanism for systems not supporting
memfd_create syscall (started being supported since 3.17).

Backporting memfd_create might not be accepted for distros relying
on older kernels. Nowadays there is no way for security driver
to discover memfd filename to be created: <tmpdir>/memfd-XXXXXX.

Also, because vhost log file descriptors can be passed to other
processes, after discussion, we thought it is best to back mmap by
using files that can be placed into a specific directory: this commit
creates "vhostlog" argv parameter for such purpose. This will allow
security drivers to operate on those files appropriately.

Argv examples:

    -netdev tap,id=net0,vhost=on
    -netdev tap,id=net0,vhost=on,vhostlog=/tmp/guest.log
    -netdev tap,id=net0,vhost=on,vhostlog=/tmp


Could it be only a filename? This would simplify testing.



For vhost backends supporting shared logs, if vhostlog is non-existent,
or a directory, random files are going to be created in the specified
directory (or, for non-existent, in tmpdir). If vhostlog is specified,
the filepath is always used when allocating vhost log files.


Regarding testing, you add utility code mmap-file, could you make this a
seperate commit, with unit tests?

thanks

Signed-off-by: Rafael David Tinoco <rafael.tinoco@canonical.com>
---
 hw/net/vhost_net.c        |   4 +-
 hw/scsi/vhost-scsi.c      |   2 +-
 hw/virtio/vhost-vsock.c   |   2 +-
 hw/virtio/vhost.c         |  41 +++++++------
 include/hw/virtio/vhost.h |   4 +-
 include/net/vhost_net.h   |   1 +
 include/qemu/mmap-file.h  |  10 +++
 net/tap.c                 |   6 ++
 qapi-schema.json          |   3 +
 qemu-options.hx           |   3 +-
 util/Makefile.objs        |   1 +
 util/mmap-file.c          | 153
++++++++++++++++++++++++++++++++++++++++++++++
 12 files changed, 207 insertions(+), 23 deletions(-)
 create mode 100644 include/qemu/mmap-file.h
 create mode 100644 util/mmap-file.c

+}
--
2.9.3

Comments

Rafael Tinoco Oct. 22, 2016, 9:52 p.m. UTC | #1
Hello,

> On Oct 22, 2016, at 05:18, Marc-André Lureau <marcandre.lureau@gmail.com> wrote:
> 
> Hi
> 
> On Sat, Oct 22, 2016 at 10:01 AM Rafael David Tinoco <rafael.tinoco@canonical.com> wrote:
> Commit 31190ed7 added a migration blocker in vhost_dev_init() to
> check if memfd would succeed. It is better if this blocker first
> checks if vhost backend requires shared log. This will avoid a
> situation where a blocker is added inappropriately (e.g. shared
> log allocation fails when vhost backend doesn't support it).
> 
> Could you make this a seperate patch?

Just did, in another e-mail, cc'ing you.

> Argv examples:
> 
>     -netdev tap,id=net0,vhost=on
>     -netdev tap,id=net0,vhost=on,vhostlog=/tmp/guest.log
>     -netdev tap,id=net0,vhost=on,vhostlog=/tmp
> 
> Could it be only a filename? This would simplify testing.

It could. Should I keep the /tmp/<random> logic if no vhostlog arg is present ? Or you think it should fail if no arg is given ? I'm afraid of backward compatibility when back-porting this to older qemu versions on stable releases (like my case: I'll backport this to ~3 different versions). 

> For vhost backends supporting shared logs, if vhostlog is non-existent,
> or a directory, random files are going to be created in the specified
> directory (or, for non-existent, in tmpdir). If vhostlog is specified,
> the filepath is always used when allocating vhost log files.
> 
> 
> Regarding testing, you add utility code mmap-file, could you make this a seperate commit, with unit tests?
> 

Sure, I'll work on it.

> thanks

Thank u!

-Rafael Tinoco
diff mbox

Patch

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index f2d49ad..d650c92 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -171,8 +171,8 @@  struct vhost_net *vhost_net_init(VhostNetOptions
*options)
         net->dev.vq_index = net->nc->queue_index * net->dev.nvqs;
     }

-    r = vhost_dev_init(&net->dev, options->opaque,
-                       options->backend_type, options->busyloop_timeout);
+    r = vhost_dev_init(&net->dev, options->opaque, options->backend_type,
+                       options->busyloop_timeout, options->vhostlog);
     if (r < 0) {
         goto fail;
     }
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index 5b26946..5dc3d30 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -248,7 +248,7 @@  static void vhost_scsi_realize(DeviceState *dev, Error
**errp)
     s->dev.backend_features = 0;

     ret = vhost_dev_init(&s->dev, (void *)(uintptr_t)vhostfd,
-                         VHOST_BACKEND_TYPE_KERNEL, 0);
+                         VHOST_BACKEND_TYPE_KERNEL, 0, NULL);
     if (ret < 0) {
         error_setg(errp, "vhost-scsi: vhost initialization failed: %s",
                    strerror(-ret));
diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c
index b481562..6cf6081 100644
--- a/hw/virtio/vhost-vsock.c
+++ b/hw/virtio/vhost-vsock.c
@@ -342,7 +342,7 @@  static void vhost_vsock_device_realize(DeviceState
*dev, Error **errp)
     vsock->vhost_dev.nvqs = ARRAY_SIZE(vsock->vhost_vqs);
     vsock->vhost_dev.vqs = vsock->vhost_vqs;
     ret = vhost_dev_init(&vsock->vhost_dev, (void *)(uintptr_t)vhostfd,
-                         VHOST_BACKEND_TYPE_KERNEL, 0);
+                         VHOST_BACKEND_TYPE_KERNEL, 0, NULL);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "vhost-vsock: vhost_dev_init failed");
         goto err_virtio;
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index bd051ab..d874ebb 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -20,7 +20,7 @@ 
 #include "qemu/atomic.h"
 #include "qemu/range.h"
 #include "qemu/error-report.h"
-#include "qemu/memfd.h"
+#include "qemu/mmap-file.h"
 #include <linux/vhost.h>
 #include "exec/address-spaces.h"
 #include "hw/virtio/virtio-bus.h"
@@ -326,7 +326,7 @@  static uint64_t vhost_get_log_size(struct vhost_dev
*dev)
     return log_size;
 }

-static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
+static struct vhost_log *vhost_log_alloc(char *path, uint64_t size, bool
share)
 {
     struct vhost_log *log;
     uint64_t logsize = size * sizeof(*(log->log));
@@ -334,9 +334,7 @@  static struct vhost_log *vhost_log_alloc(uint64_t size,
bool share)

     log = g_new0(struct vhost_log, 1);
     if (share) {
-        log->log = qemu_memfd_alloc("vhost-log", logsize,
-                                    F_SEAL_GROW | F_SEAL_SHRINK |
F_SEAL_SEAL,
-                                    &fd);
+        log->log = qemu_mmap_alloc(path, logsize, &fd);
         memset(log->log, 0, logsize);
     } else {
         log->log = g_malloc0(logsize);
@@ -349,12 +347,12 @@  static struct vhost_log *vhost_log_alloc(uint64_t
size, bool share)
     return log;
 }

-static struct vhost_log *vhost_log_get(uint64_t size, bool share)
+static struct vhost_log *vhost_log_get(char *path, uint64_t size, bool
share)
 {
     struct vhost_log *log = share ? vhost_log_shm : vhost_log;

     if (!log || log->size != size) {
-        log = vhost_log_alloc(size, share);
+        log = vhost_log_alloc(path, size, share);
         if (share) {
             vhost_log_shm = log;
         } else {
@@ -388,8 +386,7 @@  static void vhost_log_put(struct vhost_dev *dev, bool
sync)
             g_free(log->log);
             vhost_log = NULL;
         } else if (vhost_log_shm == log) {
-            qemu_memfd_free(log->log, log->size * sizeof(*(log->log)),
-                            log->fd);
+            qemu_mmap_free(log->log, log->size * sizeof(*(log->log)),
log->fd);
             vhost_log_shm = NULL;
         }

@@ -405,9 +402,12 @@  static bool vhost_dev_log_is_shared(struct vhost_dev
*dev)

 static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t
size)
 {
-    struct vhost_log *log = vhost_log_get(size,
vhost_dev_log_is_shared(dev));
-    uint64_t log_base = (uintptr_t)log->log;
     int r;
+    struct vhost_log *log;
+    uint64_t log_base;
+
+    log = vhost_log_get(dev->log_filename, size,
vhost_dev_log_is_shared(dev));
+    log_base = (uintptr_t)log->log;

     /* inform backend of log switching, this must be done before
        releasing the current log, to ensure no logging is lost */
@@ -1049,7 +1049,8 @@  static void vhost_virtqueue_cleanup(struct
vhost_virtqueue *vq)
 }

 int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
-                   VhostBackendType backend_type, uint32_t
busyloop_timeout)
+                   VhostBackendType backend_type,
+                   uint32_t busyloop_timeout, char *vhostlog)
 {
     uint64_t features;
     int i, r, n_initialized_vqs = 0;
@@ -1118,11 +1119,18 @@  int vhost_dev_init(struct vhost_dev *hdev, void
*opaque,
         .priority = 10
     };

+    hdev->log = NULL;
+    hdev->log_size = 0;
+    hdev->log_enabled = false;
+    hdev->log_filename = vhostlog ? g_strdup(vhostlog) : NULL;
+    g_free(vhostlog);
+
     if (hdev->migration_blocker == NULL) {
         if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
             error_setg(&hdev->migration_blocker,
                        "Migration disabled: vhost lacks VHOST_F_LOG_ALL
feature.");
-        } else if (!qemu_memfd_check()) {
+        } else if (vhost_dev_log_is_shared(hdev) &&
+                !qemu_mmap_check(hdev->log_filename)) {
             error_setg(&hdev->migration_blocker,
                        "Migration disabled: failed to allocate shared
memory");
         }
@@ -1135,9 +1143,6 @@  int vhost_dev_init(struct vhost_dev *hdev, void
*opaque,
     hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
     hdev->n_mem_sections = 0;
     hdev->mem_sections = NULL;
-    hdev->log = NULL;
-    hdev->log_size = 0;
-    hdev->log_enabled = false;
     hdev->started = false;
     hdev->memory_changed = false;
     memory_listener_register(&hdev->memory_listener,
&address_space_memory);
@@ -1175,6 +1180,7 @@  void vhost_dev_cleanup(struct vhost_dev *hdev)
     if (hdev->vhost_ops) {
         hdev->vhost_ops->vhost_backend_cleanup(hdev);
     }
+    g_free(hdev->log_filename);
     assert(!hdev->log);

     memset(hdev, 0, sizeof(struct vhost_dev));
@@ -1335,7 +1341,8 @@  int vhost_dev_start(struct vhost_dev *hdev,
VirtIODevice *vdev)
         uint64_t log_base;

         hdev->log_size = vhost_get_log_size(hdev);
-        hdev->log = vhost_log_get(hdev->log_size,
+        hdev->log = vhost_log_get(hdev->log_filename,
+                                  hdev->log_size,
                                   vhost_dev_log_is_shared(hdev));
         log_base = (uintptr_t)hdev->log->log;
         r = hdev->vhost_ops->vhost_set_log_base(hdev,
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index e433089..1ea4f3a 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -52,6 +52,7 @@  struct vhost_dev {
     uint64_t max_queues;
     bool started;
     bool log_enabled;
+    char *log_filename;
     uint64_t log_size;
     Error *migration_blocker;
     bool memory_changed;
@@ -65,7 +66,8 @@  struct vhost_dev {

 int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
                    VhostBackendType backend_type,
-                   uint32_t busyloop_timeout);
+                   uint32_t busyloop_timeout,
+                   char *vhostlog);
 void vhost_dev_cleanup(struct vhost_dev *hdev);
 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
 void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 5a08eff..94161b2 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -12,6 +12,7 @@  typedef struct VhostNetOptions {
     NetClientState *net_backend;
     uint32_t busyloop_timeout;
     void *opaque;
+    char *vhostlog;
 } VhostNetOptions;

 uint64_t vhost_net_get_max_queues(VHostNetState *net);
diff --git a/include/qemu/mmap-file.h b/include/qemu/mmap-file.h
new file mode 100644
index 0000000..427612a
--- /dev/null
+++ b/include/qemu/mmap-file.h
@@ -0,0 +1,10 @@ 
+#ifndef QEMU_MMAP_FILE_H
+#define QEMU_MMAP_FILE_H
+
+#include "qemu-common.h"
+
+void *qemu_mmap_alloc(const char *path, size_t size, int *fd);
+void qemu_mmap_free(void *ptr, size_t size, int fd);
+bool qemu_mmap_check(const char *path);
+
+#endif
diff --git a/net/tap.c b/net/tap.c
index b6896a7..7b242cd 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -699,6 +699,12 @@  static void net_init_tap_one(const NetdevTapOptions
*tap, NetClientState *peer,
         }
         options.opaque = (void *)(uintptr_t)vhostfd;

+        if (tap->has_vhostlog) {
+            options.vhostlog = g_strdup(tap->vhostlog);
+        } else {
+            options.vhostlog = NULL;
+        }
+
         s->vhost_net = vhost_net_init(&options);
         if (!s->vhost_net) {
             error_setg(errp,
diff --git a/qapi-schema.json b/qapi-schema.json
index 5a8ec38..72608bd 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2640,6 +2640,8 @@ 
 #
 # @vhostforce: #optional vhost on for non-MSIX virtio guests
 #
+# @vhostlog: #optional file or directory for vhost backend log
+#
 # @queues: #optional number of queues to be created for multiqueue capable
tap
 #
 # @poll-us: #optional maximum number of microseconds that could
@@ -2662,6 +2664,7 @@ 
     '*vhostfd':    'str',
     '*vhostfds':   'str',
     '*vhostforce': 'bool',
+    '*vhostlog':   'str',
     '*queues':     'uint32',
     '*poll-us':    'uint32'} }

diff --git a/qemu-options.hx b/qemu-options.hx
index b1fbdb0..5c09c09 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1599,7 +1599,7 @@  DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
 #else
     "-netdev
tap,id=str[,fd=h][,fds=x:y:...:z][,ifname=name][,script=file][,downscript=dfile]\n"
     "
 [,br=bridge][,helper=helper][,sndbuf=nbytes][,vnet_hdr=on|off][,vhost=on|off]\n"
-    "
 [,vhostfd=h][,vhostfds=x:y:...:z][,vhostforce=on|off][,queues=n]\n"
+    "
 [,vhostfd=h][,vhostfds=x:y:...:z][,vhostforce=on|off][,vhostlog=file|dir][,queues=n]\n"
     "         [,poll-us=n]\n"
     "                configure a host TAP network backend with ID 'str'\n"
     "                connected to a bridge (default="
DEFAULT_BRIDGE_INTERFACE ")\n"
@@ -1618,6 +1618,7 @@  DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
     "                use vhost=on to enable experimental in kernel
accelerator\n"
     "                    (only has effect for virtio guests which use
MSIX)\n"
     "                use vhostforce=on to force vhost on for non-MSIX
virtio guests\n"
+    "                use 'vhostlog=file|dir' file or directory for vhost
backend log\n"
     "                use 'vhostfd=h' to connect to an already opened vhost
net device\n"
     "                use 'vhostfds=x:y:...:z to connect to multiple
already opened vhost net devices\n"
     "                use 'queues=n' to specify the number of queues to be
created for multiqueue TAP\n"
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 36c7dcc..69bb27a 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -3,6 +3,7 @@  util-obj-y += bufferiszero.o
 util-obj-$(CONFIG_POSIX) += compatfd.o
 util-obj-$(CONFIG_POSIX) += event_notifier-posix.o
 util-obj-$(CONFIG_POSIX) += mmap-alloc.o
+util-obj-$(CONFIG_POSIX) += mmap-file.o
 util-obj-$(CONFIG_POSIX) += oslib-posix.o
 util-obj-$(CONFIG_POSIX) += qemu-openpty.o
 util-obj-$(CONFIG_POSIX) += qemu-thread-posix.o
diff --git a/util/mmap-file.c b/util/mmap-file.c
new file mode 100644
index 0000000..ce778cf
--- /dev/null
+++ b/util/mmap-file.c
@@ -0,0 +1,153 @@ 
+/*
+ * Support for file backed by mmaped host memory.
+ *
+ * Authors:
+ *  Rafael David Tinoco <rafael.tinoco@canonical.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/mmap-file.h"
+
+static char *qemu_mmap_rand_name(void)
+{
+    char *name;
+    GRand *rsufix;
+    guint32 sufix;
+
+    rsufix = g_rand_new();
+    sufix = g_rand_int(rsufix);
+    g_free(rsufix);
+    name = g_strdup_printf("mmap-%u", sufix);
+
+    return name;
+}
+
+static inline void qemu_mmap_rand_name_free(char *str)
+{
+    g_free(str);
+}
+
+static bool qemu_mmap_is(const char *path, mode_t what)
+{
+    struct stat s;
+
+    memset(&s,  0, sizeof(struct stat));
+    if (stat(path, &s)) {
+        perror("stat");
+        goto negative;
+    }
+
+    if ((s.st_mode & S_IFMT) == what) {
+        return true;
+    }
+
+negative:
+    return false;
+}
+
+static inline bool qemu_mmap_is_file(const char *path)
+{
+    return qemu_mmap_is(path, S_IFREG);
+}
+
+static inline bool qemu_mmap_is_dir(const char *path)
+{
+    return qemu_mmap_is(path, S_IFDIR);
+}
+
+static void *qemu_mmap_alloc_file(const char *filepath, size_t size, int
*fd)
+{
+    void *ptr;
+    int mfd = -1;
+
+    *fd = -1;
+
+    mfd = open(filepath, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
+    if (mfd == -1) {
+        perror("open");
+        return NULL;
+    }
+
+    unlink(filepath);
+
+    if (ftruncate(mfd, size) == -1) {
+        perror("ftruncate");
+        close(mfd);
+        return NULL;
+    }
+
+    ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0);
+    if (ptr == MAP_FAILED) {
+        perror("mmap");
+        close(mfd);
+        return NULL;
+    }
+
+    *fd = mfd;
+    return ptr;
+}
+
+static void *qemu_mmap_alloc_dir(const char *dirpath, size_t size, int *fd)
+{
+    void *ptr;
+    char *file, *rand, *tmp, *dir2use = NULL;
+
+    if (dirpath && !qemu_mmap_is_dir(dirpath)) {
+        return NULL;
+    }
+
+    tmp = (char *) g_get_tmp_dir();
+    dir2use = dirpath ? (char *) dirpath : tmp;
+    rand = qemu_mmap_rand_name();
+    file = g_strdup_printf("%s/%s", dir2use, rand);
+    ptr = qemu_mmap_alloc_file(file, size, fd);
+    g_free(tmp);
+    qemu_mmap_rand_name_free(rand);
+
+    return ptr;
+}
+
+/*
+ * "path" can be:
+ *
+ *   filename = full path for the file to back mmap
+ *   dir path = full dir path where to create random file for mmap
+ *   null     = will use <tmpdir>  to create random file for mmap
+ */
+void *qemu_mmap_alloc(const char *path, size_t size, int *fd)
+{
+    if (!path || qemu_mmap_is_dir(path)) {
+        return qemu_mmap_alloc_dir(path, size, fd);
+    }
+
+    return qemu_mmap_alloc_file(path, size, fd);
+}
+
+void qemu_mmap_free(void *ptr, size_t size, int fd)
+{
+    if (ptr) {
+        munmap(ptr, size);
+    }
+
+    if (fd != -1) {
+        close(fd);
+    }
+}
+
+bool qemu_mmap_check(const char *path)
+{
+    void *ptr;
+    int fd = -1;
+    bool r = true;
+
+    ptr = qemu_mmap_alloc(path, 4096, &fd);
+    if (!ptr) {
+        r = false;
+    }
+    qemu_mmap_free(ptr, 4096, fd);
+
+    return r == true ? true : false;