@@ -59,6 +59,7 @@ typedef struct ExtendedOps {
#define V9FS_RDONLY 0x00000040
#define V9FS_PROXY_SOCK_FD 0x00000080
#define V9FS_PROXY_SOCK_NAME 0x00000100
+#define V9FS_REMAP_INODES 0x00000200
#define V9FS_SEC_MASK 0x0000003C
@@ -31,7 +31,9 @@ static QemuOptsList qemu_fsdev_opts = {
}, {
.name = "readonly",
.type = QEMU_OPT_BOOL,
-
+ }, {
+ .name = "remap_inodes",
+ .type = QEMU_OPT_BOOL,
}, {
.name = "socket",
.type = QEMU_OPT_STRING,
@@ -76,6 +78,9 @@ static QemuOptsList qemu_virtfs_opts = {
.name = "readonly",
.type = QEMU_OPT_BOOL,
}, {
+ .name = "remap_inodes",
+ .type = QEMU_OPT_BOOL,
+ }, {
.name = "socket",
.type = QEMU_OPT_STRING,
}, {
@@ -121,6 +121,7 @@ int qemu_fsdev_add(QemuOpts *opts, Error **errp)
const char *fsdev_id = qemu_opts_id(opts);
const char *fsdriver = qemu_opt_get(opts, "fsdriver");
const char *writeout = qemu_opt_get(opts, "writeout");
+ bool remap_inodes = qemu_opt_get_bool(opts, "remap_inodes", 0);
bool ro = qemu_opt_get_bool(opts, "readonly", 0);
if (!fsdev_id) {
@@ -161,6 +162,11 @@ int qemu_fsdev_add(QemuOpts *opts, Error **errp)
} else {
fsle->fse.export_flags &= ~V9FS_RDONLY;
}
+ if (remap_inodes) {
+ fsle->fse.export_flags |= V9FS_REMAP_INODES;
+ } else {
+ fsle->fse.export_flags &= ~V9FS_REMAP_INODES;
+ }
if (fsle->fse.ops->parse_opts) {
if (fsle->fse.ops->parse_opts(opts, &fsle->fse, errp)) {
@@ -25,6 +25,7 @@
#include "trace.h"
#include "migration/blocker.h"
#include "sysemu/qtest.h"
+#include "qemu/xxhash.h"
int open_fd_hw;
int total_open_fd;
@@ -571,22 +572,98 @@ static void coroutine_fn virtfs_reset(V9fsPDU *pdu)
P9_STAT_MODE_NAMED_PIPE | \
P9_STAT_MODE_SOCKET)
-/* This is the algorithm from ufs in spfs */
+
+/* creative abuse of tb_hash_func7, which is based on xxhash */
+static uint32_t qpp_hash(QppEntry e)
+{
+ return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0);
+}
+
+static bool qpp_lookup_func(const void *obj, const void *userp)
+{
+ const QppEntry *e1 = obj, *e2 = userp;
+ return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix;
+}
+
+static void qpp_table_remove(void *p, uint32_t h, void *up)
+{
+ g_free(p);
+}
+
+static void qpp_table_destroy(struct qht *ht)
+{
+ qht_iter(ht, qpp_table_remove, NULL);
+ qht_destroy(ht);
+}
+
+/* stat_to_qid needs to map inode number (64 bits) and device id (32 bits)
+ * to a unique QID path (64 bits). To avoid having to map and keep track
+ * of up to 2^64 objects, we map only the 16 highest bits of the inode plus
+ * the device id to the 16 highest bits of the QID path. The 48 lowest bits
+ * of the QID path equal to the lowest bits of the inode number.
+ *
+ * This takes advantage of the fact that inode number are usually not
+ * random but allocated sequentially, so we have fewer items to keep
+ * track of.
+ */
+static int qid_path_prefixmap(V9fsPDU *pdu, const struct stat *stbuf,
+ uint64_t *path)
+{
+ QppEntry lookup = {
+ .dev = stbuf->st_dev,
+ .ino_prefix = (uint16_t) (stbuf->st_ino >> 48)
+ }, *val;
+ uint32_t hash = qpp_hash(lookup);
+
+ val = qht_lookup(&pdu->s->qpp_table, &lookup, hash);
+
+ if (!val) {
+ if (pdu->s->qp_prefix_next == 0) {
+ /* we ran out of prefixes */
+ error_report_once(
+ "9p: No more prefixes available for remapping inodes from "
+ "host to guest."
+ );
+ return -ENFILE;
+ }
+
+ val = g_malloc0(sizeof(QppEntry));
+ *val = lookup;
+
+ /* new unique inode prefix and device combo */
+ val->qp_prefix = pdu->s->qp_prefix_next++;
+ qht_insert(&pdu->s->qpp_table, val, hash, NULL);
+ }
+
+ *path = ((uint64_t)val->qp_prefix << 48) | (stbuf->st_ino & QPATH_INO_MASK);
+ return 0;
+}
+
static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp)
{
+ int err;
size_t size;
- if (pdu->s->dev_id != stbuf->st_dev) {
- error_report_once(
- "9p: Multiple devices detected in same VirtFS export. "
- "You must use a separate export for each device."
- );
- return -ENODEV;
+ if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
+ /* map inode+device to qid path (fast path) */
+ err = qid_path_prefixmap(pdu, stbuf, &qidp->path);
+ if (err) {
+ return err;
+ }
+ } else {
+ if (pdu->s->dev_id != stbuf->st_dev) {
+ error_report_once(
+ "9p: Multiple devices detected in same VirtFS export. "
+ "You must either use a separate export for each device "
+ "shared from host or enable virtfs option 'remap_inodes'."
+ );
+ return -ENODEV;
+ }
+ memset(&qidp->path, 0, sizeof(qidp->path));
+ size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
+ memcpy(&qidp->path, &stbuf->st_ino, size);
}
- memset(&qidp->path, 0, sizeof(qidp->path));
- size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
- memcpy(&qidp->path, &stbuf->st_ino, size);
qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
qidp->type = 0;
if (S_ISDIR(stbuf->st_mode)) {
@@ -616,6 +693,30 @@ static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
return 0;
}
+static int coroutine_fn dirent_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
+ struct dirent *dent, V9fsQID *qidp)
+{
+ struct stat stbuf;
+ V9fsPath path;
+ int err;
+
+ v9fs_path_init(&path);
+
+ err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
+ if (err < 0) {
+ goto out;
+ }
+ err = v9fs_co_lstat(pdu, &path, &stbuf);
+ if (err < 0) {
+ goto out;
+ }
+ err = stat_to_qid(pdu, &stbuf, qidp);
+
+out:
+ v9fs_path_free(&path);
+ return err;
+}
+
V9fsPDU *pdu_alloc(V9fsState *s)
{
V9fsPDU *pdu = NULL;
@@ -1940,6 +2041,19 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
int32_t count = 0;
off_t saved_dir_pos;
struct dirent *dent;
+ struct stat stbuf;
+ bool fidIsExportRoot;
+
+ /*
+ * determine if fidp is the export root, which is required for safe
+ * handling of ".." below
+ */
+ err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
+ if (err < 0) {
+ return err;
+ }
+ fidIsExportRoot = pdu->s->dev_id == stbuf.st_dev &&
+ pdu->s->root_ino == stbuf.st_ino;
/* save the directory position */
saved_dir_pos = v9fs_co_telldir(pdu, fidp);
@@ -1964,16 +2078,51 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
v9fs_string_free(&name);
return count;
}
- /*
- * Fill up just the path field of qid because the client uses
- * only that. To fill the entire qid structure we will have
- * to stat each dirent found, which is expensive
- */
- size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
- memcpy(&qid.path, &dent->d_ino, size);
- /* Fill the other fields with dummy values */
- qid.type = 0;
- qid.version = 0;
+
+ if (fidIsExportRoot && !strcmp("..", dent->d_name)) {
+ /*
+ * if "." is export root, then return qid of export root for
+ * ".." to avoid exposing anything outside the export
+ */
+ err = fid_to_qid(pdu, fidp, &qid);
+ if (err < 0) {
+ v9fs_readdir_unlock(&fidp->fs.dir);
+ v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
+ v9fs_string_free(&name);
+ return err;
+ }
+ } else if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
+ /*
+ * dirent_to_qid() implies expensive stat call for each entry,
+ * we must do that here though since inode remapping requires
+ * the device id, which in turn might be different for
+ * different entries; we cannot make any assumption to avoid
+ * that here.
+ */
+ err = dirent_to_qid(pdu, fidp, dent, &qid);
+ if (err < 0) {
+ v9fs_readdir_unlock(&fidp->fs.dir);
+ v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
+ v9fs_string_free(&name);
+ return err;
+ }
+ } else {
+ /*
+ * Fill up just the path field of qid because the client uses
+ * only that. To fill the entire qid structure we will have
+ * to stat each dirent found, which is expensive. For the
+ * latter reason we don't call dirent_to_qid() here. Only drawback
+ * is that no multi-device export detection of stat_to_qid()
+ * would be done and provided as error to the user here. But
+ * user would get that error anyway when accessing those
+ * files/dirs through other ways.
+ */
+ size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
+ memcpy(&qid.path, &dent->d_ino, size);
+ /* Fill the other fields with dummy values */
+ qid.type = 0;
+ qid.version = 0;
+ }
/* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
len = pdu_marshal(pdu, 11 + count, "Qqbs",
@@ -3672,8 +3821,13 @@ int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t,
goto out;
}
+ s->root_ino = stat.st_ino;
s->dev_id = stat.st_dev;
+ /* QID path hash table. 1 entry ought to be enough for anybody ;) */
+ qht_init(&s->qpp_table, qpp_lookup_func, 1, QHT_MODE_AUTO_RESIZE);
+ s->qp_prefix_next = 1; /* reserve 0 to detect overflow */
+
s->ctx.fst = &fse->fst;
fsdev_throttle_init(s->ctx.fst);
@@ -3687,6 +3841,7 @@ out:
}
g_free(s->tag);
g_free(s->ctx.fs_root);
+ qpp_table_destroy(&s->qpp_table);
v9fs_path_free(&path);
}
return rc;
@@ -3699,6 +3854,7 @@ void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
}
fsdev_throttle_cleanup(s->ctx.fst);
g_free(s->tag);
+ qpp_table_destroy(&s->qpp_table);
g_free(s->ctx.fs_root);
}
@@ -8,6 +8,7 @@
#include "fsdev/9p-iov-marshal.h"
#include "qemu/thread.h"
#include "qemu/coroutine.h"
+#include "qemu/qht.h"
enum {
P9_TLERROR = 6,
@@ -235,6 +236,15 @@ struct V9fsFidState
V9fsFidState *rclm_lst;
};
+#define QPATH_INO_MASK ((1ULL << 48) - 1)
+
+/* QID path prefix entry, see stat_to_qid */
+typedef struct {
+ dev_t dev;
+ uint16_t ino_prefix;
+ uint16_t qp_prefix;
+} QppEntry;
+
struct V9fsState
{
QLIST_HEAD(, V9fsPDU) free_list;
@@ -256,7 +266,10 @@ struct V9fsState
Error *migration_blocker;
V9fsConf fsconf;
V9fsQID root_qid;
+ ino_t root_ino;
dev_t dev_id;
+ struct qht qpp_table;
+ uint16_t qp_prefix_next;
};
/* 9p2000.L open flags */
@@ -1334,17 +1334,17 @@ ETEXI
DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs,
"-virtfs local,path=path,mount_tag=tag,security_model=mapped-xattr|mapped-file|passthrough|none\n"
- " [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode]\n"
- "-virtfs proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly]\n"
- "-virtfs proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonly]\n"
+ " [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode][,remap_inodes]\n"
+ "-virtfs proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly][,remap_inodes]\n"
+ "-virtfs proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonly][,remap_inodes]\n"
"-virtfs synth,mount_tag=tag[,id=id][,readonly]\n",
QEMU_ARCH_ALL)
STEXI
-@item -virtfs local,path=@var{path},mount_tag=@var{mount_tag} ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] [,fmode=@var{fmode}][,dmode=@var{dmode}]
-@itemx -virtfs proxy,socket=@var{socket},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly]
-@itemx -virtfs proxy,sock_fd=@var{sock_fd},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly]
+@item -virtfs local,path=@var{path},mount_tag=@var{mount_tag} ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] [,fmode=@var{fmode}][,dmode=@var{dmode}][,remap_inodes]
+@itemx -virtfs proxy,socket=@var{socket},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly][,remap_inodes]
+@itemx -virtfs proxy,sock_fd=@var{sock_fd},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly][,remap_inodes]
@itemx -virtfs synth,mount_tag=@var{mount_tag}
@findex -virtfs
@@ -1398,6 +1398,19 @@ Specifies the default mode for newly created directories on the host. Works
only with security models "mapped-xattr" and "mapped-file".
@item mount_tag=@var{mount_tag}
Specifies the tag name to be used by the guest to mount this export point.
+@item remap_inodes
+By default virtfs 9p supports only one device per export in order to avoid
+file ID collisions on guest which may otherwise happen because the original
+device IDs from host are not passed and exposed on guest. Instead all files
+of an export shared with virtfs do have the same device id on guest. So two
+files with identical inode numbers but from actually different devices on
+host would otherwise cause a file ID collision and hence potential
+misbehaviours on guest. For that reason it is recommended to create a
+separate virtfs export for each device to be shared with guests. However
+you may also enable "remap_inodes" which allows you to share multiple
+devices with only one export instead, which is achieved by remapping the
+original inode numbers from host to guest in a way that would prevent such
+collisions.
@end table
ETEXI
@@ -3457,6 +3457,9 @@ int main(int argc, char **argv, char **envp)
qemu_opt_set_bool(fsdev, "readonly",
qemu_opt_get_bool(opts, "readonly", 0),
&error_abort);
+ qemu_opt_set_bool(fsdev, "remap_inodes",
+ qemu_opt_get_bool(opts, "remap_inodes", 0),
+ &error_abort);
device = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
&error_abort);
qemu_opt_set(device, "driver", "virtio-9p-pci", &error_abort);