Message ID | 366a157b3f3f9f94892eb90eee80a7dbf5d8ad12.1667542066.git.john.g.johnson@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio-user client | expand |
On Tue, Nov 08, 2022 at 03:13:31PM -0800, John Johnson wrote: > Also negotiate protocol version with remote server LGTM Reviewed-by: John Levon <john.levon@nutanix.com> regards john
On 11/9/22 00:13, John Johnson wrote: > Also negotiate protocol version with remote server > > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > --- > hw/vfio/pci.c | 15 ++ > hw/vfio/pci.h | 1 + > hw/vfio/user-protocol.h | 62 ++++++ > hw/vfio/user.c | 508 ++++++++++++++++++++++++++++++++++++++++++++++++ > hw/vfio/user.h | 9 + > 5 files changed, 595 insertions(+) > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index f086235..b2534b3 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -3489,11 +3489,25 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) > vbasedev->proxy = proxy; > vfio_user_set_handler(vbasedev, vfio_user_pci_process_req, vdev); > > + if (udev->send_queued) { > + proxy->flags |= VFIO_PROXY_FORCE_QUEUED; > + } > + > + vfio_user_validate_version(proxy, &err); > + if (err != NULL) { > + error_propagate(errp, err); > + goto error; > + } > + > vbasedev->name = g_strdup_printf("VFIO user <%s>", udev->sock_name); > vbasedev->ops = &vfio_user_pci_ops; > vbasedev->type = VFIO_DEVICE_TYPE_PCI; > vbasedev->dev = DEVICE(vdev); > > + return; > + > +error: > + error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name); > } > > static void vfio_user_instance_finalize(Object *obj) > @@ -3510,6 +3524,7 @@ static void vfio_user_instance_finalize(Object *obj) > > static Property vfio_user_pci_dev_properties[] = { > DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name), > + DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false), > DEFINE_PROP_END_OF_LIST(), > }; > > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h > index 27db931..c47d2f8 100644 > --- a/hw/vfio/pci.h > +++ b/hw/vfio/pci.h > @@ -195,6 +195,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI) > struct VFIOUserPCIDevice { > VFIOPCIDevice device; > char *sock_name; > + bool send_queued; /* all sends are queued */ > }; > > /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */ > diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h > index d23877c..5de5b20 100644 > --- a/hw/vfio/user-protocol.h > +++ b/hw/vfio/user-protocol.h > @@ -51,4 +51,66 @@ enum vfio_user_command { > #define VFIO_USER_NO_REPLY 0x10 > #define VFIO_USER_ERROR 0x20 > > + > +/* > + * VFIO_USER_VERSION > + */ > +typedef struct { > + VFIOUserHdr hdr; > + uint16_t major; > + uint16_t minor; > + char capabilities[]; > +} VFIOUserVersion; > + > +#define VFIO_USER_MAJOR_VER 0 > +#define VFIO_USER_MINOR_VER 0 > + > +#define VFIO_USER_CAP "capabilities" > + > +/* "capabilities" members */ > +#define VFIO_USER_CAP_MAX_FDS "max_msg_fds" > +#define VFIO_USER_CAP_MAX_XFER "max_data_xfer_size" > +#define VFIO_USER_CAP_PGSIZES "pgsizes" > +#define VFIO_USER_CAP_MAP_MAX "max_dma_maps" > +#define VFIO_USER_CAP_MIGR "migration" > + > +/* "migration" members */ > +#define VFIO_USER_CAP_PGSIZE "pgsize" > +#define VFIO_USER_CAP_MAX_BITMAP "max_bitmap_size" > + > +/* > + * Max FDs mainly comes into play when a device supports multiple interrupts > + * where each ones uses an eventfd to inject it into the guest. > + * It is clamped by the the number of FDs the qio channel supports in a > + * single message. > + */ > +#define VFIO_USER_DEF_MAX_FDS 8 > +#define VFIO_USER_MAX_MAX_FDS 16 > + > +/* > + * Max transfer limits the amount of data in region and DMA messages. > + * Region R/W will be very small (limited by how much a single instruction > + * can process) so just use a reasonable limit here. > + */ > +#define VFIO_USER_DEF_MAX_XFER (1024 * 1024) > +#define VFIO_USER_MAX_MAX_XFER (64 * 1024 * 1024) > + > +/* > + * Default pagesizes supported is 4k. > + */ > +#define VFIO_USER_DEF_PGSIZE 4096 > + > +/* > + * Default max number of DMA mappings is stolen from the > + * linux kernel "dma_entry_limit" > + */ > +#define VFIO_USER_DEF_MAP_MAX 65535 > + > +/* > + * Default max bitmap size is also take from the linux kernel, > + * where usage of signed ints limits the VA range to 2^31 bytes. > + * Dividing that by the number of bits per byte yields 256MB > + */ > +#define VFIO_USER_DEF_MAX_BITMAP (256 * 1024 * 1024) > + > #endif /* VFIO_USER_PROTOCOL_H */ > diff --git a/hw/vfio/user.c b/hw/vfio/user.c > index ffd69b9..31bcc93 100644 > --- a/hw/vfio/user.c > +++ b/hw/vfio/user.c > @@ -23,11 +23,19 @@ > #include "io/channel-socket.h" > #include "io/channel-util.h" > #include "sysemu/iothread.h" > +#include "qapi/qmp/qdict.h" > +#include "qapi/qmp/qjson.h" > +#include "qapi/qmp/qnull.h" > +#include "qapi/qmp/qstring.h" > +#include "qapi/qmp/qnum.h" > +#include "qapi/qmp/qbool.h" > #include "user.h" > > +static int wait_time = 5000; /* wait up to 5 sec for busy servers */ > static IOThread *vfio_user_iothread; > > static void vfio_user_shutdown(VFIOProxy *proxy); > +static int vfio_user_send_qio(VFIOProxy *proxy, VFIOUserMsg *msg); > static VFIOUserMsg *vfio_user_getmsg(VFIOProxy *proxy, VFIOUserHdr *hdr, > VFIOUserFDs *fds); > static VFIOUserFDs *vfio_user_getfds(int numfds); > @@ -35,9 +43,16 @@ static void vfio_user_recycle(VFIOProxy *proxy, VFIOUserMsg *msg); > > static void vfio_user_recv(void *opaque); > static int vfio_user_recv_one(VFIOProxy *proxy); > +static void vfio_user_send(void *opaque); > +static int vfio_user_send_one(VFIOProxy *proxy); > static void vfio_user_cb(void *opaque); > > static void vfio_user_request(void *opaque); > +static int vfio_user_send_queued(VFIOProxy *proxy, VFIOUserMsg *msg); > +static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr, > + VFIOUserFDs *fds, int rsize, bool nobql); > +static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, > + uint32_t size, uint32_t flags); > > static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err) > { > @@ -55,6 +70,33 @@ static void vfio_user_shutdown(VFIOProxy *proxy) > qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, NULL, NULL, NULL); > } > > +static int vfio_user_send_qio(VFIOProxy *proxy, VFIOUserMsg *msg) > +{ > + VFIOUserFDs *fds = msg->fds; > + struct iovec iov = { > + .iov_base = msg->hdr, > + .iov_len = msg->hdr->size, > + }; > + size_t numfds = 0; > + int ret, *fdp = NULL; > + Error *local_err = NULL; > + > + if (fds != NULL && fds->send_fds != 0) { > + numfds = fds->send_fds; > + fdp = fds->fds; > + } > + > + ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds, 0, > + &local_err); > + > + if (ret == -1) { > + vfio_user_set_error(msg->hdr, EIO); > + vfio_user_shutdown(proxy); > + error_report_err(local_err); > + } > + return ret; > +} > + > static VFIOUserMsg *vfio_user_getmsg(VFIOProxy *proxy, VFIOUserHdr *hdr, > VFIOUserFDs *fds) > { > @@ -95,6 +137,7 @@ static void vfio_user_recycle(VFIOProxy *proxy, VFIOUserMsg *msg) > msg->hdr = NULL; > msg->fds = NULL; > msg->complete = false; > + msg->pending = false; > QTAILQ_INSERT_HEAD(&proxy->free, msg, next); > } > > @@ -383,6 +426,54 @@ err: > return -1; > } > > +/* > + * Send messages from outgoing queue when the socket buffer has space. > + * If we deplete 'outgoing', remove ourselves from the poll list. > + */ > +static void vfio_user_send(void *opaque) > +{ > + VFIOProxy *proxy = opaque; > + > + QEMU_LOCK_GUARD(&proxy->lock); > + > + if (proxy->state == VFIO_PROXY_CONNECTED) { > + while (!QTAILQ_EMPTY(&proxy->outgoing)) { > + if (vfio_user_send_one(proxy) < 0) { > + return; > + } > + } > + qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, > + vfio_user_recv, NULL, proxy); > + } > +} > + > +/* > + * Send a single message. > + * > + * Sent async messages are freed, others are moved to pending queue. > + */ > +static int vfio_user_send_one(VFIOProxy *proxy) > +{ > + VFIOUserMsg *msg; > + int ret; > + > + msg = QTAILQ_FIRST(&proxy->outgoing); > + ret = vfio_user_send_qio(proxy, msg); > + if (ret < 0) { > + return ret; > + } > + > + QTAILQ_REMOVE(&proxy->outgoing, msg, next); > + if (msg->type == VFIO_MSG_ASYNC) { > + vfio_user_recycle(proxy, msg); > + } else { > + QTAILQ_INSERT_TAIL(&proxy->pending, msg, next); > + msg->pending = true; > + } > + > + return 0; > +} > + > static void vfio_user_cb(void *opaque) > { > VFIOProxy *proxy = opaque; > @@ -443,6 +534,134 @@ static void vfio_user_request(void *opaque) > } > } > > +/* > + * Messages are queued onto the proxy's outgoing list. > + * > + * It handles 3 types of messages: > + * > + * async messages - replies and posted writes > + * > + * There will be no reply from the server, so message > + * buffers are freed after they're sent. > + * > + * nowait messages - map/unmap during address space transactions > + * > + * These are also sent async, but a reply is expected so that > + * vfio_wait_reqs() can wait for the youngest nowait request. > + * They transition from the outgoing list to the pending list > + * when sent, and are freed when the reply is received. > + * > + * wait messages - all other requests > + * > + * The reply to these messages is waited for by their caller. > + * They also transition from outgoing to pending when sent, but > + * the message buffer is returned to the caller with the reply > + * contents. The caller is responsible for freeing these messages. > + * > + * As an optimization, if the outgoing list and the socket send > + * buffer are empty, the message is sent inline instead of being > + * added to the outgoing list. The rest of the transitions are > + * unchanged. > + * > + * returns 0 if the message was sent or queued > + * returns -1 on send error > + */ > +static int vfio_user_send_queued(VFIOProxy *proxy, VFIOUserMsg *msg) > +{ > + int ret; > + > + /* > + * Unsent outgoing msgs - add to tail > + */ > + if (!QTAILQ_EMPTY(&proxy->outgoing)) { > + QTAILQ_INSERT_TAIL(&proxy->outgoing, msg, next); > + return 0; > + } > + > + /* > + * Try inline - if blocked, queue it and kick send poller > + */ > + if (proxy->flags & VFIO_PROXY_FORCE_QUEUED) { > + ret = QIO_CHANNEL_ERR_BLOCK; > + } else { > + ret = vfio_user_send_qio(proxy, msg); > + } > + if (ret == QIO_CHANNEL_ERR_BLOCK) { > + QTAILQ_INSERT_HEAD(&proxy->outgoing, msg, next); > + qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, > + vfio_user_recv, vfio_user_send, > + proxy); > + return 0; > + } > + if (ret == -1) { > + return ret; > + } > + > + /* > + * Sent - free async, add others to pending > + */ > + if (msg->type == VFIO_MSG_ASYNC) { > + vfio_user_recycle(proxy, msg); > + } else { > + QTAILQ_INSERT_TAIL(&proxy->pending, msg, next); > + msg->pending = true; > + } > + > + return 0; > +} > + > +static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr, > + VFIOUserFDs *fds, int rsize, bool nobql) > +{ > + VFIOUserMsg *msg; > + bool iolock = false; > + int ret; > + > + if (hdr->flags & VFIO_USER_NO_REPLY) { > + error_printf("vfio_user_send_wait on async message\n"); Shouldn't there be a vfio_user_set_error() call also ? > + return; > + } > + > + /* > + * We may block later, so use a per-proxy lock and drop > + * BQL while we sleep unless 'nobql' says not to. > + */ > + qemu_mutex_lock(&proxy->lock); > + if (!nobql) { > + iolock = qemu_mutex_iothread_locked(); > + if (iolock) { > + qemu_mutex_unlock_iothread(); > + } > + } > + > + msg = vfio_user_getmsg(proxy, hdr, fds); > + msg->id = hdr->id; > + msg->rsize = rsize ? rsize : hdr->size; > + msg->type = VFIO_MSG_WAIT; > + > + ret = vfio_user_send_queued(proxy, msg); > + > + if (ret == 0) { > + while (!msg->complete) { > + if (!qemu_cond_timedwait(&msg->cv, &proxy->lock, wait_time)) { > + VFIOUserMsgQ *list; > + > + list = msg->pending ? &proxy->pending : &proxy->outgoing; > + QTAILQ_REMOVE(list, msg, next); > + vfio_user_set_error(hdr, ETIMEDOUT); > + break; > + } > + } > + } > + vfio_user_recycle(proxy, msg); > + > + /* lock order is BQL->proxy - don't hold proxy when getting BQL */ > + qemu_mutex_unlock(&proxy->lock); > + if (iolock) { > + qemu_mutex_lock_iothread(); > + } > +} > + > static QLIST_HEAD(, VFIOProxy) vfio_user_sockets = > QLIST_HEAD_INITIALIZER(vfio_user_sockets); > > @@ -470,6 +689,15 @@ VFIOProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp) > proxy = g_malloc0(sizeof(VFIOProxy)); > proxy->sockname = g_strdup_printf("unix:%s", sockname); > proxy->ioc = ioc; > + > + /* init defaults */ > + proxy->max_xfer_size = VFIO_USER_DEF_MAX_XFER; > + proxy->max_send_fds = VFIO_USER_DEF_MAX_FDS; > + proxy->max_dma = VFIO_USER_DEF_MAP_MAX; > + proxy->dma_pgsizes = VFIO_USER_DEF_PGSIZE; > + proxy->max_bitmap = VFIO_USER_DEF_MAX_BITMAP; > + proxy->migr_pgsize = VFIO_USER_DEF_PGSIZE; > + > proxy->flags = VFIO_PROXY_CLIENT; > proxy->state = VFIO_PROXY_CONNECTED; > > @@ -567,3 +795,283 @@ void vfio_user_disconnect(VFIOProxy *proxy) > g_free(proxy->sockname); > g_free(proxy); > } > + > +static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, > + uint32_t size, uint32_t flags) > +{ > + static uint16_t next_id; > + > + hdr->id = qatomic_fetch_inc(&next_id); > + hdr->command = cmd; > + hdr->size = size; > + hdr->flags = (flags & ~VFIO_USER_TYPE) | VFIO_USER_REQUEST; > + hdr->error_reply = 0; > +} > + > +struct cap_entry { > + const char *name; > + int (*check)(VFIOProxy *proxy, QObject *qobj, Error **errp); > +}; > + > +static int caps_parse(VFIOProxy *proxy, QDict *qdict, struct cap_entry caps[], > + Error **errp) > +{ > + QObject *qobj; > + struct cap_entry *p; > + > + for (p = caps; p->name != NULL; p++) { > + qobj = qdict_get(qdict, p->name); > + if (qobj != NULL) { > + if (p->check(proxy, qobj, errp)) { > + return -1; > + } > + qdict_del(qdict, p->name); > + } > + } > + > + /* warning, for now */ > + if (qdict_size(qdict) != 0) { > + error_printf("spurious capabilities\n"); May be use warn_report() instead > + } > + return 0; > +} > + > +static int check_migr_pgsize(VFIOProxy *proxy, QObject *qobj, Error **errp) > +{ > + QNum *qn = qobject_to(QNum, qobj); > + uint64_t pgsize; > + > + if (qn == NULL || !qnum_get_try_uint(qn, &pgsize)) { > + error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZE); > + return -1; > + } > + > + /* must be larger than default */ > + if (pgsize & (VFIO_USER_DEF_PGSIZE - 1)) { > + error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsize); > + return -1; > + } > + > + proxy->migr_pgsize = pgsize; > + return 0; > +} > + > +static int check_bitmap(VFIOProxy *proxy, QObject *qobj, Error **errp) > +{ > + QNum *qn = qobject_to(QNum, qobj); > + uint64_t bitmap_size; > + > + if (qn == NULL || !qnum_get_try_uint(qn, &bitmap_size)) { > + error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_BITMAP); > + return -1; > + } > + > + /* can only lower it */ > + if (bitmap_size > VFIO_USER_DEF_MAX_BITMAP) { > + error_setg(errp, "%s too large", VFIO_USER_CAP_MAX_BITMAP); > + return -1; > + } > + > + proxy->max_bitmap = bitmap_size; > + return 0; > +} > + > +static struct cap_entry caps_migr[] = { > + { VFIO_USER_CAP_PGSIZE, check_migr_pgsize }, > + { VFIO_USER_CAP_MAX_BITMAP, check_bitmap }, > + { NULL } > +}; > + > +static int check_max_fds(VFIOProxy *proxy, QObject *qobj, Error **errp) > +{ > + QNum *qn = qobject_to(QNum, qobj); > + uint64_t max_send_fds; > + > + if (qn == NULL || !qnum_get_try_uint(qn, &max_send_fds) || > + max_send_fds > VFIO_USER_MAX_MAX_FDS) { > + error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS); > + return -1; > + } > + proxy->max_send_fds = max_send_fds; > + return 0; > +} > + > +static int check_max_xfer(VFIOProxy *proxy, QObject *qobj, Error **errp) > +{ > + QNum *qn = qobject_to(QNum, qobj); > + uint64_t max_xfer_size; > + > + if (qn == NULL || !qnum_get_try_uint(qn, &max_xfer_size) || > + max_xfer_size > VFIO_USER_MAX_MAX_XFER) { > + error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_XFER); > + return -1; > + } > + proxy->max_xfer_size = max_xfer_size; > + return 0; > +} > + > +static int check_pgsizes(VFIOProxy *proxy, QObject *qobj, Error **errp) > +{ > + QNum *qn = qobject_to(QNum, qobj); > + uint64_t pgsizes; > + > + if (qn == NULL || !qnum_get_try_uint(qn, &pgsizes)) { > + error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZES); > + return -1; > + } > + > + /* must be larger than default */ > + if (pgsizes & (VFIO_USER_DEF_PGSIZE - 1)) { > + error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsizes); > + return -1; > + } > + > + proxy->dma_pgsizes = pgsizes; > + return 0; > +} > + > +static int check_max_dma(VFIOProxy *proxy, QObject *qobj, Error **errp) > +{ > + QNum *qn = qobject_to(QNum, qobj); > + uint64_t max_dma; > + > + if (qn == NULL || !qnum_get_try_uint(qn, &max_dma)) { > + error_setg(errp, "malformed %s", VFIO_USER_CAP_MAP_MAX); > + return -1; > + } > + > + /* can only lower it */ > + if (max_dma > VFIO_USER_DEF_MAP_MAX) { > + error_setg(errp, "%s too large", VFIO_USER_CAP_MAP_MAX); > + return -1; > + } > + > + proxy->max_dma = max_dma; > + return 0; > +} > + > +static int check_migr(VFIOProxy *proxy, QObject *qobj, Error **errp) > +{ > + QDict *qdict = qobject_to(QDict, qobj); > + > + if (qdict == NULL) { > + error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS); > + return -1; > + } > + return caps_parse(proxy, qdict, caps_migr, errp); > +} > + > +static struct cap_entry caps_cap[] = { > + { VFIO_USER_CAP_MAX_FDS, check_max_fds }, > + { VFIO_USER_CAP_MAX_XFER, check_max_xfer }, > + { VFIO_USER_CAP_PGSIZES, check_pgsizes }, > + { VFIO_USER_CAP_MAP_MAX, check_max_dma }, > + { VFIO_USER_CAP_MIGR, check_migr }, > + { NULL } > +}; > + > +static int check_cap(VFIOProxy *proxy, QObject *qobj, Error **errp) > +{ > + QDict *qdict = qobject_to(QDict, qobj); > + > + if (qdict == NULL) { > + error_setg(errp, "malformed %s", VFIO_USER_CAP); > + return -1; > + } > + return caps_parse(proxy, qdict, caps_cap, errp); > +} > + > +static struct cap_entry ver_0_0[] = { > + { VFIO_USER_CAP, check_cap }, > + { NULL } > +}; > + > +static int caps_check(VFIOProxy *proxy, int minor, const char *caps, what is the minor parameter for ? Thanks, C. > + Error **errp) > +{ > + QObject *qobj; > + QDict *qdict; > + int ret; > + > + qobj = qobject_from_json(caps, NULL); > + if (qobj == NULL) { > + error_setg(errp, "malformed capabilities %s", caps); > + return -1; > + } > + qdict = qobject_to(QDict, qobj); > + if (qdict == NULL) { > + error_setg(errp, "capabilities %s not an object", caps); > + qobject_unref(qobj); > + return -1; > + } > + ret = caps_parse(proxy, qdict, ver_0_0, errp); > + > + qobject_unref(qobj); > + return ret; > +} > + > +static GString *caps_json(void) > +{ > + QDict *dict = qdict_new(); > + QDict *capdict = qdict_new(); > + QDict *migdict = qdict_new(); > + GString *str; > + > + qdict_put_int(migdict, VFIO_USER_CAP_PGSIZE, VFIO_USER_DEF_PGSIZE); > + qdict_put_int(migdict, VFIO_USER_CAP_MAX_BITMAP, VFIO_USER_DEF_MAX_BITMAP); > + qdict_put_obj(capdict, VFIO_USER_CAP_MIGR, QOBJECT(migdict)); > + > + qdict_put_int(capdict, VFIO_USER_CAP_MAX_FDS, VFIO_USER_MAX_MAX_FDS); > + qdict_put_int(capdict, VFIO_USER_CAP_MAX_XFER, VFIO_USER_DEF_MAX_XFER); > + qdict_put_int(capdict, VFIO_USER_CAP_PGSIZES, VFIO_USER_DEF_PGSIZE); > + qdict_put_int(capdict, VFIO_USER_CAP_MAP_MAX, VFIO_USER_DEF_MAP_MAX); > + > + qdict_put_obj(dict, VFIO_USER_CAP, QOBJECT(capdict)); > + > + str = qobject_to_json(QOBJECT(dict)); > + qobject_unref(dict); > + return str; > +} > + > +int vfio_user_validate_version(VFIOProxy *proxy, Error **errp) > +{ > + g_autofree VFIOUserVersion *msgp; > + GString *caps; > + char *reply; > + int size, caplen; > + > + caps = caps_json(); > + caplen = caps->len + 1; > + size = sizeof(*msgp) + caplen; > + msgp = g_malloc0(size); > + > + vfio_user_request_msg(&msgp->hdr, VFIO_USER_VERSION, size, 0); > + msgp->major = VFIO_USER_MAJOR_VER; > + msgp->minor = VFIO_USER_MINOR_VER; > + memcpy(&msgp->capabilities, caps->str, caplen); > + g_string_free(caps, true); > + > + vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, false); > + if (msgp->hdr.flags & VFIO_USER_ERROR) { > + error_setg_errno(errp, msgp->hdr.error_reply, "version reply"); > + return -1; > + } > + > + if (msgp->major != VFIO_USER_MAJOR_VER || > + msgp->minor > VFIO_USER_MINOR_VER) { > + error_setg(errp, "incompatible server version"); > + return -1; > + } > + > + reply = msgp->capabilities; > + if (reply[msgp->hdr.size - sizeof(*msgp) - 1] != '\0') { > + error_setg(errp, "corrupt version reply"); > + return -1; > + } > + > + if (caps_check(proxy, msgp->minor, reply, errp) != 0) { > + return -1; > + } > + > + return 0; > +} > diff --git a/hw/vfio/user.h b/hw/vfio/user.h > index 68a1080..8ce3cd9 100644 > --- a/hw/vfio/user.h > +++ b/hw/vfio/user.h > @@ -35,6 +35,7 @@ typedef struct VFIOUserMsg { > uint32_t id; > QemuCond cv; > bool complete; > + bool pending; > enum msg_type type; > } VFIOUserMsg; > > @@ -54,6 +55,12 @@ typedef struct VFIOProxy { > struct QIOChannel *ioc; > void (*request)(void *opaque, VFIOUserMsg *msg); > void *req_arg; > + uint64_t max_xfer_size; > + uint64_t max_send_fds; > + uint64_t max_dma; > + uint64_t dma_pgsizes; > + uint64_t max_bitmap; > + uint64_t migr_pgsize; > int flags; > QemuCond close_cv; > AioContext *ctx; > @@ -76,11 +83,13 @@ typedef struct VFIOProxy { > > /* VFIOProxy flags */ > #define VFIO_PROXY_CLIENT 0x1 > +#define VFIO_PROXY_FORCE_QUEUED 0x4 > > VFIOProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp); > void vfio_user_disconnect(VFIOProxy *proxy); > void vfio_user_set_handler(VFIODevice *vbasedev, > void (*handler)(void *opaque, VFIOUserMsg *msg), > void *reqarg); > +int vfio_user_validate_version(VFIOProxy *proxy, Error **errp); > > #endif /* VFIO_USER_H */
> On Dec 13, 2022, at 5:48 AM, Cédric Le Goater <clg@redhat.com> wrote: > > On 11/9/22 00:13, John Johnson wrote: >> >> + >> +static struct cap_entry ver_0_0[] = { >> + { VFIO_USER_CAP, check_cap }, >> + { NULL } >> +}; >> + >> +static int caps_check(VFIOProxy *proxy, int minor, const char *caps, > > what is the minor parameter for ? > So we can make forward compatible changes to the protocol. JJ
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index f086235..b2534b3 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3489,11 +3489,25 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) vbasedev->proxy = proxy; vfio_user_set_handler(vbasedev, vfio_user_pci_process_req, vdev); + if (udev->send_queued) { + proxy->flags |= VFIO_PROXY_FORCE_QUEUED; + } + + vfio_user_validate_version(proxy, &err); + if (err != NULL) { + error_propagate(errp, err); + goto error; + } + vbasedev->name = g_strdup_printf("VFIO user <%s>", udev->sock_name); vbasedev->ops = &vfio_user_pci_ops; vbasedev->type = VFIO_DEVICE_TYPE_PCI; vbasedev->dev = DEVICE(vdev); + return; + +error: + error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name); } static void vfio_user_instance_finalize(Object *obj) @@ -3510,6 +3524,7 @@ static void vfio_user_instance_finalize(Object *obj) static Property vfio_user_pci_dev_properties[] = { DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name), + DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 27db931..c47d2f8 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -195,6 +195,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI) struct VFIOUserPCIDevice { VFIOPCIDevice device; char *sock_name; + bool send_queued; /* all sends are queued */ }; /* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */ diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h index d23877c..5de5b20 100644 --- a/hw/vfio/user-protocol.h +++ b/hw/vfio/user-protocol.h @@ -51,4 +51,66 @@ enum vfio_user_command { #define VFIO_USER_NO_REPLY 0x10 #define VFIO_USER_ERROR 0x20 + +/* + * VFIO_USER_VERSION + */ +typedef struct { + VFIOUserHdr hdr; + uint16_t major; + uint16_t minor; + char capabilities[]; +} VFIOUserVersion; + +#define VFIO_USER_MAJOR_VER 0 +#define VFIO_USER_MINOR_VER 0 + +#define VFIO_USER_CAP "capabilities" + +/* "capabilities" members */ +#define VFIO_USER_CAP_MAX_FDS "max_msg_fds" +#define VFIO_USER_CAP_MAX_XFER "max_data_xfer_size" +#define VFIO_USER_CAP_PGSIZES "pgsizes" +#define VFIO_USER_CAP_MAP_MAX "max_dma_maps" +#define VFIO_USER_CAP_MIGR "migration" + +/* "migration" members */ +#define VFIO_USER_CAP_PGSIZE "pgsize" +#define VFIO_USER_CAP_MAX_BITMAP "max_bitmap_size" + +/* + * Max FDs mainly comes into play when a device supports multiple interrupts + * where each ones uses an eventfd to inject it into the guest. + * It is clamped by the the number of FDs the qio channel supports in a + * single message. + */ +#define VFIO_USER_DEF_MAX_FDS 8 +#define VFIO_USER_MAX_MAX_FDS 16 + +/* + * Max transfer limits the amount of data in region and DMA messages. + * Region R/W will be very small (limited by how much a single instruction + * can process) so just use a reasonable limit here. + */ +#define VFIO_USER_DEF_MAX_XFER (1024 * 1024) +#define VFIO_USER_MAX_MAX_XFER (64 * 1024 * 1024) + +/* + * Default pagesizes supported is 4k. + */ +#define VFIO_USER_DEF_PGSIZE 4096 + +/* + * Default max number of DMA mappings is stolen from the + * linux kernel "dma_entry_limit" + */ +#define VFIO_USER_DEF_MAP_MAX 65535 + +/* + * Default max bitmap size is also take from the linux kernel, + * where usage of signed ints limits the VA range to 2^31 bytes. + * Dividing that by the number of bits per byte yields 256MB + */ +#define VFIO_USER_DEF_MAX_BITMAP (256 * 1024 * 1024) + #endif /* VFIO_USER_PROTOCOL_H */ diff --git a/hw/vfio/user.c b/hw/vfio/user.c index ffd69b9..31bcc93 100644 --- a/hw/vfio/user.c +++ b/hw/vfio/user.c @@ -23,11 +23,19 @@ #include "io/channel-socket.h" #include "io/channel-util.h" #include "sysemu/iothread.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qjson.h" +#include "qapi/qmp/qnull.h" +#include "qapi/qmp/qstring.h" +#include "qapi/qmp/qnum.h" +#include "qapi/qmp/qbool.h" #include "user.h" +static int wait_time = 5000; /* wait up to 5 sec for busy servers */ static IOThread *vfio_user_iothread; static void vfio_user_shutdown(VFIOProxy *proxy); +static int vfio_user_send_qio(VFIOProxy *proxy, VFIOUserMsg *msg); static VFIOUserMsg *vfio_user_getmsg(VFIOProxy *proxy, VFIOUserHdr *hdr, VFIOUserFDs *fds); static VFIOUserFDs *vfio_user_getfds(int numfds); @@ -35,9 +43,16 @@ static void vfio_user_recycle(VFIOProxy *proxy, VFIOUserMsg *msg); static void vfio_user_recv(void *opaque); static int vfio_user_recv_one(VFIOProxy *proxy); +static void vfio_user_send(void *opaque); +static int vfio_user_send_one(VFIOProxy *proxy); static void vfio_user_cb(void *opaque); static void vfio_user_request(void *opaque); +static int vfio_user_send_queued(VFIOProxy *proxy, VFIOUserMsg *msg); +static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds, int rsize, bool nobql); +static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, + uint32_t size, uint32_t flags); static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err) { @@ -55,6 +70,33 @@ static void vfio_user_shutdown(VFIOProxy *proxy) qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, NULL, NULL, NULL); } +static int vfio_user_send_qio(VFIOProxy *proxy, VFIOUserMsg *msg) +{ + VFIOUserFDs *fds = msg->fds; + struct iovec iov = { + .iov_base = msg->hdr, + .iov_len = msg->hdr->size, + }; + size_t numfds = 0; + int ret, *fdp = NULL; + Error *local_err = NULL; + + if (fds != NULL && fds->send_fds != 0) { + numfds = fds->send_fds; + fdp = fds->fds; + } + + ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds, 0, + &local_err); + + if (ret == -1) { + vfio_user_set_error(msg->hdr, EIO); + vfio_user_shutdown(proxy); + error_report_err(local_err); + } + return ret; +} + static VFIOUserMsg *vfio_user_getmsg(VFIOProxy *proxy, VFIOUserHdr *hdr, VFIOUserFDs *fds) { @@ -95,6 +137,7 @@ static void vfio_user_recycle(VFIOProxy *proxy, VFIOUserMsg *msg) msg->hdr = NULL; msg->fds = NULL; msg->complete = false; + msg->pending = false; QTAILQ_INSERT_HEAD(&proxy->free, msg, next); } @@ -383,6 +426,54 @@ err: return -1; } +/* + * Send messages from outgoing queue when the socket buffer has space. + * If we deplete 'outgoing', remove ourselves from the poll list. + */ +static void vfio_user_send(void *opaque) +{ + VFIOProxy *proxy = opaque; + + QEMU_LOCK_GUARD(&proxy->lock); + + if (proxy->state == VFIO_PROXY_CONNECTED) { + while (!QTAILQ_EMPTY(&proxy->outgoing)) { + if (vfio_user_send_one(proxy) < 0) { + return; + } + } + qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, + vfio_user_recv, NULL, proxy); + } +} + +/* + * Send a single message. + * + * Sent async messages are freed, others are moved to pending queue. + */ +static int vfio_user_send_one(VFIOProxy *proxy) +{ + VFIOUserMsg *msg; + int ret; + + msg = QTAILQ_FIRST(&proxy->outgoing); + ret = vfio_user_send_qio(proxy, msg); + if (ret < 0) { + return ret; + } + + QTAILQ_REMOVE(&proxy->outgoing, msg, next); + if (msg->type == VFIO_MSG_ASYNC) { + vfio_user_recycle(proxy, msg); + } else { + QTAILQ_INSERT_TAIL(&proxy->pending, msg, next); + msg->pending = true; + } + + return 0; +} + static void vfio_user_cb(void *opaque) { VFIOProxy *proxy = opaque; @@ -443,6 +534,134 @@ static void vfio_user_request(void *opaque) } } +/* + * Messages are queued onto the proxy's outgoing list. + * + * It handles 3 types of messages: + * + * async messages - replies and posted writes + * + * There will be no reply from the server, so message + * buffers are freed after they're sent. + * + * nowait messages - map/unmap during address space transactions + * + * These are also sent async, but a reply is expected so that + * vfio_wait_reqs() can wait for the youngest nowait request. + * They transition from the outgoing list to the pending list + * when sent, and are freed when the reply is received. + * + * wait messages - all other requests + * + * The reply to these messages is waited for by their caller. + * They also transition from outgoing to pending when sent, but + * the message buffer is returned to the caller with the reply + * contents. The caller is responsible for freeing these messages. + * + * As an optimization, if the outgoing list and the socket send + * buffer are empty, the message is sent inline instead of being + * added to the outgoing list. The rest of the transitions are + * unchanged. + * + * returns 0 if the message was sent or queued + * returns -1 on send error + */ +static int vfio_user_send_queued(VFIOProxy *proxy, VFIOUserMsg *msg) +{ + int ret; + + /* + * Unsent outgoing msgs - add to tail + */ + if (!QTAILQ_EMPTY(&proxy->outgoing)) { + QTAILQ_INSERT_TAIL(&proxy->outgoing, msg, next); + return 0; + } + + /* + * Try inline - if blocked, queue it and kick send poller + */ + if (proxy->flags & VFIO_PROXY_FORCE_QUEUED) { + ret = QIO_CHANNEL_ERR_BLOCK; + } else { + ret = vfio_user_send_qio(proxy, msg); + } + if (ret == QIO_CHANNEL_ERR_BLOCK) { + QTAILQ_INSERT_HEAD(&proxy->outgoing, msg, next); + qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, + vfio_user_recv, vfio_user_send, + proxy); + return 0; + } + if (ret == -1) { + return ret; + } + + /* + * Sent - free async, add others to pending + */ + if (msg->type == VFIO_MSG_ASYNC) { + vfio_user_recycle(proxy, msg); + } else { + QTAILQ_INSERT_TAIL(&proxy->pending, msg, next); + msg->pending = true; + } + + return 0; +} + +static void vfio_user_send_wait(VFIOProxy *proxy, VFIOUserHdr *hdr, + VFIOUserFDs *fds, int rsize, bool nobql) +{ + VFIOUserMsg *msg; + bool iolock = false; + int ret; + + if (hdr->flags & VFIO_USER_NO_REPLY) { + error_printf("vfio_user_send_wait on async message\n"); + return; + } + + /* + * We may block later, so use a per-proxy lock and drop + * BQL while we sleep unless 'nobql' says not to. + */ + qemu_mutex_lock(&proxy->lock); + if (!nobql) { + iolock = qemu_mutex_iothread_locked(); + if (iolock) { + qemu_mutex_unlock_iothread(); + } + } + + msg = vfio_user_getmsg(proxy, hdr, fds); + msg->id = hdr->id; + msg->rsize = rsize ? rsize : hdr->size; + msg->type = VFIO_MSG_WAIT; + + ret = vfio_user_send_queued(proxy, msg); + + if (ret == 0) { + while (!msg->complete) { + if (!qemu_cond_timedwait(&msg->cv, &proxy->lock, wait_time)) { + VFIOUserMsgQ *list; + + list = msg->pending ? &proxy->pending : &proxy->outgoing; + QTAILQ_REMOVE(list, msg, next); + vfio_user_set_error(hdr, ETIMEDOUT); + break; + } + } + } + vfio_user_recycle(proxy, msg); + + /* lock order is BQL->proxy - don't hold proxy when getting BQL */ + qemu_mutex_unlock(&proxy->lock); + if (iolock) { + qemu_mutex_lock_iothread(); + } +} + static QLIST_HEAD(, VFIOProxy) vfio_user_sockets = QLIST_HEAD_INITIALIZER(vfio_user_sockets); @@ -470,6 +689,15 @@ VFIOProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp) proxy = g_malloc0(sizeof(VFIOProxy)); proxy->sockname = g_strdup_printf("unix:%s", sockname); proxy->ioc = ioc; + + /* init defaults */ + proxy->max_xfer_size = VFIO_USER_DEF_MAX_XFER; + proxy->max_send_fds = VFIO_USER_DEF_MAX_FDS; + proxy->max_dma = VFIO_USER_DEF_MAP_MAX; + proxy->dma_pgsizes = VFIO_USER_DEF_PGSIZE; + proxy->max_bitmap = VFIO_USER_DEF_MAX_BITMAP; + proxy->migr_pgsize = VFIO_USER_DEF_PGSIZE; + proxy->flags = VFIO_PROXY_CLIENT; proxy->state = VFIO_PROXY_CONNECTED; @@ -567,3 +795,283 @@ void vfio_user_disconnect(VFIOProxy *proxy) g_free(proxy->sockname); g_free(proxy); } + +static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd, + uint32_t size, uint32_t flags) +{ + static uint16_t next_id; + + hdr->id = qatomic_fetch_inc(&next_id); + hdr->command = cmd; + hdr->size = size; + hdr->flags = (flags & ~VFIO_USER_TYPE) | VFIO_USER_REQUEST; + hdr->error_reply = 0; +} + +struct cap_entry { + const char *name; + int (*check)(VFIOProxy *proxy, QObject *qobj, Error **errp); +}; + +static int caps_parse(VFIOProxy *proxy, QDict *qdict, struct cap_entry caps[], + Error **errp) +{ + QObject *qobj; + struct cap_entry *p; + + for (p = caps; p->name != NULL; p++) { + qobj = qdict_get(qdict, p->name); + if (qobj != NULL) { + if (p->check(proxy, qobj, errp)) { + return -1; + } + qdict_del(qdict, p->name); + } + } + + /* warning, for now */ + if (qdict_size(qdict) != 0) { + error_printf("spurious capabilities\n"); + } + return 0; +} + +static int check_migr_pgsize(VFIOProxy *proxy, QObject *qobj, Error **errp) +{ + QNum *qn = qobject_to(QNum, qobj); + uint64_t pgsize; + + if (qn == NULL || !qnum_get_try_uint(qn, &pgsize)) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZE); + return -1; + } + + /* must be larger than default */ + if (pgsize & (VFIO_USER_DEF_PGSIZE - 1)) { + error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsize); + return -1; + } + + proxy->migr_pgsize = pgsize; + return 0; +} + +static int check_bitmap(VFIOProxy *proxy, QObject *qobj, Error **errp) +{ + QNum *qn = qobject_to(QNum, qobj); + uint64_t bitmap_size; + + if (qn == NULL || !qnum_get_try_uint(qn, &bitmap_size)) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_BITMAP); + return -1; + } + + /* can only lower it */ + if (bitmap_size > VFIO_USER_DEF_MAX_BITMAP) { + error_setg(errp, "%s too large", VFIO_USER_CAP_MAX_BITMAP); + return -1; + } + + proxy->max_bitmap = bitmap_size; + return 0; +} + +static struct cap_entry caps_migr[] = { + { VFIO_USER_CAP_PGSIZE, check_migr_pgsize }, + { VFIO_USER_CAP_MAX_BITMAP, check_bitmap }, + { NULL } +}; + +static int check_max_fds(VFIOProxy *proxy, QObject *qobj, Error **errp) +{ + QNum *qn = qobject_to(QNum, qobj); + uint64_t max_send_fds; + + if (qn == NULL || !qnum_get_try_uint(qn, &max_send_fds) || + max_send_fds > VFIO_USER_MAX_MAX_FDS) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS); + return -1; + } + proxy->max_send_fds = max_send_fds; + return 0; +} + +static int check_max_xfer(VFIOProxy *proxy, QObject *qobj, Error **errp) +{ + QNum *qn = qobject_to(QNum, qobj); + uint64_t max_xfer_size; + + if (qn == NULL || !qnum_get_try_uint(qn, &max_xfer_size) || + max_xfer_size > VFIO_USER_MAX_MAX_XFER) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_XFER); + return -1; + } + proxy->max_xfer_size = max_xfer_size; + return 0; +} + +static int check_pgsizes(VFIOProxy *proxy, QObject *qobj, Error **errp) +{ + QNum *qn = qobject_to(QNum, qobj); + uint64_t pgsizes; + + if (qn == NULL || !qnum_get_try_uint(qn, &pgsizes)) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZES); + return -1; + } + + /* must be larger than default */ + if (pgsizes & (VFIO_USER_DEF_PGSIZE - 1)) { + error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsizes); + return -1; + } + + proxy->dma_pgsizes = pgsizes; + return 0; +} + +static int check_max_dma(VFIOProxy *proxy, QObject *qobj, Error **errp) +{ + QNum *qn = qobject_to(QNum, qobj); + uint64_t max_dma; + + if (qn == NULL || !qnum_get_try_uint(qn, &max_dma)) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_MAP_MAX); + return -1; + } + + /* can only lower it */ + if (max_dma > VFIO_USER_DEF_MAP_MAX) { + error_setg(errp, "%s too large", VFIO_USER_CAP_MAP_MAX); + return -1; + } + + proxy->max_dma = max_dma; + return 0; +} + +static int check_migr(VFIOProxy *proxy, QObject *qobj, Error **errp) +{ + QDict *qdict = qobject_to(QDict, qobj); + + if (qdict == NULL) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS); + return -1; + } + return caps_parse(proxy, qdict, caps_migr, errp); +} + +static struct cap_entry caps_cap[] = { + { VFIO_USER_CAP_MAX_FDS, check_max_fds }, + { VFIO_USER_CAP_MAX_XFER, check_max_xfer }, + { VFIO_USER_CAP_PGSIZES, check_pgsizes }, + { VFIO_USER_CAP_MAP_MAX, check_max_dma }, + { VFIO_USER_CAP_MIGR, check_migr }, + { NULL } +}; + +static int check_cap(VFIOProxy *proxy, QObject *qobj, Error **errp) +{ + QDict *qdict = qobject_to(QDict, qobj); + + if (qdict == NULL) { + error_setg(errp, "malformed %s", VFIO_USER_CAP); + return -1; + } + return caps_parse(proxy, qdict, caps_cap, errp); +} + +static struct cap_entry ver_0_0[] = { + { VFIO_USER_CAP, check_cap }, + { NULL } +}; + +static int caps_check(VFIOProxy *proxy, int minor, const char *caps, + Error **errp) +{ + QObject *qobj; + QDict *qdict; + int ret; + + qobj = qobject_from_json(caps, NULL); + if (qobj == NULL) { + error_setg(errp, "malformed capabilities %s", caps); + return -1; + } + qdict = qobject_to(QDict, qobj); + if (qdict == NULL) { + error_setg(errp, "capabilities %s not an object", caps); + qobject_unref(qobj); + return -1; + } + ret = caps_parse(proxy, qdict, ver_0_0, errp); + + qobject_unref(qobj); + return ret; +} + +static GString *caps_json(void) +{ + QDict *dict = qdict_new(); + QDict *capdict = qdict_new(); + QDict *migdict = qdict_new(); + GString *str; + + qdict_put_int(migdict, VFIO_USER_CAP_PGSIZE, VFIO_USER_DEF_PGSIZE); + qdict_put_int(migdict, VFIO_USER_CAP_MAX_BITMAP, VFIO_USER_DEF_MAX_BITMAP); + qdict_put_obj(capdict, VFIO_USER_CAP_MIGR, QOBJECT(migdict)); + + qdict_put_int(capdict, VFIO_USER_CAP_MAX_FDS, VFIO_USER_MAX_MAX_FDS); + qdict_put_int(capdict, VFIO_USER_CAP_MAX_XFER, VFIO_USER_DEF_MAX_XFER); + qdict_put_int(capdict, VFIO_USER_CAP_PGSIZES, VFIO_USER_DEF_PGSIZE); + qdict_put_int(capdict, VFIO_USER_CAP_MAP_MAX, VFIO_USER_DEF_MAP_MAX); + + qdict_put_obj(dict, VFIO_USER_CAP, QOBJECT(capdict)); + + str = qobject_to_json(QOBJECT(dict)); + qobject_unref(dict); + return str; +} + +int vfio_user_validate_version(VFIOProxy *proxy, Error **errp) +{ + g_autofree VFIOUserVersion *msgp; + GString *caps; + char *reply; + int size, caplen; + + caps = caps_json(); + caplen = caps->len + 1; + size = sizeof(*msgp) + caplen; + msgp = g_malloc0(size); + + vfio_user_request_msg(&msgp->hdr, VFIO_USER_VERSION, size, 0); + msgp->major = VFIO_USER_MAJOR_VER; + msgp->minor = VFIO_USER_MINOR_VER; + memcpy(&msgp->capabilities, caps->str, caplen); + g_string_free(caps, true); + + vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, false); + if (msgp->hdr.flags & VFIO_USER_ERROR) { + error_setg_errno(errp, msgp->hdr.error_reply, "version reply"); + return -1; + } + + if (msgp->major != VFIO_USER_MAJOR_VER || + msgp->minor > VFIO_USER_MINOR_VER) { + error_setg(errp, "incompatible server version"); + return -1; + } + + reply = msgp->capabilities; + if (reply[msgp->hdr.size - sizeof(*msgp) - 1] != '\0') { + error_setg(errp, "corrupt version reply"); + return -1; + } + + if (caps_check(proxy, msgp->minor, reply, errp) != 0) { + return -1; + } + + return 0; +} diff --git a/hw/vfio/user.h b/hw/vfio/user.h index 68a1080..8ce3cd9 100644 --- a/hw/vfio/user.h +++ b/hw/vfio/user.h @@ -35,6 +35,7 @@ typedef struct VFIOUserMsg { uint32_t id; QemuCond cv; bool complete; + bool pending; enum msg_type type; } VFIOUserMsg; @@ -54,6 +55,12 @@ typedef struct VFIOProxy { struct QIOChannel *ioc; void (*request)(void *opaque, VFIOUserMsg *msg); void *req_arg; + uint64_t max_xfer_size; + uint64_t max_send_fds; + uint64_t max_dma; + uint64_t dma_pgsizes; + uint64_t max_bitmap; + uint64_t migr_pgsize; int flags; QemuCond close_cv; AioContext *ctx; @@ -76,11 +83,13 @@ typedef struct VFIOProxy { /* VFIOProxy flags */ #define VFIO_PROXY_CLIENT 0x1 +#define VFIO_PROXY_FORCE_QUEUED 0x4 VFIOProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp); void vfio_user_disconnect(VFIOProxy *proxy); void vfio_user_set_handler(VFIODevice *vbasedev, void (*handler)(void *opaque, VFIOUserMsg *msg), void *reqarg); +int vfio_user_validate_version(VFIOProxy *proxy, Error **errp); #endif /* VFIO_USER_H */