Message ID | 3c043becf2b6e820f5392e0cadb465d5d9b9e6f8.1630084211.git.jag.raman@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio-user server in QEMU | expand |
On Fri, Aug 27, 2021 at 01:53:24PM -0400, Jagannathan Raman wrote: > Setup a handler to run vfio-user context. The context is driven by > messages to the file descriptor associated with it - get the fd for > the context and hook up the handler with it > > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > --- > hw/remote/vfio-user-obj.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 70 insertions(+), 1 deletion(-) > > diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c > index 5ae0991..0726eb9 100644 > --- a/hw/remote/vfio-user-obj.c > +++ b/hw/remote/vfio-user-obj.c > @@ -35,6 +35,7 @@ > #include "trace.h" > #include "sysemu/runstate.h" > #include "qemu/notify.h" > +#include "qemu/thread.h" > #include "qapi/error.h" > #include "sysemu/sysemu.h" > #include "libvfio-user.h" > @@ -65,6 +66,8 @@ struct VfuObject { > vfu_ctx_t *vfu_ctx; > > PCIDevice *pci_dev; > + > + int vfu_poll_fd; > }; > > static void vfu_object_set_socket(Object *obj, const char *str, Error **errp) > @@ -89,13 +92,67 @@ static void vfu_object_set_devid(Object *obj, const char *str, Error **errp) > trace_vfu_prop("devid", str); > } > > +static void vfu_object_ctx_run(void *opaque) > +{ > + VfuObject *o = opaque; > + int ret = -1; > + > + while (ret != 0) { > + ret = vfu_run_ctx(o->vfu_ctx); > + if (ret < 0) { > + if (errno == EINTR) { > + continue; > + } else if (errno == ENOTCONN) { > + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); > + o->vfu_poll_fd = -1; > + object_unparent(OBJECT(o)); > + break; > + } else { > + error_setg(&error_abort, "vfu: Failed to run device %s - %s", > + o->devid, strerror(errno)); > + break; > + } > + } > + } > +} > + > +static void *vfu_object_attach_ctx(void *opaque) > +{ > + VfuObject *o = opaque; > + int ret; > + > +retry_attach: > + ret = vfu_attach_ctx(o->vfu_ctx); > + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { Does this loop consume 100% CPU since this is non-blocking? Is it possible to register the fd with a QEMU AioContext instead of spawning a separate thread? libvfio-user has non-blocking listen_fd but conn_fd is always blocking. This means ATTACH_NB is not useful because vfu_attach_ctx() is actually blocking. I think this means vfu_run_ctx() is also blocking in some places and QEMU's event loop might hang :(. Can you make libvfio-user non-blocking in order to solve these issues? > + goto retry_attach; > + } else if (ret < 0) { > + error_setg(&error_abort, > + "vfu: Failed to attach device %s to context - %s", > + o->devid, strerror(errno)); > + return NULL; > + } > + > + o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx); > + if (o->vfu_poll_fd < 0) { > + error_setg(&error_abort, "vfu: Failed to get poll fd %s", o->devid); > + return NULL; > + } > + > + qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, > + NULL, o); > + > + return NULL; > +} > + > static void vfu_object_machine_done(Notifier *notifier, void *data) > { > VfuObject *o = container_of(notifier, VfuObject, machine_done); > DeviceState *dev = NULL; > + QemuThread thread; > int ret; > > - o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket, 0, > + o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket, > + LIBVFIO_USER_FLAG_ATTACH_NB, > o, VFU_DEV_TYPE_PCI); > if (o->vfu_ctx == NULL) { > error_setg(&error_abort, "vfu: Failed to create context - %s", > @@ -124,6 +181,16 @@ static void vfu_object_machine_done(Notifier *notifier, void *data) > o->devid, strerror(errno)); > return; > } > + > + ret = vfu_realize_ctx(o->vfu_ctx); > + if (ret < 0) { > + error_setg(&error_abort, "vfu: Failed to realize device %s- %s", > + o->devid, strerror(errno)); > + return; > + } > + > + qemu_thread_create(&thread, o->socket, vfu_object_attach_ctx, o, > + QEMU_THREAD_DETACHED); Is this thread leaked when the object is destroyed? > } > > static void vfu_object_init(Object *obj) > @@ -147,6 +214,8 @@ static void vfu_object_init(Object *obj) > > o->machine_done.notify = vfu_object_machine_done; > qemu_add_machine_init_done_notifier(&o->machine_done); > + > + o->vfu_poll_fd = -1; > } > > static void vfu_object_finalize(Object *obj) > -- > 1.8.3.1 >
On Wed, Sep 08, 2021 at 01:58:46PM +0100, Stefan Hajnoczi wrote: > > +static void *vfu_object_attach_ctx(void *opaque) > > +{ > > + VfuObject *o = opaque; > > + int ret; > > + > > +retry_attach: > > + ret = vfu_attach_ctx(o->vfu_ctx); > > + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { > > Does this loop consume 100% CPU since this is non-blocking? Looks like it. Instead after vfu_create_ctx, there should be a vfu_get_poll_fd() to get the listen socket, then a qemu_set_fd_handler(vfu_object_attach_ctx) to handle the attach when the listen socket is ready, modulo the below part. > libvfio-user has non-blocking listen_fd but conn_fd is always blocking. It is, but in vfu_run_ctx(), we poll on it: ``` 790 if (vfu_ctx->flags & LIBVFIO_USER_FLAG_ATTACH_NB) { 791 sock_flags = MSG_DONTWAIT | MSG_WAITALL; 792 } 793 return get_msg(hdr, sizeof(*hdr), fds, nr_fds, ts->conn_fd, sock_flags); ``` > This means ATTACH_NB is not useful because vfu_attach_ctx() is actually > blocking. You're correct that vfu_attach_ctx is in fact partially blocking: after accepting the connection, we call negotiate(), which can indeed block waiting if the client hasn't sent anything. > I think this means vfu_run_ctx() is also blocking in some places Correct. There's a presumption that if a message is ready, we can read it all without blocking, and equally that we can write to the socket without blocking. The library docs are not at all clear on this point. > and QEMU's event loop might hang :( > > Can you make libvfio-user non-blocking in order to solve these issues? I presume you're concerned about the security aspect: a malicious client could withhold a write, and hence hang the device server. Problem is the libvfio-user API is synchronous: there's no way to return half-way through a vfu_attach_ctx() (or a vfu_run_ctx() after we read the header) then resume. We'd have to have a whole separate API to do that, so a separate thread seems a better approach? regards john
On Wed, Sep 08, 2021 at 01:37:53PM +0000, John Levon wrote: > On Wed, Sep 08, 2021 at 01:58:46PM +0100, Stefan Hajnoczi wrote: > > > > +static void *vfu_object_attach_ctx(void *opaque) > > > +{ > > > + VfuObject *o = opaque; > > > + int ret; > > > + > > > +retry_attach: > > > + ret = vfu_attach_ctx(o->vfu_ctx); > > > + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { > > > > Does this loop consume 100% CPU since this is non-blocking? > > Looks like it. Instead after vfu_create_ctx, there should be a vfu_get_poll_fd() > to get the listen socket, then a qemu_set_fd_handler(vfu_object_attach_ctx) > to handle the attach when the listen socket is ready, modulo the below part. > > > libvfio-user has non-blocking listen_fd but conn_fd is always blocking. > > It is, but in vfu_run_ctx(), we poll on it: > > ``` > 790 if (vfu_ctx->flags & LIBVFIO_USER_FLAG_ATTACH_NB) { > 791 sock_flags = MSG_DONTWAIT | MSG_WAITALL; > 792 } > 793 return get_msg(hdr, sizeof(*hdr), fds, nr_fds, ts->conn_fd, sock_flags); > ``` This is only used for the request header. Other I/O is blocking. > > > This means ATTACH_NB is not useful because vfu_attach_ctx() is actually > > blocking. > > You're correct that vfu_attach_ctx is in fact partially blocking: after > accepting the connection, we call negotiate(), which can indeed block waiting if > the client hasn't sent anything. > > > I think this means vfu_run_ctx() is also blocking in some places > > Correct. There's a presumption that if a message is ready, we can read it all > without blocking, and equally that we can write to the socket without blocking. > > The library docs are not at all clear on this point. > > > and QEMU's event loop might hang :( > > > > Can you make libvfio-user non-blocking in order to solve these issues? > > I presume you're concerned about the security aspect: a malicious client could > withhold a write, and hence hang the device server. > > Problem is the libvfio-user API is synchronous: there's no way to return > half-way through a vfu_attach_ctx() (or a vfu_run_ctx() after we read the > header) then resume. > > We'd have to have a whole separate API to do that, so a separate thread seems a > better approach? Whether to support non-blocking properly in libvfio-user is a decision for you. If libvfio-user doesn't support non-blocking, then QEMU should run a dedicated thread instead of the partially non-blocking approach in this patch. A non-blocking approach is nice when there are many devices hosted in a single process or a lot of async replies (which requires extra thread synchronization with the blocking approach). Stefan
On Wed, Sep 08, 2021 at 04:02:22PM +0100, Stefan Hajnoczi wrote: > > We'd have to have a whole separate API to do that, so a separate thread seems a > > better approach? > > Whether to support non-blocking properly in libvfio-user is a decision > for you. If libvfio-user doesn't support non-blocking, then QEMU should > run a dedicated thread instead of the partially non-blocking approach in > this patch. Right, sure. At this point we don't have any plans to implement a separate async API due to the amount of work involved. > A non-blocking approach is nice when there are many devices hosted in a > single process or a lot of async replies (which requires extra thread > synchronization with the blocking approach). I suppose this would be more of a problem with devices where the I/O path has to be handled via the socket. regards john
On Wed, Sep 08, 2021 at 03:21:19PM +0000, John Levon wrote: > On Wed, Sep 08, 2021 at 04:02:22PM +0100, Stefan Hajnoczi wrote: > > > > We'd have to have a whole separate API to do that, so a separate thread seems a > > > better approach? > > > > Whether to support non-blocking properly in libvfio-user is a decision > > for you. If libvfio-user doesn't support non-blocking, then QEMU should > > run a dedicated thread instead of the partially non-blocking approach in > > this patch. > > Right, sure. At this point we don't have any plans to implement a separate async > API due to the amount of work involved. > > > A non-blocking approach is nice when there are many devices hosted in a > > single process or a lot of async replies (which requires extra thread > > synchronization with the blocking approach). > > I suppose this would be more of a problem with devices where the I/O path has to > be handled via the socket. Yes, exactly. I think it shouldn't be a problem when shared memory is used and the irqfd (eventfd) mechanism is used for IRQs. It becomes slow when there's no shared memory or if raising IRQs requires protocol messages. Stefan
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c index 5ae0991..0726eb9 100644 --- a/hw/remote/vfio-user-obj.c +++ b/hw/remote/vfio-user-obj.c @@ -35,6 +35,7 @@ #include "trace.h" #include "sysemu/runstate.h" #include "qemu/notify.h" +#include "qemu/thread.h" #include "qapi/error.h" #include "sysemu/sysemu.h" #include "libvfio-user.h" @@ -65,6 +66,8 @@ struct VfuObject { vfu_ctx_t *vfu_ctx; PCIDevice *pci_dev; + + int vfu_poll_fd; }; static void vfu_object_set_socket(Object *obj, const char *str, Error **errp) @@ -89,13 +92,67 @@ static void vfu_object_set_devid(Object *obj, const char *str, Error **errp) trace_vfu_prop("devid", str); } +static void vfu_object_ctx_run(void *opaque) +{ + VfuObject *o = opaque; + int ret = -1; + + while (ret != 0) { + ret = vfu_run_ctx(o->vfu_ctx); + if (ret < 0) { + if (errno == EINTR) { + continue; + } else if (errno == ENOTCONN) { + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); + o->vfu_poll_fd = -1; + object_unparent(OBJECT(o)); + break; + } else { + error_setg(&error_abort, "vfu: Failed to run device %s - %s", + o->devid, strerror(errno)); + break; + } + } + } +} + +static void *vfu_object_attach_ctx(void *opaque) +{ + VfuObject *o = opaque; + int ret; + +retry_attach: + ret = vfu_attach_ctx(o->vfu_ctx); + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { + goto retry_attach; + } else if (ret < 0) { + error_setg(&error_abort, + "vfu: Failed to attach device %s to context - %s", + o->devid, strerror(errno)); + return NULL; + } + + o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx); + if (o->vfu_poll_fd < 0) { + error_setg(&error_abort, "vfu: Failed to get poll fd %s", o->devid); + return NULL; + } + + qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, + NULL, o); + + return NULL; +} + static void vfu_object_machine_done(Notifier *notifier, void *data) { VfuObject *o = container_of(notifier, VfuObject, machine_done); DeviceState *dev = NULL; + QemuThread thread; int ret; - o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket, 0, + o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket, + LIBVFIO_USER_FLAG_ATTACH_NB, o, VFU_DEV_TYPE_PCI); if (o->vfu_ctx == NULL) { error_setg(&error_abort, "vfu: Failed to create context - %s", @@ -124,6 +181,16 @@ static void vfu_object_machine_done(Notifier *notifier, void *data) o->devid, strerror(errno)); return; } + + ret = vfu_realize_ctx(o->vfu_ctx); + if (ret < 0) { + error_setg(&error_abort, "vfu: Failed to realize device %s- %s", + o->devid, strerror(errno)); + return; + } + + qemu_thread_create(&thread, o->socket, vfu_object_attach_ctx, o, + QEMU_THREAD_DETACHED); } static void vfu_object_init(Object *obj) @@ -147,6 +214,8 @@ static void vfu_object_init(Object *obj) o->machine_done.notify = vfu_object_machine_done; qemu_add_machine_init_done_notifier(&o->machine_done); + + o->vfu_poll_fd = -1; } static void vfu_object_finalize(Object *obj)