Message ID | f7fdee9b5cde0f8ee8e99702f113ab22dc4167ea.1639549843.git.jag.raman@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio-user server in QEMU | expand |
On Wed, Dec 15, 2021 at 10:35:31AM -0500, Jagannathan Raman wrote: > @@ -114,6 +118,62 @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp) > vfu_object_init_ctx(o, errp); > } > > +static void vfu_object_ctx_run(void *opaque) > +{ > + VfuObject *o = opaque; > + int ret = -1; > + > + while (ret != 0) { > + ret = vfu_run_ctx(o->vfu_ctx); > + if (ret < 0) { > + if (errno == EINTR) { > + continue; > + } else if (errno == ENOTCONN) { > + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); > + o->vfu_poll_fd = -1; > + object_unparent(OBJECT(o)); > + break; If nothing else logs a message then I think that should be done here so users know why their vfio-user server object disappeared. > + } else { > + error_setg(&error_abort, "vfu: Failed to run device %s - %s", > + o->device, strerror(errno)); error_abort is equivalent to assuming !o->daemon. In the case where the user doesn't want to automatically shut down the process we need to log a message without aborting. > + break; Indentation is off. > + } > + } > + } > +} > + > +static void vfu_object_attach_ctx(void *opaque) > +{ > + VfuObject *o = opaque; > + GPollFD pfds[1]; > + int ret; > + > + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); > + > + pfds[0].fd = o->vfu_poll_fd; > + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; > + > +retry_attach: > + ret = vfu_attach_ctx(o->vfu_ctx); > + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { > + qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS); > + goto retry_attach; This can block the thread indefinitely. Other events like monitor commands are not handled in this loop. Please make this asynchronous (set an fd handler and return from this function so we can try again later). The vfu_attach_ctx() implementation synchronously negotiates the vfio-user connection :(. That's a shame because even if accept(2) is handled asynchronously, the negotiation can still block. It would be cleanest to have a fully async libvfio-user's vfu_attach_ctx() API to avoid blocking. Is that possible? If libvfio-user can't make vfu_attach_ctx() fully async then it may be possible to spawn a thread just for the blocking vfu_attach_ctx() call and report the result back to the event loop (e.g. using EventNotifier). That adds a bunch of code to work around a blocking API though, so I guess we can leave the blocking part if necessary. At the very minimum, please make EAGAIN/EWOULDBLOCK async as mentioned above and add a comment explaining the situation with the partially-async vfu_attach_ctx() API so it's clear that this can block (that way it's clear that you're aware of the issue and this isn't by accident). > + } else if (ret < 0) { > + error_setg(&error_abort, > + "vfu: Failed to attach device %s to context - %s", > + o->device, strerror(errno)); error_abort assumes !o->daemon. Please handle the o->daemon == true case by logging an error without aborting. > + return; > + } > + > + o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx); > + if (o->vfu_poll_fd < 0) { > + error_setg(&error_abort, "vfu: Failed to get poll fd %s", o->device); Same here. > @@ -208,6 +284,8 @@ static void vfu_object_init(Object *obj) > TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE); > return; > } > + > + o->vfu_poll_fd = -1; > } This must call qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL) when o->vfu_poll_fd != -1 to avoid leaving a dangling fd handler callback registered.
> On Dec 16, 2021, at 6:17 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote: > > On Wed, Dec 15, 2021 at 10:35:31AM -0500, Jagannathan Raman wrote: >> @@ -114,6 +118,62 @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp) >> vfu_object_init_ctx(o, errp); >> } >> >> +static void vfu_object_ctx_run(void *opaque) >> +{ >> + VfuObject *o = opaque; >> + int ret = -1; >> + >> + while (ret != 0) { >> + ret = vfu_run_ctx(o->vfu_ctx); >> + if (ret < 0) { >> + if (errno == EINTR) { >> + continue; >> + } else if (errno == ENOTCONN) { >> + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); >> + o->vfu_poll_fd = -1; >> + object_unparent(OBJECT(o)); >> + break; > > If nothing else logs a message then I think that should be done here so > users know why their vfio-user server object disappeared. Sure will do. Do you prefer a trace, or a message to the console? Trace makes sense to me. Presently, the client could unplug the vfio-user device which would trigger the deletion of this object. This process could happen quietly. > >> + } else { >> + error_setg(&error_abort, "vfu: Failed to run device %s - %s", >> + o->device, strerror(errno)); > > error_abort is equivalent to assuming !o->daemon. In the case where the > user doesn't want to automatically shut down the process we need to log > a message without aborting. OK, makes sense. > >> + break; > > Indentation is off. > >> + } >> + } >> + } >> +} >> + >> +static void vfu_object_attach_ctx(void *opaque) >> +{ >> + VfuObject *o = opaque; >> + GPollFD pfds[1]; >> + int ret; >> + >> + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); >> + >> + pfds[0].fd = o->vfu_poll_fd; >> + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; >> + >> +retry_attach: >> + ret = vfu_attach_ctx(o->vfu_ctx); >> + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { >> + qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS); >> + goto retry_attach; > > This can block the thread indefinitely. Other events like monitor > commands are not handled in this loop. Please make this asynchronous > (set an fd handler and return from this function so we can try again > later). > > The vfu_attach_ctx() implementation synchronously negotiates the > vfio-user connection :(. That's a shame because even if accept(2) is > handled asynchronously, the negotiation can still block. It would be > cleanest to have a fully async libvfio-user's vfu_attach_ctx() API to > avoid blocking. Is that possible? Thanos / John, Any thoughts on this? > > If libvfio-user can't make vfu_attach_ctx() fully async then it may be > possible to spawn a thread just for the blocking vfu_attach_ctx() call > and report the result back to the event loop (e.g. using EventNotifier). > That adds a bunch of code to work around a blocking API though, so I > guess we can leave the blocking part if necessary. > > At the very minimum, please make EAGAIN/EWOULDBLOCK async as mentioned > above and add a comment explaining the situation with the > partially-async vfu_attach_ctx() API so it's clear that this can block > (that way it's clear that you're aware of the issue and this isn't by > accident). Sure, we could make the attach async at QEMU depending on how the library prefers to do this. > >> + } else if (ret < 0) { >> + error_setg(&error_abort, >> + "vfu: Failed to attach device %s to context - %s", >> + o->device, strerror(errno)); > > error_abort assumes !o->daemon. Please handle the o->daemon == true > case by logging an error without aborting. > >> + return; >> + } >> + >> + o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx); >> + if (o->vfu_poll_fd < 0) { >> + error_setg(&error_abort, "vfu: Failed to get poll fd %s", o->device); > > Same here. > >> @@ -208,6 +284,8 @@ static void vfu_object_init(Object *obj) >> TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE); >> return; >> } >> + >> + o->vfu_poll_fd = -1; >> } > > This must call qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL) > when o->vfu_poll_fd != -1 to avoid leaving a dangling fd handler > callback registered. This is during the init phase, and the FD handlers are not set. Do you mean to add this at finalize? I agree it’s good to explicitly add this at finalize. But vfu_destroy_ctx() should trigger a ENOTCONN, which would do it anyway. Thank you! -- Jag
On Fri, Dec 17, 2021 at 05:59:48PM +0000, Jag Raman wrote: > > > > On Dec 16, 2021, at 6:17 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote: > > > > On Wed, Dec 15, 2021 at 10:35:31AM -0500, Jagannathan Raman wrote: > >> @@ -114,6 +118,62 @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp) > >> vfu_object_init_ctx(o, errp); > >> } > >> > >> +static void vfu_object_ctx_run(void *opaque) > >> +{ > >> + VfuObject *o = opaque; > >> + int ret = -1; > >> + > >> + while (ret != 0) { > >> + ret = vfu_run_ctx(o->vfu_ctx); > >> + if (ret < 0) { > >> + if (errno == EINTR) { > >> + continue; > >> + } else if (errno == ENOTCONN) { > >> + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); > >> + o->vfu_poll_fd = -1; > >> + object_unparent(OBJECT(o)); > >> + break; > > > > If nothing else logs a message then I think that should be done here so > > users know why their vfio-user server object disappeared. > > Sure will do. > > Do you prefer a trace, or a message to the console? Trace makes sense to me. > Presently, the client could unplug the vfio-user device which would trigger the > deletion of this object. This process could happen quietly. If there is no way to differentiate graceful disconnect from unexpected disconnect then logging might be too noisy. Regarding the automatic deletion of the object, that might not be desirable for two reasons: 1. It prevents reconnection or another client connecting. 2. Management tools are in the dark about it. For #2 there are monitor events that QEMU emits to notify management tools about state changes like disconnections. It's worth thinking about current and future use cases before baking in a policy like automatically deleting VfuObject on disconnect because it's inflexible and would require a QEMU update in the future to support a different policy. One approach is to emit a disconnect event but leave the VfuObject in a disconnected state. The management tool can then restart or clean up, depending on its policy. I'm not sure what's best because it depends on the use cases, but maybe you and others have ideas here. > >> @@ -208,6 +284,8 @@ static void vfu_object_init(Object *obj) > >> TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE); > >> return; > >> } > >> + > >> + o->vfu_poll_fd = -1; > >> } > > > > This must call qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL) > > when o->vfu_poll_fd != -1 to avoid leaving a dangling fd handler > > callback registered. > > This is during the init phase, and the FD handlers are not set. Do you mean > to add this at finalize? > > I agree it’s good to explicitly add this at finalize. But vfu_destroy_ctx() should > trigger a ENOTCONN, which would do it anyway. I'm not sure my comment makes sense since this is the init function, not finalize. However, it's not clear to me that the o->vfu_poll_fd fd handler is unregistered from the event loop when VfuObject is finalized (e.g. by the object-del monitor command). You say vfu_destroy_ctx() triggers ENOTCONN, but the VfuObject is freed after finalize returns so when is the fd handler deregistered? Stefan
> On Dec 20, 2021, at 3:29 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote: > > On Fri, Dec 17, 2021 at 05:59:48PM +0000, Jag Raman wrote: >> >> >>> On Dec 16, 2021, at 6:17 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote: >>> >>> On Wed, Dec 15, 2021 at 10:35:31AM -0500, Jagannathan Raman wrote: >>>> @@ -114,6 +118,62 @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp) >>>> vfu_object_init_ctx(o, errp); >>>> } >>>> >>>> +static void vfu_object_ctx_run(void *opaque) >>>> +{ >>>> + VfuObject *o = opaque; >>>> + int ret = -1; >>>> + >>>> + while (ret != 0) { >>>> + ret = vfu_run_ctx(o->vfu_ctx); >>>> + if (ret < 0) { >>>> + if (errno == EINTR) { >>>> + continue; >>>> + } else if (errno == ENOTCONN) { >>>> + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); >>>> + o->vfu_poll_fd = -1; >>>> + object_unparent(OBJECT(o)); >>>> + break; >>> >>> If nothing else logs a message then I think that should be done here so >>> users know why their vfio-user server object disappeared. >> >> Sure will do. >> >> Do you prefer a trace, or a message to the console? Trace makes sense to me. >> Presently, the client could unplug the vfio-user device which would trigger the >> deletion of this object. This process could happen quietly. > > If there is no way to differentiate graceful disconnect from unexpected > disconnect then logging might be too noisy. I think that’s what happens in the regular VFIO case also. vfio_put_base_device() closes the FD used for ioctls. > > Regarding the automatic deletion of the object, that might not be > desirable for two reasons: > 1. It prevents reconnection or another client connecting. > 2. Management tools are in the dark about it. > > For #2 there are monitor events that QEMU emits to notify management > tools about state changes like disconnections. This is very interesting. I suppose you’re referring to something like ‘BLOCK_JOB_COMPLETED’ event. It’d be good to inform the management tools about disconnection. Not used this before, will check it out to gather ideas on how to use it. > > It's worth thinking about current and future use cases before baking in > a policy like automatically deleting VfuObject on disconnect because > it's inflexible and would require a QEMU update in the future to support > a different policy. > > One approach is to emit a disconnect event but leave the VfuObject in a > disconnected state. The management tool can then restart or clean up, > depending on its policy. > > I'm not sure what's best because it depends on the use cases, but maybe > you and others have ideas here. > >>>> @@ -208,6 +284,8 @@ static void vfu_object_init(Object *obj) >>>> TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE); >>>> return; >>>> } >>>> + >>>> + o->vfu_poll_fd = -1; >>>> } >>> >>> This must call qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL) >>> when o->vfu_poll_fd != -1 to avoid leaving a dangling fd handler >>> callback registered. >> >> This is during the init phase, and the FD handlers are not set. Do you mean >> to add this at finalize? >> >> I agree it’s good to explicitly add this at finalize. But vfu_destroy_ctx() should >> trigger a ENOTCONN, which would do it anyway. > > I'm not sure my comment makes sense since this is the init function, not > finalize. > > However, it's not clear to me that the o->vfu_poll_fd fd handler is > unregistered from the event loop when VfuObject is finalized (e.g. by > the object-del monitor command). You say vfu_destroy_ctx() triggers > ENOTCONN, but the VfuObject is freed after finalize returns so when is > the fd handler deregistered? That is correct - will remove the FD handler in finalize also. Thank you! -- Jag > > Stefan
> -----Original Message----- > From: Jag Raman <jag.raman@oracle.com> > Sent: 17 December 2021 18:00 > To: Stefan Hajnoczi <stefanha@redhat.com>; John Levon > <john.levon@nutanix.com>; Thanos Makatos <thanos.makatos@nutanix.com> > Cc: qemu-devel <qemu-devel@nongnu.org>; Alex Williamson > <alex.williamson@redhat.com>; Marc-André Lureau > <marcandre.lureau@gmail.com>; Philippe Mathieu-Daudé > <philmd@redhat.com>; pbonzini@redhat.com; alex.bennee@linaro.org; > thuth@redhat.com; crosa@redhat.com; wainersm@redhat.com; > bleal@redhat.com; Elena Ufimtseva <elena.ufimtseva@oracle.com>; John > Levon <john.levon@nutanix.com>; John Johnson > <john.g.johnson@oracle.com>; Thanos Makatos > <thanos.makatos@nutanix.com>; Swapnil Ingle <swapnil.ingle@nutanix.com> > Subject: Re: [PATCH v4 07/14] vfio-user: run vfio-user context > > > > > On Dec 16, 2021, at 6:17 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote: > > > > On Wed, Dec 15, 2021 at 10:35:31AM -0500, Jagannathan Raman wrote: > >> @@ -114,6 +118,62 @@ static void vfu_object_set_device(Object *obj, > const char *str, Error **errp) > >> vfu_object_init_ctx(o, errp); > >> } > >> > >> +static void vfu_object_ctx_run(void *opaque) > >> +{ > >> + VfuObject *o = opaque; > >> + int ret = -1; > >> + > >> + while (ret != 0) { > >> + ret = vfu_run_ctx(o->vfu_ctx); > >> + if (ret < 0) { > >> + if (errno == EINTR) { > >> + continue; > >> + } else if (errno == ENOTCONN) { > >> + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); > >> + o->vfu_poll_fd = -1; > >> + object_unparent(OBJECT(o)); > >> + break; > > > > If nothing else logs a message then I think that should be done here so > > users know why their vfio-user server object disappeared. > > Sure will do. > > Do you prefer a trace, or a message to the console? Trace makes sense to me. > Presently, the client could unplug the vfio-user device which would trigger the > deletion of this object. This process could happen quietly. > > > > >> + } else { > >> + error_setg(&error_abort, "vfu: Failed to run device %s - %s", > >> + o->device, strerror(errno)); > > > > error_abort is equivalent to assuming !o->daemon. In the case where the > > user doesn't want to automatically shut down the process we need to log > > a message without aborting. > > OK, makes sense. > > > > >> + break; > > > > Indentation is off. > > > >> + } > >> + } > >> + } > >> +} > >> + > >> +static void vfu_object_attach_ctx(void *opaque) > >> +{ > >> + VfuObject *o = opaque; > >> + GPollFD pfds[1]; > >> + int ret; > >> + > >> + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); > >> + > >> + pfds[0].fd = o->vfu_poll_fd; > >> + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; > >> + > >> +retry_attach: > >> + ret = vfu_attach_ctx(o->vfu_ctx); > >> + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { > >> + qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS); > >> + goto retry_attach; > > > > This can block the thread indefinitely. Other events like monitor > > commands are not handled in this loop. Please make this asynchronous > > (set an fd handler and return from this function so we can try again > > later). > > > > The vfu_attach_ctx() implementation synchronously negotiates the > > vfio-user connection :(. That's a shame because even if accept(2) is > > handled asynchronously, the negotiation can still block. It would be > > cleanest to have a fully async libvfio-user's vfu_attach_ctx() API to > > avoid blocking. Is that possible? > > Thanos / John, > > Any thoughts on this? I'm discussing this with John and FYI there are other places where libvfio-user can block, e.g. sending a response or receiving a command. Is it just the negotiation you want it to be asynchronous or _all_ libvfio-user operations? Making libvfio-user fully asynchronous might require a substantial API rewrite. > > > > > If libvfio-user can't make vfu_attach_ctx() fully async then it may be > > possible to spawn a thread just for the blocking vfu_attach_ctx() call > > and report the result back to the event loop (e.g. using EventNotifier). > > That adds a bunch of code to work around a blocking API though, so I > > guess we can leave the blocking part if necessary. > > > > At the very minimum, please make EAGAIN/EWOULDBLOCK async as > mentioned > > above and add a comment explaining the situation with the > > partially-async vfu_attach_ctx() API so it's clear that this can block > > (that way it's clear that you're aware of the issue and this isn't by > > accident). > > Sure, we could make the attach async at QEMU depending on how the > library prefers to do this. > > > > >> + } else if (ret < 0) { > >> + error_setg(&error_abort, > >> + "vfu: Failed to attach device %s to context - %s", > >> + o->device, strerror(errno)); > > > > error_abort assumes !o->daemon. Please handle the o->daemon == true > > case by logging an error without aborting. > > > >> + return; > >> + } > >> + > >> + o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx); > >> + if (o->vfu_poll_fd < 0) { > >> + error_setg(&error_abort, "vfu: Failed to get poll fd %s", o->device); > > > > Same here. > > > >> @@ -208,6 +284,8 @@ static void vfu_object_init(Object *obj) > >> TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE); > >> return; > >> } > >> + > >> + o->vfu_poll_fd = -1; > >> } > > > > This must call qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL) > > when o->vfu_poll_fd != -1 to avoid leaving a dangling fd handler > > callback registered. > > This is during the init phase, and the FD handlers are not set. Do you mean > to add this at finalize? > > I agree it’s good to explicitly add this at finalize. But vfu_destroy_ctx() should > trigger a ENOTCONN, which would do it anyway. > > Thank you! > -- > Jag
On Wed, Jan 05, 2022 at 10:38:10AM +0000, Thanos Makatos wrote: > > > > -----Original Message----- > > From: Jag Raman <jag.raman@oracle.com> > > Sent: 17 December 2021 18:00 > > To: Stefan Hajnoczi <stefanha@redhat.com>; John Levon > > <john.levon@nutanix.com>; Thanos Makatos <thanos.makatos@nutanix.com> > > Cc: qemu-devel <qemu-devel@nongnu.org>; Alex Williamson > > <alex.williamson@redhat.com>; Marc-André Lureau > > <marcandre.lureau@gmail.com>; Philippe Mathieu-Daudé > > <philmd@redhat.com>; pbonzini@redhat.com; alex.bennee@linaro.org; > > thuth@redhat.com; crosa@redhat.com; wainersm@redhat.com; > > bleal@redhat.com; Elena Ufimtseva <elena.ufimtseva@oracle.com>; John > > Levon <john.levon@nutanix.com>; John Johnson > > <john.g.johnson@oracle.com>; Thanos Makatos > > <thanos.makatos@nutanix.com>; Swapnil Ingle <swapnil.ingle@nutanix.com> > > Subject: Re: [PATCH v4 07/14] vfio-user: run vfio-user context > > > > > > > > > On Dec 16, 2021, at 6:17 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote: > > > > > > On Wed, Dec 15, 2021 at 10:35:31AM -0500, Jagannathan Raman wrote: > > >> @@ -114,6 +118,62 @@ static void vfu_object_set_device(Object *obj, > > const char *str, Error **errp) > > >> vfu_object_init_ctx(o, errp); > > >> } > > >> > > >> +static void vfu_object_ctx_run(void *opaque) > > >> +{ > > >> + VfuObject *o = opaque; > > >> + int ret = -1; > > >> + > > >> + while (ret != 0) { > > >> + ret = vfu_run_ctx(o->vfu_ctx); > > >> + if (ret < 0) { > > >> + if (errno == EINTR) { > > >> + continue; > > >> + } else if (errno == ENOTCONN) { > > >> + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); > > >> + o->vfu_poll_fd = -1; > > >> + object_unparent(OBJECT(o)); > > >> + break; > > > > > > If nothing else logs a message then I think that should be done here so > > > users know why their vfio-user server object disappeared. > > > > Sure will do. > > > > Do you prefer a trace, or a message to the console? Trace makes sense to me. > > Presently, the client could unplug the vfio-user device which would trigger the > > deletion of this object. This process could happen quietly. > > > > > > > >> + } else { > > >> + error_setg(&error_abort, "vfu: Failed to run device %s - %s", > > >> + o->device, strerror(errno)); > > > > > > error_abort is equivalent to assuming !o->daemon. In the case where the > > > user doesn't want to automatically shut down the process we need to log > > > a message without aborting. > > > > OK, makes sense. > > > > > > > >> + break; > > > > > > Indentation is off. > > > > > >> + } > > >> + } > > >> + } > > >> +} > > >> + > > >> +static void vfu_object_attach_ctx(void *opaque) > > >> +{ > > >> + VfuObject *o = opaque; > > >> + GPollFD pfds[1]; > > >> + int ret; > > >> + > > >> + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); > > >> + > > >> + pfds[0].fd = o->vfu_poll_fd; > > >> + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; > > >> + > > >> +retry_attach: > > >> + ret = vfu_attach_ctx(o->vfu_ctx); > > >> + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { > > >> + qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS); > > >> + goto retry_attach; > > > > > > This can block the thread indefinitely. Other events like monitor > > > commands are not handled in this loop. Please make this asynchronous > > > (set an fd handler and return from this function so we can try again > > > later). > > > > > > The vfu_attach_ctx() implementation synchronously negotiates the > > > vfio-user connection :(. That's a shame because even if accept(2) is > > > handled asynchronously, the negotiation can still block. It would be > > > cleanest to have a fully async libvfio-user's vfu_attach_ctx() API to > > > avoid blocking. Is that possible? > > > > Thanos / John, > > > > Any thoughts on this? > > I'm discussing this with John and FYI there are other places where libvfio-user can block, e.g. sending a response or receiving a command. Is it just the negotiation you want it to be asynchronous or _all_ libvfio-user operations? Making libvfio-user fully asynchronous might require a substantial API rewrite. I see at least two reasons for a fully async API: 1. The program wants to handle other events (e.g. a management REST API) from the same event loop thread that invokes libvfio-user. If libvfio-user blocks then the other events cannot be handled within a reasonable time frame. The workaround for this is to use multi-threading and ignore the event-driven architecture implied by vfu_get_poll_fd(). 2. The program handles multiple clients that do not trust each other. This could be a software-defined network switch or storage appliance. A malicious client can cause a denial-of-service by making a libvfio-user call block. Again, the program needs separate threads instead of an event loop to work around this. The downside to a sync approach is that programs that already have an event loop require extra code to set up dedicated threads for libvfio-user. That's a library integration/usability issue. In some cases it's okay to block: when the program doesn't need to handle other events. If most users of libvfio-user are expected to fall into this category then there's no need to change the API. Either way, the doc comments in libvfio-user.h aren't very clear. Someone integrating this library may think vfu_get_poll_fd() allows for fully async operation. Stefan
On Thu, Jan 06, 2022 at 01:35:32PM +0000, Stefan Hajnoczi wrote: > > > >> +static void vfu_object_attach_ctx(void *opaque) > > > >> +{ > > > >> + VfuObject *o = opaque; > > > >> + GPollFD pfds[1]; > > > >> + int ret; > > > >> + > > > >> + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); > > > >> + > > > >> + pfds[0].fd = o->vfu_poll_fd; > > > >> + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; > > > >> + > > > >> +retry_attach: > > > >> + ret = vfu_attach_ctx(o->vfu_ctx); > > > >> + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { > > > >> + qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS); > > > >> + goto retry_attach; > > > > > > > > This can block the thread indefinitely. Other events like monitor > > > > commands are not handled in this loop. Please make this asynchronous > > > > (set an fd handler and return from this function so we can try again > > > > later). > > > > > > > > The vfu_attach_ctx() implementation synchronously negotiates the > > > > vfio-user connection :(. That's a shame because even if accept(2) is > > > > handled asynchronously, the negotiation can still block. It would be > > > > cleanest to have a fully async libvfio-user's vfu_attach_ctx() API to > > > > avoid blocking. Is that possible? > > > > > > Thanos / John, > > > > > > Any thoughts on this? > > > > I'm discussing this with John and FYI there are other places where libvfio-user can block, e.g. sending a response or receiving a command. Is it just the negotiation you want it to be asynchronous or _all_ libvfio-user operations? Making libvfio-user fully asynchronous might require a substantial API rewrite. > > I see at least two reasons for a fully async API: > > 1. The program wants to handle other events (e.g. a management REST API) > from the same event loop thread that invokes libvfio-user. If > libvfio-user blocks then the other events cannot be handled within a > reasonable time frame. > > The workaround for this is to use multi-threading and ignore the > event-driven architecture implied by vfu_get_poll_fd(). > > 2. The program handles multiple clients that do not trust each other. > This could be a software-defined network switch or storage appliance. > A malicious client can cause a denial-of-service by making a > libvfio-user call block. > > Again, the program needs separate threads instead of an event loop to > work around this. Hi Stefan, we're certainly aware of the rationale. Ultimately we just haven't yet found resources to fix this: in practice, we don't really hit problems from the parts that are still synchronous. Of course, that's not a good argument when it comes to your second issue, and indeed the library is not currently suitable for multiple untrusted clients. For Jag but: patches are welcome. But it's not just negotiate(): all sorts of things are potentially synchronous - for example, it's expected that the region read/write callbacks are done synchronously. Any other client reply, or server->client message, is also synchronous. It's partly why we haven't yet stabilised the API. regards john
On Mon, Jan 10, 2022 at 05:56:25PM +0000, John Levon wrote: > On Thu, Jan 06, 2022 at 01:35:32PM +0000, Stefan Hajnoczi wrote: > > > > > >> +static void vfu_object_attach_ctx(void *opaque) > > > > >> +{ > > > > >> + VfuObject *o = opaque; > > > > >> + GPollFD pfds[1]; > > > > >> + int ret; > > > > >> + > > > > >> + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); > > > > >> + > > > > >> + pfds[0].fd = o->vfu_poll_fd; > > > > >> + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; > > > > >> + > > > > >> +retry_attach: > > > > >> + ret = vfu_attach_ctx(o->vfu_ctx); > > > > >> + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { > > > > >> + qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS); > > > > >> + goto retry_attach; > > > > > > > > > > This can block the thread indefinitely. Other events like monitor > > > > > commands are not handled in this loop. Please make this asynchronous > > > > > (set an fd handler and return from this function so we can try again > > > > > later). > > > > > > > > > > The vfu_attach_ctx() implementation synchronously negotiates the > > > > > vfio-user connection :(. That's a shame because even if accept(2) is > > > > > handled asynchronously, the negotiation can still block. It would be > > > > > cleanest to have a fully async libvfio-user's vfu_attach_ctx() API to > > > > > avoid blocking. Is that possible? > > > > > > > > Thanos / John, > > > > > > > > Any thoughts on this? > > > > > > I'm discussing this with John and FYI there are other places where libvfio-user can block, e.g. sending a response or receiving a command. Is it just the negotiation you want it to be asynchronous or _all_ libvfio-user operations? Making libvfio-user fully asynchronous might require a substantial API rewrite. > > > > I see at least two reasons for a fully async API: > > > > 1. The program wants to handle other events (e.g. a management REST API) > > from the same event loop thread that invokes libvfio-user. If > > libvfio-user blocks then the other events cannot be handled within a > > reasonable time frame. > > > > The workaround for this is to use multi-threading and ignore the > > event-driven architecture implied by vfu_get_poll_fd(). > > > > 2. The program handles multiple clients that do not trust each other. > > This could be a software-defined network switch or storage appliance. > > A malicious client can cause a denial-of-service by making a > > libvfio-user call block. > > > > Again, the program needs separate threads instead of an event loop to > > work around this. > > Hi Stefan, we're certainly aware of the rationale. Ultimately we just haven't > yet found resources to fix this: in practice, we don't really hit problems from > the parts that are still synchronous. Of course, that's not a good argument when > it comes to your second issue, and indeed the library is not currently suitable > for multiple untrusted clients. > > For Jag but: patches are welcome. But it's not just negotiate(): all sorts of > things are potentially synchronous - for example, it's expected that the region > read/write callbacks are done synchronously. Any other client reply, or > server->client message, is also synchronous. > > It's partly why we haven't yet stabilised the API. From the QEMU side it's okay to have blocking code in this experimental feature if users are aware of the limitation (e.g. the monitor is unresponsive if vfio-user blocks) and multiple untrusted clients are not supported. The QEMU code should also make its limitations clear so readers are aware of them and know the author chose this approach intentionally rather than due to an oversight. Jag: Please mention this in the documentation and add a comment to vfu_object_attach_ctx() explaining that this code currently blocks. I think in the long run it will be necessary to address it, e.g. in the multi-client case. That can either be done with threads or by making libvfio-user fully async. Stefan
> On Jan 11, 2022, at 4:36 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote: > > On Mon, Jan 10, 2022 at 05:56:25PM +0000, John Levon wrote: >> On Thu, Jan 06, 2022 at 01:35:32PM +0000, Stefan Hajnoczi wrote: >> >>>>>>> +static void vfu_object_attach_ctx(void *opaque) >>>>>>> +{ >>>>>>> + VfuObject *o = opaque; >>>>>>> + GPollFD pfds[1]; >>>>>>> + int ret; >>>>>>> + >>>>>>> + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); >>>>>>> + >>>>>>> + pfds[0].fd = o->vfu_poll_fd; >>>>>>> + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; >>>>>>> + >>>>>>> +retry_attach: >>>>>>> + ret = vfu_attach_ctx(o->vfu_ctx); >>>>>>> + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { >>>>>>> + qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS); >>>>>>> + goto retry_attach; >>>>>> >>>>>> This can block the thread indefinitely. Other events like monitor >>>>>> commands are not handled in this loop. Please make this asynchronous >>>>>> (set an fd handler and return from this function so we can try again >>>>>> later). >>>>>> >>>>>> The vfu_attach_ctx() implementation synchronously negotiates the >>>>>> vfio-user connection :(. That's a shame because even if accept(2) is >>>>>> handled asynchronously, the negotiation can still block. It would be >>>>>> cleanest to have a fully async libvfio-user's vfu_attach_ctx() API to >>>>>> avoid blocking. Is that possible? >>>>> >>>>> Thanos / John, >>>>> >>>>> Any thoughts on this? >>>> >>>> I'm discussing this with John and FYI there are other places where libvfio-user can block, e.g. sending a response or receiving a command. Is it just the negotiation you want it to be asynchronous or _all_ libvfio-user operations? Making libvfio-user fully asynchronous might require a substantial API rewrite. >>> >>> I see at least two reasons for a fully async API: >>> >>> 1. The program wants to handle other events (e.g. a management REST API) >>> from the same event loop thread that invokes libvfio-user. If >>> libvfio-user blocks then the other events cannot be handled within a >>> reasonable time frame. >>> >>> The workaround for this is to use multi-threading and ignore the >>> event-driven architecture implied by vfu_get_poll_fd(). >>> >>> 2. The program handles multiple clients that do not trust each other. >>> This could be a software-defined network switch or storage appliance. >>> A malicious client can cause a denial-of-service by making a >>> libvfio-user call block. >>> >>> Again, the program needs separate threads instead of an event loop to >>> work around this. >> >> Hi Stefan, we're certainly aware of the rationale. Ultimately we just haven't >> yet found resources to fix this: in practice, we don't really hit problems from >> the parts that are still synchronous. Of course, that's not a good argument when >> it comes to your second issue, and indeed the library is not currently suitable >> for multiple untrusted clients. >> >> For Jag but: patches are welcome. But it's not just negotiate(): all sorts of >> things are potentially synchronous - for example, it's expected that the region >> read/write callbacks are done synchronously. Any other client reply, or >> server->client message, is also synchronous. >> >> It's partly why we haven't yet stabilised the API. > > From the QEMU side it's okay to have blocking code in this experimental > feature if users are aware of the limitation (e.g. the monitor is > unresponsive if vfio-user blocks) and multiple untrusted clients are not > supported. The QEMU code should also make its limitations clear so > readers are aware of them and know the author chose this approach > intentionally rather than due to an oversight. > > Jag: Please mention this in the documentation and add a comment to > vfu_object_attach_ctx() explaining that this code currently blocks. Sure, will do. Thank you! -- Jag > > I think in the long run it will be necessary to address it, e.g. in the > multi-client case. That can either be done with threads or by making > libvfio-user fully async. > > Stefan
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c index bcbea59bf1..a01a0ad185 100644 --- a/hw/remote/vfio-user-obj.c +++ b/hw/remote/vfio-user-obj.c @@ -42,10 +42,12 @@ #include "qapi/error.h" #include "qapi/qapi-visit-sockets.h" #include "qemu/notify.h" +#include "qemu/thread.h" #include "sysemu/sysemu.h" #include "libvfio-user.h" #include "hw/qdev-core.h" #include "hw/pci/pci.h" +#include "qemu/timer.h" #define TYPE_VFU_OBJECT "x-vfio-user-server" OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT) @@ -73,6 +75,8 @@ struct VfuObject { vfu_ctx_t *vfu_ctx; PCIDevice *pci_dev; + + int vfu_poll_fd; }; static void vfu_object_init_ctx(VfuObject *o, Error **errp); @@ -114,6 +118,62 @@ static void vfu_object_set_device(Object *obj, const char *str, Error **errp) vfu_object_init_ctx(o, errp); } +static void vfu_object_ctx_run(void *opaque) +{ + VfuObject *o = opaque; + int ret = -1; + + while (ret != 0) { + ret = vfu_run_ctx(o->vfu_ctx); + if (ret < 0) { + if (errno == EINTR) { + continue; + } else if (errno == ENOTCONN) { + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); + o->vfu_poll_fd = -1; + object_unparent(OBJECT(o)); + break; + } else { + error_setg(&error_abort, "vfu: Failed to run device %s - %s", + o->device, strerror(errno)); + break; + } + } + } +} + +static void vfu_object_attach_ctx(void *opaque) +{ + VfuObject *o = opaque; + GPollFD pfds[1]; + int ret; + + qemu_set_fd_handler(o->vfu_poll_fd, NULL, NULL, NULL); + + pfds[0].fd = o->vfu_poll_fd; + pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR; + +retry_attach: + ret = vfu_attach_ctx(o->vfu_ctx); + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) { + qemu_poll_ns(pfds, 1, 500 * (int64_t)SCALE_MS); + goto retry_attach; + } else if (ret < 0) { + error_setg(&error_abort, + "vfu: Failed to attach device %s to context - %s", + o->device, strerror(errno)); + return; + } + + o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx); + if (o->vfu_poll_fd < 0) { + error_setg(&error_abort, "vfu: Failed to get poll fd %s", o->device); + return; + } + + qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_ctx_run, NULL, o); +} + /* * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device' * properties. It also depends on devices instantiated in QEMU. These @@ -151,7 +211,8 @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp) return; } - o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path, 0, + o->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, o->socket->u.q_unix.path, + LIBVFIO_USER_FLAG_ATTACH_NB, o, VFU_DEV_TYPE_PCI); if (o->vfu_ctx == NULL) { error_setg(errp, "vfu: Failed to create context - %s", strerror(errno)); @@ -183,6 +244,21 @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp) goto fail; } + ret = vfu_realize_ctx(o->vfu_ctx); + if (ret < 0) { + error_setg(errp, "vfu: Failed to realize device %s- %s", + o->device, strerror(errno)); + goto fail; + } + + o->vfu_poll_fd = vfu_get_poll_fd(o->vfu_ctx); + if (o->vfu_poll_fd < 0) { + error_setg(errp, "vfu: Failed to get poll fd %s", o->device); + goto fail; + } + + qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o); + return; fail: @@ -208,6 +284,8 @@ static void vfu_object_init(Object *obj) TYPE_VFU_OBJECT, TYPE_REMOTE_MACHINE); return; } + + o->vfu_poll_fd = -1; } static void vfu_object_finalize(Object *obj)