diff mbox series

[RESEND,v6,12/36] multi-process: add functions to synchronize proxy and remote endpoints

Message ID 7542e59e646421515051902fcd05fbb69fa4d866.1587614626.git.elena.ufimtseva@oracle.com (mailing list archive)
State New, archived
Headers show
Series [RESEND,v6,01/36] memory: alloc RAM from file at offset | expand

Commit Message

Elena Ufimtseva April 23, 2020, 4:13 a.m. UTC
From: Jagannathan Raman <jag.raman@oracle.com>

In some cases, for example MMIO read, QEMU has to wait for the remote to
complete a command before proceeding. An eventfd based mechanism is
added to synchronize QEMU & remote process.

Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
---
 include/io/mpqemu-link.h |  7 +++++
 io/mpqemu-link.c         | 61 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)

Comments

Stefan Hajnoczi May 12, 2020, 10:21 a.m. UTC | #1
On Wed, Apr 22, 2020 at 09:13:47PM -0700, elena.ufimtseva@oracle.com wrote:
> From: Jagannathan Raman <jag.raman@oracle.com>
> 
> In some cases, for example MMIO read, QEMU has to wait for the remote to
> complete a command before proceeding. An eventfd based mechanism is
> added to synchronize QEMU & remote process.

Why are temporary eventfds used instead of sending a reply message from
the remote device program back to QEMU?

> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> ---
>  include/io/mpqemu-link.h |  7 +++++
>  io/mpqemu-link.c         | 61 ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 68 insertions(+)
> 
> diff --git a/include/io/mpqemu-link.h b/include/io/mpqemu-link.h
> index af401e640c..ef95599bca 100644
> --- a/include/io/mpqemu-link.h
> +++ b/include/io/mpqemu-link.h
> @@ -124,4 +124,11 @@ void mpqemu_link_set_callback(MPQemuLinkState *s,
>  void mpqemu_start_coms(MPQemuLinkState *s, MPQemuChannel* chan);
>  bool mpqemu_msg_valid(MPQemuMsg *msg);
>  
> +#define GET_REMOTE_WAIT eventfd(0, EFD_CLOEXEC)
> +#define PUT_REMOTE_WAIT(wait) close(wait)

Hiding this in macros makes the code harder to understand.

Why is an eventfd necessary instead of a reply message? It's simpler and
probably faster to use a reply message instead of creating and passing
temporary eventfds.

> +#define PROXY_LINK_WAIT_DONE 1
> +
> +uint64_t wait_for_remote(int efd);
> +void notify_proxy(int fd, uint64_t val);
> +
>  #endif
> diff --git a/io/mpqemu-link.c b/io/mpqemu-link.c
> index 48f53a8928..cc0a7aecd4 100644
> --- a/io/mpqemu-link.c
> +++ b/io/mpqemu-link.c
> @@ -10,6 +10,7 @@
>  
>  #include "qemu/osdep.h"
>  #include "qemu-common.h"
> +#include <poll.h>
>  
>  #include "qemu/module.h"
>  #include "io/mpqemu-link.h"
> @@ -204,6 +205,66 @@ int mpqemu_msg_recv(MPQemuMsg *msg, MPQemuChannel *chan)
>      return rc;
>  }
>  
> +/*
> + * wait_for_remote() Synchronizes QEMU and the remote process. The maximum
> + *                   wait time is 1s, after which the wait times out.
> + *                   The function alse returns a 64 bit return value after
> + *                   the wait. The function uses eventfd() to do the wait
> + *                   and pass the return values. eventfd() can't return a
> + *                   value of '0'. Therefore, all return values are offset
> + *                   by '1' at the sending end, and corrected at the
> + *                   receiving end.
> + */
> +
> +uint64_t wait_for_remote(int efd)
> +{
> +    struct pollfd pfd = { .fd = efd, .events = POLLIN };
> +    uint64_t val;
> +    int ret;
> +
> +    ret = poll(&pfd, 1, 1000);

This 1 second blocking operation is not allowed in an event loop since
it will stall any other event loop activity. If locks are held then
other threads may also be stalled.

It's likely that this will need to change as part of the QEMU event loop
integration. Caller code can be kept mostly unchanged if you use
coroutines.

> +
> +    switch (ret) {
> +    case 0:
> +        qemu_log_mask(LOG_REMOTE_DEBUG, "Error wait_for_remote: Timed out\n");
> +        /* TODO: Kick-off error recovery */
> +        return UINT64_MAX;
> +    case -1:
> +        qemu_log_mask(LOG_REMOTE_DEBUG, "Poll error wait_for_remote: %s\n",
> +                      strerror(errno));
> +        return UINT64_MAX;
> +    default:
> +        if (read(efd, &val, sizeof(val)) == -1) {
> +            qemu_log_mask(LOG_REMOTE_DEBUG, "Error wait_for_remote: %s\n",
> +                          strerror(errno));
> +            return UINT64_MAX;
> +        }
> +    }
> +
> +    /*
> +     * The remote process could write a non-zero value
> +     * to the eventfd to wake QEMU up. However, the drawback of using eventfd
> +     * for this purpose is that a return value of zero wouldn't wake QEMU up.
> +     * Therefore, we offset the return value by one at the remote process and
> +     * correct it in the QEMU end.
> +     */
> +    val = (val == UINT64_MAX) ? val : (val - 1);
> +
> +    return val;
> +}
> +
> +void notify_proxy(int efd, uint64_t val)
> +{
> +    val = (val == UINT64_MAX) ? val : (val + 1);
> +    ssize_t len = -1;
> +
> +    len = write(efd, &val, sizeof(val));
> +    if (len == -1 || len != sizeof(val)) {
> +        qemu_log_mask(LOG_REMOTE_DEBUG, "Error notify_proxy: %s\n",
> +                      strerror(errno));
> +    }
> +}
> +
>  static gboolean mpqemu_link_handler_prepare(GSource *gsrc, gint *timeout)
>  {
>      g_assert(timeout);
> -- 
> 2.25.GIT
>
Jag Raman May 12, 2020, 12:28 p.m. UTC | #2
> On May 12, 2020, at 6:21 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> 
> On Wed, Apr 22, 2020 at 09:13:47PM -0700, elena.ufimtseva@oracle.com wrote:
>> From: Jagannathan Raman <jag.raman@oracle.com>
>> 
>> In some cases, for example MMIO read, QEMU has to wait for the remote to
>> complete a command before proceeding. An eventfd based mechanism is
>> added to synchronize QEMU & remote process.
> 
> Why are temporary eventfds used instead of sending a reply message from
> the remote device program back to QEMU?

Originally, we were envisioning a scenario where the remote process would
interrupt QEMU with a message. We used separate eventfds to distinguish
the two.

> 
>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>> ---
>> include/io/mpqemu-link.h |  7 +++++
>> io/mpqemu-link.c         | 61 ++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 68 insertions(+)
>> 
>> diff --git a/include/io/mpqemu-link.h b/include/io/mpqemu-link.h
>> index af401e640c..ef95599bca 100644
>> --- a/include/io/mpqemu-link.h
>> +++ b/include/io/mpqemu-link.h
>> @@ -124,4 +124,11 @@ void mpqemu_link_set_callback(MPQemuLinkState *s,
>> void mpqemu_start_coms(MPQemuLinkState *s, MPQemuChannel* chan);
>> bool mpqemu_msg_valid(MPQemuMsg *msg);
>> 
>> +#define GET_REMOTE_WAIT eventfd(0, EFD_CLOEXEC)
>> +#define PUT_REMOTE_WAIT(wait) close(wait)
> 
> Hiding this in macros makes the code harder to understand.
> 
> Why is an eventfd necessary instead of a reply message? It's simpler and
> probably faster to use a reply message instead of creating and passing
> temporary eventfds.

OK, got it.

> 
>> +#define PROXY_LINK_WAIT_DONE 1
>> +
>> +uint64_t wait_for_remote(int efd);
>> +void notify_proxy(int fd, uint64_t val);
>> +
>> #endif
>> diff --git a/io/mpqemu-link.c b/io/mpqemu-link.c
>> index 48f53a8928..cc0a7aecd4 100644
>> --- a/io/mpqemu-link.c
>> +++ b/io/mpqemu-link.c
>> @@ -10,6 +10,7 @@
>> 
>> #include "qemu/osdep.h"
>> #include "qemu-common.h"
>> +#include <poll.h>
>> 
>> #include "qemu/module.h"
>> #include "io/mpqemu-link.h"
>> @@ -204,6 +205,66 @@ int mpqemu_msg_recv(MPQemuMsg *msg, MPQemuChannel *chan)
>>     return rc;
>> }
>> 
>> +/*
>> + * wait_for_remote() Synchronizes QEMU and the remote process. The maximum
>> + *                   wait time is 1s, after which the wait times out.
>> + *                   The function alse returns a 64 bit return value after
>> + *                   the wait. The function uses eventfd() to do the wait
>> + *                   and pass the return values. eventfd() can't return a
>> + *                   value of '0'. Therefore, all return values are offset
>> + *                   by '1' at the sending end, and corrected at the
>> + *                   receiving end.
>> + */
>> +
>> +uint64_t wait_for_remote(int efd)
>> +{
>> +    struct pollfd pfd = { .fd = efd, .events = POLLIN };
>> +    uint64_t val;
>> +    int ret;
>> +
>> +    ret = poll(&pfd, 1, 1000);
> 
> This 1 second blocking operation is not allowed in an event loop since
> it will stall any other event loop activity. If locks are held then
> other threads may also be stalled.
> 
> It's likely that this will need to change as part of the QEMU event loop
> integration. Caller code can be kept mostly unchanged if you use
> coroutines.

In case the remote process has hung or terminated, the 1 second timeout
ensures that the IO operation does not block for too long.

--
Jag

> 
>> +
>> +    switch (ret) {
>> +    case 0:
>> +        qemu_log_mask(LOG_REMOTE_DEBUG, "Error wait_for_remote: Timed out\n");
>> +        /* TODO: Kick-off error recovery */
>> +        return UINT64_MAX;
>> +    case -1:
>> +        qemu_log_mask(LOG_REMOTE_DEBUG, "Poll error wait_for_remote: %s\n",
>> +                      strerror(errno));
>> +        return UINT64_MAX;
>> +    default:
>> +        if (read(efd, &val, sizeof(val)) == -1) {
>> +            qemu_log_mask(LOG_REMOTE_DEBUG, "Error wait_for_remote: %s\n",
>> +                          strerror(errno));
>> +            return UINT64_MAX;
>> +        }
>> +    }
>> +
>> +    /*
>> +     * The remote process could write a non-zero value
>> +     * to the eventfd to wake QEMU up. However, the drawback of using eventfd
>> +     * for this purpose is that a return value of zero wouldn't wake QEMU up.
>> +     * Therefore, we offset the return value by one at the remote process and
>> +     * correct it in the QEMU end.
>> +     */
>> +    val = (val == UINT64_MAX) ? val : (val - 1);
>> +
>> +    return val;
>> +}
>> +
>> +void notify_proxy(int efd, uint64_t val)
>> +{
>> +    val = (val == UINT64_MAX) ? val : (val + 1);
>> +    ssize_t len = -1;
>> +
>> +    len = write(efd, &val, sizeof(val));
>> +    if (len == -1 || len != sizeof(val)) {
>> +        qemu_log_mask(LOG_REMOTE_DEBUG, "Error notify_proxy: %s\n",
>> +                      strerror(errno));
>> +    }
>> +}
>> +
>> static gboolean mpqemu_link_handler_prepare(GSource *gsrc, gint *timeout)
>> {
>>     g_assert(timeout);
>> -- 
>> 2.25.GIT
>>
Stefan Hajnoczi May 13, 2020, 8:43 a.m. UTC | #3
On Tue, May 12, 2020 at 08:28:39AM -0400, Jag Raman wrote:
> > On May 12, 2020, at 6:21 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> > On Wed, Apr 22, 2020 at 09:13:47PM -0700, elena.ufimtseva@oracle.com wrote:
> >> +uint64_t wait_for_remote(int efd)
> >> +{
> >> +    struct pollfd pfd = { .fd = efd, .events = POLLIN };
> >> +    uint64_t val;
> >> +    int ret;
> >> +
> >> +    ret = poll(&pfd, 1, 1000);
> > 
> > This 1 second blocking operation is not allowed in an event loop since
> > it will stall any other event loop activity. If locks are held then
> > other threads may also be stalled.
> > 
> > It's likely that this will need to change as part of the QEMU event loop
> > integration. Caller code can be kept mostly unchanged if you use
> > coroutines.
> 
> In case the remote process has hung or terminated, the 1 second timeout
> ensures that the IO operation does not block for too long.

Timeouts are fine. They just need to be integrated into the event loop
instead of blocking it. That way other processing can continue while
waiting for the remote device process to respond or time out.

Stefan
diff mbox series

Patch

diff --git a/include/io/mpqemu-link.h b/include/io/mpqemu-link.h
index af401e640c..ef95599bca 100644
--- a/include/io/mpqemu-link.h
+++ b/include/io/mpqemu-link.h
@@ -124,4 +124,11 @@  void mpqemu_link_set_callback(MPQemuLinkState *s,
 void mpqemu_start_coms(MPQemuLinkState *s, MPQemuChannel* chan);
 bool mpqemu_msg_valid(MPQemuMsg *msg);
 
+#define GET_REMOTE_WAIT eventfd(0, EFD_CLOEXEC)
+#define PUT_REMOTE_WAIT(wait) close(wait)
+#define PROXY_LINK_WAIT_DONE 1
+
+uint64_t wait_for_remote(int efd);
+void notify_proxy(int fd, uint64_t val);
+
 #endif
diff --git a/io/mpqemu-link.c b/io/mpqemu-link.c
index 48f53a8928..cc0a7aecd4 100644
--- a/io/mpqemu-link.c
+++ b/io/mpqemu-link.c
@@ -10,6 +10,7 @@ 
 
 #include "qemu/osdep.h"
 #include "qemu-common.h"
+#include <poll.h>
 
 #include "qemu/module.h"
 #include "io/mpqemu-link.h"
@@ -204,6 +205,66 @@  int mpqemu_msg_recv(MPQemuMsg *msg, MPQemuChannel *chan)
     return rc;
 }
 
+/*
+ * wait_for_remote() Synchronizes QEMU and the remote process. The maximum
+ *                   wait time is 1s, after which the wait times out.
+ *                   The function alse returns a 64 bit return value after
+ *                   the wait. The function uses eventfd() to do the wait
+ *                   and pass the return values. eventfd() can't return a
+ *                   value of '0'. Therefore, all return values are offset
+ *                   by '1' at the sending end, and corrected at the
+ *                   receiving end.
+ */
+
+uint64_t wait_for_remote(int efd)
+{
+    struct pollfd pfd = { .fd = efd, .events = POLLIN };
+    uint64_t val;
+    int ret;
+
+    ret = poll(&pfd, 1, 1000);
+
+    switch (ret) {
+    case 0:
+        qemu_log_mask(LOG_REMOTE_DEBUG, "Error wait_for_remote: Timed out\n");
+        /* TODO: Kick-off error recovery */
+        return UINT64_MAX;
+    case -1:
+        qemu_log_mask(LOG_REMOTE_DEBUG, "Poll error wait_for_remote: %s\n",
+                      strerror(errno));
+        return UINT64_MAX;
+    default:
+        if (read(efd, &val, sizeof(val)) == -1) {
+            qemu_log_mask(LOG_REMOTE_DEBUG, "Error wait_for_remote: %s\n",
+                          strerror(errno));
+            return UINT64_MAX;
+        }
+    }
+
+    /*
+     * The remote process could write a non-zero value
+     * to the eventfd to wake QEMU up. However, the drawback of using eventfd
+     * for this purpose is that a return value of zero wouldn't wake QEMU up.
+     * Therefore, we offset the return value by one at the remote process and
+     * correct it in the QEMU end.
+     */
+    val = (val == UINT64_MAX) ? val : (val - 1);
+
+    return val;
+}
+
+void notify_proxy(int efd, uint64_t val)
+{
+    val = (val == UINT64_MAX) ? val : (val + 1);
+    ssize_t len = -1;
+
+    len = write(efd, &val, sizeof(val));
+    if (len == -1 || len != sizeof(val)) {
+        qemu_log_mask(LOG_REMOTE_DEBUG, "Error notify_proxy: %s\n",
+                      strerror(errno));
+    }
+}
+
 static gboolean mpqemu_link_handler_prepare(GSource *gsrc, gint *timeout)
 {
     g_assert(timeout);