diff mbox series

[RFC,v3,07/45] multi-process: define proxy-link object

Message ID 51220007b0f8a34cc72ff2847f5deb1f85c9c0e4.1567534653.git.jag.raman@oracle.com (mailing list archive)
State New, archived
Headers show
Series Initial support of multi-process qemu | expand

Commit Message

Jag Raman Sept. 3, 2019, 8:37 p.m. UTC
Defines proxy-link object which forms the communication link between
QEMU & emulation program.
Adds functions to configure members of proxy-link object instance.
Adds functions to send and receive messages over the communication
channel.
Adds GMainLoop to handle events received on the communication channel.

Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
---
 v1 -> v2:
   - Use default context for main loop instead of a new context

 v2 -> v3:
   - Enabled multi-channel support in the communication link

 include/glib-compat.h   |   4 +
 include/io/proxy-link.h | 147 ++++++++++++++++++++++++
 io/Makefile.objs        |   2 +
 io/proxy-link.c         | 292 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 445 insertions(+)
 create mode 100644 include/io/proxy-link.h
 create mode 100644 io/proxy-link.c

Comments

Daniel P. Berrangé Sept. 4, 2019, 8:22 a.m. UTC | #1
On Tue, Sep 03, 2019 at 04:37:33PM -0400, Jagannathan Raman wrote:
> Defines proxy-link object which forms the communication link between
> QEMU & emulation program.
> Adds functions to configure members of proxy-link object instance.
> Adds functions to send and receive messages over the communication
> channel.
> Adds GMainLoop to handle events received on the communication channel.
> 
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> ---
>  v1 -> v2:
>    - Use default context for main loop instead of a new context
> 
>  v2 -> v3:
>    - Enabled multi-channel support in the communication link
> 
>  include/glib-compat.h   |   4 +
>  include/io/proxy-link.h | 147 ++++++++++++++++++++++++
>  io/Makefile.objs        |   2 +
>  io/proxy-link.c         | 292 ++++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 445 insertions(+)
>  create mode 100644 include/io/proxy-link.h
>  create mode 100644 io/proxy-link.c
> 
> diff --git a/include/glib-compat.h b/include/glib-compat.h
> index 1291628..6189b9a 100644
> --- a/include/glib-compat.h
> +++ b/include/glib-compat.h
> @@ -19,12 +19,16 @@
>  /* Ask for warnings for anything that was marked deprecated in
>   * the defined version, or before. It is a candidate for rewrite.
>   */
> +#ifndef GLIB_VERSION_MIN_REQUIRED
>  #define GLIB_VERSION_MIN_REQUIRED GLIB_VERSION_2_40
> +#endif
>  
>  /* Ask for warnings if code tries to use function that did not
>   * exist in the defined version. These risk breaking builds
>   */
> +#ifndef GLIB_VERSION_MAX_ALLOWED
>  #define GLIB_VERSION_MAX_ALLOWED GLIB_VERSION_2_40
> +#endif

This does not look good. This header can only be included
once thanks to the #ifndef QEMU_GLIB_COMPAT_H. So the fact
that you need these conditionals is a hint of a bug elsewhere
in the code related to glib usage.


> diff --git a/include/io/proxy-link.h b/include/io/proxy-link.h
> new file mode 100644
> index 0000000..ee78cdd
> --- /dev/null
> +++ b/include/io/proxy-link.h
> @@ -0,0 +1,147 @@
> +/*
> + * Communication channel between QEMU and remote device process
> + *
> + * Copyright 2019, Oracle and/or its affiliates.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#ifndef PROXY_LINK_H
> +#define PROXY_LINK_H
> +
> +#include <stddef.h>
> +#include <stdint.h>
> +#include <glib.h>

I'm guessing this is the cause - nothing should be including this
directly - it is pulled  in for you via qemu/osdep.h

> +#include <pthread.h>
> +
> +#include "qemu/osdep.h"
> +#include "qom/object.h"
> +#include "qemu/thread.h"

Regards,
Daniel
Eric Blake Sept. 5, 2019, 2:37 p.m. UTC | #2
On 9/4/19 3:22 AM, Daniel P. Berrangé wrote:
> On Tue, Sep 03, 2019 at 04:37:33PM -0400, Jagannathan Raman wrote:
>> Defines proxy-link object which forms the communication link between
>> QEMU & emulation program.
>> Adds functions to configure members of proxy-link object instance.
>> Adds functions to send and receive messages over the communication
>> channel.
>> Adds GMainLoop to handle events received on the communication channel.

>> +
>> +#ifndef PROXY_LINK_H
>> +#define PROXY_LINK_H
>> +
>> +#include <stddef.h>
>> +#include <stdint.h>
>> +#include <glib.h>
> 
> I'm guessing this is the cause - nothing should be including this
> directly - it is pulled  in for you via qemu/osdep.h
> 
>> +#include <pthread.h>
>> +
>> +#include "qemu/osdep.h"

For that matter, "qemu/osdep.h" should ALWAYS be listed first, before
any system headers, and inclusion of <stddef.h> and <stdint.h> is also
redundant, just as the <glib.h>.
Jag Raman Sept. 5, 2019, 3:20 p.m. UTC | #3
On 9/5/2019 10:37 AM, Eric Blake wrote:
> On 9/4/19 3:22 AM, Daniel P. Berrangé wrote:
>> On Tue, Sep 03, 2019 at 04:37:33PM -0400, Jagannathan Raman wrote:
>>> Defines proxy-link object which forms the communication link between
>>> QEMU & emulation program.
>>> Adds functions to configure members of proxy-link object instance.
>>> Adds functions to send and receive messages over the communication
>>> channel.
>>> Adds GMainLoop to handle events received on the communication channel.
> 
>>> +
>>> +#ifndef PROXY_LINK_H
>>> +#define PROXY_LINK_H
>>> +
>>> +#include <stddef.h>
>>> +#include <stdint.h>
>>> +#include <glib.h>
>>
>> I'm guessing this is the cause - nothing should be including this
>> directly - it is pulled  in for you via qemu/osdep.h
>>
>>> +#include <pthread.h>
>>> +
>>> +#include "qemu/osdep.h"
> 
> For that matter, "qemu/osdep.h" should ALWAYS be listed first, before
> any system headers, and inclusion of <stddef.h> and <stdint.h> is also
> redundant, just as the <glib.h>.

Removing <glib.h> resolved the build issue. We'll remove <glib.h> in all
files in the next rev.

We soon realized the "qemu/osdep.h" should be the first include in all
the files. We'll ensure that this is the case for all files in the next
revision.

Thanks!
--
Jag

> 
>
Stefan Hajnoczi Sept. 12, 2019, 3:34 p.m. UTC | #4
On Tue, Sep 03, 2019 at 04:37:33PM -0400, Jagannathan Raman wrote:
> diff --git a/include/io/proxy-link.h b/include/io/proxy-link.h
> new file mode 100644
> index 0000000..ee78cdd
> --- /dev/null
> +++ b/include/io/proxy-link.h
> @@ -0,0 +1,147 @@

Regarding naming: so far I've seen "remote", "mpqemu", and "proxy".
These concepts aren't well-defined and I'm not sure if they refer to the
same thing or are different.  ProxyLinkState should be named
MPQemuLinkState?

It's also unclear to me how tightly coupled this "proxy link" is to PCI
(since it has PCI Configuration Space commands and how it would be
cleanly extended to support other busses).

> +/*
> + * Communication channel between QEMU and remote device process
> + *
> + * Copyright 2019, Oracle and/or its affiliates.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#ifndef PROXY_LINK_H
> +#define PROXY_LINK_H
> +
> +#include <stddef.h>
> +#include <stdint.h>
> +#include <glib.h>
> +#include <pthread.h>
> +
> +#include "qemu/osdep.h"
> +#include "qom/object.h"
> +#include "qemu/thread.h"
> +
> +typedef struct ProxyLinkState ProxyLinkState;
> +
> +#define TYPE_PROXY_LINK "proxy-link"
> +#define PROXY_LINK(obj) \
> +    OBJECT_CHECK(ProxyLinkState, (obj), TYPE_PROXY_LINK)
> +
> +#define REMOTE_MAX_FDS 8
> +
> +#define PROC_HDR_SIZE offsetof(ProcMsg, data1.u64)
> +
> +/*
> + * proc_cmd_t enum type to specify the command to be executed on the remote
> + * device
> + *
> + * Following commands are supported:
> + * CONF_READ        PCI config. space read
> + * CONF_WRITE       PCI config. space write
> + *
> + */
> +typedef enum {
> +    INIT = 0,
> +    CONF_READ,
> +    CONF_WRITE,
> +    MAX,
> +} proc_cmd_t;
> +
> +/*
> + * ProcMsg Format of the message sent to the remote device from QEMU
> + *
> + * cmd         The remote command
> + * bytestream  Indicates if the data to be shared is structured (data1)
> + *             or unstructured (data2)
> + * size        Size of the data to be shared
> + * data1       Structured data
> + * fds         File descriptors to be shared with remote device
> + * data2       Unstructured data
> + *
> + */
> +typedef struct {
> +    proc_cmd_t cmd;
> +    int bytestream;

Please use bool.

> +    size_t size;
> +
> +    union {
> +        uint64_t u64;
> +    } data1;
> +
> +    int fds[REMOTE_MAX_FDS];
> +    int num_fds;
> +
> +    uint8_t *data2;
> +} ProcMsg;
> +
> +struct conf_data_msg {
> +    uint32_t addr;
> +    uint32_t val;
> +    int l;

Please pick a descriptive name for this field.

> +};
> +
> +/*
> + * ProcChannel defines the channel that make up the communication link
> + * between QEMU and remote process
> + *
> + * gsrc       GSource object to be used by loop
> + * gpfd       GPollFD object containing the socket & events to monitor
> + * sock       Socket to send/receive communication, same as the one in gpfd
> + * lock       Mutex to synchronize access to the channel
> + */

Please see the doc comment style in include/qom/object.h for examples of
how to format doc comments.

> +
> +typedef struct ProcChannel {
> +    GSource gsrc;
> +    GPollFD gpfd;
> +    int sock;
> +    QemuMutex lock;
> +} ProcChannel;
> +
> +typedef void (*proxy_link_callback)(GIOCondition cond, ProcChannel *chan);
> +
> +/*
> + * ProxyLinkState Instance info. of the communication
> + * link between QEMU and remote process. The Link could
> + * be made up of multiple channels.
> + *
> + * ctx        GMainContext to be used for communication
> + * loop       Main loop that would be used to poll for incoming data
> + * com        Communication channel to transport control messages
> + *
> + */
> +struct ProxyLinkState {
> +    Object obj;
> +
> +    GMainContext *ctx;
> +    GMainLoop *loop;
> +
> +    ProcChannel *com;
> +
> +    proxy_link_callback callback;
> +};
> +
> +ProxyLinkState *proxy_link_create(void);
> +void proxy_link_finalize(ProxyLinkState *s);
> +
> +void proxy_proc_send(ProxyLinkState *s, ProcMsg *msg, ProcChannel *chan);
> +int proxy_proc_recv(ProxyLinkState *s, ProcMsg *msg, ProcChannel *chan);
> +
> +void proxy_link_init_channel(ProxyLinkState *s, ProcChannel **chan, int fd);
> +void proxy_link_destroy_channel(ProcChannel *chan);
> +void proxy_link_set_callback(ProxyLinkState *s, proxy_link_callback callback);
> +void start_handler(ProxyLinkState *s);
> +
> +#endif
> diff --git a/io/Makefile.objs b/io/Makefile.objs
> index 9a20fce..ff88b46 100644
> --- a/io/Makefile.objs
> +++ b/io/Makefile.objs
> @@ -10,3 +10,5 @@ io-obj-y += channel-util.o
>  io-obj-y += dns-resolver.o
>  io-obj-y += net-listener.o
>  io-obj-y += task.o
> +
> +io-obj-$(CONFIG_MPQEMU) += proxy-link.o
> diff --git a/io/proxy-link.c b/io/proxy-link.c
> new file mode 100644
> index 0000000..5eb9718
> --- /dev/null
> +++ b/io/proxy-link.c
> @@ -0,0 +1,292 @@
> +/*
> + * Communication channel between QEMU and remote device process
> + *
> + * Copyright 2019, Oracle and/or its affiliates.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include <assert.h>
> +#include <errno.h>
> +#include <pthread.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <unistd.h>
> +
> +#include "qemu/module.h"
> +#include "io/proxy-link.h"
> +#include "qemu/log.h"
> +
> +GSourceFuncs gsrc_funcs;
> +
> +static void proxy_link_inst_init(Object *obj)
> +{
> +    ProxyLinkState *s = PROXY_LINK(obj);
> +
> +    s->ctx = g_main_context_default();
> +    s->loop = g_main_loop_new(s->ctx, FALSE);
> +}
> +
> +static const TypeInfo proxy_link_info = {
> +    .name = TYPE_PROXY_LINK,
> +    .parent = TYPE_OBJECT,
> +    .instance_size = sizeof(ProxyLinkState),
> +    .instance_init = proxy_link_inst_init,
> +};
> +
> +static void proxy_link_register_types(void)
> +{
> +    type_register_static(&proxy_link_info);
> +}
> +
> +type_init(proxy_link_register_types)
> +
> +ProxyLinkState *proxy_link_create(void)
> +{
> +    return PROXY_LINK(object_new(TYPE_PROXY_LINK));
> +}
> +
> +void proxy_link_finalize(ProxyLinkState *s)
> +{
> +    g_main_loop_unref(s->loop);
> +    g_main_context_unref(s->ctx);
> +    g_main_loop_quit(s->loop);
> +
> +    proxy_link_destroy_channel(s->com);
> +
> +    object_unref(OBJECT(s));
> +}

It's strange that the destruction of ProxyLinkState does not use QEMU's
object system.  How about implementing .instance_finalize(), dropping
proxy_link_finalize(), and letting the caller unref ProxyLinkState like
a regular Object?

> +
> +void proxy_proc_send(ProxyLinkState *s, ProcMsg *msg, ProcChannel *chan)
> +{
> +    int rc;
> +    uint8_t *data;
> +    union {
> +        char control[CMSG_SPACE(REMOTE_MAX_FDS * sizeof(int))];
> +        struct cmsghdr align;
> +    } u;
> +    struct msghdr hdr;
> +    struct cmsghdr *chdr;
> +    int sock = chan->sock;
> +    QemuMutex *lock = &chan->lock;
> +
> +    struct iovec iov = {
> +        .iov_base = (char *) msg,
> +        .iov_len = PROC_HDR_SIZE,
> +    };
> +
> +    memset(&hdr, 0, sizeof(hdr));
> +    memset(&u, 0, sizeof(u));
> +
> +    hdr.msg_iov = &iov;
> +    hdr.msg_iovlen = 1;
> +
> +    if (msg->num_fds > REMOTE_MAX_FDS) {
> +        qemu_log_mask(LOG_REMOTE_DEBUG, "%s: Max FDs exceeded\n", __func__);
> +        return;
> +    }
> +
> +    if (msg->num_fds > 0) {
> +        size_t fdsize = msg->num_fds * sizeof(int);
> +
> +        hdr.msg_control = &u;
> +        hdr.msg_controllen = sizeof(u);
> +
> +        chdr = CMSG_FIRSTHDR(&hdr);
> +        chdr->cmsg_len = CMSG_LEN(fdsize);
> +        chdr->cmsg_level = SOL_SOCKET;
> +        chdr->cmsg_type = SCM_RIGHTS;
> +        memcpy(CMSG_DATA(chdr), msg->fds, fdsize);
> +        hdr.msg_controllen = CMSG_SPACE(fdsize);
> +    }
> +
> +    qemu_mutex_lock(lock);
> +
> +    do {
> +        rc = sendmsg(sock, &hdr, 0);
> +    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
> +
> +    if (rc < 0) {
> +        qemu_log_mask(LOG_REMOTE_DEBUG, "%s - sendmsg rc is %d, errno is %d,"
> +                      " sock %d\n", __func__, rc, errno, sock);
> +        qemu_mutex_unlock(lock);
> +        return;
> +    }
> +
> +    if (msg->bytestream) {
> +        data = msg->data2;
> +    } else {
> +        data = (uint8_t *)msg + PROC_HDR_SIZE;
> +    }
> +
> +    do {
> +        rc = write(sock, data, msg->size);
> +    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
> +
> +    qemu_mutex_unlock(lock);

Can this message be transmitted in a single sendmsg() by including 2
iovecs instead of just 1?  Then no locking is needed and only one
syscall is required.

> +}
> +
> +
> +int proxy_proc_recv(ProxyLinkState *s, ProcMsg *msg, ProcChannel *chan)
> +{
> +    int rc;
> +    uint8_t *data;
> +    union {
> +        char control[CMSG_SPACE(REMOTE_MAX_FDS * sizeof(int))];
> +        struct cmsghdr align;
> +    } u;
> +    struct msghdr hdr;
> +    struct cmsghdr *chdr;
> +    size_t fdsize;
> +    int sock = chan->sock;
> +    QemuMutex *lock = &chan->lock;
> +
> +    struct iovec iov = {
> +        .iov_base = (char *) msg,
> +        .iov_len = PROC_HDR_SIZE,
> +    };
> +
> +    memset(&hdr, 0, sizeof(hdr));
> +    memset(&u, 0, sizeof(u));
> +
> +    hdr.msg_iov = &iov;
> +    hdr.msg_iovlen = 1;
> +    hdr.msg_control = &u;
> +    hdr.msg_controllen = sizeof(u);
> +
> +    qemu_mutex_lock(lock);

Sockets are full-duplex (sending and receiving are independent).  Is it
necessary to use the same lock for both send and receive?

> +
> +    do {
> +        rc = recvmsg(sock, &hdr, 0);
> +    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
> +
> +    if (rc < 0) {
> +        qemu_log_mask(LOG_REMOTE_DEBUG, "%s - recvmsg rc is %d, errno is %d,"
> +                      " sock %d\n", __func__, rc, errno, sock);
> +        qemu_mutex_unlock(lock);
> +        return rc;
> +    }
> +
> +    msg->num_fds = 0;
> +    for (chdr = CMSG_FIRSTHDR(&hdr); chdr != NULL;
> +         chdr = CMSG_NXTHDR(&hdr, chdr)) {
> +        if ((chdr->cmsg_level == SOL_SOCKET) &&
> +            (chdr->cmsg_type == SCM_RIGHTS)) {
> +            fdsize = chdr->cmsg_len - CMSG_LEN(0);
> +            msg->num_fds = fdsize / sizeof(int);
> +            memcpy(msg->fds, CMSG_DATA(chdr), fdsize);

Please validate num_fds before memcpy to prevent the buffer overflow.

> +            break;
> +        }
> +    }
> +
> +    if (msg->size && msg->bytestream) {
> +        msg->data2 = calloc(1, msg->size);
> +        data = msg->data2;
> +    } else {
> +        data = (uint8_t *)&msg->data1;
> +    }
> +
> +    if (msg->size) {
> +        do {
> +            rc = read(sock, data, msg->size);
> +        } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
> +    }

Please validate size to prevent the buffer overflow.
Stefan Hajnoczi Oct. 9, 2019, 1:37 p.m. UTC | #5
On Thu, Sep 12, 2019 at 05:34:35PM +0200, Stefan Hajnoczi wrote:
> On Tue, Sep 03, 2019 at 04:37:33PM -0400, Jagannathan Raman wrote:
> > +    msg->num_fds = 0;
> > +    for (chdr = CMSG_FIRSTHDR(&hdr); chdr != NULL;
> > +         chdr = CMSG_NXTHDR(&hdr, chdr)) {
> > +        if ((chdr->cmsg_level == SOL_SOCKET) &&
> > +            (chdr->cmsg_type == SCM_RIGHTS)) {
> > +            fdsize = chdr->cmsg_len - CMSG_LEN(0);
> > +            msg->num_fds = fdsize / sizeof(int);
> > +            memcpy(msg->fds, CMSG_DATA(chdr), fdsize);
> 
> Please validate num_fds before memcpy to prevent the buffer overflow.
> 
> > +            break;
> > +        }
> > +    }
> > +
> > +    if (msg->size && msg->bytestream) {
> > +        msg->data2 = calloc(1, msg->size);
> > +        data = msg->data2;
> > +    } else {
> > +        data = (uint8_t *)&msg->data1;
> > +    }
> > +
> > +    if (msg->size) {
> > +        do {
> > +            rc = read(sock, data, msg->size);
> > +        } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
> > +    }
> 
> Please validate size to prevent the buffer overflow.

I didn't see a reply so I want to highlight that the effort to introduce
isolation between devices is pointless if the communications link is not
coded securely.

Multi-process QEMU adds no security if one process can corrupt the
memory of another process by sending invalid inputs.  Please audit the
code.

Stefan
Elena Ufimtseva Oct. 9, 2019, 5:58 p.m. UTC | #6
On Wed, Oct 09, 2019 at 02:37:24PM +0100, Stefan Hajnoczi wrote:
> On Thu, Sep 12, 2019 at 05:34:35PM +0200, Stefan Hajnoczi wrote:
> > On Tue, Sep 03, 2019 at 04:37:33PM -0400, Jagannathan Raman wrote:
> > > +    msg->num_fds = 0;
> > > +    for (chdr = CMSG_FIRSTHDR(&hdr); chdr != NULL;
> > > +         chdr = CMSG_NXTHDR(&hdr, chdr)) {
> > > +        if ((chdr->cmsg_level == SOL_SOCKET) &&
> > > +            (chdr->cmsg_type == SCM_RIGHTS)) {
> > > +            fdsize = chdr->cmsg_len - CMSG_LEN(0);
> > > +            msg->num_fds = fdsize / sizeof(int);
> > > +            memcpy(msg->fds, CMSG_DATA(chdr), fdsize);
> > 
> > Please validate num_fds before memcpy to prevent the buffer overflow.
> > 
> > > +            break;
> > > +        }
> > > +    }
> > > +
> > > +    if (msg->size && msg->bytestream) {
> > > +        msg->data2 = calloc(1, msg->size);
> > > +        data = msg->data2;
> > > +    } else {
> > > +        data = (uint8_t *)&msg->data1;
> > > +    }
> > > +
> > > +    if (msg->size) {
> > > +        do {
> > > +            rc = read(sock, data, msg->size);
> > > +        } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
> > > +    }
> > 
> > Please validate size to prevent the buffer overflow.
> 
> I didn't see a reply so I want to highlight that the effort to introduce
> isolation between devices is pointless if the communications link is not
> coded securely.
> 
> Multi-process QEMU adds no security if one process can corrupt the
> memory of another process by sending invalid inputs.  Please audit the
> code.
>

Hi Stefan

Sorry for not replyingi earlier. We have reviewed all the comments we received
on the this patch series and working on the code improvements which are
mostly done.
We recognize the importance of the secure code and making efforts to
eliminate as much as possible these mentioned unverified inputs along with other changes.

The changes will be in the next version of the patchset we are actively
working on.

The other your suggestion about reducing the number of syscalls by
stuffing all of the parts of the message in the io_vec and using one
sendmsg/recvmsg cannot be done at this point with the way we have
organized the messages structure. But we are looking into the
adoption of shared ring buffer for communication channel instead of the
current mechanism to reduce the number of syscalls.
Though this change will not be a part of the next patchset as we are
tinkering with live migration.
But all other recommendations and comments will be taken into account.

Regards,
Elena & Jag & JJ.


> Stefan
Jag Raman Oct. 10, 2019, 8:21 p.m. UTC | #7
On 10/9/2019 1:58 PM, Elena Ufimtseva wrote:
> On Wed, Oct 09, 2019 at 02:37:24PM +0100, Stefan Hajnoczi wrote:
>> On Thu, Sep 12, 2019 at 05:34:35PM +0200, Stefan Hajnoczi wrote:
>>> On Tue, Sep 03, 2019 at 04:37:33PM -0400, Jagannathan Raman wrote:
>>>> +    msg->num_fds = 0;
>>>> +    for (chdr = CMSG_FIRSTHDR(&hdr); chdr != NULL;
>>>> +         chdr = CMSG_NXTHDR(&hdr, chdr)) {
>>>> +        if ((chdr->cmsg_level == SOL_SOCKET) &&
>>>> +            (chdr->cmsg_type == SCM_RIGHTS)) {
>>>> +            fdsize = chdr->cmsg_len - CMSG_LEN(0);
>>>> +            msg->num_fds = fdsize / sizeof(int);
>>>> +            memcpy(msg->fds, CMSG_DATA(chdr), fdsize);
>>>
>>> Please validate num_fds before memcpy to prevent the buffer overflow.
>>>
>>>> +            break;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    if (msg->size && msg->bytestream) {
>>>> +        msg->data2 = calloc(1, msg->size);
>>>> +        data = msg->data2;
>>>> +    } else {
>>>> +        data = (uint8_t *)&msg->data1;
>>>> +    }
>>>> +
>>>> +    if (msg->size) {
>>>> +        do {
>>>> +            rc = read(sock, data, msg->size);
>>>> +        } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
>>>> +    }
>>>
>>> Please validate size to prevent the buffer overflow.
>>
>> I didn't see a reply so I want to highlight that the effort to introduce
>> isolation between devices is pointless if the communications link is not
>> coded securely.
>>
>> Multi-process QEMU adds no security if one process can corrupt the
>> memory of another process by sending invalid inputs.  Please audit the
>> code.
>>
> 
> Hi Stefan
> 
> Sorry for not replyingi earlier. We have reviewed all the comments we received
> on the this patch series and working on the code improvements which are
> mostly done.
> We recognize the importance of the secure code and making efforts to
> eliminate as much as possible these mentioned unverified inputs along with other changes.
> 
> The changes will be in the next version of the patchset we are actively
> working on.
> 
> The other your suggestion about reducing the number of syscalls by
> stuffing all of the parts of the message in the io_vec and using one
> sendmsg/recvmsg cannot be done at this point with the way we have
> organized the messages structure. But we are looking into the
> adoption of shared ring buffer for communication channel instead of the
> current mechanism to reduce the number of syscalls.
> Though this change will not be a part of the next patchset as we are
> tinkering with live migration.
> But all other recommendations and comments will be taken into account.

Thanks Elena!

Hi Stefan,

I'd like to confirm that we are targeting Live Migration for next
version and we are also discussing another use case for this project.

The other use case we are discussing is testing emulated devices (as 
separate processes) with libfuzzer. We could test corner cases and the 
handling of error conditions on emulated devices using this approach.

Thanks!
--
Jag

> 
> Regards,
> Elena & Jag & JJ.
> 
> 
>> Stefan
> 
> 
>
diff mbox series

Patch

diff --git a/include/glib-compat.h b/include/glib-compat.h
index 1291628..6189b9a 100644
--- a/include/glib-compat.h
+++ b/include/glib-compat.h
@@ -19,12 +19,16 @@ 
 /* Ask for warnings for anything that was marked deprecated in
  * the defined version, or before. It is a candidate for rewrite.
  */
+#ifndef GLIB_VERSION_MIN_REQUIRED
 #define GLIB_VERSION_MIN_REQUIRED GLIB_VERSION_2_40
+#endif
 
 /* Ask for warnings if code tries to use function that did not
  * exist in the defined version. These risk breaking builds
  */
+#ifndef GLIB_VERSION_MAX_ALLOWED
 #define GLIB_VERSION_MAX_ALLOWED GLIB_VERSION_2_40
+#endif
 
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
diff --git a/include/io/proxy-link.h b/include/io/proxy-link.h
new file mode 100644
index 0000000..ee78cdd
--- /dev/null
+++ b/include/io/proxy-link.h
@@ -0,0 +1,147 @@ 
+/*
+ * Communication channel between QEMU and remote device process
+ *
+ * Copyright 2019, Oracle and/or its affiliates.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef PROXY_LINK_H
+#define PROXY_LINK_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <glib.h>
+#include <pthread.h>
+
+#include "qemu/osdep.h"
+#include "qom/object.h"
+#include "qemu/thread.h"
+
+typedef struct ProxyLinkState ProxyLinkState;
+
+#define TYPE_PROXY_LINK "proxy-link"
+#define PROXY_LINK(obj) \
+    OBJECT_CHECK(ProxyLinkState, (obj), TYPE_PROXY_LINK)
+
+#define REMOTE_MAX_FDS 8
+
+#define PROC_HDR_SIZE offsetof(ProcMsg, data1.u64)
+
+/*
+ * proc_cmd_t enum type to specify the command to be executed on the remote
+ * device
+ *
+ * Following commands are supported:
+ * CONF_READ        PCI config. space read
+ * CONF_WRITE       PCI config. space write
+ *
+ */
+typedef enum {
+    INIT = 0,
+    CONF_READ,
+    CONF_WRITE,
+    MAX,
+} proc_cmd_t;
+
+/*
+ * ProcMsg Format of the message sent to the remote device from QEMU
+ *
+ * cmd         The remote command
+ * bytestream  Indicates if the data to be shared is structured (data1)
+ *             or unstructured (data2)
+ * size        Size of the data to be shared
+ * data1       Structured data
+ * fds         File descriptors to be shared with remote device
+ * data2       Unstructured data
+ *
+ */
+typedef struct {
+    proc_cmd_t cmd;
+    int bytestream;
+    size_t size;
+
+    union {
+        uint64_t u64;
+    } data1;
+
+    int fds[REMOTE_MAX_FDS];
+    int num_fds;
+
+    uint8_t *data2;
+} ProcMsg;
+
+struct conf_data_msg {
+    uint32_t addr;
+    uint32_t val;
+    int l;
+};
+
+/*
+ * ProcChannel defines the channel that make up the communication link
+ * between QEMU and remote process
+ *
+ * gsrc       GSource object to be used by loop
+ * gpfd       GPollFD object containing the socket & events to monitor
+ * sock       Socket to send/receive communication, same as the one in gpfd
+ * lock       Mutex to synchronize access to the channel
+ */
+
+typedef struct ProcChannel {
+    GSource gsrc;
+    GPollFD gpfd;
+    int sock;
+    QemuMutex lock;
+} ProcChannel;
+
+typedef void (*proxy_link_callback)(GIOCondition cond, ProcChannel *chan);
+
+/*
+ * ProxyLinkState Instance info. of the communication
+ * link between QEMU and remote process. The Link could
+ * be made up of multiple channels.
+ *
+ * ctx        GMainContext to be used for communication
+ * loop       Main loop that would be used to poll for incoming data
+ * com        Communication channel to transport control messages
+ *
+ */
+struct ProxyLinkState {
+    Object obj;
+
+    GMainContext *ctx;
+    GMainLoop *loop;
+
+    ProcChannel *com;
+
+    proxy_link_callback callback;
+};
+
+ProxyLinkState *proxy_link_create(void);
+void proxy_link_finalize(ProxyLinkState *s);
+
+void proxy_proc_send(ProxyLinkState *s, ProcMsg *msg, ProcChannel *chan);
+int proxy_proc_recv(ProxyLinkState *s, ProcMsg *msg, ProcChannel *chan);
+
+void proxy_link_init_channel(ProxyLinkState *s, ProcChannel **chan, int fd);
+void proxy_link_destroy_channel(ProcChannel *chan);
+void proxy_link_set_callback(ProxyLinkState *s, proxy_link_callback callback);
+void start_handler(ProxyLinkState *s);
+
+#endif
diff --git a/io/Makefile.objs b/io/Makefile.objs
index 9a20fce..ff88b46 100644
--- a/io/Makefile.objs
+++ b/io/Makefile.objs
@@ -10,3 +10,5 @@  io-obj-y += channel-util.o
 io-obj-y += dns-resolver.o
 io-obj-y += net-listener.o
 io-obj-y += task.o
+
+io-obj-$(CONFIG_MPQEMU) += proxy-link.o
diff --git a/io/proxy-link.c b/io/proxy-link.c
new file mode 100644
index 0000000..5eb9718
--- /dev/null
+++ b/io/proxy-link.c
@@ -0,0 +1,292 @@ 
+/*
+ * Communication channel between QEMU and remote device process
+ *
+ * Copyright 2019, Oracle and/or its affiliates.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include "qemu/module.h"
+#include "io/proxy-link.h"
+#include "qemu/log.h"
+
+GSourceFuncs gsrc_funcs;
+
+static void proxy_link_inst_init(Object *obj)
+{
+    ProxyLinkState *s = PROXY_LINK(obj);
+
+    s->ctx = g_main_context_default();
+    s->loop = g_main_loop_new(s->ctx, FALSE);
+}
+
+static const TypeInfo proxy_link_info = {
+    .name = TYPE_PROXY_LINK,
+    .parent = TYPE_OBJECT,
+    .instance_size = sizeof(ProxyLinkState),
+    .instance_init = proxy_link_inst_init,
+};
+
+static void proxy_link_register_types(void)
+{
+    type_register_static(&proxy_link_info);
+}
+
+type_init(proxy_link_register_types)
+
+ProxyLinkState *proxy_link_create(void)
+{
+    return PROXY_LINK(object_new(TYPE_PROXY_LINK));
+}
+
+void proxy_link_finalize(ProxyLinkState *s)
+{
+    g_main_loop_unref(s->loop);
+    g_main_context_unref(s->ctx);
+    g_main_loop_quit(s->loop);
+
+    proxy_link_destroy_channel(s->com);
+
+    object_unref(OBJECT(s));
+}
+
+void proxy_proc_send(ProxyLinkState *s, ProcMsg *msg, ProcChannel *chan)
+{
+    int rc;
+    uint8_t *data;
+    union {
+        char control[CMSG_SPACE(REMOTE_MAX_FDS * sizeof(int))];
+        struct cmsghdr align;
+    } u;
+    struct msghdr hdr;
+    struct cmsghdr *chdr;
+    int sock = chan->sock;
+    QemuMutex *lock = &chan->lock;
+
+    struct iovec iov = {
+        .iov_base = (char *) msg,
+        .iov_len = PROC_HDR_SIZE,
+    };
+
+    memset(&hdr, 0, sizeof(hdr));
+    memset(&u, 0, sizeof(u));
+
+    hdr.msg_iov = &iov;
+    hdr.msg_iovlen = 1;
+
+    if (msg->num_fds > REMOTE_MAX_FDS) {
+        qemu_log_mask(LOG_REMOTE_DEBUG, "%s: Max FDs exceeded\n", __func__);
+        return;
+    }
+
+    if (msg->num_fds > 0) {
+        size_t fdsize = msg->num_fds * sizeof(int);
+
+        hdr.msg_control = &u;
+        hdr.msg_controllen = sizeof(u);
+
+        chdr = CMSG_FIRSTHDR(&hdr);
+        chdr->cmsg_len = CMSG_LEN(fdsize);
+        chdr->cmsg_level = SOL_SOCKET;
+        chdr->cmsg_type = SCM_RIGHTS;
+        memcpy(CMSG_DATA(chdr), msg->fds, fdsize);
+        hdr.msg_controllen = CMSG_SPACE(fdsize);
+    }
+
+    qemu_mutex_lock(lock);
+
+    do {
+        rc = sendmsg(sock, &hdr, 0);
+    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+    if (rc < 0) {
+        qemu_log_mask(LOG_REMOTE_DEBUG, "%s - sendmsg rc is %d, errno is %d,"
+                      " sock %d\n", __func__, rc, errno, sock);
+        qemu_mutex_unlock(lock);
+        return;
+    }
+
+    if (msg->bytestream) {
+        data = msg->data2;
+    } else {
+        data = (uint8_t *)msg + PROC_HDR_SIZE;
+    }
+
+    do {
+        rc = write(sock, data, msg->size);
+    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+    qemu_mutex_unlock(lock);
+}
+
+
+int proxy_proc_recv(ProxyLinkState *s, ProcMsg *msg, ProcChannel *chan)
+{
+    int rc;
+    uint8_t *data;
+    union {
+        char control[CMSG_SPACE(REMOTE_MAX_FDS * sizeof(int))];
+        struct cmsghdr align;
+    } u;
+    struct msghdr hdr;
+    struct cmsghdr *chdr;
+    size_t fdsize;
+    int sock = chan->sock;
+    QemuMutex *lock = &chan->lock;
+
+    struct iovec iov = {
+        .iov_base = (char *) msg,
+        .iov_len = PROC_HDR_SIZE,
+    };
+
+    memset(&hdr, 0, sizeof(hdr));
+    memset(&u, 0, sizeof(u));
+
+    hdr.msg_iov = &iov;
+    hdr.msg_iovlen = 1;
+    hdr.msg_control = &u;
+    hdr.msg_controllen = sizeof(u);
+
+    qemu_mutex_lock(lock);
+
+    do {
+        rc = recvmsg(sock, &hdr, 0);
+    } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+
+    if (rc < 0) {
+        qemu_log_mask(LOG_REMOTE_DEBUG, "%s - recvmsg rc is %d, errno is %d,"
+                      " sock %d\n", __func__, rc, errno, sock);
+        qemu_mutex_unlock(lock);
+        return rc;
+    }
+
+    msg->num_fds = 0;
+    for (chdr = CMSG_FIRSTHDR(&hdr); chdr != NULL;
+         chdr = CMSG_NXTHDR(&hdr, chdr)) {
+        if ((chdr->cmsg_level == SOL_SOCKET) &&
+            (chdr->cmsg_type == SCM_RIGHTS)) {
+            fdsize = chdr->cmsg_len - CMSG_LEN(0);
+            msg->num_fds = fdsize / sizeof(int);
+            memcpy(msg->fds, CMSG_DATA(chdr), fdsize);
+            break;
+        }
+    }
+
+    if (msg->size && msg->bytestream) {
+        msg->data2 = calloc(1, msg->size);
+        data = msg->data2;
+    } else {
+        data = (uint8_t *)&msg->data1;
+    }
+
+    if (msg->size) {
+        do {
+            rc = read(sock, data, msg->size);
+        } while (rc < 0 && (errno == EINTR || errno == EAGAIN));
+    }
+
+    qemu_mutex_unlock(lock);
+
+    return rc;
+}
+
+static gboolean proxy_link_handler_prepare(GSource *gsrc, gint *timeout)
+{
+    g_assert(timeout);
+
+    *timeout = -1;
+
+    return FALSE;
+}
+
+static gboolean proxy_link_handler_check(GSource *gsrc)
+{
+    ProcChannel *chan = (ProcChannel *)gsrc;
+
+    return chan->gpfd.events & chan->gpfd.revents;
+}
+
+static gboolean proxy_link_handler_dispatch(GSource *gsrc, GSourceFunc func,
+                                            gpointer data)
+{
+    ProxyLinkState *s = (ProxyLinkState *)data;
+    ProcChannel *chan = (ProcChannel *)gsrc;
+
+    s->callback(chan->gpfd.revents, chan);
+
+    if ((chan->gpfd.revents & G_IO_HUP) || (chan->gpfd.revents & G_IO_ERR)) {
+        return G_SOURCE_REMOVE;
+    }
+
+    return G_SOURCE_CONTINUE;
+}
+
+void proxy_link_set_callback(ProxyLinkState *s, proxy_link_callback callback)
+{
+    s->callback = callback;
+}
+
+void proxy_link_init_channel(ProxyLinkState *s, ProcChannel **chan, int fd)
+{
+    ProcChannel *src;
+
+    gsrc_funcs = (GSourceFuncs){
+        .prepare = proxy_link_handler_prepare,
+        .check = proxy_link_handler_check,
+        .dispatch = proxy_link_handler_dispatch,
+        .finalize = NULL,
+    };
+
+    src = (ProcChannel *)g_source_new(&gsrc_funcs, sizeof(ProcChannel));
+
+    src->sock = fd;
+    qemu_mutex_init(&src->lock);
+
+    g_source_set_callback(&src->gsrc, NULL, (gpointer)s, NULL);
+    src->gpfd.fd = fd;
+    src->gpfd.events = G_IO_IN | G_IO_HUP | G_IO_ERR;
+    g_source_add_poll(&src->gsrc, &src->gpfd);
+
+    *chan = src;
+}
+
+void proxy_link_destroy_channel(ProcChannel *chan)
+{
+    g_source_unref(&chan->gsrc);
+    close(chan->sock);
+    qemu_mutex_destroy(&chan->lock);
+}
+
+void start_handler(ProxyLinkState *s)
+{
+
+    g_assert(g_source_attach(&s->com->gsrc, s->ctx));
+
+    g_main_loop_run(s->loop);
+}