diff mbox series

[V2,07/13] migration: SCM_RIGHTS for QEMUFile

Message ID 1727725244-105198-8-git-send-email-steven.sistare@oracle.com (mailing list archive)
State New, archived
Headers show
Series Live update: cpr-transfer | expand

Commit Message

Steven Sistare Sept. 30, 2024, 7:40 p.m. UTC
Define functions to put/get file descriptors to/from a QEMUFile, for qio
channels that support SCM_RIGHTS.  Maintain ordering such that
  put(A), put(fd), put(B)
followed by
  get(A), get(fd), get(B)
always succeeds.  Other get orderings may succeed but are not guaranteed.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 migration/qemu-file.c  | 83 +++++++++++++++++++++++++++++++++++++++++++++++---
 migration/qemu-file.h  |  2 ++
 migration/trace-events |  2 ++
 3 files changed, 83 insertions(+), 4 deletions(-)

Comments

Peter Xu Oct. 7, 2024, 4:06 p.m. UTC | #1
On Mon, Sep 30, 2024 at 12:40:38PM -0700, Steve Sistare wrote:
> Define functions to put/get file descriptors to/from a QEMUFile, for qio
> channels that support SCM_RIGHTS.  Maintain ordering such that
>   put(A), put(fd), put(B)
> followed by
>   get(A), get(fd), get(B)
> always succeeds.  Other get orderings may succeed but are not guaranteed.
> 
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> ---
>  migration/qemu-file.c  | 83 +++++++++++++++++++++++++++++++++++++++++++++++---
>  migration/qemu-file.h  |  2 ++
>  migration/trace-events |  2 ++
>  3 files changed, 83 insertions(+), 4 deletions(-)
> 
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index b6d2f58..7f951ab 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -37,6 +37,11 @@
>  #define IO_BUF_SIZE 32768
>  #define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
>  
> +typedef struct FdEntry {
> +    QTAILQ_ENTRY(FdEntry) entry;
> +    int fd;
> +} FdEntry;
> +
>  struct QEMUFile {
>      QIOChannel *ioc;
>      bool is_writable;
> @@ -51,6 +56,9 @@ struct QEMUFile {
>  
>      int last_error;
>      Error *last_error_obj;
> +
> +    bool fd_pass;
> +    QTAILQ_HEAD(, FdEntry) fds;
>  };
>  
>  /*
> @@ -109,6 +117,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
>      object_ref(ioc);
>      f->ioc = ioc;
>      f->is_writable = is_writable;
> +    f->fd_pass = qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS);
> +    QTAILQ_INIT(&f->fds);
>  
>      return f;
>  }
> @@ -310,6 +320,10 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
>      int len;
>      int pending;
>      Error *local_error = NULL;
> +    g_autofree int *fds = NULL;
> +    size_t nfd = 0;
> +    int **pfds = f->fd_pass ? &fds : NULL;
> +    size_t *pnfd = f->fd_pass ? &nfd : NULL;
>  
>      assert(!qemu_file_is_writable(f));
>  
> @@ -325,10 +339,9 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
>      }
>  
>      do {
> -        len = qio_channel_read(f->ioc,
> -                               (char *)f->buf + pending,
> -                               IO_BUF_SIZE - pending,
> -                               &local_error);
> +        struct iovec iov = { f->buf + pending, IO_BUF_SIZE - pending };
> +        len = qio_channel_readv_full(f->ioc, &iov, 1, pfds, pnfd, 0,
> +                                     &local_error);
>          if (len == QIO_CHANNEL_ERR_BLOCK) {
>              if (qemu_in_coroutine()) {
>                  qio_channel_yield(f->ioc, G_IO_IN);
> @@ -348,9 +361,65 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
>          qemu_file_set_error_obj(f, len, local_error);
>      }
>  
> +    for (int i = 0; i < nfd; i++) {
> +        FdEntry *fde = g_new0(FdEntry, 1);
> +        fde->fd = fds[i];
> +        QTAILQ_INSERT_TAIL(&f->fds, fde, entry);
> +    }
> +
>      return len;
>  }
>  
> +int qemu_file_put_fd(QEMUFile *f, int fd)
> +{
> +    int ret = 0;
> +    QIOChannel *ioc = qemu_file_get_ioc(f);
> +    Error *err = NULL;
> +    struct iovec iov = { (void *)" ", 1 };
> +
> +    /*
> +     * Send a dummy byte so qemu_fill_buffer on the receiving side does not
> +     * fail with a len=0 error.  Flush first to maintain ordering wrt other
> +     * data.
> +     */
> +
> +    qemu_fflush(f);
> +    if (qio_channel_writev_full(ioc, &iov, 1, &fd, 1, 0, &err) < 1) {
> +        error_report_err(error_copy(err));
> +        qemu_file_set_error_obj(f, -EIO, err);
> +        ret = -1;
> +    }
> +    trace_qemu_file_put_fd(f->ioc->name, fd, ret);
> +    return ret;
> +}
> +
> +int qemu_file_get_fd(QEMUFile *f)
> +{
> +    int fd = -1;
> +    FdEntry *fde;
> +
> +    if (!f->fd_pass) {
> +        Error *err = NULL;
> +        error_setg(&err, "%s does not support fd passing", f->ioc->name);
> +        error_report_err(error_copy(err));
> +        qemu_file_set_error_obj(f, -EIO, err);
> +        goto out;
> +    }
> +
> +    /* Force the dummy byte and its fd passenger to appear. */
> +    qemu_peek_byte(f, 0);
> +
> +    fde = QTAILQ_FIRST(&f->fds);
> +    if (fde) {
> +        qemu_get_byte(f);       /* Drop the dummy byte */

Can we still try to get rid of this magical byte?

Ideally this function should check for no byte but f->fds bening non-empty,
if it is it could invoke qemu_fill_buffer(). OTOH, qemu_fill_buffer() needs
to take len==0&&nfds!=0 as legal.  Would that work?

> +        fd = fde->fd;
> +        QTAILQ_REMOVE(&f->fds, fde, entry);
> +    }
> +out:
> +    trace_qemu_file_get_fd(f->ioc->name, fd);
> +    return fd;
> +}
> +
>  /** Closes the file
>   *
>   * Returns negative error value if any error happened on previous operations or
> @@ -361,11 +430,17 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
>   */
>  int qemu_fclose(QEMUFile *f)
>  {
> +    FdEntry *fde, *next;
>      int ret = qemu_fflush(f);
>      int ret2 = qio_channel_close(f->ioc, NULL);
>      if (ret >= 0) {
>          ret = ret2;
>      }
> +    QTAILQ_FOREACH_SAFE(fde, &f->fds, entry, next) {
> +        warn_report("qemu_fclose: received fd %d was never claimed", fde->fd);
> +        close(fde->fd);
> +        g_free(fde);
> +    }
>      g_clear_pointer(&f->ioc, object_unref);
>      error_free(f->last_error_obj);
>      g_free(f);
> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
> index 11c2120..3e47a20 100644
> --- a/migration/qemu-file.h
> +++ b/migration/qemu-file.h
> @@ -79,5 +79,7 @@ size_t qemu_get_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen,
>                            off_t pos);
>  
>  QIOChannel *qemu_file_get_ioc(QEMUFile *file);
> +int qemu_file_put_fd(QEMUFile *f, int fd);
> +int qemu_file_get_fd(QEMUFile *f);
>  
>  #endif
> diff --git a/migration/trace-events b/migration/trace-events
> index 5356fb5..345506b 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -88,6 +88,8 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
>  
>  # qemu-file.c
>  qemu_file_fclose(void) ""
> +qemu_file_put_fd(const char *name, int fd, int ret) "ioc %s, fd %d -> status %d"
> +qemu_file_get_fd(const char *name, int fd) "ioc %s -> fd %d"
>  
>  # ram.c
>  get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"
> -- 
> 1.8.3.1
>
Daniel P. Berrangé Oct. 7, 2024, 4:35 p.m. UTC | #2
On Mon, Oct 07, 2024 at 12:06:21PM -0400, Peter Xu wrote:
> On Mon, Sep 30, 2024 at 12:40:38PM -0700, Steve Sistare wrote:
> > Define functions to put/get file descriptors to/from a QEMUFile, for qio
> > channels that support SCM_RIGHTS.  Maintain ordering such that
> >   put(A), put(fd), put(B)
> > followed by
> >   get(A), get(fd), get(B)
> > always succeeds.  Other get orderings may succeed but are not guaranteed.
> > 
> > Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> > ---
> >  migration/qemu-file.c  | 83 +++++++++++++++++++++++++++++++++++++++++++++++---
> >  migration/qemu-file.h  |  2 ++
> >  migration/trace-events |  2 ++
> >  3 files changed, 83 insertions(+), 4 deletions(-)
> > 
> > diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> > index b6d2f58..7f951ab 100644
> > --- a/migration/qemu-file.c
> > +++ b/migration/qemu-file.c
> > @@ -37,6 +37,11 @@
> >  #define IO_BUF_SIZE 32768
> >  #define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
> >  
> > +typedef struct FdEntry {
> > +    QTAILQ_ENTRY(FdEntry) entry;
> > +    int fd;
> > +} FdEntry;
> > +
> >  struct QEMUFile {
> >      QIOChannel *ioc;
> >      bool is_writable;
> > @@ -51,6 +56,9 @@ struct QEMUFile {
> >  
> >      int last_error;
> >      Error *last_error_obj;
> > +
> > +    bool fd_pass;
> > +    QTAILQ_HEAD(, FdEntry) fds;
> >  };
> >  
> >  /*
> > @@ -109,6 +117,8 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
> >      object_ref(ioc);
> >      f->ioc = ioc;
> >      f->is_writable = is_writable;
> > +    f->fd_pass = qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS);
> > +    QTAILQ_INIT(&f->fds);
> >  
> >      return f;
> >  }
> > @@ -310,6 +320,10 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
> >      int len;
> >      int pending;
> >      Error *local_error = NULL;
> > +    g_autofree int *fds = NULL;
> > +    size_t nfd = 0;
> > +    int **pfds = f->fd_pass ? &fds : NULL;
> > +    size_t *pnfd = f->fd_pass ? &nfd : NULL;
> >  
> >      assert(!qemu_file_is_writable(f));
> >  
> > @@ -325,10 +339,9 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
> >      }
> >  
> >      do {
> > -        len = qio_channel_read(f->ioc,
> > -                               (char *)f->buf + pending,
> > -                               IO_BUF_SIZE - pending,
> > -                               &local_error);
> > +        struct iovec iov = { f->buf + pending, IO_BUF_SIZE - pending };
> > +        len = qio_channel_readv_full(f->ioc, &iov, 1, pfds, pnfd, 0,
> > +                                     &local_error);
> >          if (len == QIO_CHANNEL_ERR_BLOCK) {
> >              if (qemu_in_coroutine()) {
> >                  qio_channel_yield(f->ioc, G_IO_IN);
> > @@ -348,9 +361,65 @@ static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
> >          qemu_file_set_error_obj(f, len, local_error);
> >      }
> >  
> > +    for (int i = 0; i < nfd; i++) {
> > +        FdEntry *fde = g_new0(FdEntry, 1);
> > +        fde->fd = fds[i];
> > +        QTAILQ_INSERT_TAIL(&f->fds, fde, entry);
> > +    }
> > +
> >      return len;
> >  }
> >  
> > +int qemu_file_put_fd(QEMUFile *f, int fd)
> > +{
> > +    int ret = 0;
> > +    QIOChannel *ioc = qemu_file_get_ioc(f);
> > +    Error *err = NULL;
> > +    struct iovec iov = { (void *)" ", 1 };
> > +
> > +    /*
> > +     * Send a dummy byte so qemu_fill_buffer on the receiving side does not
> > +     * fail with a len=0 error.  Flush first to maintain ordering wrt other
> > +     * data.
> > +     */
> > +
> > +    qemu_fflush(f);
> > +    if (qio_channel_writev_full(ioc, &iov, 1, &fd, 1, 0, &err) < 1) {
> > +        error_report_err(error_copy(err));
> > +        qemu_file_set_error_obj(f, -EIO, err);
> > +        ret = -1;
> > +    }
> > +    trace_qemu_file_put_fd(f->ioc->name, fd, ret);
> > +    return ret;
> > +}
> > +
> > +int qemu_file_get_fd(QEMUFile *f)
> > +{
> > +    int fd = -1;
> > +    FdEntry *fde;
> > +
> > +    if (!f->fd_pass) {
> > +        Error *err = NULL;
> > +        error_setg(&err, "%s does not support fd passing", f->ioc->name);
> > +        error_report_err(error_copy(err));
> > +        qemu_file_set_error_obj(f, -EIO, err);
> > +        goto out;
> > +    }
> > +
> > +    /* Force the dummy byte and its fd passenger to appear. */
> > +    qemu_peek_byte(f, 0);
> > +
> > +    fde = QTAILQ_FIRST(&f->fds);
> > +    if (fde) {
> > +        qemu_get_byte(f);       /* Drop the dummy byte */
> 
> Can we still try to get rid of this magical byte?
> 
> Ideally this function should check for no byte but f->fds bening non-empty,
> if it is it could invoke qemu_fill_buffer(). OTOH, qemu_fill_buffer() needs
> to take len==0&&nfds!=0 as legal.  Would that work?

When passing ancilliary data with sendmsg/recvmsg, on Linux at least,
it is required that there is at least 1 byte of data present.

See 'man 7 unix':

[quote]
   At  least  one  byte  of real data should be sent when
   sending ancillary data.  On Linux, this is required to
   successfully send ancillary data over  a  UNIX  domain
   stream  socket.   When  sending  ancillary data over a
   UNIX domain datagram socket, it is  not  necessary  on
   Linux  to  send  any accompanying real data.  However,
   portable applications should also include at least one
   byte of real data when sending ancillary data  over  a
   datagram socket.
[/quote]

So if your protocol doesn't already have a convenient bit of real data to
attach the SCM_RIGHTS data to, it is common practice to send a single dummy
data byte that is discarded.

With regards,
Daniel
Peter Xu Oct. 7, 2024, 6:12 p.m. UTC | #3
On Mon, Oct 07, 2024 at 05:35:37PM +0100, Daniel P. Berrangé wrote:
> See 'man 7 unix':
> 
> [quote]
>    At  least  one  byte  of real data should be sent when
>    sending ancillary data.  On Linux, this is required to
>    successfully send ancillary data over  a  UNIX  domain
>    stream  socket.   When  sending  ancillary data over a
>    UNIX domain datagram socket, it is  not  necessary  on
>    Linux  to  send  any accompanying real data.  However,
>    portable applications should also include at least one
>    byte of real data when sending ancillary data  over  a
>    datagram socket.
> [/quote]
> 
> So if your protocol doesn't already have a convenient bit of real data to
> attach the SCM_RIGHTS data to, it is common practice to send a single dummy
> data byte that is discarded.

Ah OK, thanks.  Then maybe we can still consider dropping the initial four
bytes for fd migrations; I left the other comment in the next patch.
diff mbox series

Patch

diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index b6d2f58..7f951ab 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -37,6 +37,11 @@ 
 #define IO_BUF_SIZE 32768
 #define MAX_IOV_SIZE MIN_CONST(IOV_MAX, 64)
 
+typedef struct FdEntry {
+    QTAILQ_ENTRY(FdEntry) entry;
+    int fd;
+} FdEntry;
+
 struct QEMUFile {
     QIOChannel *ioc;
     bool is_writable;
@@ -51,6 +56,9 @@  struct QEMUFile {
 
     int last_error;
     Error *last_error_obj;
+
+    bool fd_pass;
+    QTAILQ_HEAD(, FdEntry) fds;
 };
 
 /*
@@ -109,6 +117,8 @@  static QEMUFile *qemu_file_new_impl(QIOChannel *ioc, bool is_writable)
     object_ref(ioc);
     f->ioc = ioc;
     f->is_writable = is_writable;
+    f->fd_pass = qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS);
+    QTAILQ_INIT(&f->fds);
 
     return f;
 }
@@ -310,6 +320,10 @@  static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
     int len;
     int pending;
     Error *local_error = NULL;
+    g_autofree int *fds = NULL;
+    size_t nfd = 0;
+    int **pfds = f->fd_pass ? &fds : NULL;
+    size_t *pnfd = f->fd_pass ? &nfd : NULL;
 
     assert(!qemu_file_is_writable(f));
 
@@ -325,10 +339,9 @@  static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
     }
 
     do {
-        len = qio_channel_read(f->ioc,
-                               (char *)f->buf + pending,
-                               IO_BUF_SIZE - pending,
-                               &local_error);
+        struct iovec iov = { f->buf + pending, IO_BUF_SIZE - pending };
+        len = qio_channel_readv_full(f->ioc, &iov, 1, pfds, pnfd, 0,
+                                     &local_error);
         if (len == QIO_CHANNEL_ERR_BLOCK) {
             if (qemu_in_coroutine()) {
                 qio_channel_yield(f->ioc, G_IO_IN);
@@ -348,9 +361,65 @@  static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
         qemu_file_set_error_obj(f, len, local_error);
     }
 
+    for (int i = 0; i < nfd; i++) {
+        FdEntry *fde = g_new0(FdEntry, 1);
+        fde->fd = fds[i];
+        QTAILQ_INSERT_TAIL(&f->fds, fde, entry);
+    }
+
     return len;
 }
 
+int qemu_file_put_fd(QEMUFile *f, int fd)
+{
+    int ret = 0;
+    QIOChannel *ioc = qemu_file_get_ioc(f);
+    Error *err = NULL;
+    struct iovec iov = { (void *)" ", 1 };
+
+    /*
+     * Send a dummy byte so qemu_fill_buffer on the receiving side does not
+     * fail with a len=0 error.  Flush first to maintain ordering wrt other
+     * data.
+     */
+
+    qemu_fflush(f);
+    if (qio_channel_writev_full(ioc, &iov, 1, &fd, 1, 0, &err) < 1) {
+        error_report_err(error_copy(err));
+        qemu_file_set_error_obj(f, -EIO, err);
+        ret = -1;
+    }
+    trace_qemu_file_put_fd(f->ioc->name, fd, ret);
+    return ret;
+}
+
+int qemu_file_get_fd(QEMUFile *f)
+{
+    int fd = -1;
+    FdEntry *fde;
+
+    if (!f->fd_pass) {
+        Error *err = NULL;
+        error_setg(&err, "%s does not support fd passing", f->ioc->name);
+        error_report_err(error_copy(err));
+        qemu_file_set_error_obj(f, -EIO, err);
+        goto out;
+    }
+
+    /* Force the dummy byte and its fd passenger to appear. */
+    qemu_peek_byte(f, 0);
+
+    fde = QTAILQ_FIRST(&f->fds);
+    if (fde) {
+        qemu_get_byte(f);       /* Drop the dummy byte */
+        fd = fde->fd;
+        QTAILQ_REMOVE(&f->fds, fde, entry);
+    }
+out:
+    trace_qemu_file_get_fd(f->ioc->name, fd);
+    return fd;
+}
+
 /** Closes the file
  *
  * Returns negative error value if any error happened on previous operations or
@@ -361,11 +430,17 @@  static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f)
  */
 int qemu_fclose(QEMUFile *f)
 {
+    FdEntry *fde, *next;
     int ret = qemu_fflush(f);
     int ret2 = qio_channel_close(f->ioc, NULL);
     if (ret >= 0) {
         ret = ret2;
     }
+    QTAILQ_FOREACH_SAFE(fde, &f->fds, entry, next) {
+        warn_report("qemu_fclose: received fd %d was never claimed", fde->fd);
+        close(fde->fd);
+        g_free(fde);
+    }
     g_clear_pointer(&f->ioc, object_unref);
     error_free(f->last_error_obj);
     g_free(f);
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index 11c2120..3e47a20 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -79,5 +79,7 @@  size_t qemu_get_buffer_at(QEMUFile *f, const uint8_t *buf, size_t buflen,
                           off_t pos);
 
 QIOChannel *qemu_file_get_ioc(QEMUFile *file);
+int qemu_file_put_fd(QEMUFile *f, int fd);
+int qemu_file_get_fd(QEMUFile *f);
 
 #endif
diff --git a/migration/trace-events b/migration/trace-events
index 5356fb5..345506b 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -88,6 +88,8 @@  put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)"
 
 # qemu-file.c
 qemu_file_fclose(void) ""
+qemu_file_put_fd(const char *name, int fd, int ret) "ioc %s, fd %d -> status %d"
+qemu_file_get_fd(const char *name, int fd) "ioc %s -> fd %d"
 
 # ram.c
 get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx"