diff mbox series

[RFC,v3,7/8] migration/multifd: Add a compat property for TLS termination

Message ID 20250207195359.17443-8-farosas@suse.de (mailing list archive)
State New
Headers show
Series crypto,io,migration: Add support to gnutls_bye() | expand

Commit Message

Fabiano Rosas Feb. 7, 2025, 7:53 p.m. UTC
We're currently changing the way the source multifd migration handles
the shutdown of the multifd channels when TLS is in use to perform a
clean termination by calling gnutls_bye().

Older src QEMUs will always close the channel without terminating the
TLS session. New dst QEMUs treat an unclean termination as an error.

Add multifd_clean_tls_termination (default true) that can be switched
on the destination whenever a src QEMU <= 9.2 is in use.

(Note that the compat property is only strictly necessary for src
QEMUs older than 9.1. Due to synchronization coincidences, src QEMUs
9.1 and 9.2 can put the destination in a condition where it doesn't
see the unclean termination. Still, make the property more inclusive
to facilitate potential backports.)

Signed-off-by: Fabiano Rosas <farosas@suse.de>
---
 hw/core/machine.c     |  1 +
 migration/migration.h | 33 +++++++++++++++++++++++++++++++++
 migration/multifd.c   | 15 +++++++++++++--
 migration/multifd.h   |  2 ++
 migration/options.c   |  2 ++
 5 files changed, 51 insertions(+), 2 deletions(-)

Comments

Peter Xu Feb. 10, 2025, 5:11 p.m. UTC | #1
On Fri, Feb 07, 2025 at 04:53:58PM -0300, Fabiano Rosas wrote:
> We're currently changing the way the source multifd migration handles
> the shutdown of the multifd channels when TLS is in use to perform a
> clean termination by calling gnutls_bye().
> 
> Older src QEMUs will always close the channel without terminating the
> TLS session. New dst QEMUs treat an unclean termination as an error.
> 
> Add multifd_clean_tls_termination (default true) that can be switched
> on the destination whenever a src QEMU <= 9.2 is in use.
> 
> (Note that the compat property is only strictly necessary for src
> QEMUs older than 9.1. Due to synchronization coincidences, src QEMUs
> 9.1 and 9.2 can put the destination in a condition where it doesn't
> see the unclean termination. Still, make the property more inclusive
> to facilitate potential backports.)
> 
> Signed-off-by: Fabiano Rosas <farosas@suse.de>

Reviewed-by: Peter Xu <peterx@redhat.com>

One nitpick..

> ---
>  hw/core/machine.c     |  1 +
>  migration/migration.h | 33 +++++++++++++++++++++++++++++++++
>  migration/multifd.c   | 15 +++++++++++++--
>  migration/multifd.h   |  2 ++
>  migration/options.c   |  2 ++
>  5 files changed, 51 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 254cc20c4c..02cff735b3 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -42,6 +42,7 @@ GlobalProperty hw_compat_9_2[] = {
>      { "virtio-balloon-pci-transitional", "vectors", "0" },
>      { "virtio-balloon-pci-non-transitional", "vectors", "0" },
>      { "virtio-mem-pci", "vectors", "0" },
> +    { "migration", "multifd-clean-tls-termination", "false" },
>  };
>  const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2);
>  
> diff --git a/migration/migration.h b/migration/migration.h
> index 4c1fafc2b5..77def0b437 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -443,6 +443,39 @@ struct MigrationState {
>       * Default value is false. (since 8.1)
>       */
>      bool multifd_flush_after_each_section;
> +
> +    /*
> +     * This variable only makes sense when set on the machine that is
> +     * the destination of a multifd migration with TLS enabled. It
> +     * affects the behavior of the last send->recv iteration with
> +     * regards to termination of the TLS session.
> +     *
> +     * When set:
> +     *
> +     * - the destination QEMU instance can expect to never get a
> +     *   GNUTLS_E_PREMATURE_TERMINATION error. Manifested as the error
> +     *   message: "The TLS connection was non-properly terminated".
> +     *
> +     * When clear:
> +     *
> +     * - the destination QEMU instance can expect to see a
> +     *   GNUTLS_E_PREMATURE_TERMINATION error in any multifd channel
> +     *   whenever the last recv() call of that channel happens after
> +     *   the source QEMU instance has already issued shutdown() on the
> +     *   channel.
> +     *
> +     *   Commit 637280aeb2 (since 9.1) introduced a side effect that
> +     *   causes the destination instance to not be affected by the
> +     *   premature termination, while commit 1d457daf86 (since 10.0)
> +     *   causes the premature termination condition to be once again
> +     *   reachable.
> +     *
> +     * NOTE: Regardless of the state of this option, a premature
> +     * termination of the TLS connection might happen due to error at
> +     * any moment prior to the last send->recv iteration.
> +     */
> +    bool multifd_clean_tls_termination;
> +
>      /*
>       * This decides the size of guest memory chunk that will be used
>       * to track dirty bitmap clearing.  The size of memory chunk will
> diff --git a/migration/multifd.c b/migration/multifd.c
> index 0296758c08..8045197be8 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -1151,6 +1151,7 @@ void multifd_recv_sync_main(void)
>  
>  static void *multifd_recv_thread(void *opaque)
>  {
> +    MigrationState *s = migrate_get_current();
>      MultiFDRecvParams *p = opaque;
>      Error *local_err = NULL;
>      bool use_packets = multifd_use_packets();
> @@ -1159,18 +1160,28 @@ static void *multifd_recv_thread(void *opaque)
>      trace_multifd_recv_thread_start(p->id);
>      rcu_register_thread();
>  
> +    if (!s->multifd_clean_tls_termination) {
> +        p->read_flags = QIO_CHANNEL_READ_FLAG_RELAXED_EOF;
> +    }
> +
>      while (true) {
>          uint32_t flags = 0;
>          bool has_data = false;
>          p->normal_num = 0;
>  
> +

Extra newline (can be fixed when merge)

>          if (use_packets) {
> +            struct iovec iov = {
> +                .iov_base = (void *)p->packet,
> +                .iov_len = p->packet_len
> +            };
> +
>              if (multifd_recv_should_exit()) {
>                  break;
>              }
>  
> -            ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
> -                                           p->packet_len, &local_err);
> +            ret = qio_channel_readv_full_all_eof(p->c, &iov, 1, NULL, NULL,
> +                                                 p->read_flags, &local_err);
>              if (!ret) {
>                  /* EOF */
>                  assert(!local_err);
> diff --git a/migration/multifd.h b/migration/multifd.h
> index bd785b9873..cf408ff721 100644
> --- a/migration/multifd.h
> +++ b/migration/multifd.h
> @@ -244,6 +244,8 @@ typedef struct {
>      uint32_t zero_num;
>      /* used for de-compression methods */
>      void *compress_data;
> +    /* Flags for the QIOChannel */
> +    int read_flags;
>  } MultiFDRecvParams;
>  
>  typedef struct {
> diff --git a/migration/options.c b/migration/options.c
> index 1ad950e397..feda354935 100644
> --- a/migration/options.c
> +++ b/migration/options.c
> @@ -99,6 +99,8 @@ const Property migration_properties[] = {
>                        clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT),
>      DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState,
>                       preempt_pre_7_2, false),
> +    DEFINE_PROP_BOOL("multifd-clean-tls-termination", MigrationState,
> +                     multifd_clean_tls_termination, true),
>  
>      /* Migration parameters */
>      DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState,
> -- 
> 2.35.3
>
diff mbox series

Patch

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 254cc20c4c..02cff735b3 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -42,6 +42,7 @@  GlobalProperty hw_compat_9_2[] = {
     { "virtio-balloon-pci-transitional", "vectors", "0" },
     { "virtio-balloon-pci-non-transitional", "vectors", "0" },
     { "virtio-mem-pci", "vectors", "0" },
+    { "migration", "multifd-clean-tls-termination", "false" },
 };
 const size_t hw_compat_9_2_len = G_N_ELEMENTS(hw_compat_9_2);
 
diff --git a/migration/migration.h b/migration/migration.h
index 4c1fafc2b5..77def0b437 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -443,6 +443,39 @@  struct MigrationState {
      * Default value is false. (since 8.1)
      */
     bool multifd_flush_after_each_section;
+
+    /*
+     * This variable only makes sense when set on the machine that is
+     * the destination of a multifd migration with TLS enabled. It
+     * affects the behavior of the last send->recv iteration with
+     * regards to termination of the TLS session.
+     *
+     * When set:
+     *
+     * - the destination QEMU instance can expect to never get a
+     *   GNUTLS_E_PREMATURE_TERMINATION error. Manifested as the error
+     *   message: "The TLS connection was non-properly terminated".
+     *
+     * When clear:
+     *
+     * - the destination QEMU instance can expect to see a
+     *   GNUTLS_E_PREMATURE_TERMINATION error in any multifd channel
+     *   whenever the last recv() call of that channel happens after
+     *   the source QEMU instance has already issued shutdown() on the
+     *   channel.
+     *
+     *   Commit 637280aeb2 (since 9.1) introduced a side effect that
+     *   causes the destination instance to not be affected by the
+     *   premature termination, while commit 1d457daf86 (since 10.0)
+     *   causes the premature termination condition to be once again
+     *   reachable.
+     *
+     * NOTE: Regardless of the state of this option, a premature
+     * termination of the TLS connection might happen due to error at
+     * any moment prior to the last send->recv iteration.
+     */
+    bool multifd_clean_tls_termination;
+
     /*
      * This decides the size of guest memory chunk that will be used
      * to track dirty bitmap clearing.  The size of memory chunk will
diff --git a/migration/multifd.c b/migration/multifd.c
index 0296758c08..8045197be8 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -1151,6 +1151,7 @@  void multifd_recv_sync_main(void)
 
 static void *multifd_recv_thread(void *opaque)
 {
+    MigrationState *s = migrate_get_current();
     MultiFDRecvParams *p = opaque;
     Error *local_err = NULL;
     bool use_packets = multifd_use_packets();
@@ -1159,18 +1160,28 @@  static void *multifd_recv_thread(void *opaque)
     trace_multifd_recv_thread_start(p->id);
     rcu_register_thread();
 
+    if (!s->multifd_clean_tls_termination) {
+        p->read_flags = QIO_CHANNEL_READ_FLAG_RELAXED_EOF;
+    }
+
     while (true) {
         uint32_t flags = 0;
         bool has_data = false;
         p->normal_num = 0;
 
+
         if (use_packets) {
+            struct iovec iov = {
+                .iov_base = (void *)p->packet,
+                .iov_len = p->packet_len
+            };
+
             if (multifd_recv_should_exit()) {
                 break;
             }
 
-            ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
-                                           p->packet_len, &local_err);
+            ret = qio_channel_readv_full_all_eof(p->c, &iov, 1, NULL, NULL,
+                                                 p->read_flags, &local_err);
             if (!ret) {
                 /* EOF */
                 assert(!local_err);
diff --git a/migration/multifd.h b/migration/multifd.h
index bd785b9873..cf408ff721 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -244,6 +244,8 @@  typedef struct {
     uint32_t zero_num;
     /* used for de-compression methods */
     void *compress_data;
+    /* Flags for the QIOChannel */
+    int read_flags;
 } MultiFDRecvParams;
 
 typedef struct {
diff --git a/migration/options.c b/migration/options.c
index 1ad950e397..feda354935 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -99,6 +99,8 @@  const Property migration_properties[] = {
                       clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT),
     DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState,
                      preempt_pre_7_2, false),
+    DEFINE_PROP_BOOL("multifd-clean-tls-termination", MigrationState,
+                     multifd_clean_tls_termination, true),
 
     /* Migration parameters */
     DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState,