diff mbox series

[RFC,05/11] migration/block: Merge block reactivations for fail/cancel

Message ID 20241204005138.702289-6-peterx@redhat.com (mailing list archive)
State New
Headers show
Series migration/block: disk activation rewrite | expand

Commit Message

Peter Xu Dec. 4, 2024, 12:51 a.m. UTC
When migration is either cancelled or failed during switchover, especially
when after the disks are inactivated, QEMU needs to remember re-activate
the disks again before vm starts.

It used to be done separately in two paths: one in qmp_migrate_cancel(),
the other one in the failure path of migration_completion().

It used to be fixed in different commits, all over the places in QEMU.  So
these are the ones I see at least:

 - In 2016, commit fe904ea824 ("migration: regain control of images when
   migration fails to complete")

 - In 2017, commit 1d2acc3162 ("migration: re-active images while migration
   been canceled after inactive them")

 - In 2023, commit 6dab4c93ec ("migration: Attempt disk reactivation in
   more failure scenarios")

We could have done better on trying to solve this once and for all.  Now it
might be a good chance because we have a better whole picture now.

Put both of the error cases together now into migration_iteration_finish(),
which will be invoked for either of the scenario.

At the meantime, cleanup block_inactive in a few ways:

  - Rename it to block_active, which is easier to follow..

  - Maintain the flag for the whole lifecycle of QEMU.  Otherwise it's not
    clear on when the flag is valid or not.

  - Put it together with the operations, rather than updating the flag
    separately.

Now we set the flag to TRUE from the start, showing block ownership of a
fresh started QEMU (but so far only used in migration module).  Then it can
be modified by migration switchover code if invalidation happened.  With
that, the flag always reflects the correct block ownership.

NOTE: it can always be racy if there's concurrent operation to change the
block activation status (e.g., qmp_cont() right after migration failure but
right before block re-activate happens), but that's not avoidable even
before this patch, so it's not part of the goal that this patch would like
to achieve, so put aside as of now.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 migration/migration.h | 20 +++++++++-
 migration/migration.c | 86 +++++++++++++++++++++++++------------------
 migration/savevm.c    | 11 ++----
 3 files changed, 72 insertions(+), 45 deletions(-)
diff mbox series

Patch

diff --git a/migration/migration.h b/migration/migration.h
index 3857905c0e..b71c062ad2 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -370,8 +370,20 @@  struct MigrationState {
     /* Flag set once the migration thread is running (and needs joining) */
     bool migration_thread_running;
 
-    /* Flag set once the migration thread called bdrv_inactivate_all */
-    bool block_inactive;
+    /*
+     * Migration-only cache to remember the block layer activation status.
+     * Protected by BQL.
+     *
+     * We need this because..
+     *
+     * - Migration can fail after block devices are invalidated (during
+     *   switchover phase).  When that happens, we need to be able to
+     *   recover the block drive status by re-activating them.
+     *
+     * For freshly started QEMU, block_active is initialized to TRUE
+     * reflecting the scenario where QEMU owns block device ownerships.
+     */
+    bool block_active;
 
     /* Migration is waiting for guest to unplug device */
     QemuSemaphore wait_unplug_sem;
@@ -556,4 +568,8 @@  void migration_bitmap_sync_precopy(bool last_stage);
 /* migration/block-dirty-bitmap.c */
 void dirty_bitmap_mig_init(void);
 
+/* Wrapper for block active/inactive operations */
+bool migration_block_activate(MigrationState *s);
+bool migration_block_inactivate(MigrationState *s);
+
 #endif
diff --git a/migration/migration.c b/migration/migration.c
index ba5deec5bc..8f7d09ca84 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -130,6 +130,47 @@  static void migration_downtime_end(MigrationState *s)
     trace_vmstate_downtime_checkpoint("src-downtime-end");
 }
 
+bool migration_block_activate(MigrationState *s)
+{
+    Error *local_err = NULL;
+
+    assert(bql_locked());
+
+    if (s->block_active) {
+        return true;
+    }
+
+    bdrv_activate_all(&local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return false;
+    }
+
+    s->block_active = true;
+    return true;
+}
+
+bool migration_block_inactivate(MigrationState *s)
+{
+    int ret;
+
+    assert(bql_locked());
+
+    if (!s->block_active) {
+        return true;
+    }
+
+    ret = bdrv_inactivate_all();
+    if (ret) {
+        error_report("%s: bdrv_inactivate_all() failed: %d\n",
+                     __func__, ret);
+        return false;
+    }
+
+    s->block_active = false;
+    return true;
+}
+
 static bool migration_needs_multiple_sockets(void)
 {
     return migrate_multifd() || migrate_postcopy_preempt();
@@ -1552,16 +1593,6 @@  static void migrate_fd_cancel(MigrationState *s)
             }
         }
     }
-    if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
-        Error *local_err = NULL;
-
-        bdrv_activate_all(&local_err);
-        if (local_err) {
-            error_report_err(local_err);
-        } else {
-            s->block_inactive = false;
-        }
-    }
 }
 
 void migration_add_notifier_mode(NotifierWithReturn *notify,
@@ -2778,31 +2809,6 @@  static void migration_completion_postcopy(MigrationState *s)
     trace_migration_completion_postcopy_end_after_complete();
 }
 
-static void migration_completion_failed(MigrationState *s,
-                                        int current_active_state)
-{
-    if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE ||
-                              s->state == MIGRATION_STATUS_DEVICE)) {
-        /*
-         * If not doing postcopy, vm_start() will be called: let's
-         * regain control on images.
-         */
-        Error *local_err = NULL;
-
-        bql_lock();
-        bdrv_activate_all(&local_err);
-        if (local_err) {
-            error_report_err(local_err);
-        } else {
-            s->block_inactive = false;
-        }
-        bql_unlock();
-    }
-
-    migrate_set_state(&s->state, current_active_state,
-                      MIGRATION_STATUS_FAILED);
-}
-
 /**
  * migration_completion: Used by migration_thread when there's not much left.
  *   The caller 'breaks' the loop when this returns.
@@ -2856,7 +2862,8 @@  fail:
         error_free(local_err);
     }
 
-    migration_completion_failed(s, current_active_state);
+    migrate_set_state(&s->state, current_active_state,
+                      MIGRATION_STATUS_FAILED);
 }
 
 /**
@@ -3286,6 +3293,11 @@  static void migration_iteration_finish(MigrationState *s)
     case MIGRATION_STATUS_FAILED:
     case MIGRATION_STATUS_CANCELLED:
     case MIGRATION_STATUS_CANCELLING:
+        /*
+         * Re-activate the block drives if they're inactivated.  Note, COLO
+         * shouldn't use block_active at all, so it should be no-op there.
+         */
+        migration_block_activate(s);
         if (runstate_is_live(s->vm_old_state)) {
             if (!runstate_check(RUN_STATE_SHUTDOWN)) {
                 vm_start();
@@ -3858,6 +3870,8 @@  static void migration_instance_init(Object *obj)
     ms->state = MIGRATION_STATUS_NONE;
     ms->mbps = -1;
     ms->pages_per_second = -1;
+    /* Freshly started QEMU owns all the block devices */
+    ms->block_active = true;
     qemu_sem_init(&ms->pause_sem, 0);
     qemu_mutex_init(&ms->error_mutex);
 
diff --git a/migration/savevm.c b/migration/savevm.c
index 3c75257318..a335344c75 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1549,17 +1549,14 @@  int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
     if (inactivate_disks) {
         /* Inactivate before sending QEMU_VM_EOF so that the
          * bdrv_activate_all() on the other end won't fail. */
-        ret = bdrv_inactivate_all();
-        if (ret) {
-            error_setg(&local_err, "%s: bdrv_inactivate_all() failed (%d)",
-                       __func__, ret);
+        if (!migration_block_inactivate(ms)) {
+            error_setg(&local_err, "%s: bdrv_inactivate_all() failed",
+                       __func__);
             migrate_set_error(ms, local_err);
             error_report_err(local_err);
-            qemu_file_set_error(f, ret);
+            qemu_file_set_error(f, -EFAULT);
             return ret;
         }
-        /* Remember that we did this */
-        s->block_inactive = true;
     }
     if (!in_postcopy) {
         /* Postcopy stream will still be going */