@@ -2004,11 +2004,14 @@ static void *postcopy_ram_listen_thread(void *opaque)
* want a wrapper for the QEMUFile handle.
*/
f = mis->from_src_file;
+ if (!f) {
+ /* postcopy pause never got recovered */
+ goto out;
+ }
/* And non-blocking again so we don't block in any cleanup */
qemu_file_set_blocking(f, false);
- trace_postcopy_ram_listen_thread_exit();
if (load_res < 0) {
qemu_file_set_error(f, load_res);
dirty_bitmap_mig_cancel_incoming();
@@ -2021,10 +2024,6 @@ static void *postcopy_ram_listen_thread(void *opaque)
"bitmaps are correctly migrated and valid.",
__func__, load_res);
load_res = 0; /* prevent further exit() */
- } else {
- error_report("%s: loadvm failed: %d", __func__, load_res);
- migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
- MIGRATION_STATUS_FAILED);
}
}
if (load_res >= 0) {
@@ -2034,31 +2033,40 @@ static void *postcopy_ram_listen_thread(void *opaque)
* state yet; wait for the end of the main thread.
*/
qemu_event_wait(&mis->main_thread_load_event);
- }
- postcopy_ram_incoming_cleanup(mis);
- if (load_res < 0) {
+ postcopy_ram_incoming_cleanup(mis);
+
+ migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
+ MIGRATION_STATUS_COMPLETED);
+
/*
- * If something went wrong then we have a bad state so exit;
- * depending how far we got it might be possible at this point
- * to leave the guest running and fire MCEs for pages that never
- * arrived as a desperate recovery step.
+ * If everything has worked fine, then the main thread has waited
+ * for us to start, and we're the last use of the mis.
*/
- rcu_unregister_thread();
- exit(EXIT_FAILURE);
+ migration_incoming_state_destroy();
}
- migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
- MIGRATION_STATUS_COMPLETED);
- /*
- * If everything has worked fine, then the main thread has waited
- * for us to start, and we're the last use of the mis.
- * (If something broke then qemu will have to exit anyway since it's
- * got a bad migration state).
- */
- migration_incoming_state_destroy();
-
+out:
+ trace_postcopy_ram_listen_thread_exit();
rcu_unregister_thread();
+
+ if (load_res < 0) {
+ postcopy_ram_incoming_cleanup(mis);
+
+ error_report("%s: loadvm failed: %d", __func__, load_res);
+ migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
+ MIGRATION_STATUS_FAILED);
+ if (mis->exit_on_error) {
+ /*
+ * If something went wrong then we have a bad state so exit;
+ * depending how far we got it might be possible at this point
+ * to leave the guest running and fire MCEs for pages that never
+ * arrived as a desperate recovery step.
+ */
+ exit(EXIT_FAILURE);
+ }
+ }
+
mis->have_listen_thread = false;
postcopy_state_set(POSTCOPY_INCOMING_END);
@@ -2921,7 +2929,9 @@ out:
migrate_postcopy_ram() && postcopy_pause_incoming(mis)) {
/* Reset f to point to the newly created channel */
f = mis->from_src_file;
- goto retry;
+ if (f) {
+ goto retry;
+ }
}
}
return ret;
There are a couple of problems with exiting the postcopy listen thread. It does not honor the exit-on-error flag and always exits QEMU upon error. It also does not behave well if a qmp_migrate_cancel() is issued while postcopy is paused, it either hangs during retry or crashes during access of a non-recovered QEMUFile (i.e. NULL). Fix it by adding support for exit-on-error and avoiding accessing the NULL file pointer. While here, move the end tracepoint to a later part of the function. Signed-off-by: Fabiano Rosas <farosas@suse.de> --- migration/savevm.c | 60 +++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 25 deletions(-)