@@ -113,6 +113,7 @@ struct MigrationIncomingState {
QemuThread colo_incoming_thread;
/* The coroutine we should enter (back) after failover */
Coroutine *migration_incoming_co;
+ QemuSemaphore colo_incoming_sem;
/* See savevm.c */
LoadStateEntry_Head loadvm_handlers;
@@ -183,6 +184,8 @@ struct MigrationState
QSIMPLEQ_HEAD(src_page_requests, MigrationSrcPageRequest) src_page_requests;
/* The RAMBlock used in the last src_page_request */
RAMBlock *last_req_rb;
+ /* The semaphore is used to notify COLO thread that failover is finished */
+ QemuSemaphore colo_exit_sem;
/* The last error that occurred */
Error *error;
@@ -60,6 +60,18 @@ static void secondary_vm_do_failover(void)
/* recover runstate to normal migration finish state */
autostart = true;
}
+ /*
+ * Make sure COLO incoming thread not block in recv or send,
+ * If mis->from_src_file and mis->to_src_file use the same fd,
+ * The second shutdown() will return -1, we ignore this value,
+ * It is harmless.
+ */
+ if (mis->from_src_file) {
+ qemu_file_shutdown(mis->from_src_file);
+ }
+ if (mis->to_src_file) {
+ qemu_file_shutdown(mis->to_src_file);
+ }
old_state = failover_set_state(FAILOVER_STATUS_HANDLING,
FAILOVER_STATUS_COMPLETED);
@@ -68,6 +80,8 @@ static void secondary_vm_do_failover(void)
"secondary VM", old_state);
return;
}
+ /* Notify COLO incoming thread that failover work is finished */
+ qemu_sem_post(&mis->colo_incoming_sem);
/* For Secondary VM, jump to incoming co */
if (mis->migration_incoming_co) {
qemu_coroutine_enter(mis->migration_incoming_co);
@@ -82,6 +96,18 @@ static void primary_vm_do_failover(void)
migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
MIGRATION_STATUS_COMPLETED);
+ /*
+ * Wake up COLO thread which may blocked in recv() or send(),
+ * The s->rp_state.from_dst_file and s->to_dst_file may use the
+ * same fd, but we still shutdown the fd for twice, it is harmless.
+ */
+ if (s->to_dst_file) {
+ qemu_file_shutdown(s->to_dst_file);
+ }
+ if (s->rp_state.from_dst_file) {
+ qemu_file_shutdown(s->rp_state.from_dst_file);
+ }
+
old_state = failover_set_state(FAILOVER_STATUS_HANDLING,
FAILOVER_STATUS_COMPLETED);
if (old_state != FAILOVER_STATUS_HANDLING) {
@@ -89,6 +115,8 @@ static void primary_vm_do_failover(void)
old_state);
return;
}
+ /* Notify COLO thread that failover work is finished */
+ qemu_sem_post(&s->colo_exit_sem);
}
void colo_do_failover(MigrationState *s)
@@ -374,6 +402,14 @@ out:
COLO_EXIT_REASON_REQUEST, NULL);
}
+ /* Hope this not to be too long to wait here */
+ qemu_sem_wait(&s->colo_exit_sem);
+ qemu_sem_destroy(&s->colo_exit_sem);
+ /*
+ * Must be called after failover BH is completed,
+ * Or the failover BH may shutdown the wrong fd that
+ * re-used by other threads after we release here.
+ */
if (s->rp_state.from_dst_file) {
qemu_fclose(s->rp_state.from_dst_file);
}
@@ -382,6 +418,7 @@ out:
void migrate_start_colo_process(MigrationState *s)
{
qemu_mutex_unlock_iothread();
+ qemu_sem_init(&s->colo_exit_sem, 0);
migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_COLO);
colo_process_checkpoint(s);
@@ -421,6 +458,8 @@ void *colo_process_incoming_thread(void *opaque)
Error *local_err = NULL;
int ret;
+ qemu_sem_init(&mis->colo_incoming_sem, 0);
+
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_COLO);
@@ -550,6 +589,10 @@ out:
*/
colo_release_ram_cache();
+ /* Hope this not to be too long to loop here */
+ qemu_sem_wait(&mis->colo_incoming_sem);
+ qemu_sem_destroy(&mis->colo_incoming_sem);
+ /* Must be called after failover BH is completed */
if (mis->to_src_file) {
qemu_fclose(mis->to_src_file);
}