diff mbox series

multifd: ensure multifd threads are terminated before cleanup params

Message ID 20220212130735.3236-1-wangxinxin.wang@huawei.com (mailing list archive)
State New, archived
Headers show
Series multifd: ensure multifd threads are terminated before cleanup params | expand

Commit Message

Wang Xin Feb. 12, 2022, 1:07 p.m. UTC
In multifd_save_cleanup(), we terminate all multifd threads and destroy
the 'p->mutex', while the mutex may still be held by multifd send thread,
this causes qemu to crash.

It's because the multifd_send_thread maybe scheduled out after setting
'p->running' to false. To reproduce the problem, we put
'multifd_send_thread' to sleep seconds before unlock 'p->mutex':

function multifd_send_thread()
{
    ...
    qemu_mutex_lock(&p->mutex);
    p->running = false;
    usleep(5000000);
    ^^^^^^^^^^^^^^^^
    qemu_mutex_unlock(&p->mutex);
    ...
}

As the 'p->running' is used to indicate whether the multifd_send/recv thread
is created, it should be set to false after the thread terminate.

Signed-off-by: Wang Xin <wangxinxin.wang@huawei.com>
Signed-off-by: Huangyu Zhai <zhaihuanyu@huawei.com>

Comments

Zhijian Li (Fujitsu)" via Feb. 22, 2022, 2:45 a.m. UTC | #1
Ping.

> 
> In multifd_save_cleanup(), we terminate all multifd threads and destroy
> the 'p->mutex', while the mutex may still be held by multifd send thread,
> this causes qemu to crash.
> 
> It's because the multifd_send_thread maybe scheduled out after setting
> 'p->running' to false. To reproduce the problem, we put
> 'multifd_send_thread' to sleep seconds before unlock 'p->mutex':
> 
> function multifd_send_thread()
> {
>     ...
>     qemu_mutex_lock(&p->mutex);
>     p->running = false;
>     usleep(5000000);
>     ^^^^^^^^^^^^^^^^
>     qemu_mutex_unlock(&p->mutex);
>     ...
> }
> 
> As the 'p->running' is used to indicate whether the multifd_send/recv thread
> is created, it should be set to false after the thread terminate.
> 
> Signed-off-by: Wang Xin <wangxinxin.wang@huawei.com>
> Signed-off-by: Huangyu Zhai <zhaihuanyu@huawei.com>
> 
> diff --git a/migration/multifd.c b/migration/multifd.c
> index 76b57a7177..d8fc7d319e 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -526,6 +526,7 @@ void multifd_save_cleanup(void)
> 
>          if (p->running) {
>              qemu_thread_join(&p->thread);
> +            p->running = false;
>          }
>      }
>      for (i = 0; i < migrate_multifd_channels(); i++) {
> @@ -707,10 +708,6 @@ out:
>          qemu_sem_post(&multifd_send_state->channels_ready);
>      }
> 
> -    qemu_mutex_lock(&p->mutex);
> -    p->running = false;
> -    qemu_mutex_unlock(&p->mutex);
> -
>      rcu_unregister_thread();
>      trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages);
> 
> @@ -995,6 +992,7 @@ int multifd_load_cleanup(Error **errp)
>               */
>              qemu_sem_post(&p->sem_sync);
>              qemu_thread_join(&p->thread);
> +            p->running = false;
>          }
>      }
>      for (i = 0; i < migrate_multifd_channels(); i++) {
> @@ -1110,9 +1108,6 @@ static void *multifd_recv_thread(void *opaque)
>          multifd_recv_terminate_threads(local_err);
>          error_free(local_err);
>      }
> -    qemu_mutex_lock(&p->mutex);
> -    p->running = false;
> -    qemu_mutex_unlock(&p->mutex);
> 
>      rcu_unregister_thread();
>      trace_multifd_recv_thread_end(p->id, p->num_packets, p->total_normal_pages);
> --
> 2.26.0.windows.1
diff mbox series

Patch

diff --git a/migration/multifd.c b/migration/multifd.c
index 76b57a7177..d8fc7d319e 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -526,6 +526,7 @@  void multifd_save_cleanup(void)
 
         if (p->running) {
             qemu_thread_join(&p->thread);
+            p->running = false;
         }
     }
     for (i = 0; i < migrate_multifd_channels(); i++) {
@@ -707,10 +708,6 @@  out:
         qemu_sem_post(&multifd_send_state->channels_ready);
     }
 
-    qemu_mutex_lock(&p->mutex);
-    p->running = false;
-    qemu_mutex_unlock(&p->mutex);
-
     rcu_unregister_thread();
     trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages);
 
@@ -995,6 +992,7 @@  int multifd_load_cleanup(Error **errp)
              */
             qemu_sem_post(&p->sem_sync);
             qemu_thread_join(&p->thread);
+            p->running = false;
         }
     }
     for (i = 0; i < migrate_multifd_channels(); i++) {
@@ -1110,9 +1108,6 @@  static void *multifd_recv_thread(void *opaque)
         multifd_recv_terminate_threads(local_err);
         error_free(local_err);
     }
-    qemu_mutex_lock(&p->mutex);
-    p->running = false;
-    qemu_mutex_unlock(&p->mutex);
 
     rcu_unregister_thread();
     trace_multifd_recv_thread_end(p->id, p->num_packets, p->total_normal_pages);