diff mbox series

Fix data race with the state Field of ThreadPoolElement

Message ID 20250219161223.3340431-1-mordan@ispras.ru (mailing list archive)
State New
Headers show
Series Fix data race with the state Field of ThreadPoolElement | expand

Commit Message

Vitalii Mordan Feb. 19, 2025, 4:12 p.m. UTC
TSAN reports a potential data race on the state field of
ThreadPoolElement. This is fixed by using atomic access to the field.

Fixes: d354c7eccf ("aio: add generic thread-pool facility")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2822
Signed-off-by: Vitalii Mordan <mordan@ispras.ru>
---
 util/thread-pool.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

Comments

Stefan Hajnoczi Feb. 20, 2025, 12:37 a.m. UTC | #1
On Wed, Feb 19, 2025 at 07:12:23PM +0300, Vitalii Mordan wrote:
> TSAN reports a potential data race on the state field of
> ThreadPoolElement. This is fixed by using atomic access to the field.

The tsan output from the bug report:

WARNING: ThreadSanitizer: data race (pid=787043)
  Write of size 4 at 0x7b1c00000660 by thread T5 (mutexes: write M0):
    #0 worker_thread /home/mordan/qemu/build/../util/thread-pool.c:108:20 (test-thread-pool-smc+0xa65a56)
    #1 qemu_thread_start /home/mordan/qemu/build/../util/qemu-thread-posix.c:543:9 (test-thread-pool-smc+0xa49040)

  Previous read of size 4 at 0x7b1c00000660 by main thread:
    #0 thread_pool_completion_bh /home/mordan/qemu/build/../util/thread-pool.c:183:19 (test-thread-pool-smc+0xa6549d)
    #1 aio_bh_call /home/mordan/qemu/build/../util/async.c:171:5 (test-thread-pool-smc+0xa5e03e)
    #2 aio_bh_poll /home/mordan/qemu/build/../util/async.c:218:13 (test-thread-pool-smc+0xa5e03e)
    #3 aio_poll /home/mordan/qemu/build/../util/aio-posix.c:722:17 (test-thread-pool-smc+0xa4343a)
    #4 test_submit_many /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:133:9 (test-thread-pool-smc+0x50e638)
    #5 do_test_cancel /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:150:5 (test-thread-pool-smc+0x50e638)
    #6 test_cancel_async /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:234:5 (test-thread-pool-smc+0x50e638)
    #7 main /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:249:3 (test-thread-pool-smc+0x50e638)

  Location is heap block of size 104 at 0x7b1c00000620 allocated by main thread:
    #0 malloc out/lib/clangrt-x86_64-unknown-linux-gnu/./out/lib/clangrt-x86_64-unknown-linux-gnu/./toolchain/llvm-project/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:667:5 (test-thread-pool-smc+0x346131)
    #1 g_malloc <null> (libglib-2.0.so.0+0x5e738) (BuildId: e845b8fd2f396872c036976626389ffc4f50c9c5)
    #2 thread_pool_submit_aio /home/mordan/qemu/build/../util/thread-pool.c:251:11 (test-thread-pool-smc+0xa648bd)
    #3 test_submit_many /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:128:9 (test-thread-pool-smc+0x50e600)
    #4 do_test_cancel /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:150:5 (test-thread-pool-smc+0x50e600)
    #5 test_cancel_async /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:234:5 (test-thread-pool-smc+0x50e600)
    #6 main /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:249:3 (test-thread-pool-smc+0x50e600)

  Mutex M0 (0x7b3c00000100) created at:
    #0 pthread_mutex_init out/lib/clangrt-x86_64-unknown-linux-gnu/./out/lib/clangrt-x86_64-unknown-linux-gnu/./toolchain/llvm-project/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:1316:3 (test-thread-pool-smc+0x34914f)
    #1 qemu_mutex_init /home/mordan/qemu/build/../util/qemu-thread-posix.c:71:11 (test-thread-pool-smc+0xa47189)
    #2 thread_pool_init_one /home/mordan/qemu/build/../util/thread-pool.c:334:5 (test-thread-pool-smc+0xa64f60)
    #3 thread_pool_new /home/mordan/qemu/build/../util/thread-pool.c:348:5 (test-thread-pool-smc+0xa64f60)
    #4 aio_get_thread_pool /home/mordan/qemu/build/../util/async.c:441:28 (test-thread-pool-smc+0xa5e6d4)
    #5 thread_pool_submit_aio /home/mordan/qemu/build/../util/thread-pool.c:246:24 (test-thread-pool-smc+0xa6488d)
    #6 test_submit_many /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:128:9 (test-thread-pool-smc+0x50e600)
    #7 do_test_cancel /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:150:5 (test-thread-pool-smc+0x50e600)
    #8 test_cancel_async /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:234:5 (test-thread-pool-smc+0x50e600)
    #9 main /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:249:3 (test-thread-pool-smc+0x50e600)

  Thread T5 'worker' (tid=787049, running) created by thread T4 at:
    #0 pthread_create out/lib/clangrt-x86_64-unknown-linux-gnu/./out/lib/clangrt-x86_64-unknown-linux-gnu/./toolchain/llvm-project/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:1022:3 (test-thread-pool-smc+0x34791d)
    #1 qemu_thread_create /home/mordan/qemu/build/../util/qemu-thread-posix.c:583:11 (test-thread-pool-smc+0xa48ed0)
    #2 do_spawn_thread /home/mordan/qemu/build/../util/thread-pool.c:146:5 (test-thread-pool-smc+0xa658de)
    #3 worker_thread /home/mordan/qemu/build/../util/thread-pool.c:83:5 (test-thread-pool-smc+0xa658de)
    #4 qemu_thread_start /home/mordan/qemu/build/../util/qemu-thread-posix.c:543:9 (test-thread-pool-smc+0xa49040)

SUMMARY: ThreadSanitizer: data race /home/mordan/qemu/build/../util/thread-pool.c:108:20 in worker_thread

My interpretation is that tsan is saying there is a data race between
the load in thread_pool_completion_bh():

  static void thread_pool_completion_bh(void *opaque)
  {
      ThreadPool *pool = opaque;
      ThreadPoolElement *elem, *next;

      defer_call_begin(); /* cb() may use defer_call() to coalesce work */

  restart:
      QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
          if (elem->state != THREAD_DONE) {
	      ^^^^^^^^^^^

and the store in worker_thread():

    req = QTAILQ_FIRST(&pool->request_list);
    QTAILQ_REMOVE(&pool->request_list, req, reqs);
    req->state = THREAD_ACTIVE;
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^
    qemu_mutex_unlock(&pool->lock);

It doesn't matter whether thread_pool_completion_bh() sees THREAD_QUEUED
or THREAD_ACTIVE, so this looks like a false positive. There is no
practical effect either way.

THREAD_QUEUED vs THREAD_ACTIVE matters in thread_pool_cancel(), but that
is protected by pool->lock.

Paolo: Any thoughts?

Stefan

> 
> Fixes: d354c7eccf ("aio: add generic thread-pool facility")
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2822
> Signed-off-by: Vitalii Mordan <mordan@ispras.ru>
> ---
>  util/thread-pool.c | 13 ++++++-------
>  1 file changed, 6 insertions(+), 7 deletions(-)
> 
> diff --git a/util/thread-pool.c b/util/thread-pool.c
> index 27eb777e85..6c5f4d085b 100644
> --- a/util/thread-pool.c
> +++ b/util/thread-pool.c
> @@ -111,9 +111,8 @@ static void *worker_thread(void *opaque)
>          ret = req->func(req->arg);
>  
>          req->ret = ret;
> -        /* Write ret before state.  */
> -        smp_wmb();
> -        req->state = THREAD_DONE;
> +        /* Atomically update state after setting ret.  */
> +        qatomic_store_release(&req->state, THREAD_DONE);
>  
>          qemu_bh_schedule(pool->completion_bh);
>          qemu_mutex_lock(&pool->lock);
> @@ -180,7 +179,7 @@ static void thread_pool_completion_bh(void *opaque)
>  
>  restart:
>      QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
> -        if (elem->state != THREAD_DONE) {
> +        if (qatomic_load_acquire(&elem->state) != THREAD_DONE) {
>              continue;
>          }
>  
> @@ -223,12 +222,12 @@ static void thread_pool_cancel(BlockAIOCB *acb)
>      trace_thread_pool_cancel(elem, elem->common.opaque);
>  
>      QEMU_LOCK_GUARD(&pool->lock);
> -    if (elem->state == THREAD_QUEUED) {
> +    if (qatomic_load_acquire(&elem->state) == THREAD_QUEUED) {
>          QTAILQ_REMOVE(&pool->request_list, elem, reqs);
>          qemu_bh_schedule(pool->completion_bh);
>  
> -        elem->state = THREAD_DONE;
>          elem->ret = -ECANCELED;
> +        qatomic_store_release(&elem->state, THREAD_DONE);
>      }
>  
>  }
> @@ -251,8 +250,8 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
>      req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque);
>      req->func = func;
>      req->arg = arg;
> -    req->state = THREAD_QUEUED;
>      req->pool = pool;
> +    qatomic_store_release(&req->state, THREAD_QUEUED);
>  
>      QLIST_INSERT_HEAD(&pool->head, req, all);
>  
> -- 
> 2.34.1
>
Paolo Bonzini Feb. 20, 2025, 9:10 a.m. UTC | #2
On 2/19/25 17:12, Vitalii Mordan wrote:
> diff --git a/util/thread-pool.c b/util/thread-pool.c
> index 27eb777e85..6c5f4d085b 100644
> --- a/util/thread-pool.c
> +++ b/util/thread-pool.c
> @@ -111,9 +111,8 @@ static void *worker_thread(void *opaque)
>           ret = req->func(req->arg);
>   
>           req->ret = ret;
> -        /* Write ret before state.  */
> -        smp_wmb();
> -        req->state = THREAD_DONE;
> +        /* Atomically update state after setting ret.  */
> +        qatomic_store_release(&req->state, THREAD_DONE);

This is good.

> @@ -180,7 +179,7 @@ static void thread_pool_completion_bh(void *opaque)
>   
>   restart:
>       QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
> -        if (elem->state != THREAD_DONE) {
> +        if (qatomic_load_acquire(&elem->state) != THREAD_DONE) {

This is good, but it needs a comment and it can replace the smp_rmb() below.

>               continue;
>           }
>   
> @@ -223,12 +222,12 @@ static void thread_pool_cancel(BlockAIOCB *acb)
>       trace_thread_pool_cancel(elem, elem->common.opaque);
>   
>       QEMU_LOCK_GUARD(&pool->lock);
> -    if (elem->state == THREAD_QUEUED) {
> +    if (qatomic_load_acquire(&elem->state) == THREAD_QUEUED) {

This is not ordering anything so it can be qatomic_read/qatomic_set 
(technically the one below doesn't even need that, but it's fine).

>           QTAILQ_REMOVE(&pool->request_list, elem, reqs);
>           qemu_bh_schedule(pool->completion_bh);
>   
> -        elem->state = THREAD_DONE;
>           elem->ret = -ECANCELED;
> +        qatomic_store_release(&elem->state, THREAD_DONE);
>       }
>   
>   }
> @@ -251,8 +250,8 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
>       req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque);
>       req->func = func;
>       req->arg = arg;
> -    req->state = THREAD_QUEUED;
>       req->pool = pool;
> +    qatomic_store_release(&req->state, THREAD_QUEUED);

This does not need any atomic access, because there is ordering via 
pool->lock (which protects the request_list).  There's no need to do 
store-release and load-acquire except to order with another store or 
load, and in fact adding unnecessary acquire/release is harder to 
understand.

Paolo

>       QLIST_INSERT_HEAD(&pool->head, req, all);
>
diff mbox series

Patch

diff --git a/util/thread-pool.c b/util/thread-pool.c
index 27eb777e85..6c5f4d085b 100644
--- a/util/thread-pool.c
+++ b/util/thread-pool.c
@@ -111,9 +111,8 @@  static void *worker_thread(void *opaque)
         ret = req->func(req->arg);
 
         req->ret = ret;
-        /* Write ret before state.  */
-        smp_wmb();
-        req->state = THREAD_DONE;
+        /* Atomically update state after setting ret.  */
+        qatomic_store_release(&req->state, THREAD_DONE);
 
         qemu_bh_schedule(pool->completion_bh);
         qemu_mutex_lock(&pool->lock);
@@ -180,7 +179,7 @@  static void thread_pool_completion_bh(void *opaque)
 
 restart:
     QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
-        if (elem->state != THREAD_DONE) {
+        if (qatomic_load_acquire(&elem->state) != THREAD_DONE) {
             continue;
         }
 
@@ -223,12 +222,12 @@  static void thread_pool_cancel(BlockAIOCB *acb)
     trace_thread_pool_cancel(elem, elem->common.opaque);
 
     QEMU_LOCK_GUARD(&pool->lock);
-    if (elem->state == THREAD_QUEUED) {
+    if (qatomic_load_acquire(&elem->state) == THREAD_QUEUED) {
         QTAILQ_REMOVE(&pool->request_list, elem, reqs);
         qemu_bh_schedule(pool->completion_bh);
 
-        elem->state = THREAD_DONE;
         elem->ret = -ECANCELED;
+        qatomic_store_release(&elem->state, THREAD_DONE);
     }
 
 }
@@ -251,8 +250,8 @@  BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg,
     req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque);
     req->func = func;
     req->arg = arg;
-    req->state = THREAD_QUEUED;
     req->pool = pool;
+    qatomic_store_release(&req->state, THREAD_QUEUED);
 
     QLIST_INSERT_HEAD(&pool->head, req, all);