Message ID | 20250219161223.3340431-1-mordan@ispras.ru (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Fix data race with the state Field of ThreadPoolElement | expand |
On Wed, Feb 19, 2025 at 07:12:23PM +0300, Vitalii Mordan wrote: > TSAN reports a potential data race on the state field of > ThreadPoolElement. This is fixed by using atomic access to the field. The tsan output from the bug report: WARNING: ThreadSanitizer: data race (pid=787043) Write of size 4 at 0x7b1c00000660 by thread T5 (mutexes: write M0): #0 worker_thread /home/mordan/qemu/build/../util/thread-pool.c:108:20 (test-thread-pool-smc+0xa65a56) #1 qemu_thread_start /home/mordan/qemu/build/../util/qemu-thread-posix.c:543:9 (test-thread-pool-smc+0xa49040) Previous read of size 4 at 0x7b1c00000660 by main thread: #0 thread_pool_completion_bh /home/mordan/qemu/build/../util/thread-pool.c:183:19 (test-thread-pool-smc+0xa6549d) #1 aio_bh_call /home/mordan/qemu/build/../util/async.c:171:5 (test-thread-pool-smc+0xa5e03e) #2 aio_bh_poll /home/mordan/qemu/build/../util/async.c:218:13 (test-thread-pool-smc+0xa5e03e) #3 aio_poll /home/mordan/qemu/build/../util/aio-posix.c:722:17 (test-thread-pool-smc+0xa4343a) #4 test_submit_many /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:133:9 (test-thread-pool-smc+0x50e638) #5 do_test_cancel /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:150:5 (test-thread-pool-smc+0x50e638) #6 test_cancel_async /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:234:5 (test-thread-pool-smc+0x50e638) #7 main /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:249:3 (test-thread-pool-smc+0x50e638) Location is heap block of size 104 at 0x7b1c00000620 allocated by main thread: #0 malloc out/lib/clangrt-x86_64-unknown-linux-gnu/./out/lib/clangrt-x86_64-unknown-linux-gnu/./toolchain/llvm-project/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:667:5 (test-thread-pool-smc+0x346131) #1 g_malloc <null> (libglib-2.0.so.0+0x5e738) (BuildId: e845b8fd2f396872c036976626389ffc4f50c9c5) #2 thread_pool_submit_aio /home/mordan/qemu/build/../util/thread-pool.c:251:11 (test-thread-pool-smc+0xa648bd) #3 test_submit_many /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:128:9 (test-thread-pool-smc+0x50e600) #4 do_test_cancel /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:150:5 (test-thread-pool-smc+0x50e600) #5 test_cancel_async /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:234:5 (test-thread-pool-smc+0x50e600) #6 main /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:249:3 (test-thread-pool-smc+0x50e600) Mutex M0 (0x7b3c00000100) created at: #0 pthread_mutex_init out/lib/clangrt-x86_64-unknown-linux-gnu/./out/lib/clangrt-x86_64-unknown-linux-gnu/./toolchain/llvm-project/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:1316:3 (test-thread-pool-smc+0x34914f) #1 qemu_mutex_init /home/mordan/qemu/build/../util/qemu-thread-posix.c:71:11 (test-thread-pool-smc+0xa47189) #2 thread_pool_init_one /home/mordan/qemu/build/../util/thread-pool.c:334:5 (test-thread-pool-smc+0xa64f60) #3 thread_pool_new /home/mordan/qemu/build/../util/thread-pool.c:348:5 (test-thread-pool-smc+0xa64f60) #4 aio_get_thread_pool /home/mordan/qemu/build/../util/async.c:441:28 (test-thread-pool-smc+0xa5e6d4) #5 thread_pool_submit_aio /home/mordan/qemu/build/../util/thread-pool.c:246:24 (test-thread-pool-smc+0xa6488d) #6 test_submit_many /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:128:9 (test-thread-pool-smc+0x50e600) #7 do_test_cancel /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:150:5 (test-thread-pool-smc+0x50e600) #8 test_cancel_async /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:234:5 (test-thread-pool-smc+0x50e600) #9 main /home/mordan/qemu/build/../tests/unit/test-thread-pool-smc.c:249:3 (test-thread-pool-smc+0x50e600) Thread T5 'worker' (tid=787049, running) created by thread T4 at: #0 pthread_create out/lib/clangrt-x86_64-unknown-linux-gnu/./out/lib/clangrt-x86_64-unknown-linux-gnu/./toolchain/llvm-project/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp:1022:3 (test-thread-pool-smc+0x34791d) #1 qemu_thread_create /home/mordan/qemu/build/../util/qemu-thread-posix.c:583:11 (test-thread-pool-smc+0xa48ed0) #2 do_spawn_thread /home/mordan/qemu/build/../util/thread-pool.c:146:5 (test-thread-pool-smc+0xa658de) #3 worker_thread /home/mordan/qemu/build/../util/thread-pool.c:83:5 (test-thread-pool-smc+0xa658de) #4 qemu_thread_start /home/mordan/qemu/build/../util/qemu-thread-posix.c:543:9 (test-thread-pool-smc+0xa49040) SUMMARY: ThreadSanitizer: data race /home/mordan/qemu/build/../util/thread-pool.c:108:20 in worker_thread My interpretation is that tsan is saying there is a data race between the load in thread_pool_completion_bh(): static void thread_pool_completion_bh(void *opaque) { ThreadPool *pool = opaque; ThreadPoolElement *elem, *next; defer_call_begin(); /* cb() may use defer_call() to coalesce work */ restart: QLIST_FOREACH_SAFE(elem, &pool->head, all, next) { if (elem->state != THREAD_DONE) { ^^^^^^^^^^^ and the store in worker_thread(): req = QTAILQ_FIRST(&pool->request_list); QTAILQ_REMOVE(&pool->request_list, req, reqs); req->state = THREAD_ACTIVE; ^^^^^^^^^^^^^^^^^^^^^^^^^^^ qemu_mutex_unlock(&pool->lock); It doesn't matter whether thread_pool_completion_bh() sees THREAD_QUEUED or THREAD_ACTIVE, so this looks like a false positive. There is no practical effect either way. THREAD_QUEUED vs THREAD_ACTIVE matters in thread_pool_cancel(), but that is protected by pool->lock. Paolo: Any thoughts? Stefan > > Fixes: d354c7eccf ("aio: add generic thread-pool facility") > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2822 > Signed-off-by: Vitalii Mordan <mordan@ispras.ru> > --- > util/thread-pool.c | 13 ++++++------- > 1 file changed, 6 insertions(+), 7 deletions(-) > > diff --git a/util/thread-pool.c b/util/thread-pool.c > index 27eb777e85..6c5f4d085b 100644 > --- a/util/thread-pool.c > +++ b/util/thread-pool.c > @@ -111,9 +111,8 @@ static void *worker_thread(void *opaque) > ret = req->func(req->arg); > > req->ret = ret; > - /* Write ret before state. */ > - smp_wmb(); > - req->state = THREAD_DONE; > + /* Atomically update state after setting ret. */ > + qatomic_store_release(&req->state, THREAD_DONE); > > qemu_bh_schedule(pool->completion_bh); > qemu_mutex_lock(&pool->lock); > @@ -180,7 +179,7 @@ static void thread_pool_completion_bh(void *opaque) > > restart: > QLIST_FOREACH_SAFE(elem, &pool->head, all, next) { > - if (elem->state != THREAD_DONE) { > + if (qatomic_load_acquire(&elem->state) != THREAD_DONE) { > continue; > } > > @@ -223,12 +222,12 @@ static void thread_pool_cancel(BlockAIOCB *acb) > trace_thread_pool_cancel(elem, elem->common.opaque); > > QEMU_LOCK_GUARD(&pool->lock); > - if (elem->state == THREAD_QUEUED) { > + if (qatomic_load_acquire(&elem->state) == THREAD_QUEUED) { > QTAILQ_REMOVE(&pool->request_list, elem, reqs); > qemu_bh_schedule(pool->completion_bh); > > - elem->state = THREAD_DONE; > elem->ret = -ECANCELED; > + qatomic_store_release(&elem->state, THREAD_DONE); > } > > } > @@ -251,8 +250,8 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, > req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque); > req->func = func; > req->arg = arg; > - req->state = THREAD_QUEUED; > req->pool = pool; > + qatomic_store_release(&req->state, THREAD_QUEUED); > > QLIST_INSERT_HEAD(&pool->head, req, all); > > -- > 2.34.1 >
On 2/19/25 17:12, Vitalii Mordan wrote: > diff --git a/util/thread-pool.c b/util/thread-pool.c > index 27eb777e85..6c5f4d085b 100644 > --- a/util/thread-pool.c > +++ b/util/thread-pool.c > @@ -111,9 +111,8 @@ static void *worker_thread(void *opaque) > ret = req->func(req->arg); > > req->ret = ret; > - /* Write ret before state. */ > - smp_wmb(); > - req->state = THREAD_DONE; > + /* Atomically update state after setting ret. */ > + qatomic_store_release(&req->state, THREAD_DONE); This is good. > @@ -180,7 +179,7 @@ static void thread_pool_completion_bh(void *opaque) > > restart: > QLIST_FOREACH_SAFE(elem, &pool->head, all, next) { > - if (elem->state != THREAD_DONE) { > + if (qatomic_load_acquire(&elem->state) != THREAD_DONE) { This is good, but it needs a comment and it can replace the smp_rmb() below. > continue; > } > > @@ -223,12 +222,12 @@ static void thread_pool_cancel(BlockAIOCB *acb) > trace_thread_pool_cancel(elem, elem->common.opaque); > > QEMU_LOCK_GUARD(&pool->lock); > - if (elem->state == THREAD_QUEUED) { > + if (qatomic_load_acquire(&elem->state) == THREAD_QUEUED) { This is not ordering anything so it can be qatomic_read/qatomic_set (technically the one below doesn't even need that, but it's fine). > QTAILQ_REMOVE(&pool->request_list, elem, reqs); > qemu_bh_schedule(pool->completion_bh); > > - elem->state = THREAD_DONE; > elem->ret = -ECANCELED; > + qatomic_store_release(&elem->state, THREAD_DONE); > } > > } > @@ -251,8 +250,8 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, > req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque); > req->func = func; > req->arg = arg; > - req->state = THREAD_QUEUED; > req->pool = pool; > + qatomic_store_release(&req->state, THREAD_QUEUED); This does not need any atomic access, because there is ordering via pool->lock (which protects the request_list). There's no need to do store-release and load-acquire except to order with another store or load, and in fact adding unnecessary acquire/release is harder to understand. Paolo > QLIST_INSERT_HEAD(&pool->head, req, all); >
diff --git a/util/thread-pool.c b/util/thread-pool.c index 27eb777e85..6c5f4d085b 100644 --- a/util/thread-pool.c +++ b/util/thread-pool.c @@ -111,9 +111,8 @@ static void *worker_thread(void *opaque) ret = req->func(req->arg); req->ret = ret; - /* Write ret before state. */ - smp_wmb(); - req->state = THREAD_DONE; + /* Atomically update state after setting ret. */ + qatomic_store_release(&req->state, THREAD_DONE); qemu_bh_schedule(pool->completion_bh); qemu_mutex_lock(&pool->lock); @@ -180,7 +179,7 @@ static void thread_pool_completion_bh(void *opaque) restart: QLIST_FOREACH_SAFE(elem, &pool->head, all, next) { - if (elem->state != THREAD_DONE) { + if (qatomic_load_acquire(&elem->state) != THREAD_DONE) { continue; } @@ -223,12 +222,12 @@ static void thread_pool_cancel(BlockAIOCB *acb) trace_thread_pool_cancel(elem, elem->common.opaque); QEMU_LOCK_GUARD(&pool->lock); - if (elem->state == THREAD_QUEUED) { + if (qatomic_load_acquire(&elem->state) == THREAD_QUEUED) { QTAILQ_REMOVE(&pool->request_list, elem, reqs); qemu_bh_schedule(pool->completion_bh); - elem->state = THREAD_DONE; elem->ret = -ECANCELED; + qatomic_store_release(&elem->state, THREAD_DONE); } } @@ -251,8 +250,8 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque); req->func = func; req->arg = arg; - req->state = THREAD_QUEUED; req->pool = pool; + qatomic_store_release(&req->state, THREAD_QUEUED); QLIST_INSERT_HEAD(&pool->head, req, all);
TSAN reports a potential data race on the state field of ThreadPoolElement. This is fixed by using atomic access to the field. Fixes: d354c7eccf ("aio: add generic thread-pool facility") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2822 Signed-off-by: Vitalii Mordan <mordan@ispras.ru> --- util/thread-pool.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-)