Message ID | 20240626215241.3360246-1-rkir@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] timer: Fix a race condition between timer's callback and destroying code | expand |
On 6/26/24 23:52, Roman Kiryanov wrote: > `timerlist_run_timers` provides no mechanism to > make sure the data pointed by `opaque` is valid > when calling timer's callback: there could be > another thread running which is destroying > timer's opaque data. > > With this change `timer_del` becomes blocking if > timer's callback is running and it will be safe > to destroy timer's data once `timer_del` returns. > > Signed-off-by: Roman Kiryanov <rkir@google.com> Generally QEMU is running event loop code under the "big QEMU lock" (BQL). If both timer_del() and the callback run under the BQL, the race cannot happen. If you're using multiple threads, however, this code is generally very performance sensitive; and adding a mutex and broadcast on every timer that fires is probably too much. A simpler possibility (and the QemuEvent is already there, even) could be to use qemu_event_wait(&timer_list->timers_done_ev); but I think this situation is not specific to timers, and tied more to the code that creates the timer; therefore it's easier to handle it outside common code. If you only need to synchronize freeing against callbacks, you can use aio_bh_schedule_oneshot() and do the free in the bottom half. If instead you need the cleanup to be synchronous, the same idea of the bottom half can be used, via aio_wait_bh_oneshot(). Paolo > --- > v2: rebased to the right branch and removed > Google specific tags from the commit message. > > include/qemu/timer.h | 4 ++++ > util/qemu-timer.c | 21 +++++++++++++++++++++ > 2 files changed, 25 insertions(+) > > diff --git a/include/qemu/timer.h b/include/qemu/timer.h > index 5ce83c7911..efd0e95d20 100644 > --- a/include/qemu/timer.h > +++ b/include/qemu/timer.h > @@ -3,6 +3,7 @@ > > #include "qemu/bitops.h" > #include "qemu/notify.h" > +#include "qemu/thread.h" > #include "qemu/host-utils.h" > > #define NANOSECONDS_PER_SECOND 1000000000LL > @@ -86,9 +87,12 @@ struct QEMUTimer { > QEMUTimerList *timer_list; > QEMUTimerCB *cb; > void *opaque; > + QemuMutex opaque_lock; > + QemuCond cb_done; > QEMUTimer *next; > int attributes; > int scale; > + bool cb_running; > }; > > extern QEMUTimerListGroup main_loop_tlg; > diff --git a/util/qemu-timer.c b/util/qemu-timer.c > index 213114be68..95af255519 100644 > --- a/util/qemu-timer.c > +++ b/util/qemu-timer.c > @@ -369,7 +369,10 @@ void timer_init_full(QEMUTimer *ts, > ts->opaque = opaque; > ts->scale = scale; > ts->attributes = attributes; > + qemu_mutex_init(&ts->opaque_lock); > + qemu_cond_init(&ts->cb_done); > ts->expire_time = -1; > + ts->cb_running = false; > } > > void timer_deinit(QEMUTimer *ts) > @@ -394,6 +397,12 @@ static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts) > } > pt = &t->next; > } > + > + qemu_mutex_lock(&ts->opaque_lock); > + while (ts->cb_running) { > + qemu_cond_wait(&ts->cb_done, &ts->opaque_lock); > + } > + qemu_mutex_unlock(&ts->opaque_lock); > } > > static bool timer_mod_ns_locked(QEMUTimerList *timer_list, > @@ -571,11 +580,23 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) > cb = ts->cb; > opaque = ts->opaque; > > + /* Mark the callback as running to prevent > + * destroying `opaque` in another thread. > + */ > + qemu_mutex_lock(&ts->opaque_lock); > + ts->cb_running = true; > + qemu_mutex_unlock(&ts->opaque_lock); > + > /* run the callback (the timer list can be modified) */ > qemu_mutex_unlock(&timer_list->active_timers_lock); > cb(opaque); > qemu_mutex_lock(&timer_list->active_timers_lock); > > + qemu_mutex_lock(&ts->opaque_lock); > + ts->cb_running = false; > + qemu_cond_broadcast(&ts->cb_done); > + qemu_mutex_unlock(&ts->opaque_lock); > + > progress = true; > } > qemu_mutex_unlock(&timer_list->active_timers_lock);
diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 5ce83c7911..efd0e95d20 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -3,6 +3,7 @@ #include "qemu/bitops.h" #include "qemu/notify.h" +#include "qemu/thread.h" #include "qemu/host-utils.h" #define NANOSECONDS_PER_SECOND 1000000000LL @@ -86,9 +87,12 @@ struct QEMUTimer { QEMUTimerList *timer_list; QEMUTimerCB *cb; void *opaque; + QemuMutex opaque_lock; + QemuCond cb_done; QEMUTimer *next; int attributes; int scale; + bool cb_running; }; extern QEMUTimerListGroup main_loop_tlg; diff --git a/util/qemu-timer.c b/util/qemu-timer.c index 213114be68..95af255519 100644 --- a/util/qemu-timer.c +++ b/util/qemu-timer.c @@ -369,7 +369,10 @@ void timer_init_full(QEMUTimer *ts, ts->opaque = opaque; ts->scale = scale; ts->attributes = attributes; + qemu_mutex_init(&ts->opaque_lock); + qemu_cond_init(&ts->cb_done); ts->expire_time = -1; + ts->cb_running = false; } void timer_deinit(QEMUTimer *ts) @@ -394,6 +397,12 @@ static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts) } pt = &t->next; } + + qemu_mutex_lock(&ts->opaque_lock); + while (ts->cb_running) { + qemu_cond_wait(&ts->cb_done, &ts->opaque_lock); + } + qemu_mutex_unlock(&ts->opaque_lock); } static bool timer_mod_ns_locked(QEMUTimerList *timer_list, @@ -571,11 +580,23 @@ bool timerlist_run_timers(QEMUTimerList *timer_list) cb = ts->cb; opaque = ts->opaque; + /* Mark the callback as running to prevent + * destroying `opaque` in another thread. + */ + qemu_mutex_lock(&ts->opaque_lock); + ts->cb_running = true; + qemu_mutex_unlock(&ts->opaque_lock); + /* run the callback (the timer list can be modified) */ qemu_mutex_unlock(&timer_list->active_timers_lock); cb(opaque); qemu_mutex_lock(&timer_list->active_timers_lock); + qemu_mutex_lock(&ts->opaque_lock); + ts->cb_running = false; + qemu_cond_broadcast(&ts->cb_done); + qemu_mutex_unlock(&ts->opaque_lock); + progress = true; } qemu_mutex_unlock(&timer_list->active_timers_lock);
`timerlist_run_timers` provides no mechanism to make sure the data pointed by `opaque` is valid when calling timer's callback: there could be another thread running which is destroying timer's opaque data. With this change `timer_del` becomes blocking if timer's callback is running and it will be safe to destroy timer's data once `timer_del` returns. Signed-off-by: Roman Kiryanov <rkir@google.com> --- v2: rebased to the right branch and removed Google specific tags from the commit message. include/qemu/timer.h | 4 ++++ util/qemu-timer.c | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+)