diff mbox series

[v2,01/13] configure: add --enable-tsan flag + fiber annotations for coroutine-ucontext

Message ID 20200605173422.1490-2-robert.foley@linaro.org (mailing list archive)
State New, archived
Headers show
Series Add Thread Sanitizer support to QEMU | expand

Commit Message

Robert Foley June 5, 2020, 5:34 p.m. UTC
From: Lingfeng Yang <lfy@google.com>

We tried running QEMU under tsan in 2016, but tsan's lack of support for
longjmp-based fibers was a blocker:
  https://groups.google.com/forum/#!topic/thread-sanitizer/se0YuzfWazw

Fortunately, thread sanitizer gained fiber support in early 2019:
  https://reviews.llvm.org/D54889

This patch brings tsan support upstream by importing the patch that annotated
QEMU's coroutines as tsan fibers in Android's QEMU fork:
  https://android-review.googlesource.com/c/platform/external/qemu/+/844675

Tested with '--enable-tsan --cc=clang-9 --cxx=clang++-9 --disable-werror'
configure flags.

Signed-off-by: Lingfeng Yang <lfy@google.com>
Signed-off-by: Emilio G. Cota <cota@braap.org>
[cota: minor modifications + configure changes]
Signed-off-by: Robert Foley <robert.foley@linaro.org>
[RF: configure changes for warnings, erorr handling + minor modifications]
---
 configure                 | 47 ++++++++++++++++++-
 util/coroutine-ucontext.c | 97 +++++++++++++++++++++++++++++++++++----
 2 files changed, 134 insertions(+), 10 deletions(-)

Comments

Alex Bennée June 8, 2020, 1:39 p.m. UTC | #1
Robert Foley <robert.foley@linaro.org> writes:

> From: Lingfeng Yang <lfy@google.com>
>
> We tried running QEMU under tsan in 2016, but tsan's lack of support for
> longjmp-based fibers was a blocker:
>   https://groups.google.com/forum/#!topic/thread-sanitizer/se0YuzfWazw
>
> Fortunately, thread sanitizer gained fiber support in early 2019:
>   https://reviews.llvm.org/D54889
>
> This patch brings tsan support upstream by importing the patch that annotated
> QEMU's coroutines as tsan fibers in Android's QEMU fork:
>   https://android-review.googlesource.com/c/platform/external/qemu/+/844675
>
> Tested with '--enable-tsan --cc=clang-9 --cxx=clang++-9 --disable-werror'
> configure flags.
>
> Signed-off-by: Lingfeng Yang <lfy@google.com>
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> [cota: minor modifications + configure changes]
> Signed-off-by: Robert Foley <robert.foley@linaro.org>
> [RF: configure changes for warnings, erorr handling + minor modifications]
<snip>
>  
> +#define UC_DEBUG 0
> +#if UC_DEBUG && defined(CONFIG_TSAN)
> +#define UC_TRACE(fmt, ...) fprintf(stderr, "%s:%d:%p " fmt "\n", \
> +    __func__, __LINE__, __tsan_get_current_fiber(), ##__VA_ARGS__);
> +#else
> +#define UC_TRACE(fmt, ...)
> +#endif
> +

We shouldn't be introducing new debug printfs if we can avoid it. I
suspect a separate patch could introduce some relevant trace points that
are outside the #if CONFIG_TSAN chunks.

>  /**
>   * Per-thread coroutine bookkeeping
>   */
> @@ -65,7 +80,20 @@ union cc_arg {
>      int i[2];
>  };
>  
> -static void finish_switch_fiber(void *fake_stack_save)
> +/* QEMU_ALWAYS_INLINE only does so if __OPTIMIZE__, so we cannot use it. */
> +static inline __attribute__((always_inline))
> +void on_new_fiber(CoroutineUContext *co)
> +{

We could put a tracepoint here at something like trace_new_fibre() but I
suspect for following what's going on you could probably just have
tracepoints in the higher coroutine functions and leave the fibre stuff
as purely a CONFIG_TSAN detail.

Please we wouldn't have to ague about American vs British spelling for
the tracepoints ;-)

<snip>

Otherwise without the UC_TRACE verbiage:

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Robert Foley June 8, 2020, 8:09 p.m. UTC | #2
On Mon, 8 Jun 2020 at 09:39, Alex Bennée <alex.bennee@linaro.org> wrote:
<snip>
> > -static void finish_switch_fiber(void *fake_stack_save)
> > +/* QEMU_ALWAYS_INLINE only does so if __OPTIMIZE__, so we cannot use it. */
> > +static inline __attribute__((always_inline))
> > +void on_new_fiber(CoroutineUContext *co)
> > +{
>
> We could put a tracepoint here at something like trace_new_fibre() but I
> suspect for following what's going on you could probably just have
> tracepoints in the higher coroutine functions and leave the fibre stuff
> as purely a CONFIG_TSAN detail.
>
> Please we wouldn't have to ague about American vs British spelling for
> the tracepoints ;-)
>
> <snip>
>
> Otherwise without the UC_TRACE verbiage:
>
> Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

Thanks for the review.  Sounds good, we will remove the fibre related
traces. :-)

Thanks & Regards,
-Rob
>
> --
> Alex Bennée
diff mbox series

Patch

diff --git a/configure b/configure
index f087d2bcd1..9b50820366 100755
--- a/configure
+++ b/configure
@@ -395,6 +395,7 @@  gprof="no"
 debug_tcg="no"
 debug="no"
 sanitizers="no"
+tsan="no"
 fortify_source=""
 strip_opt="yes"
 tcg_interpreter="no"
@@ -1150,6 +1151,10 @@  for opt do
   ;;
   --disable-sanitizers) sanitizers="no"
   ;;
+  --enable-tsan) tsan="yes"
+  ;;
+  --disable-tsan) tsan="no"
+  ;;
   --enable-sparse) sparse="yes"
   ;;
   --disable-sparse) sparse="no"
@@ -1750,6 +1755,7 @@  Advanced options (experts only):
   --with-pkgversion=VERS   use specified string as sub-version of the package
   --enable-debug           enable common debug build options
   --enable-sanitizers      enable default sanitizers
+  --enable-tsan            enable thread sanitizer
   --disable-strip          disable stripping binaries
   --disable-werror         disable compilation abort on warning
   --disable-stack-protector disable compiler-provided stack protection
@@ -6192,6 +6198,30 @@  if test "$fuzzing" = "yes" ; then
   fi
 fi
 
+# Thread sanitizer is, for now, much noisier than the other sanitizers;
+# keep it separate until that is not the case.
+if test "$tsan" = "yes" && test "$sanitizers" = "yes"; then
+  error_exit "TSAN is not supported with other sanitiziers."
+fi
+have_tsan=no
+have_tsan_iface_fiber=no
+if test "$tsan" = "yes" ; then
+  write_c_skeleton
+  if compile_prog "$CPU_CFLAGS -Werror -fsanitize=thread" "" ; then
+      have_tsan=yes
+  fi
+  cat > $TMPC << EOF
+#include <sanitizer/tsan_interface.h>
+int main(void) {
+  __tsan_create_fiber(0);
+  return 0;
+}
+EOF
+  if compile_prog "$CPU_CFLAGS -Werror -fsanitize=thread" "" ; then
+      have_tsan_iface_fiber=yes
+  fi
+fi
+
 ##########################################
 # check for libpmem
 
@@ -6293,6 +6323,16 @@  if test "$have_asan" = "yes"; then
            "Without code annotation, the report may be inferior."
   fi
 fi
+if test "$have_tsan" = "yes" ; then
+  if test "$have_tsan_iface_fiber" = "yes" ; then
+    QEMU_CFLAGS="-fsanitize=thread $QEMU_CFLAGS"
+    QEMU_LDFLAGS="-fsanitize=thread $QEMU_LDFLAGS"
+  else
+    error_exit "Cannot enable TSAN due to missing fiber annotation interface."
+  fi
+elif test "$tsan" = "yes" ; then
+  error_exit "Cannot enable TSAN due to missing sanitize thread interface."
+fi
 if test "$have_ubsan" = "yes"; then
   QEMU_CFLAGS="-fsanitize=undefined $QEMU_CFLAGS"
   QEMU_LDFLAGS="-fsanitize=undefined $QEMU_LDFLAGS"
@@ -6328,7 +6368,8 @@  if test "$werror" = "yes"; then
     QEMU_CFLAGS="-Werror $QEMU_CFLAGS"
 fi
 
-if test "$solaris" = "no" ; then
+# Exclude --warn-common with TSan to suppress warnings from the TSan libraries.
+if test "$solaris" = "no" && test "$tsan" = "no"; then
     if $ld --version 2>/dev/null | grep "GNU ld" >/dev/null 2>/dev/null ; then
         QEMU_LDFLAGS="-Wl,--warn-common $QEMU_LDFLAGS"
     fi
@@ -7382,6 +7423,10 @@  if test "$have_asan_iface_fiber" = "yes" ; then
     echo "CONFIG_ASAN_IFACE_FIBER=y" >> $config_host_mak
 fi
 
+if test "$have_tsan" = "yes" && test "$have_tsan_iface_fiber" = "yes" ; then
+    echo "CONFIG_TSAN=y" >> $config_host_mak
+fi
+
 if test "$has_environ" = "yes" ; then
   echo "CONFIG_HAS_ENVIRON=y" >> $config_host_mak
 fi
diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c
index bd593e61bc..a3dc78e67a 100644
--- a/util/coroutine-ucontext.c
+++ b/util/coroutine-ucontext.c
@@ -37,18 +37,33 @@ 
 #endif
 #endif
 
+#ifdef CONFIG_TSAN
+#include <sanitizer/tsan_interface.h>
+#endif
+
 typedef struct {
     Coroutine base;
     void *stack;
     size_t stack_size;
     sigjmp_buf env;
 
+    void *tsan_co_fiber;
+    void *tsan_caller_fiber;
+
 #ifdef CONFIG_VALGRIND_H
     unsigned int valgrind_stack_id;
 #endif
 
 } CoroutineUContext;
 
+#define UC_DEBUG 0
+#if UC_DEBUG && defined(CONFIG_TSAN)
+#define UC_TRACE(fmt, ...) fprintf(stderr, "%s:%d:%p " fmt "\n", \
+    __func__, __LINE__, __tsan_get_current_fiber(), ##__VA_ARGS__);
+#else
+#define UC_TRACE(fmt, ...)
+#endif
+
 /**
  * Per-thread coroutine bookkeeping
  */
@@ -65,7 +80,20 @@  union cc_arg {
     int i[2];
 };
 
-static void finish_switch_fiber(void *fake_stack_save)
+/* QEMU_ALWAYS_INLINE only does so if __OPTIMIZE__, so we cannot use it. */
+static inline __attribute__((always_inline))
+void on_new_fiber(CoroutineUContext *co)
+{
+#ifdef CONFIG_TSAN
+    co->tsan_co_fiber = __tsan_create_fiber(0); /* flags: sync on switch */
+    co->tsan_caller_fiber = __tsan_get_current_fiber();
+    UC_TRACE("Create new TSAN co fiber. co: %p co fiber: %p caller fiber: %p ",
+             co, co->tsan_co_fiber, co->tsan_caller_fiber);
+#endif
+}
+
+static inline __attribute__((always_inline))
+void finish_switch_fiber(void *fake_stack_save)
 {
 #ifdef CONFIG_ASAN
     const void *bottom_old;
@@ -78,18 +106,40 @@  static void finish_switch_fiber(void *fake_stack_save)
         leader.stack_size = size_old;
     }
 #endif
+#ifdef CONFIG_TSAN
+    if (fake_stack_save) {
+        __tsan_release(fake_stack_save);
+        __tsan_switch_to_fiber(fake_stack_save, 0);  /* 0=synchronize */
+    }
+#endif
 }
 
-static void start_switch_fiber(void **fake_stack_save,
-                               const void *bottom, size_t size)
+static inline __attribute__((always_inline)) void start_switch_fiber(
+    CoroutineAction action, void **fake_stack_save,
+    const void *bottom, size_t size, void *new_fiber)
 {
 #ifdef CONFIG_ASAN
-    __sanitizer_start_switch_fiber(fake_stack_save, bottom, size);
+    if (action == COROUTINE_TERMINATE) {
+        __sanitizer_start_switch_fiber(
+            action == COROUTINE_TERMINATE ? NULL : fake_stack_save,
+            bottom, size);
+    }
+#endif
+#ifdef CONFIG_TSAN
+    void *curr_fiber =
+        __tsan_get_current_fiber();
+    __tsan_acquire(curr_fiber);
+
+    UC_TRACE("Current fiber: %p.", curr_fiber);
+    *fake_stack_save = curr_fiber;
+    UC_TRACE("Switch to fiber %p", new_fiber);
+    __tsan_switch_to_fiber(new_fiber, 0);  /* 0=synchronize */
 #endif
 }
 
 static void coroutine_trampoline(int i0, int i1)
 {
+    UC_TRACE("Start trampoline");
     union cc_arg arg;
     CoroutineUContext *self;
     Coroutine *co;
@@ -104,21 +154,34 @@  static void coroutine_trampoline(int i0, int i1)
 
     /* Initialize longjmp environment and switch back the caller */
     if (!sigsetjmp(self->env, 0)) {
-        start_switch_fiber(&fake_stack_save,
-                           leader.stack, leader.stack_size);
+        UC_TRACE("Current fiber: %p. Set co %p to env 0x%lx",
+                 __tsan_get_current_fiber(), self, (unsigned long)self->env);
+        start_switch_fiber(
+            COROUTINE_YIELD,
+            &fake_stack_save,
+            leader.stack,
+            leader.stack_size,
+            self->tsan_caller_fiber);
+        UC_TRACE("Jump to co %p caller fiber %p env 0x%lx",
+                 co, self->tsan_caller_fiber, *(unsigned long *)co->entry_arg);
         siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
     }
 
+    UC_TRACE("After first siglongjmp");
+
     finish_switch_fiber(fake_stack_save);
 
     while (true) {
         co->entry(co->entry_arg);
+        UC_TRACE("switch from co %p to caller co %p fiber %p\n",
+                 co, co->caller, self->tsan_caller_fiber);
         qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
     }
 }
 
 Coroutine *qemu_coroutine_new(void)
 {
+    UC_TRACE("Start new coroutine");
     CoroutineUContext *co;
     ucontext_t old_uc, uc;
     sigjmp_buf old_env;
@@ -154,12 +217,16 @@  Coroutine *qemu_coroutine_new(void)
 
     arg.p = co;
 
+    on_new_fiber(co);
     makecontext(&uc, (void (*)(void))coroutine_trampoline,
                 2, arg.i[0], arg.i[1]);
 
     /* swapcontext() in, siglongjmp() back out */
     if (!sigsetjmp(old_env, 0)) {
-        start_switch_fiber(&fake_stack_save, co->stack, co->stack_size);
+        start_switch_fiber(
+            COROUTINE_YIELD,
+            &fake_stack_save,
+            co->stack, co->stack_size, co->tsan_co_fiber);
         swapcontext(&old_uc, &uc);
     }
 
@@ -185,6 +252,7 @@  static inline void valgrind_stack_deregister(CoroutineUContext *co)
 
 void qemu_coroutine_delete(Coroutine *co_)
 {
+    UC_TRACE("Nuking co %p from orbit", co_);
     CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
 
 #ifdef CONFIG_VALGRIND_H
@@ -209,6 +277,10 @@  qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
 {
     CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
     CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+    UC_TRACE("from to: %p %p uc: %p %p. fibers: %p %p caller fibers: %p %p\n",
+            from_, to_, from, to,
+            from->tsan_co_fiber, to->tsan_co_fiber,
+            from->tsan_caller_fiber, to->tsan_caller_fiber);
     int ret;
     void *fake_stack_save = NULL;
 
@@ -216,8 +288,8 @@  qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
 
     ret = sigsetjmp(from->env, 0);
     if (ret == 0) {
-        start_switch_fiber(action == COROUTINE_TERMINATE ?
-                           NULL : &fake_stack_save, to->stack, to->stack_size);
+        start_switch_fiber(action, &fake_stack_save,
+                           to->stack, to->stack_size, to->tsan_co_fiber);
         siglongjmp(to->env, action);
     }
 
@@ -231,6 +303,13 @@  Coroutine *qemu_coroutine_self(void)
     if (!current) {
         current = &leader.base;
     }
+#ifdef CONFIG_TSAN
+    if (!leader.tsan_co_fiber) {
+        leader.tsan_co_fiber = __tsan_get_current_fiber();
+        UC_TRACE("For co %p set leader co fiber to %p",
+                 current, leader.tsan_co_fiber);
+    }
+#endif
     return current;
 }