diff mbox series

[v2,2/2] trace-cmd: Fix a possible race condition and deadlock in trace-cmd

Message ID 20190605111937.17894-2-tstoyanov@vmware.com (mailing list archive)
State Accepted
Delegated to: Steven Rostedt
Headers show
Series [v2,1/2] trace-cmd: Fix crash when trace-cmd is executed with args "profile -F sleep 1" | expand

Commit Message

Tzvetomir Stoyanov June 5, 2019, 11:19 a.m. UTC
When pipes are used for communication between trace-cmd main
thread and per-cpu recorder threads, there is a possible race
condition in stop_threads(), which can cause a deadlock between
the main thread and cpu recorder thread:
   In trace_stream_read(), the select() call can return 0 if threads
   have no data to send. This will force stop_threads() to stop reading
   the thread's pipes and enter a waitpid() loop, to wait for all threads
   to be terminated. However, there is a case when some threads are still
   flushing its data - tracecmd_flush_recording() tries a blocking write()
   to the pipe. A dead lock appears - the cpu thread is blocked in write(),
   as its buffer is full and no one is reading it. The main thread is blocked
   in waitpid(), to wait the same thread to exit.
The deadlock can be (randomly) observed with the command
"trace-cmd profile -p function -F sleep 10"

The proposed fix removes the select timeout, makes the call blocking,
to ensure the threads are flushed its data before going in waitpid() loop.

Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
---
 tracecmd/trace-record.c | 3 +--
 tracecmd/trace-stream.c | 6 ++++++
 2 files changed, 7 insertions(+), 2 deletions(-)

Comments

Slavomir Kaslev June 7, 2019, 6:48 a.m. UTC | #1
On Wed, Jun 5, 2019 at 2:21 PM Tzvetomir Stoyanov <tstoyanov@vmware.com> wrote:
>
> When pipes are used for communication between trace-cmd main
> thread and per-cpu recorder threads, there is a possible race
> condition in stop_threads(), which can cause a deadlock between
> the main thread and cpu recorder thread:
>    In trace_stream_read(), the select() call can return 0 if threads
>    have no data to send. This will force stop_threads() to stop reading
>    the thread's pipes and enter a waitpid() loop, to wait for all threads
>    to be terminated. However, there is a case when some threads are still
>    flushing its data - tracecmd_flush_recording() tries a blocking write()
>    to the pipe. A dead lock appears - the cpu thread is blocked in write(),
>    as its buffer is full and no one is reading it. The main thread is blocked
>    in waitpid(), to wait the same thread to exit.
> The deadlock can be (randomly) observed with the command
> "trace-cmd profile -p function -F sleep 10"
>
> The proposed fix removes the select timeout, makes the call blocking,
> to ensure the threads are flushed its data before going in waitpid() loop.
>
> Signed-off-by: Tzvetomir Stoyanov <tstoyanov@vmware.com>
> ---
>  tracecmd/trace-record.c | 3 +--
>  tracecmd/trace-stream.c | 6 ++++++
>  2 files changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
> index 4523128..2f5fbd9 100644
> --- a/tracecmd/trace-record.c
> +++ b/tracecmd/trace-record.c
> @@ -626,7 +626,6 @@ static void delete_thread_data(void)
>
>  static void stop_threads(enum trace_type type)
>  {
> -       struct timeval tv = { 0, 0 };
>         int ret;
>         int i;
>
> @@ -643,7 +642,7 @@ static void stop_threads(enum trace_type type)
>         /* Flush out the pipes */
>         if (type & TRACE_TYPE_STREAM) {
>                 do {
> -                       ret = trace_stream_read(pids, recorder_threads, &tv);
> +                       ret = trace_stream_read(pids, recorder_threads, NULL);
>                 } while (ret > 0);
>         }
>
> diff --git a/tracecmd/trace-stream.c b/tracecmd/trace-stream.c
> index dad3466..3814a35 100644
> --- a/tracecmd/trace-stream.c
> +++ b/tracecmd/trace-stream.c
> @@ -92,6 +92,7 @@ int trace_stream_read(struct pid_record_data *pids, int nr_pids, struct timeval
>         struct pid_record_data *last_pid;
>         fd_set rfds;
>         int top_rfd = 0;
> +       int nr_fd;
>         int ret;
>         int i;
>
> @@ -119,18 +120,23 @@ int trace_stream_read(struct pid_record_data *pids, int nr_pids, struct timeval
>                 return 1;
>         }
>
> +       nr_fd = 0;
>         FD_ZERO(&rfds);
>
>         for (i = 0; i < nr_pids; i++) {
>                 /* Do not process closed pipes */
>                 if (pids[i].closed)
>                         continue;
> +               nr_fd++;
>                 if (pids[i].brass[0] > top_rfd)
>                         top_rfd = pids[i].brass[0];
>
>                 FD_SET(pids[i].brass[0], &rfds);
>         }
>
> +       if (!nr_fd)
> +               return 0;
> +
>         ret = select(top_rfd + 1, &rfds, NULL, NULL, tv);
>
>         if (ret > 0)
> --
> 2.21.0
>

Looks good to me.

Reviewed-by: Slavomir Kaslev <kaslevs@vmware.com>

Cheers,

-- Slavi
diff mbox series

Patch

diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
index 4523128..2f5fbd9 100644
--- a/tracecmd/trace-record.c
+++ b/tracecmd/trace-record.c
@@ -626,7 +626,6 @@  static void delete_thread_data(void)
 
 static void stop_threads(enum trace_type type)
 {
-	struct timeval tv = { 0, 0 };
 	int ret;
 	int i;
 
@@ -643,7 +642,7 @@  static void stop_threads(enum trace_type type)
 	/* Flush out the pipes */
 	if (type & TRACE_TYPE_STREAM) {
 		do {
-			ret = trace_stream_read(pids, recorder_threads, &tv);
+			ret = trace_stream_read(pids, recorder_threads, NULL);
 		} while (ret > 0);
 	}
 
diff --git a/tracecmd/trace-stream.c b/tracecmd/trace-stream.c
index dad3466..3814a35 100644
--- a/tracecmd/trace-stream.c
+++ b/tracecmd/trace-stream.c
@@ -92,6 +92,7 @@  int trace_stream_read(struct pid_record_data *pids, int nr_pids, struct timeval
 	struct pid_record_data *last_pid;
 	fd_set rfds;
 	int top_rfd = 0;
+	int nr_fd;
 	int ret;
 	int i;
 
@@ -119,18 +120,23 @@  int trace_stream_read(struct pid_record_data *pids, int nr_pids, struct timeval
 		return 1;
 	}
 
+	nr_fd = 0;
 	FD_ZERO(&rfds);
 
 	for (i = 0; i < nr_pids; i++) {
 		/* Do not process closed pipes */
 		if (pids[i].closed)
 			continue;
+		nr_fd++;
 		if (pids[i].brass[0] > top_rfd)
 			top_rfd = pids[i].brass[0];
 
 		FD_SET(pids[i].brass[0], &rfds);
 	}
 
+	if (!nr_fd)
+		return 0;
+
 	ret = select(top_rfd + 1, &rfds, NULL, NULL, tv);
 
 	if (ret > 0)