diff mbox

[1/1] Protect stderr from non-blocking mode

Message ID 0b88a3432f2fcfee1e57abfa9420ca6c5c386b49.1453954698.git.sam.bobroff@au1.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sam Bobroff Jan. 28, 2016, 4:18 a.m. UTC
On Linux, if QEMU is run from a shell with -d to enable debug logging
but without directing it to a file (e.g. -D is not used, and
qemu_logfile is set to stderr), and no shell redirection is used, it
is possible for log messages to be lost under load.

This is caused by a combination of several factors:

* The shell (e.g. bash) may provide stdin, stdout and stderr as
  duplicates of a single open file, so they share file status flags
  including O_NONBLOCK.

* As character devices are registered (see qemu_chr_open_stdio()),
  stdin and stdout are set non-blocking.

* The printf() family of functions, including fprintf(), are not
  "non-blocking" aware: if they receive EAGAIN after partially
  writing their output, they immediately return EAGAIN with no way to
  discover how much output was written, if any.

So O_NONBLOCK is set on stdin/stdout, which causes O_NONBLOCK to be
set on stderr, and qemu_logfile is set to stderr. Then, under load,
fprintf()s to qemu_logfile return EAGAIN, which is ignored and data is
lost. This can't be fixed by handling EAGAIN because an unknown amount
of data has been written.

This patch works around the issue by re-opening the underlying tty
file, which is available in Linux as /proc/self/fd/2, and duplicating
it into fd 2 which causes stderr to refer to a new, unshared, "file
description". Additionally, if we end up logging to a non-blocking
file descriptor on a non-Linux platform, where no workaround has been
implemented, we display a warning message.

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
---
This is somewhat of an RFC as the issue probably hasn't been noticed
very often (ever?) and there are several approaches to addressing it.
The discussion was a while ago, here:

http://lists.nongnu.org/archive/html/qemu-devel/2015-12/msg00761.html

 include/sysemu/os-posix.h |  2 +-
 include/sysemu/os-win32.h |  2 +-
 os-posix.c                | 30 +++++++++++++++++++++++++++++-
 os-win32.c                |  2 +-
 vl.c                      |  2 +-
 5 files changed, 33 insertions(+), 5 deletions(-)

Comments

Stefan Hajnoczi Jan. 28, 2016, 12:11 p.m. UTC | #1
On Thu, Jan 28, 2016 at 03:18:23PM +1100, Sam Bobroff wrote:
> On Linux, if QEMU is run from a shell with -d to enable debug logging
> but without directing it to a file (e.g. -D is not used, and
> qemu_logfile is set to stderr), and no shell redirection is used, it
> is possible for log messages to be lost under load.
> 
> This is caused by a combination of several factors:
> 
> * The shell (e.g. bash) may provide stdin, stdout and stderr as
>   duplicates of a single open file, so they share file status flags
>   including O_NONBLOCK.
> 
> * As character devices are registered (see qemu_chr_open_stdio()),
>   stdin and stdout are set non-blocking.
> 
> * The printf() family of functions, including fprintf(), are not
>   "non-blocking" aware: if they receive EAGAIN after partially
>   writing their output, they immediately return EAGAIN with no way to
>   discover how much output was written, if any.
> 
> So O_NONBLOCK is set on stdin/stdout, which causes O_NONBLOCK to be
> set on stderr, and qemu_logfile is set to stderr. Then, under load,
> fprintf()s to qemu_logfile return EAGAIN, which is ignored and data is
> lost. This can't be fixed by handling EAGAIN because an unknown amount
> of data has been written.
> 
> This patch works around the issue by re-opening the underlying tty
> file, which is available in Linux as /proc/self/fd/2, and duplicating
> it into fd 2 which causes stderr to refer to a new, unshared, "file
> description". Additionally, if we end up logging to a non-blocking
> file descriptor on a non-Linux platform, where no workaround has been
> implemented, we display a warning message.
> 
> Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
> ---
> This is somewhat of an RFC as the issue probably hasn't been noticed
> very often (ever?) and there are several approaches to addressing it.
> The discussion was a while ago, here:
> 
> http://lists.nongnu.org/archive/html/qemu-devel/2015-12/msg00761.html
> 
>  include/sysemu/os-posix.h |  2 +-
>  include/sysemu/os-win32.h |  2 +-
>  os-posix.c                | 30 +++++++++++++++++++++++++++++-
>  os-win32.c                |  2 +-
>  vl.c                      |  2 +-
>  5 files changed, 33 insertions(+), 5 deletions(-)

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
diff mbox

Patch

diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
index f131521..7c7a36b 100644
--- a/include/sysemu/os-posix.h
+++ b/include/sysemu/os-posix.h
@@ -28,7 +28,7 @@ 
 
 #include <sys/time.h>
 
-void os_set_line_buffering(void);
+void os_setup_stdio(void);
 void os_set_proc_name(const char *s);
 void os_setup_signal_handling(void);
 void os_daemonize(void);
diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
index 400e098..b55c5d4 100644
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h
@@ -84,7 +84,7 @@  struct tm *localtime_r(const time_t *timep, struct tm *result);
 static inline void os_setup_signal_handling(void) {}
 static inline void os_daemonize(void) {}
 static inline void os_setup_post(void) {}
-void os_set_line_buffering(void);
+void os_setup_stdio(void);
 static inline void os_set_proc_name(const char *dummy) {}
 
 int getpagesize(void);
diff --git a/os-posix.c b/os-posix.c
index e4da406..d9afa6d 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -40,6 +40,7 @@ 
 #include "net/slirp.h"
 #include "qemu-options.h"
 #include "qemu/rcu.h"
+#include "qemu/log.h"
 
 #ifdef CONFIG_LINUX
 #include <sys/prctl.h>
@@ -256,6 +257,17 @@  void os_setup_post(void)
 {
     int fd = 0;
 
+    if (qemu_logfile == stderr) {
+        if (fcntl(2, F_GETFL) & O_NONBLOCK) {
+            /* See os_setup_stdio() for an explanation. */
+            fprintf(stderr,
+"Note: stderr has been set non-blocking which can cause log messages to be\n"
+"lost under load. This can be avoided by directing log messages to a file\n"
+"(using -D) or by redirecting stderr using your shell.\n"
+            );
+        }
+    }
+
     if (daemonize) {
         if (chdir("/")) {
             perror("not able to chdir to /");
@@ -289,8 +301,24 @@  void os_setup_post(void)
     }
 }
 
-void os_set_line_buffering(void)
+void os_setup_stdio(void)
 {
+#if defined(__linux__)
+/* In some situations (e.g. running QEMU from a bash command line and not using
+ * any redirection) the shell may have set up stdin, stdout and stderr as
+ * duplicates of a single open file (i.e. they share a "file description"). If
+ * this is the case then later when char drivers are registered and stdin and
+ * stdout are set non-blocking, stderr will also become non-blocking. Because
+ * printf (and variants) are not "non-blocking aware" this can cause debugging
+ * information to be lost (typically when using "-d" without "-D" and producing
+ * a lot of debug output). To avoid this, we re-open stderr to create a
+ * separate "file description" with it's own file status flags. */
+    int fd = open("/proc/self/fd/2", O_WRONLY);
+    if (fd != -1) {
+        dup2(fd, 2);
+        close(fd);
+    }
+#endif
     setvbuf(stdout, NULL, _IOLBF, 0);
 }
 
diff --git a/os-win32.c b/os-win32.c
index cc09196..2fcbc72 100644
--- a/os-win32.c
+++ b/os-win32.c
@@ -88,7 +88,7 @@  char *os_find_datadir(void)
     return qemu_get_exec_dir();
 }
 
-void os_set_line_buffering(void)
+void os_setup_stdio(void)
 {
     setbuf(stdout, NULL);
     setbuf(stderr, NULL);
diff --git a/vl.c b/vl.c
index f043009..04a6893 100644
--- a/vl.c
+++ b/vl.c
@@ -4379,7 +4379,7 @@  int main(int argc, char **argv, char **envp)
         semihosting_arg_fallback(kernel_filename, kernel_cmdline);
     }
 
-    os_set_line_buffering();
+    os_setup_stdio();
 
 #ifdef CONFIG_SPICE
     /* spice needs the timers to be initialized by this point */