diff mbox series

[2/3] gdbstub: Implement follow-fork-mode child

Message ID 20240131205031.144607-3-iii@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series gdbstub: Implement follow-fork-mode child | expand

Commit Message

Ilya Leoshkevich Jan. 31, 2024, 8:43 p.m. UTC
Currently it's not possible to use gdbstub for debugging linux-user
code that runs in a forked child, which is normally done using the `set
follow-fork-mode child` GDB command. Purely on the protocol level, the
missing piece is the fork-events feature.

However, a deeper problem is supporting $Hg switching between different
processes - right now it can do only threads. Implementing this for the
general case would be quite complicated, but, fortunately, for the
follow-fork-mode case there are a few factors that greatly simplify
things: fork() happens in the exclusive section, there are only two
processes involved, and before one of them is resumed, the second one
is detached.

This makes it possible to implement a simplified scheme: the parent and
the child share the gdbserver socket, it's used only by one of them at
any given time, which is coordinated through a separate socketpair. The
processes can read from the gdbserver socket only one byte at a time,
which is not great for performance, but, fortunately, the
follow-fork-mode involves only a few messages.

Add the hooks for the user-specific handling of $qSupported, $Hg, and
$D. Advertise the fork-events support, and remember whether GDB has it
as well. Implement the state machine that is initialized on fork(),
decides the current owner of the gdbserver socket, and is terminated
when one of the two processes is detached. The logic for the parent and
the child is the same, only the initial state is different.

Handle the `stepi` of a syscall corner case by disabling the
single-stepping in detached processes.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
---
 gdbstub/gdbstub.c   |  29 ++++--
 gdbstub/internals.h |   3 +
 gdbstub/user.c      | 210 +++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 234 insertions(+), 8 deletions(-)

Comments

Alex Bennée Feb. 1, 2024, 12:11 p.m. UTC | #1
Ilya Leoshkevich <iii@linux.ibm.com> writes:

> Currently it's not possible to use gdbstub for debugging linux-user
> code that runs in a forked child, which is normally done using the `set
> follow-fork-mode child` GDB command. Purely on the protocol level, the
> missing piece is the fork-events feature.
>
> However, a deeper problem is supporting $Hg switching between different
> processes - right now it can do only threads. Implementing this for the
> general case would be quite complicated, but, fortunately, for the
> follow-fork-mode case there are a few factors that greatly simplify
> things: fork() happens in the exclusive section, there are only two
> processes involved, and before one of them is resumed, the second one
> is detached.
>
> This makes it possible to implement a simplified scheme: the parent and
> the child share the gdbserver socket, it's used only by one of them at
> any given time, which is coordinated through a separate socketpair. The
> processes can read from the gdbserver socket only one byte at a time,
> which is not great for performance, but, fortunately, the
> follow-fork-mode involves only a few messages.
>
> Add the hooks for the user-specific handling of $qSupported, $Hg, and
> $D. Advertise the fork-events support, and remember whether GDB has it
> as well. Implement the state machine that is initialized on fork(),
> decides the current owner of the gdbserver socket, and is terminated
> when one of the two processes is detached. The logic for the parent and
> the child is the same, only the initial state is different.
>
> Handle the `stepi` of a syscall corner case by disabling the
> single-stepping in detached processes.
>
> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> ---
>  gdbstub/gdbstub.c   |  29 ++++--
>  gdbstub/internals.h |   3 +
>  gdbstub/user.c      | 210 +++++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 234 insertions(+), 8 deletions(-)
>
> diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c
> index 7e73e916bdc..46f5dd47e9e 100644
> --- a/gdbstub/gdbstub.c
> +++ b/gdbstub/gdbstub.c
> @@ -991,6 +991,12 @@ static void handle_detach(GArray *params, void *user_ctx)
>          pid = get_param(params, 0)->val_ul;
>      }
>  
> +#ifdef CONFIG_USER_ONLY
> +    if (gdb_handle_detach_user(pid)) {
> +        return;
> +    }
> +#endif
> +
>      process = gdb_get_process(pid);
>      gdb_process_breakpoint_remove_all(process);
>      process->attached = false;
> @@ -1066,6 +1072,7 @@ static void handle_cont_with_sig(GArray *params, void *user_ctx)
>  
>  static void handle_set_thread(GArray *params, void *user_ctx)
>  {
> +    uint32_t pid, tid;
>      CPUState *cpu;
>  
>      if (params->len != 2) {
> @@ -1083,8 +1090,14 @@ static void handle_set_thread(GArray *params, void *user_ctx)
>          return;
>      }
>  
> -    cpu = gdb_get_cpu(get_param(params, 1)->thread_id.pid,
> -                      get_param(params, 1)->thread_id.tid);
> +    pid = get_param(params, 1)->thread_id.pid;
> +    tid = get_param(params, 1)->thread_id.tid;
> +#ifdef CONFIG_USER_ONLY
> +    if (gdb_handle_set_thread_user(pid, tid)) {
> +        return;
> +    }
> +#endif
> +    cpu = gdb_get_cpu(pid, tid);
>      if (!cpu) {
>          gdb_put_packet("E22");
>          return;
> @@ -1599,6 +1612,7 @@ static void handle_query_thread_extra(GArray *params, void *user_ctx)
>  
>  static void handle_query_supported(GArray *params, void *user_ctx)
>  {
> +    const char *gdb_supported;
>      CPUClass *cc;
>  
>      g_string_printf(gdbserver_state.str_buf, "PacketSize=%x", MAX_PACKET_LENGTH);
> @@ -1622,9 +1636,14 @@ static void handle_query_supported(GArray *params, void *user_ctx)
>      g_string_append(gdbserver_state.str_buf, ";qXfer:exec-file:read+");
>  #endif
>  
> -    if (params->len &&
> -        strstr(get_param(params, 0)->data, "multiprocess+")) {
> -        gdbserver_state.multiprocess = true;
> +    if (params->len) {
> +        gdb_supported = get_param(params, 0)->data;
> +        if (strstr(gdb_supported, "multiprocess+")) {
> +            gdbserver_state.multiprocess = true;
> +        }
> +#if defined(CONFIG_USER_ONLY)
> +        gdb_handle_query_supported_user(gdb_supported);
> +#endif
>      }
>  
>      g_string_append(gdbserver_state.str_buf, ";vContSupported+;multiprocess+");
> diff --git a/gdbstub/internals.h b/gdbstub/internals.h
> index 56b7c13b750..b4724598384 100644
> --- a/gdbstub/internals.h
> +++ b/gdbstub/internals.h
> @@ -196,6 +196,9 @@ void gdb_handle_v_file_pread(GArray *params, void *user_ctx); /* user */
>  void gdb_handle_v_file_readlink(GArray *params, void *user_ctx); /* user */
>  void gdb_handle_query_xfer_exec_file(GArray *params, void *user_ctx); /* user */
>  void gdb_handle_set_catch_syscalls(GArray *params, void *user_ctx); /* user */
> +void gdb_handle_query_supported_user(const char *gdb_supported); /* user */
> +bool gdb_handle_set_thread_user(uint32_t pid, uint32_t tid); /* user */
> +bool gdb_handle_detach_user(uint32_t pid); /* user */
>  
>  void gdb_handle_query_attached(GArray *params, void *user_ctx); /* both */
>  
> diff --git a/gdbstub/user.c b/gdbstub/user.c
> index 120eb7fc117..962f4cb74e7 100644
> --- a/gdbstub/user.c
> +++ b/gdbstub/user.c
> @@ -10,6 +10,7 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include <sys/syscall.h>
>  #include "qemu/bitops.h"
>  #include "qemu/cutils.h"
>  #include "qemu/sockets.h"
> @@ -25,6 +26,41 @@
>  #define GDB_NR_SYSCALLS 1024
>  typedef unsigned long GDBSyscallsMask[BITS_TO_LONGS(GDB_NR_SYSCALLS)];
>  
> +/*
> + * Forked child talks to its parent in order to let GDB enforce the
> + * follow-fork-mode. This happens inside a start_exclusive() section, so that
> + * the other threads, which may be forking too, do not interfere. The
> + * implementation relies on GDB not sending $vCont until it has detached
> + * either from the parent (follow-fork-mode child) or from the child
> + * (follow-fork-mode parent).
> + *
> + * The parent and the child share the GDB socket; at any given time only one
> + * of them is allowed to use it, as is reflected in the respective fork_state.
> + * This is negotiated via the fork_sockets pair as a reaction to $Hg.
> + */
> +enum GDBForkState {
> +    /* Fully owning the GDB socket. */
> +    GDB_FORK_ENABLED,
> +    /* Working with the GDB socket; the peer is inactive. */
> +    GDB_FORK_ACTIVE,
> +    /* Handing off the GDB socket to the peer. */
> +    GDB_FORK_DEACTIVATING,
> +    /* The peer is working with the GDB socket. */
> +    GDB_FORK_INACTIVE,
> +    /* Asking the peer to close its GDB socket fd. */
> +    GDB_FORK_ENABLING,
> +    /* Asking the peer to take over, closing our GDB socket fd. */
> +    GDB_FORK_DISABLING,
> +    /* The peer has taken over, our GDB socket fd is closed. */
> +    GDB_FORK_DISABLED,
> +};

gulp - thats a potentially fairly complex state diagram. Do we just work
through the states sequentially?

> +
> +enum GDBForkMessage {
> +    GDB_FORK_ACTIVATE = 'a',
> +    GDB_FORK_ENABLE = 'e',
> +    GDB_FORK_DISABLE = 'd',
> +};
> +
>  /* User-mode specific state */
>  typedef struct {
>      int fd;
> @@ -36,6 +72,10 @@ typedef struct {
>       */
>      bool catch_all_syscalls;
>      GDBSyscallsMask catch_syscalls_mask;
> +    bool fork_events;
> +    enum GDBForkState fork_state;
> +    int fork_sockets[2];
> +    pid_t fork_peer_pid, fork_peer_tid;
>  } GDBUserState;
>  
>  static GDBUserState gdbserver_user_state;
> @@ -358,6 +398,18 @@ int gdbserver_start(const char *port_or_path)
>  
>  void gdbserver_fork_start(void)
>  {
> +    if (!gdbserver_state.init || gdbserver_user_state.fd < 0) {
> +        return;
> +    }
> +    if (!gdbserver_user_state.fork_events ||
> +            qemu_socketpair(AF_UNIX, SOCK_STREAM, 0,
> +                            gdbserver_user_state.fork_sockets) < 0) {
> +        gdbserver_user_state.fork_state = GDB_FORK_DISABLED;
> +        return;
> +    }
> +    gdbserver_user_state.fork_state = GDB_FORK_INACTIVE;
> +    gdbserver_user_state.fork_peer_pid = getpid();
> +    gdbserver_user_state.fork_peer_tid = qemu_get_thread_id();
>  }
>  
>  static void disable_gdbstub(void)
> @@ -369,16 +421,168 @@ static void disable_gdbstub(void)
>      CPU_FOREACH(cpu) {
>          cpu_breakpoint_remove_all(cpu, BP_GDB);
>          /* no cpu_watchpoint_remove_all for user-mode */
> +        cpu_single_step(cpu, 0);
> +        tb_flush(cpu);
>      }
>  }
>  
> -/* Disable gdb stub for child processes.  */
>  void gdbserver_fork_end(pid_t pid)
>  {
> -    if (pid != 0 || !gdbserver_state.init || gdbserver_user_state.fd < 0) {
> +    char b;
> +    int fd;
> +
> +    if (!gdbserver_state.init || gdbserver_user_state.fd < 0) {
> +        return;
> +    }
> +
> +    if (pid == -1) {
> +        if (gdbserver_user_state.fork_state != GDB_FORK_DISABLED) {
> +            g_assert(gdbserver_user_state.fork_state == GDB_FORK_INACTIVE);
> +            close(gdbserver_user_state.fork_sockets[0]);
> +            close(gdbserver_user_state.fork_sockets[1]);
> +        }
>          return;
>      }
> -    disable_gdbstub();
> +
> +    if (gdbserver_user_state.fork_state == GDB_FORK_DISABLED) {
> +        if (pid == 0) {
> +            disable_gdbstub();
> +        }
> +        return;
> +    }
> +
> +    if (pid == 0) {
> +        close(gdbserver_user_state.fork_sockets[0]);
> +        fd = gdbserver_user_state.fork_sockets[1];
> +        g_assert(gdbserver_state.process_num == 1);
> +        g_assert(gdbserver_state.processes[0].pid ==
> +                     gdbserver_user_state.fork_peer_pid);
> +        g_assert(gdbserver_state.processes[0].attached);
> +        gdbserver_state.processes[0].pid = getpid();
> +    } else {
> +        close(gdbserver_user_state.fork_sockets[1]);
> +        fd = gdbserver_user_state.fork_sockets[0];
> +        gdbserver_user_state.fork_state = GDB_FORK_ACTIVE;
> +        gdbserver_user_state.fork_peer_pid = pid;
> +        gdbserver_user_state.fork_peer_tid = pid;
> +
> +        if (!gdbserver_state.allow_stop_reply) {
> +            goto fail;
> +        }
> +        g_string_printf(gdbserver_state.str_buf,
> +                        "T%02xfork:p%02x.%02x;thread:p%02x.%02x;",
> +                        gdb_target_signal_to_gdb(gdb_target_sigtrap()),
> +                        pid, pid, (int)getpid(),
> qemu_get_thread_id());

I don't think I messed up the merge but:

../../gdbstub/user.c: In function ‘gdbserver_fork_end’:
../../gdbstub/user.c:461:50: error: implicit declaration of function ‘gdb_target_sigtrap’ [-Werror=implicit-function-declaration]
  461 |                         gdb_target_signal_to_gdb(gdb_target_sigtrap()),
      |                                                  ^~~~~~~~~~~~~~~~~~
../../gdbstub/user.c:461:50: error: nested extern declaration of ‘gdb_target_sigtrap’ [-Werror=nested-externs]
cc1: all warnings being treated as errors

I cant see where gdb_target_sigtrap is from?
Ilya Leoshkevich Feb. 1, 2024, 4:22 p.m. UTC | #2
On Thu, 2024-02-01 at 12:11 +0000, Alex Bennée wrote:
> Ilya Leoshkevich <iii@linux.ibm.com> writes:
> 
> > Currently it's not possible to use gdbstub for debugging linux-user
> > code that runs in a forked child, which is normally done using the
> > `set
> > follow-fork-mode child` GDB command. Purely on the protocol level,
> > the
> > missing piece is the fork-events feature.
> > 
> > However, a deeper problem is supporting $Hg switching between
> > different
> > processes - right now it can do only threads. Implementing this for
> > the
> > general case would be quite complicated, but, fortunately, for the
> > follow-fork-mode case there are a few factors that greatly simplify
> > things: fork() happens in the exclusive section, there are only two
> > processes involved, and before one of them is resumed, the second
> > one
> > is detached.
> > 
> > This makes it possible to implement a simplified scheme: the parent
> > and
> > the child share the gdbserver socket, it's used only by one of them
> > at
> > any given time, which is coordinated through a separate socketpair.
> > The
> > processes can read from the gdbserver socket only one byte at a
> > time,
> > which is not great for performance, but, fortunately, the
> > follow-fork-mode involves only a few messages.
> > 
> > Add the hooks for the user-specific handling of $qSupported, $Hg,
> > and
> > $D. Advertise the fork-events support, and remember whether GDB has
> > it
> > as well. Implement the state machine that is initialized on fork(),
> > decides the current owner of the gdbserver socket, and is
> > terminated
> > when one of the two processes is detached. The logic for the parent
> > and
> > the child is the same, only the initial state is different.
> > 
> > Handle the `stepi` of a syscall corner case by disabling the
> > single-stepping in detached processes.
> > 
> > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> > ---
> >  gdbstub/gdbstub.c   |  29 ++++--
> >  gdbstub/internals.h |   3 +
> >  gdbstub/user.c      | 210
> > +++++++++++++++++++++++++++++++++++++++++++-
> >  3 files changed, 234 insertions(+), 8 deletions(-)

[...]
 
> > diff --git a/gdbstub/user.c b/gdbstub/user.c
> > index 120eb7fc117..962f4cb74e7 100644
> > --- a/gdbstub/user.c
> > +++ b/gdbstub/user.c
> > @@ -10,6 +10,7 @@
> >   */
> >  
> >  #include "qemu/osdep.h"
> > +#include <sys/syscall.h>
> >  #include "qemu/bitops.h"
> >  #include "qemu/cutils.h"
> >  #include "qemu/sockets.h"
> > @@ -25,6 +26,41 @@
> >  #define GDB_NR_SYSCALLS 1024
> >  typedef unsigned long
> > GDBSyscallsMask[BITS_TO_LONGS(GDB_NR_SYSCALLS)];
> >  
> > +/*
> > + * Forked child talks to its parent in order to let GDB enforce
> > the
> > + * follow-fork-mode. This happens inside a start_exclusive()
> > section, so that
> > + * the other threads, which may be forking too, do not interfere.
> > The
> > + * implementation relies on GDB not sending $vCont until it has
> > detached
> > + * either from the parent (follow-fork-mode child) or from the
> > child
> > + * (follow-fork-mode parent).
> > + *
> > + * The parent and the child share the GDB socket; at any given
> > time only one
> > + * of them is allowed to use it, as is reflected in the respective
> > fork_state.
> > + * This is negotiated via the fork_sockets pair as a reaction to
> > $Hg.
> > + */
> > +enum GDBForkState {
> > +    /* Fully owning the GDB socket. */
> > +    GDB_FORK_ENABLED,
> > +    /* Working with the GDB socket; the peer is inactive. */
> > +    GDB_FORK_ACTIVE,
> > +    /* Handing off the GDB socket to the peer. */
> > +    GDB_FORK_DEACTIVATING,
> > +    /* The peer is working with the GDB socket. */
> > +    GDB_FORK_INACTIVE,
> > +    /* Asking the peer to close its GDB socket fd. */
> > +    GDB_FORK_ENABLING,
> > +    /* Asking the peer to take over, closing our GDB socket fd. */
> > +    GDB_FORK_DISABLING,
> > +    /* The peer has taken over, our GDB socket fd is closed. */
> > +    GDB_FORK_DISABLED,
> > +};
> 
> gulp - thats a potentially fairly complex state diagram. Do we just
> work
> through the states sequentially?

Unfortunately no. I had less states at some point, but then realized
it was better to have these things laid out explicitly. Let me try to
summarize the possible transitions:

GDB_FORK_ENABLED: Terminal state; GDB follows the current process.
GDB_FORK_DISABLED: Terminal state; GDB follows the other process.
GDB_FORK_ACTIVE -> GDB_FORK_DEACTIVATING: On $Hg.
GDB_FORK_ACTIVE -> GDB_FORK_ENABLING: On $D.
GDB_FORK_ACTIVE -> GDB_FORK_DISABLING: On $D.
GDB_FORK_ACTIVE -> GDB_FORK_DISABLED: On communication error.
GDB_FORK_DEACTIVATING -> GDB_FORK_INACTIVE: On gdb_read_byte() return.
GDB_FORK_DEACTIVATING -> GDB_FORK_DISABLED: On communication error.
GDB_FORK_INACTIVE -> GDB_FORK_ACTIVE: On $Hg in peer.
GDB_FORK_INACTIVE -> GDB_FORK_ENABLE: On $D in peer.
GDB_FORK_INACTIVE -> GDB_FORK_DISABLE: On $D in peer.
GDB_FORK_INACTIVE -> GDB_FORK_DISABLED: On communication error.
GDB_FORK_ENABLING -> GDB_FORK_ENABLED: On gdb_read_byte() return.
GDB_FORK_ENABLING -> GDB_FORK_DISABLED: On communication error.
GDB_FORK_DISABLING -> GDB_FORK_DISABLED: On gdb_read_byte() return.

Some states have only one meaningful transition:

GDB_FORK_DEACTIVATING -> GDB_FORK_INACTIVE
GDB_FORK_ENABLING -> GDB_FORK_ENABLED

and can in theory be squashed, but then the socketpair communication
would have to be moved to the respective user-hook, which would
complicate the error handling.

[...]

> > @@ -369,16 +421,168 @@ static void disable_gdbstub(void)
> >      CPU_FOREACH(cpu) {
> >          cpu_breakpoint_remove_all(cpu, BP_GDB);
> >          /* no cpu_watchpoint_remove_all for user-mode */
> > +        cpu_single_step(cpu, 0);
> > +        tb_flush(cpu);
> >      }
> >  }
> >  
> > -/* Disable gdb stub for child processes.  */
> >  void gdbserver_fork_end(pid_t pid)
> >  {
> > -    if (pid != 0 || !gdbserver_state.init ||
> > gdbserver_user_state.fd < 0) {
> > +    char b;
> > +    int fd;
> > +
> > +    if (!gdbserver_state.init || gdbserver_user_state.fd < 0) {
> > +        return;
> > +    }
> > +
> > +    if (pid == -1) {
> > +        if (gdbserver_user_state.fork_state != GDB_FORK_DISABLED)
> > {
> > +            g_assert(gdbserver_user_state.fork_state ==
> > GDB_FORK_INACTIVE);
> > +            close(gdbserver_user_state.fork_sockets[0]);
> > +            close(gdbserver_user_state.fork_sockets[1]);
> > +        }
> >          return;
> >      }
> > -    disable_gdbstub();
> > +
> > +    if (gdbserver_user_state.fork_state == GDB_FORK_DISABLED) {
> > +        if (pid == 0) {
> > +            disable_gdbstub();
> > +        }
> > +        return;
> > +    }
> > +
> > +    if (pid == 0) {
> > +        close(gdbserver_user_state.fork_sockets[0]);
> > +        fd = gdbserver_user_state.fork_sockets[1];
> > +        g_assert(gdbserver_state.process_num == 1);
> > +        g_assert(gdbserver_state.processes[0].pid ==
> > +                     gdbserver_user_state.fork_peer_pid);
> > +        g_assert(gdbserver_state.processes[0].attached);
> > +        gdbserver_state.processes[0].pid = getpid();
> > +    } else {
> > +        close(gdbserver_user_state.fork_sockets[1]);
> > +        fd = gdbserver_user_state.fork_sockets[0];
> > +        gdbserver_user_state.fork_state = GDB_FORK_ACTIVE;
> > +        gdbserver_user_state.fork_peer_pid = pid;
> > +        gdbserver_user_state.fork_peer_tid = pid;
> > +
> > +        if (!gdbserver_state.allow_stop_reply) {
> > +            goto fail;
> > +        }
> > +        g_string_printf(gdbserver_state.str_buf,
> > +                        "T%02xfork:p%02x.%02x;thread:p%02x.%02x;",
> > +                       
> > gdb_target_signal_to_gdb(gdb_target_sigtrap()),
> > +                        pid, pid, (int)getpid(),
> > qemu_get_thread_id());
> 
> I don't think I messed up the merge but:
> 
> ../../gdbstub/user.c: In function ‘gdbserver_fork_end’:
> ../../gdbstub/user.c:461:50: error: implicit declaration of function
> ‘gdb_target_sigtrap’ [-Werror=implicit-function-declaration]
>   461 |                        
> gdb_target_signal_to_gdb(gdb_target_sigtrap()),
>       |                                                 
> ^~~~~~~~~~~~~~~~~~
> ../../gdbstub/user.c:461:50: error: nested extern declaration of
> ‘gdb_target_sigtrap’ [-Werror=nested-externs]
> cc1: all warnings being treated as errors
> 
> I cant see where gdb_target_sigtrap is from?

This is from [1], which this series is Based-on. I can make one series
with both features if it's more convenient to review.

[1] https://patchew.org/QEMU/20240116094411.216665-1-iii@linux.ibm.com/
Alex Bennée Feb. 1, 2024, 4:48 p.m. UTC | #3
Ilya Leoshkevich <iii@linux.ibm.com> writes:

> On Thu, 2024-02-01 at 12:11 +0000, Alex Bennée wrote:
>> Ilya Leoshkevich <iii@linux.ibm.com> writes:
>> 
>> > Currently it's not possible to use gdbstub for debugging linux-user
>> > code that runs in a forked child, which is normally done using the
>> > `set
>> > follow-fork-mode child` GDB command. Purely on the protocol level,
>> > the
>> > missing piece is the fork-events feature.
>> > 
>> > However, a deeper problem is supporting $Hg switching between
>> > different
>> > processes - right now it can do only threads. Implementing this for
>> > the
>> > general case would be quite complicated, but, fortunately, for the
>> > follow-fork-mode case there are a few factors that greatly simplify
>> > things: fork() happens in the exclusive section, there are only two
>> > processes involved, and before one of them is resumed, the second
>> > one
>> > is detached.
>> > 
>> > This makes it possible to implement a simplified scheme: the parent
>> > and
>> > the child share the gdbserver socket, it's used only by one of them
>> > at
>> > any given time, which is coordinated through a separate socketpair.
>> > The
>> > processes can read from the gdbserver socket only one byte at a
>> > time,
>> > which is not great for performance, but, fortunately, the
>> > follow-fork-mode involves only a few messages.
>> > 
>> > Add the hooks for the user-specific handling of $qSupported, $Hg,
>> > and
>> > $D. Advertise the fork-events support, and remember whether GDB has
>> > it
>> > as well. Implement the state machine that is initialized on fork(),
>> > decides the current owner of the gdbserver socket, and is
>> > terminated
>> > when one of the two processes is detached. The logic for the parent
>> > and
>> > the child is the same, only the initial state is different.
>> > 
>> > Handle the `stepi` of a syscall corner case by disabling the
>> > single-stepping in detached processes.
>> > 
>> > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
>> > ---
>> >  gdbstub/gdbstub.c   |  29 ++++--
>> >  gdbstub/internals.h |   3 +
>> >  gdbstub/user.c      | 210
>> > +++++++++++++++++++++++++++++++++++++++++++-
>> >  3 files changed, 234 insertions(+), 8 deletions(-)
>
> [...]
>  
>> > diff --git a/gdbstub/user.c b/gdbstub/user.c
>> > index 120eb7fc117..962f4cb74e7 100644
>> > --- a/gdbstub/user.c
>> > +++ b/gdbstub/user.c
>> > @@ -10,6 +10,7 @@
>> >   */
>> >  
>> >  #include "qemu/osdep.h"
>> > +#include <sys/syscall.h>
>> >  #include "qemu/bitops.h"
>> >  #include "qemu/cutils.h"
>> >  #include "qemu/sockets.h"
>> > @@ -25,6 +26,41 @@
>> >  #define GDB_NR_SYSCALLS 1024
>> >  typedef unsigned long
>> > GDBSyscallsMask[BITS_TO_LONGS(GDB_NR_SYSCALLS)];
>> >  
>> > +/*
>> > + * Forked child talks to its parent in order to let GDB enforce
>> > the
>> > + * follow-fork-mode. This happens inside a start_exclusive()
>> > section, so that
>> > + * the other threads, which may be forking too, do not interfere.
>> > The
>> > + * implementation relies on GDB not sending $vCont until it has
>> > detached
>> > + * either from the parent (follow-fork-mode child) or from the
>> > child
>> > + * (follow-fork-mode parent).
>> > + *
>> > + * The parent and the child share the GDB socket; at any given
>> > time only one
>> > + * of them is allowed to use it, as is reflected in the respective
>> > fork_state.
>> > + * This is negotiated via the fork_sockets pair as a reaction to
>> > $Hg.
>> > + */
>> > +enum GDBForkState {
>> > +    /* Fully owning the GDB socket. */
>> > +    GDB_FORK_ENABLED,
>> > +    /* Working with the GDB socket; the peer is inactive. */
>> > +    GDB_FORK_ACTIVE,
>> > +    /* Handing off the GDB socket to the peer. */
>> > +    GDB_FORK_DEACTIVATING,
>> > +    /* The peer is working with the GDB socket. */
>> > +    GDB_FORK_INACTIVE,
>> > +    /* Asking the peer to close its GDB socket fd. */
>> > +    GDB_FORK_ENABLING,
>> > +    /* Asking the peer to take over, closing our GDB socket fd. */
>> > +    GDB_FORK_DISABLING,
>> > +    /* The peer has taken over, our GDB socket fd is closed. */
>> > +    GDB_FORK_DISABLED,
>> > +};
>> 
>> gulp - thats a potentially fairly complex state diagram. Do we just
>> work
>> through the states sequentially?
>
> Unfortunately no. I had less states at some point, but then realized
> it was better to have these things laid out explicitly. Let me try to
> summarize the possible transitions:
>
> GDB_FORK_ENABLED: Terminal state; GDB follows the current process.
> GDB_FORK_DISABLED: Terminal state; GDB follows the other process.
> GDB_FORK_ACTIVE -> GDB_FORK_DEACTIVATING: On $Hg.
> GDB_FORK_ACTIVE -> GDB_FORK_ENABLING: On $D.
> GDB_FORK_ACTIVE -> GDB_FORK_DISABLING: On $D.
> GDB_FORK_ACTIVE -> GDB_FORK_DISABLED: On communication error.
> GDB_FORK_DEACTIVATING -> GDB_FORK_INACTIVE: On gdb_read_byte() return.
> GDB_FORK_DEACTIVATING -> GDB_FORK_DISABLED: On communication error.
> GDB_FORK_INACTIVE -> GDB_FORK_ACTIVE: On $Hg in peer.
> GDB_FORK_INACTIVE -> GDB_FORK_ENABLE: On $D in peer.
> GDB_FORK_INACTIVE -> GDB_FORK_DISABLE: On $D in peer.
> GDB_FORK_INACTIVE -> GDB_FORK_DISABLED: On communication error.
> GDB_FORK_ENABLING -> GDB_FORK_ENABLED: On gdb_read_byte() return.
> GDB_FORK_ENABLING -> GDB_FORK_DISABLED: On communication error.
> GDB_FORK_DISABLING -> GDB_FORK_DISABLED: On gdb_read_byte() return.
>
> Some states have only one meaningful transition:
>
> GDB_FORK_DEACTIVATING -> GDB_FORK_INACTIVE
> GDB_FORK_ENABLING -> GDB_FORK_ENABLED
>
> and can in theory be squashed, but then the socketpair communication
> would have to be moved to the respective user-hook, which would
> complicate the error handling.
>
> [...]
>
>> > @@ -369,16 +421,168 @@ static void disable_gdbstub(void)
>> >      CPU_FOREACH(cpu) {
>> >          cpu_breakpoint_remove_all(cpu, BP_GDB);
>> >          /* no cpu_watchpoint_remove_all for user-mode */
>> > +        cpu_single_step(cpu, 0);
>> > +        tb_flush(cpu);
>> >      }
>> >  }
>> >  
>> > -/* Disable gdb stub for child processes.  */
>> >  void gdbserver_fork_end(pid_t pid)
>> >  {
>> > -    if (pid != 0 || !gdbserver_state.init ||
>> > gdbserver_user_state.fd < 0) {
>> > +    char b;
>> > +    int fd;
>> > +
>> > +    if (!gdbserver_state.init || gdbserver_user_state.fd < 0) {
>> > +        return;
>> > +    }
>> > +
>> > +    if (pid == -1) {
>> > +        if (gdbserver_user_state.fork_state != GDB_FORK_DISABLED)
>> > {
>> > +            g_assert(gdbserver_user_state.fork_state ==
>> > GDB_FORK_INACTIVE);
>> > +            close(gdbserver_user_state.fork_sockets[0]);
>> > +            close(gdbserver_user_state.fork_sockets[1]);
>> > +        }
>> >          return;
>> >      }
>> > -    disable_gdbstub();
>> > +
>> > +    if (gdbserver_user_state.fork_state == GDB_FORK_DISABLED) {
>> > +        if (pid == 0) {
>> > +            disable_gdbstub();
>> > +        }
>> > +        return;
>> > +    }
>> > +
>> > +    if (pid == 0) {
>> > +        close(gdbserver_user_state.fork_sockets[0]);
>> > +        fd = gdbserver_user_state.fork_sockets[1];
>> > +        g_assert(gdbserver_state.process_num == 1);
>> > +        g_assert(gdbserver_state.processes[0].pid ==
>> > +                     gdbserver_user_state.fork_peer_pid);
>> > +        g_assert(gdbserver_state.processes[0].attached);
>> > +        gdbserver_state.processes[0].pid = getpid();
>> > +    } else {
>> > +        close(gdbserver_user_state.fork_sockets[1]);
>> > +        fd = gdbserver_user_state.fork_sockets[0];
>> > +        gdbserver_user_state.fork_state = GDB_FORK_ACTIVE;
>> > +        gdbserver_user_state.fork_peer_pid = pid;
>> > +        gdbserver_user_state.fork_peer_tid = pid;
>> > +
>> > +        if (!gdbserver_state.allow_stop_reply) {
>> > +            goto fail;
>> > +        }
>> > +        g_string_printf(gdbserver_state.str_buf,
>> > +                        "T%02xfork:p%02x.%02x;thread:p%02x.%02x;",
>> > +                       
>> > gdb_target_signal_to_gdb(gdb_target_sigtrap()),
>> > +                        pid, pid, (int)getpid(),
>> > qemu_get_thread_id());
>> 
>> I don't think I messed up the merge but:
>> 
>> ../../gdbstub/user.c: In function ‘gdbserver_fork_end’:
>> ../../gdbstub/user.c:461:50: error: implicit declaration of function
>> ‘gdb_target_sigtrap’ [-Werror=implicit-function-declaration]
>>   461 |                        
>> gdb_target_signal_to_gdb(gdb_target_sigtrap()),
>>       |                                                 
>> ^~~~~~~~~~~~~~~~~~
>> ../../gdbstub/user.c:461:50: error: nested extern declaration of
>> ‘gdb_target_sigtrap’ [-Werror=nested-externs]
>> cc1: all warnings being treated as errors
>> 
>> I cant see where gdb_target_sigtrap is from?
>
> This is from [1], which this series is Based-on. I can make one series
> with both features if it's more convenient to review.
>
> [1]
> https://patchew.org/QEMU/20240116094411.216665-1-iii@linux.ibm.com/

Oops missed that one, looking at it now.
diff mbox series

Patch

diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c
index 7e73e916bdc..46f5dd47e9e 100644
--- a/gdbstub/gdbstub.c
+++ b/gdbstub/gdbstub.c
@@ -991,6 +991,12 @@  static void handle_detach(GArray *params, void *user_ctx)
         pid = get_param(params, 0)->val_ul;
     }
 
+#ifdef CONFIG_USER_ONLY
+    if (gdb_handle_detach_user(pid)) {
+        return;
+    }
+#endif
+
     process = gdb_get_process(pid);
     gdb_process_breakpoint_remove_all(process);
     process->attached = false;
@@ -1066,6 +1072,7 @@  static void handle_cont_with_sig(GArray *params, void *user_ctx)
 
 static void handle_set_thread(GArray *params, void *user_ctx)
 {
+    uint32_t pid, tid;
     CPUState *cpu;
 
     if (params->len != 2) {
@@ -1083,8 +1090,14 @@  static void handle_set_thread(GArray *params, void *user_ctx)
         return;
     }
 
-    cpu = gdb_get_cpu(get_param(params, 1)->thread_id.pid,
-                      get_param(params, 1)->thread_id.tid);
+    pid = get_param(params, 1)->thread_id.pid;
+    tid = get_param(params, 1)->thread_id.tid;
+#ifdef CONFIG_USER_ONLY
+    if (gdb_handle_set_thread_user(pid, tid)) {
+        return;
+    }
+#endif
+    cpu = gdb_get_cpu(pid, tid);
     if (!cpu) {
         gdb_put_packet("E22");
         return;
@@ -1599,6 +1612,7 @@  static void handle_query_thread_extra(GArray *params, void *user_ctx)
 
 static void handle_query_supported(GArray *params, void *user_ctx)
 {
+    const char *gdb_supported;
     CPUClass *cc;
 
     g_string_printf(gdbserver_state.str_buf, "PacketSize=%x", MAX_PACKET_LENGTH);
@@ -1622,9 +1636,14 @@  static void handle_query_supported(GArray *params, void *user_ctx)
     g_string_append(gdbserver_state.str_buf, ";qXfer:exec-file:read+");
 #endif
 
-    if (params->len &&
-        strstr(get_param(params, 0)->data, "multiprocess+")) {
-        gdbserver_state.multiprocess = true;
+    if (params->len) {
+        gdb_supported = get_param(params, 0)->data;
+        if (strstr(gdb_supported, "multiprocess+")) {
+            gdbserver_state.multiprocess = true;
+        }
+#if defined(CONFIG_USER_ONLY)
+        gdb_handle_query_supported_user(gdb_supported);
+#endif
     }
 
     g_string_append(gdbserver_state.str_buf, ";vContSupported+;multiprocess+");
diff --git a/gdbstub/internals.h b/gdbstub/internals.h
index 56b7c13b750..b4724598384 100644
--- a/gdbstub/internals.h
+++ b/gdbstub/internals.h
@@ -196,6 +196,9 @@  void gdb_handle_v_file_pread(GArray *params, void *user_ctx); /* user */
 void gdb_handle_v_file_readlink(GArray *params, void *user_ctx); /* user */
 void gdb_handle_query_xfer_exec_file(GArray *params, void *user_ctx); /* user */
 void gdb_handle_set_catch_syscalls(GArray *params, void *user_ctx); /* user */
+void gdb_handle_query_supported_user(const char *gdb_supported); /* user */
+bool gdb_handle_set_thread_user(uint32_t pid, uint32_t tid); /* user */
+bool gdb_handle_detach_user(uint32_t pid); /* user */
 
 void gdb_handle_query_attached(GArray *params, void *user_ctx); /* both */
 
diff --git a/gdbstub/user.c b/gdbstub/user.c
index 120eb7fc117..962f4cb74e7 100644
--- a/gdbstub/user.c
+++ b/gdbstub/user.c
@@ -10,6 +10,7 @@ 
  */
 
 #include "qemu/osdep.h"
+#include <sys/syscall.h>
 #include "qemu/bitops.h"
 #include "qemu/cutils.h"
 #include "qemu/sockets.h"
@@ -25,6 +26,41 @@ 
 #define GDB_NR_SYSCALLS 1024
 typedef unsigned long GDBSyscallsMask[BITS_TO_LONGS(GDB_NR_SYSCALLS)];
 
+/*
+ * Forked child talks to its parent in order to let GDB enforce the
+ * follow-fork-mode. This happens inside a start_exclusive() section, so that
+ * the other threads, which may be forking too, do not interfere. The
+ * implementation relies on GDB not sending $vCont until it has detached
+ * either from the parent (follow-fork-mode child) or from the child
+ * (follow-fork-mode parent).
+ *
+ * The parent and the child share the GDB socket; at any given time only one
+ * of them is allowed to use it, as is reflected in the respective fork_state.
+ * This is negotiated via the fork_sockets pair as a reaction to $Hg.
+ */
+enum GDBForkState {
+    /* Fully owning the GDB socket. */
+    GDB_FORK_ENABLED,
+    /* Working with the GDB socket; the peer is inactive. */
+    GDB_FORK_ACTIVE,
+    /* Handing off the GDB socket to the peer. */
+    GDB_FORK_DEACTIVATING,
+    /* The peer is working with the GDB socket. */
+    GDB_FORK_INACTIVE,
+    /* Asking the peer to close its GDB socket fd. */
+    GDB_FORK_ENABLING,
+    /* Asking the peer to take over, closing our GDB socket fd. */
+    GDB_FORK_DISABLING,
+    /* The peer has taken over, our GDB socket fd is closed. */
+    GDB_FORK_DISABLED,
+};
+
+enum GDBForkMessage {
+    GDB_FORK_ACTIVATE = 'a',
+    GDB_FORK_ENABLE = 'e',
+    GDB_FORK_DISABLE = 'd',
+};
+
 /* User-mode specific state */
 typedef struct {
     int fd;
@@ -36,6 +72,10 @@  typedef struct {
      */
     bool catch_all_syscalls;
     GDBSyscallsMask catch_syscalls_mask;
+    bool fork_events;
+    enum GDBForkState fork_state;
+    int fork_sockets[2];
+    pid_t fork_peer_pid, fork_peer_tid;
 } GDBUserState;
 
 static GDBUserState gdbserver_user_state;
@@ -358,6 +398,18 @@  int gdbserver_start(const char *port_or_path)
 
 void gdbserver_fork_start(void)
 {
+    if (!gdbserver_state.init || gdbserver_user_state.fd < 0) {
+        return;
+    }
+    if (!gdbserver_user_state.fork_events ||
+            qemu_socketpair(AF_UNIX, SOCK_STREAM, 0,
+                            gdbserver_user_state.fork_sockets) < 0) {
+        gdbserver_user_state.fork_state = GDB_FORK_DISABLED;
+        return;
+    }
+    gdbserver_user_state.fork_state = GDB_FORK_INACTIVE;
+    gdbserver_user_state.fork_peer_pid = getpid();
+    gdbserver_user_state.fork_peer_tid = qemu_get_thread_id();
 }
 
 static void disable_gdbstub(void)
@@ -369,16 +421,168 @@  static void disable_gdbstub(void)
     CPU_FOREACH(cpu) {
         cpu_breakpoint_remove_all(cpu, BP_GDB);
         /* no cpu_watchpoint_remove_all for user-mode */
+        cpu_single_step(cpu, 0);
+        tb_flush(cpu);
     }
 }
 
-/* Disable gdb stub for child processes.  */
 void gdbserver_fork_end(pid_t pid)
 {
-    if (pid != 0 || !gdbserver_state.init || gdbserver_user_state.fd < 0) {
+    char b;
+    int fd;
+
+    if (!gdbserver_state.init || gdbserver_user_state.fd < 0) {
+        return;
+    }
+
+    if (pid == -1) {
+        if (gdbserver_user_state.fork_state != GDB_FORK_DISABLED) {
+            g_assert(gdbserver_user_state.fork_state == GDB_FORK_INACTIVE);
+            close(gdbserver_user_state.fork_sockets[0]);
+            close(gdbserver_user_state.fork_sockets[1]);
+        }
         return;
     }
-    disable_gdbstub();
+
+    if (gdbserver_user_state.fork_state == GDB_FORK_DISABLED) {
+        if (pid == 0) {
+            disable_gdbstub();
+        }
+        return;
+    }
+
+    if (pid == 0) {
+        close(gdbserver_user_state.fork_sockets[0]);
+        fd = gdbserver_user_state.fork_sockets[1];
+        g_assert(gdbserver_state.process_num == 1);
+        g_assert(gdbserver_state.processes[0].pid ==
+                     gdbserver_user_state.fork_peer_pid);
+        g_assert(gdbserver_state.processes[0].attached);
+        gdbserver_state.processes[0].pid = getpid();
+    } else {
+        close(gdbserver_user_state.fork_sockets[1]);
+        fd = gdbserver_user_state.fork_sockets[0];
+        gdbserver_user_state.fork_state = GDB_FORK_ACTIVE;
+        gdbserver_user_state.fork_peer_pid = pid;
+        gdbserver_user_state.fork_peer_tid = pid;
+
+        if (!gdbserver_state.allow_stop_reply) {
+            goto fail;
+        }
+        g_string_printf(gdbserver_state.str_buf,
+                        "T%02xfork:p%02x.%02x;thread:p%02x.%02x;",
+                        gdb_target_signal_to_gdb(gdb_target_sigtrap()),
+                        pid, pid, (int)getpid(), qemu_get_thread_id());
+        gdb_put_strbuf();
+    }
+
+    gdbserver_state.state = RS_IDLE;
+    gdbserver_state.allow_stop_reply = false;
+    gdbserver_user_state.running_state = 0;
+    for (;;) {
+        switch (gdbserver_user_state.fork_state) {
+        case GDB_FORK_ENABLED:
+            if (gdbserver_user_state.running_state) {
+                return;
+            }
+            QEMU_FALLTHROUGH;
+        case GDB_FORK_ACTIVE:
+            if (read(gdbserver_user_state.fd, &b, 1) != 1) {
+                goto fail;
+            }
+            gdb_read_byte(b);
+            break;
+        case GDB_FORK_DEACTIVATING:
+            b = GDB_FORK_ACTIVATE;
+            if (write(fd, &b, 1) != 1) {
+                goto fail;
+            }
+            gdbserver_user_state.fork_state = GDB_FORK_INACTIVE;
+            break;
+        case GDB_FORK_INACTIVE:
+            if (read(fd, &b, 1) != 1) {
+                goto fail;
+            }
+            switch (b) {
+            case GDB_FORK_ACTIVATE:
+                gdbserver_user_state.fork_state = GDB_FORK_ACTIVE;
+                break;
+            case GDB_FORK_ENABLE:
+                close(fd);
+                gdbserver_user_state.fork_state = GDB_FORK_ENABLED;
+                break;
+            case GDB_FORK_DISABLE:
+                gdbserver_user_state.fork_state = GDB_FORK_DISABLED;
+                break;
+            default:
+                g_assert_not_reached();
+            }
+            break;
+        case GDB_FORK_ENABLING:
+            b = GDB_FORK_DISABLE;
+            if (write(fd, &b, 1) != 1) {
+                goto fail;
+            }
+            close(fd);
+            gdbserver_user_state.fork_state = GDB_FORK_ENABLED;
+            break;
+        case GDB_FORK_DISABLING:
+            b = GDB_FORK_ENABLE;
+            if (write(fd, &b, 1) != 1) {
+                goto fail;
+            }
+            gdbserver_user_state.fork_state = GDB_FORK_DISABLED;
+            break;
+        case GDB_FORK_DISABLED:
+            close(fd);
+            disable_gdbstub();
+            return;
+        default:
+            g_assert_not_reached();
+        }
+    }
+
+fail:
+    close(fd);
+    if (pid == 0) {
+        disable_gdbstub();
+    }
+}
+
+void gdb_handle_query_supported_user(const char *gdb_supported)
+{
+    if (strstr(gdb_supported, "fork-events+")) {
+        gdbserver_user_state.fork_events = true;
+    }
+    g_string_append(gdbserver_state.str_buf, ";fork-events+");
+}
+
+bool gdb_handle_set_thread_user(uint32_t pid, uint32_t tid)
+{
+    if (gdbserver_user_state.fork_state == GDB_FORK_ACTIVE &&
+            pid == gdbserver_user_state.fork_peer_pid &&
+            tid == gdbserver_user_state.fork_peer_tid) {
+        gdbserver_user_state.fork_state = GDB_FORK_DEACTIVATING;
+        gdb_put_packet("OK");
+        return true;
+    }
+    return false;
+}
+
+bool gdb_handle_detach_user(uint32_t pid)
+{
+    bool enable;
+
+    if (gdbserver_user_state.fork_state == GDB_FORK_ACTIVE) {
+        enable = pid == gdbserver_user_state.fork_peer_pid;
+        if (enable || pid == getpid()) {
+            gdbserver_user_state.fork_state = enable ? GDB_FORK_ENABLING :
+                                                       GDB_FORK_DISABLING;
+            gdb_put_packet("OK");
+            return true;
+        }
+    }
+    return false;
 }
 
 /*