diff mbox series

[v2,01/19] tools: Add gendwarfksyms

Message ID 20240815173903.4172139-22-samitolvanen@google.com (mailing list archive)
State Handled Elsewhere
Headers show
Series Implement DWARF modversions | expand

Commit Message

Sami Tolvanen Aug. 15, 2024, 5:39 p.m. UTC
Add a basic DWARF parser, which uses libdw to traverse the debugging
information in an object file and looks for functions and variables.
In follow-up patches, this will be expanded to produce symbol versions
for CONFIG_MODVERSIONS from DWARF.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 kernel/module/Kconfig                 |   8 ++
 scripts/Makefile                      |   1 +
 scripts/gendwarfksyms/.gitignore      |   2 +
 scripts/gendwarfksyms/Makefile        |   7 ++
 scripts/gendwarfksyms/dwarf.c         |  87 +++++++++++++++
 scripts/gendwarfksyms/gendwarfksyms.c | 146 ++++++++++++++++++++++++++
 scripts/gendwarfksyms/gendwarfksyms.h |  78 ++++++++++++++
 7 files changed, 329 insertions(+)
 create mode 100644 scripts/gendwarfksyms/.gitignore
 create mode 100644 scripts/gendwarfksyms/Makefile
 create mode 100644 scripts/gendwarfksyms/dwarf.c
 create mode 100644 scripts/gendwarfksyms/gendwarfksyms.c
 create mode 100644 scripts/gendwarfksyms/gendwarfksyms.h

Comments

Greg Kroah-Hartman Aug. 16, 2024, 7:14 a.m. UTC | #1
On Thu, Aug 15, 2024 at 05:39:05PM +0000, Sami Tolvanen wrote:
> --- /dev/null
> +++ b/scripts/gendwarfksyms/dwarf.c
> @@ -0,0 +1,87 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later

Sorry, but I have to ask, do you _REALLY_ mean "or later" here and in
other places in this series?  If so, great, but I want to be sure, as I
know:

> + * Copyright (C) 2024 Google LLC

Has some issues with the types of licenses that marking will cover.

thanks,

greg k-h
Petr Pavlu Aug. 26, 2024, 5:41 p.m. UTC | #2
On 8/15/24 19:39, Sami Tolvanen wrote:
> Add a basic DWARF parser, which uses libdw to traverse the debugging
> information in an object file and looks for functions and variables.
> In follow-up patches, this will be expanded to produce symbol versions
> for CONFIG_MODVERSIONS from DWARF.
> 
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> ---
>  kernel/module/Kconfig                 |   8 ++
>  scripts/Makefile                      |   1 +
>  scripts/gendwarfksyms/.gitignore      |   2 +
>  scripts/gendwarfksyms/Makefile        |   7 ++
>  scripts/gendwarfksyms/dwarf.c         |  87 +++++++++++++++
>  scripts/gendwarfksyms/gendwarfksyms.c | 146 ++++++++++++++++++++++++++
>  scripts/gendwarfksyms/gendwarfksyms.h |  78 ++++++++++++++
>  7 files changed, 329 insertions(+)
>  create mode 100644 scripts/gendwarfksyms/.gitignore
>  create mode 100644 scripts/gendwarfksyms/Makefile
>  create mode 100644 scripts/gendwarfksyms/dwarf.c
>  create mode 100644 scripts/gendwarfksyms/gendwarfksyms.c
>  create mode 100644 scripts/gendwarfksyms/gendwarfksyms.h
> 
> [...]
> +static int parse_options(int argc, const char **argv)
> +{
> +	for (int i = 1; i < argc; i++) {
> +		bool flag = false;
> +
> +		for (int j = 0; j < ARRAY_SIZE(options); j++) {
> +			if (strcmp(argv[i], options[j].arg))
> +				continue;
> +
> +			*options[j].flag = true;
> +
> +			if (options[j].param) {
> +				if (++i >= argc) {
> +					error("%s needs an argument",
> +					      options[j].arg);
> +					return -1;
> +				}
> +
> +				*options[j].param = argv[i];
> +			}
> +
> +			flag = true;
> +			break;
> +		}
> +
> +		if (!flag)
> +			object_files[object_count++] = argv[i];

I would rather add a check that this doesn't produce an out-of-bounds
access.

> [...]
> +int main(int argc, const char **argv)
> +{
> +	unsigned int n;
> +
> +	if (parse_options(argc, argv) < 0)
> +		return usage();
> +
> +	for (n = 0; n < object_count; n++) {
> +		Dwfl *dwfl;
> +		int fd;
> +
> +		fd = open(object_files[n], O_RDONLY);
> +		if (fd == -1) {
> +			error("open failed for '%s': %s", object_files[n],
> +			      strerror(errno));
> +			return -1;
> +		}
> +
> +		dwfl = dwfl_begin(&callbacks);
> +		if (!dwfl) {
> +			error("dwfl_begin failed for '%s': %s", object_files[n],
> +			      dwarf_errmsg(-1));
> +			return -1;
> +		}
> +
> +		if (!dwfl_report_offline(dwfl, object_files[n], object_files[n],
> +					 fd)) {
> +			error("dwfl_report_offline failed for '%s': %s",
> +			      object_files[n], dwarf_errmsg(-1));
> +			return -1;
> +		}
> +
> +		dwfl_report_end(dwfl, NULL, NULL);
> +
> +		if (dwfl_getmodules(dwfl, &process_modules, NULL, 0)) {
> +			error("dwfl_getmodules failed for '%s'",
> +			      object_files[n]);
> +			return -1;
> +		}

I see that libdwfl has also directly function dwfl_nextcu(). Would it
make sense to use it to simplify the code?

> +
> +		dwfl_end(dwfl);
> +		close(fd);

Isn't fd consumed by dwfl_report_offline() on success? I'm seeing EBADF
from this close() call.

> +	}
> +
> +	return 0;
> +}
Sami Tolvanen Aug. 26, 2024, 6:47 p.m. UTC | #3
Hi Petr,

On Mon, Aug 26, 2024 at 10:42 AM Petr Pavlu <petr.pavlu@suse.com> wrote:
>
> On 8/15/24 19:39, Sami Tolvanen wrote:
> > +static int parse_options(int argc, const char **argv)
> > +{
> > +     for (int i = 1; i < argc; i++) {
> > +             bool flag = false;
> > +
> > +             for (int j = 0; j < ARRAY_SIZE(options); j++) {
> > +                     if (strcmp(argv[i], options[j].arg))
> > +                             continue;
> > +
> > +                     *options[j].flag = true;
> > +
> > +                     if (options[j].param) {
> > +                             if (++i >= argc) {
> > +                                     error("%s needs an argument",
> > +                                           options[j].arg);
> > +                                     return -1;
> > +                             }
> > +
> > +                             *options[j].param = argv[i];
> > +                     }
> > +
> > +                     flag = true;
> > +                     break;
> > +             }
> > +
> > +             if (!flag)
> > +                     object_files[object_count++] = argv[i];
>
> I would rather add a check that this doesn't produce an out-of-bounds
> access.

True, this could overflow object_files with a sufficient number of
arguments. I'll add a check.

> > [...]
> > +int main(int argc, const char **argv)
> > +{
> > +     unsigned int n;
> > +
> > +     if (parse_options(argc, argv) < 0)
> > +             return usage();
> > +
> > +     for (n = 0; n < object_count; n++) {
> > +             Dwfl *dwfl;
> > +             int fd;
> > +
> > +             fd = open(object_files[n], O_RDONLY);
> > +             if (fd == -1) {
> > +                     error("open failed for '%s': %s", object_files[n],
> > +                           strerror(errno));
> > +                     return -1;
> > +             }
> > +
> > +             dwfl = dwfl_begin(&callbacks);
> > +             if (!dwfl) {
> > +                     error("dwfl_begin failed for '%s': %s", object_files[n],
> > +                           dwarf_errmsg(-1));
> > +                     return -1;
> > +             }
> > +
> > +             if (!dwfl_report_offline(dwfl, object_files[n], object_files[n],
> > +                                      fd)) {
> > +                     error("dwfl_report_offline failed for '%s': %s",
> > +                           object_files[n], dwarf_errmsg(-1));
> > +                     return -1;
> > +             }
> > +
> > +             dwfl_report_end(dwfl, NULL, NULL);
> > +
> > +             if (dwfl_getmodules(dwfl, &process_modules, NULL, 0)) {
> > +                     error("dwfl_getmodules failed for '%s'",
> > +                           object_files[n]);
> > +                     return -1;
> > +             }
>
> I see that libdwfl has also directly function dwfl_nextcu(). Would it
> make sense to use it to simplify the code?

How do you propose using the function? This loop goes through multiple
input files, should we need them, and we iterate through all the CUs
in process_modules.

> > +
> > +             dwfl_end(dwfl);
> > +             close(fd);
>
> Isn't fd consumed by dwfl_report_offline() on success? I'm seeing EBADF
> from this close() call.

Good catch! I'll drop this in v3.

Sami
Sami Tolvanen Aug. 27, 2024, 4:44 p.m. UTC | #4
Hi Greg,

On Fri, Aug 16, 2024 at 12:14 AM Greg Kroah-Hartman
<gregkh@linuxfoundation.org> wrote:
>
> On Thu, Aug 15, 2024 at 05:39:05PM +0000, Sami Tolvanen wrote:
> > --- /dev/null
> > +++ b/scripts/gendwarfksyms/dwarf.c
> > @@ -0,0 +1,87 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
>
> Sorry, but I have to ask, do you _REALLY_ mean "or later" here and in
> other places in this series?  If so, great, but I want to be sure, as I
> know:
>
> > + * Copyright (C) 2024 Google LLC
>
> Has some issues with the types of licenses that marking will cover.

Thanks for bringing this up. The license is inherited from the
genksyms code that's included in this series, but I'll consult with
our licensing folks and adjust this in v3 for the other parts if
needed.

Sami
Petr Pavlu Aug. 28, 2024, 12:31 p.m. UTC | #5
On 8/26/24 20:47, Sami Tolvanen wrote:
> On Mon, Aug 26, 2024 at 10:42 AM Petr Pavlu <petr.pavlu@suse.com> wrote:
>> On 8/15/24 19:39, Sami Tolvanen wrote:
>>> [...]
>>> +int main(int argc, const char **argv)
>>> +{
>>> +     unsigned int n;
>>> +
>>> +     if (parse_options(argc, argv) < 0)
>>> +             return usage();
>>> +
>>> +     for (n = 0; n < object_count; n++) {
>>> +             Dwfl *dwfl;
>>> +             int fd;
>>> +
>>> +             fd = open(object_files[n], O_RDONLY);
>>> +             if (fd == -1) {
>>> +                     error("open failed for '%s': %s", object_files[n],
>>> +                           strerror(errno));
>>> +                     return -1;
>>> +             }
>>> +
>>> +             dwfl = dwfl_begin(&callbacks);
>>> +             if (!dwfl) {
>>> +                     error("dwfl_begin failed for '%s': %s", object_files[n],
>>> +                           dwarf_errmsg(-1));
>>> +                     return -1;
>>> +             }
>>> +
>>> +             if (!dwfl_report_offline(dwfl, object_files[n], object_files[n],
>>> +                                      fd)) {
>>> +                     error("dwfl_report_offline failed for '%s': %s",
>>> +                           object_files[n], dwarf_errmsg(-1));
>>> +                     return -1;
>>> +             }
>>> +
>>> +             dwfl_report_end(dwfl, NULL, NULL);
>>> +
>>> +             if (dwfl_getmodules(dwfl, &process_modules, NULL, 0)) {
>>> +                     error("dwfl_getmodules failed for '%s'",
>>> +                           object_files[n]);
>>> +                     return -1;
>>> +             }
>>
>> I see that libdwfl has also directly function dwfl_nextcu(). Would it
>> make sense to use it to simplify the code?
> 
> How do you propose using the function? This loop goes through multiple
> input files, should we need them, and we iterate through all the CUs
> in process_modules.

I was thinking it could be possible to replace the code to traverse
modules and their their CUs, that is functions process_modules() and
process_module(), with dwfl_nextcu(). However, I now notice that more
work is added in subsequent patches to process_modules() so this
wouldn't quite work.

I would then only suggest to change some function names in the current
code. Function process_modules() is a callback to process a single
module and so it would be better to name it process_module(). The
present function process_module() actually processes a compilation unit
DIE so I would rename it to something like process_cu().

On 8/15/24 19:39, Sami Tolvanen wrote:
> +int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie)
> +{
> +	struct state state = { .mod = mod, .dbg = dbg };
> +
> +	return check(process_die_container(
> +		&state, cudie, process_exported_symbols, match_all));
> +}

Mostly a minor suggestion too.. Looking at the entire series, state.mod
ends up unused and state.dbg is only used in process_cached() where it
could be possibly replaced by doing dwarf_cu_getdwarf(die->cu)?

Removing these two members from the state struct would then allow to
instantiate a new state in process_exported_symbols() for each processed
symbol. That looks cleaner than changing state.sym and resetting some
parts of the state as the function walks over the exported symbols.
Masahiro Yamada Aug. 28, 2024, 5:45 p.m. UTC | #6
On Fri, Aug 16, 2024 at 2:39 AM Sami Tolvanen <samitolvanen@google.com> wrote:
>
> Add a basic DWARF parser, which uses libdw to traverse the debugging
> information in an object file and looks for functions and variables.
> In follow-up patches, this will be expanded to produce symbol versions
> for CONFIG_MODVERSIONS from DWARF.



I do not think it would make sense to split the patch
until this tool starts to do something useful.






>
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> ---
>  kernel/module/Kconfig                 |   8 ++
>  scripts/Makefile                      |   1 +
>  scripts/gendwarfksyms/.gitignore      |   2 +
>  scripts/gendwarfksyms/Makefile        |   7 ++
>  scripts/gendwarfksyms/dwarf.c         |  87 +++++++++++++++
>  scripts/gendwarfksyms/gendwarfksyms.c | 146 ++++++++++++++++++++++++++
>  scripts/gendwarfksyms/gendwarfksyms.h |  78 ++++++++++++++
>  7 files changed, 329 insertions(+)
>  create mode 100644 scripts/gendwarfksyms/.gitignore
>  create mode 100644 scripts/gendwarfksyms/Makefile
>  create mode 100644 scripts/gendwarfksyms/dwarf.c
>  create mode 100644 scripts/gendwarfksyms/gendwarfksyms.c
>  create mode 100644 scripts/gendwarfksyms/gendwarfksyms.h
>
> diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
> index 4047b6d48255..a506d4ac660f 100644
> --- a/kernel/module/Kconfig
> +++ b/kernel/module/Kconfig
> @@ -168,6 +168,14 @@ config MODVERSIONS
>           make them incompatible with the kernel you are running.  If
>           unsure, say N.
>
> +config GENDWARFKSYMS
> +       bool
> +       depends on DEBUG_INFO
> +       # Requires full debugging information, split DWARF not supported.
> +       depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT
> +       # Requires ELF object files.
> +       depends on !LTO
> +
>  config ASM_MODVERSIONS
>         bool
>         default HAVE_ASM_MODVERSIONS && MODVERSIONS
> diff --git a/scripts/Makefile b/scripts/Makefile
> index dccef663ca82..2fd0199662e9 100644
> --- a/scripts/Makefile
> +++ b/scripts/Makefile
> @@ -54,6 +54,7 @@ targets += module.lds
>
>  subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins
>  subdir-$(CONFIG_MODVERSIONS) += genksyms
> +subdir-$(CONFIG_GENDWARFKSYMS) += gendwarfksyms
>  subdir-$(CONFIG_SECURITY_SELINUX) += selinux
>
>  # Let clean descend into subdirs
> diff --git a/scripts/gendwarfksyms/.gitignore b/scripts/gendwarfksyms/.gitignore
> new file mode 100644
> index 000000000000..ab8c763b3afe
> --- /dev/null
> +++ b/scripts/gendwarfksyms/.gitignore
> @@ -0,0 +1,2 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +/gendwarfksyms
> diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
> new file mode 100644
> index 000000000000..c1389c161f9c
> --- /dev/null
> +++ b/scripts/gendwarfksyms/Makefile
> @@ -0,0 +1,7 @@
> +hostprogs-always-y += gendwarfksyms
> +
> +gendwarfksyms-objs += gendwarfksyms.o
> +gendwarfksyms-objs += dwarf.o
> +
> +HOST_EXTRACFLAGS := -I $(srctree)/tools/include
> +HOSTLDLIBS_gendwarfksyms := -ldw -lelf
> diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
> new file mode 100644
> index 000000000000..65a29d0bd8f4
> --- /dev/null
> +++ b/scripts/gendwarfksyms/dwarf.c
> @@ -0,0 +1,87 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2024 Google LLC
> + */
> +
> +#include "gendwarfksyms.h"
> +
> +/*
> + * Type string processing
> + */
> +static int process(struct state *state, const char *s)
> +{
> +       s = s ?: "<null>";
> +
> +       if (debug)
> +               fputs(s, stderr);
> +
> +       return 0;
> +}
> +
> +bool match_all(Dwarf_Die *die)
> +{
> +       return true;
> +}
> +
> +int process_die_container(struct state *state, Dwarf_Die *die,
> +                         die_callback_t func, die_match_callback_t match)
> +{
> +       Dwarf_Die current;
> +       int res;
> +
> +       res = checkp(dwarf_child(die, &current));
> +       while (!res) {
> +               if (match(&current))
> +                       check(func(state, &current));
> +               res = checkp(dwarf_siblingof(&current, &current));
> +       }
> +
> +       return 0;
> +}
> +
> +/*
> + * Symbol processing
> + */
> +static int process_subprogram(struct state *state, Dwarf_Die *die)
> +{
> +       return check(process(state, "subprogram;\n"));
> +}
> +
> +static int process_variable(struct state *state, Dwarf_Die *die)
> +{
> +       return check(process(state, "variable;\n"));
> +}
> +
> +static int process_exported_symbols(struct state *state, Dwarf_Die *die)
> +{
> +       int tag = dwarf_tag(die);
> +
> +       switch (tag) {
> +       /* Possible containers of exported symbols */
> +       case DW_TAG_namespace:
> +       case DW_TAG_class_type:
> +       case DW_TAG_structure_type:
> +               return check(process_die_container(
> +                       state, die, process_exported_symbols, match_all));
> +
> +       /* Possible exported symbols */
> +       case DW_TAG_subprogram:
> +       case DW_TAG_variable:
> +               if (tag == DW_TAG_subprogram)
> +                       check(process_subprogram(state, die));
> +               else
> +                       check(process_variable(state, die));
> +
> +               return 0;
> +       default:
> +               return 0;
> +       }
> +}
> +
> +int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie)
> +{
> +       struct state state = { .mod = mod, .dbg = dbg };
> +
> +       return check(process_die_container(
> +               &state, cudie, process_exported_symbols, match_all));
> +}
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
> new file mode 100644
> index 000000000000..27f2d6423c45
> --- /dev/null
> +++ b/scripts/gendwarfksyms/gendwarfksyms.c
> @@ -0,0 +1,146 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2024 Google LLC
> + */
> +
> +#include <fcntl.h>
> +#include <errno.h>
> +#include <stdarg.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include "gendwarfksyms.h"
> +
> +/*
> + * Options
> + */
> +
> +/* Print out debugging information to stderr */
> +bool debug;
> +
> +static const struct {
> +       const char *arg;
> +       bool *flag;
> +       const char **param;
> +} options[] = {
> +       { "--debug", &debug, NULL },
> +};
> +
> +static int usage(void)
> +{
> +       error("usage: gendwarfksyms [options] elf-object-file ...");



Description for each option, please.

See
scripts/genksyms/genksyms.c
as an example.


> +       return -1;
> +}
> +
> +static const char *object_files[MAX_INPUT_FILES];
> +static unsigned int object_count;
> +
> +static int parse_options(int argc, const char **argv)



Why not getopt_long()?


See
scripts/kallsyms.c
scripts/genksyms/genksyms.c
as examples.









> +{
> +       for (int i = 1; i < argc; i++) {
> +               bool flag = false;
> +
> +               for (int j = 0; j < ARRAY_SIZE(options); j++) {
> +                       if (strcmp(argv[i], options[j].arg))
> +                               continue;
> +
> +                       *options[j].flag = true;
> +
> +                       if (options[j].param) {
> +                               if (++i >= argc) {
> +                                       error("%s needs an argument",
> +                                             options[j].arg);
> +                                       return -1;
> +                               }
> +
> +                               *options[j].param = argv[i];
> +                       }
> +
> +                       flag = true;
> +                       break;
> +               }
> +
> +               if (!flag)
> +                       object_files[object_count++] = argv[i];
> +       }
> +
> +       return object_count ? 0 : -1;
> +}
> +
> +static int process_modules(Dwfl_Module *mod, void **userdata, const char *name,
> +                          Dwarf_Addr base, void *arg)
> +{
> +       Dwarf_Addr dwbias;
> +       Dwarf_Die cudie;
> +       Dwarf_CU *cu = NULL;
> +       Dwarf *dbg;
> +       int res;
> +
> +       debug("%s", name);
> +       dbg = dwfl_module_getdwarf(mod, &dwbias);
> +
> +       do {
> +               res = dwarf_get_units(dbg, cu, &cu, NULL, NULL, &cudie, NULL);
> +               if (res < 0) {
> +                       error("dwarf_get_units failed: no debugging information?");
> +                       return -1;
> +               } else if (res == 1) {
> +                       break; /* No more units */
> +               }
> +
> +               check(process_module(mod, dbg, &cudie));
> +       } while (cu);
> +
> +       return DWARF_CB_OK;
> +}
> +
> +static const Dwfl_Callbacks callbacks = {
> +       .section_address = dwfl_offline_section_address,
> +       .find_debuginfo = dwfl_standard_find_debuginfo,
> +};
> +
> +int main(int argc, const char **argv)
> +{
> +       unsigned int n;
> +
> +       if (parse_options(argc, argv) < 0)
> +               return usage();
> +
> +       for (n = 0; n < object_count; n++) {


When does  "object_count >= 2" happen ?










> +               Dwfl *dwfl;
> +               int fd;
> +
> +               fd = open(object_files[n], O_RDONLY);
> +               if (fd == -1) {
> +                       error("open failed for '%s': %s", object_files[n],
> +                             strerror(errno));
> +                       return -1;
> +               }
> +
> +               dwfl = dwfl_begin(&callbacks);
> +               if (!dwfl) {
> +                       error("dwfl_begin failed for '%s': %s", object_files[n],
> +                             dwarf_errmsg(-1));
> +                       return -1;
> +               }
> +
> +               if (!dwfl_report_offline(dwfl, object_files[n], object_files[n],
> +                                        fd)) {
> +                       error("dwfl_report_offline failed for '%s': %s",
> +                             object_files[n], dwarf_errmsg(-1));
> +                       return -1;
> +               }
> +
> +               dwfl_report_end(dwfl, NULL, NULL);
> +
> +               if (dwfl_getmodules(dwfl, &process_modules, NULL, 0)) {
> +                       error("dwfl_getmodules failed for '%s'",
> +                             object_files[n]);
> +                       return -1;
> +               }
> +
> +               dwfl_end(dwfl);
> +               close(fd);
> +       }
> +
> +       return 0;
> +}
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
> new file mode 100644
> index 000000000000..5ab7ce7d4efb
> --- /dev/null
> +++ b/scripts/gendwarfksyms/gendwarfksyms.h
> @@ -0,0 +1,78 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/*
> + * Copyright (C) 2024 Google LLC
> + */
> +
> +#include <dwarf.h>
> +#include <elfutils/libdw.h>
> +#include <elfutils/libdwfl.h>
> +#include <linux/hashtable.h>
> +#include <inttypes.h>
> +#include <stdlib.h>
> +#include <stdio.h>
> +
> +#ifndef __GENDWARFKSYMS_H
> +#define __GENDWARFKSYMS_H
> +
> +/*
> + * Options -- in gendwarfksyms.c
> + */
> +extern bool debug;
> +
> +#define MAX_INPUT_FILES 128
> +
> +/*
> + * Output helpers
> + */
> +#define __PREFIX "gendwarfksyms: "
> +#define __println(prefix, format, ...)                                \
> +       fprintf(stderr, prefix __PREFIX "%s: " format "\n", __func__, \
> +               ##__VA_ARGS__)
> +
> +#define debug(format, ...)                                    \
> +       do {                                                  \
> +               if (debug)                                    \
> +                       __println("", format, ##__VA_ARGS__); \
> +       } while (0)
> +
> +#define warn(format, ...) __println("warning: ", format, ##__VA_ARGS__)
> +#define error(format, ...) __println("error: ", format, ##__VA_ARGS__)
> +
> +/*
> + * Error handling helpers
> + */
> +#define __check(expr, test, rv)                                 \
> +       ({                                                      \
> +               int __res = expr;                               \
> +               if (test) {                                     \
> +                       error("`%s` failed: %d", #expr, __res); \
> +                       return rv;                              \
> +               }                                               \
> +               __res;                                          \
> +       })
> +
> +/* Error == non-zero values */
> +#define check(expr) __check(expr, __res, -1)
> +/* Error == negative values */
> +#define checkp(expr) __check(expr, __res < 0, __res)
> +
> +/*
> + * dwarf.c
> + */
> +
> +struct state {
> +       Dwfl_Module *mod;
> +       Dwarf *dbg;
> +};
> +
> +typedef int (*die_callback_t)(struct state *state, Dwarf_Die *die);
> +typedef bool (*die_match_callback_t)(Dwarf_Die *die);
> +extern bool match_all(Dwarf_Die *die);
> +
> +extern int process_die_container(struct state *state, Dwarf_Die *die,
> +                                die_callback_t func,
> +                                die_match_callback_t match);
> +
> +extern int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie);


No 'extern' for function declarations.





> +
> +#endif /* __GENDWARFKSYMS_H */
> --
> 2.46.0.184.g6999bdac58-goog
>


--
Best Regards

Masahiro Yamada
Sami Tolvanen Aug. 28, 2024, 9:28 p.m. UTC | #7
On Wed, Aug 28, 2024 at 02:31:05PM +0200, Petr Pavlu wrote:
> On 8/26/24 20:47, Sami Tolvanen wrote:
> > How do you propose using the function? This loop goes through multiple
> > input files, should we need them, and we iterate through all the CUs
> > in process_modules.
> 
> I was thinking it could be possible to replace the code to traverse
> modules and their their CUs, that is functions process_modules() and
> process_module(), with dwfl_nextcu(). However, I now notice that more
> work is added in subsequent patches to process_modules() so this
> wouldn't quite work.
> 
> I would then only suggest to change some function names in the current
> code. Function process_modules() is a callback to process a single
> module and so it would be better to name it process_module(). The
> present function process_module() actually processes a compilation unit
> DIE so I would rename it to something like process_cu().

Sure, sounds reasonable. I'll rename these.

> On 8/15/24 19:39, Sami Tolvanen wrote:
> > +int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie)
> > +{
> > +	struct state state = { .mod = mod, .dbg = dbg };
> > +
> > +	return check(process_die_container(
> > +		&state, cudie, process_exported_symbols, match_all));
> > +}
> 
> Mostly a minor suggestion too.. Looking at the entire series, state.mod
> ends up unused and state.dbg is only used in process_cached() where it
> could be possibly replaced by doing dwarf_cu_getdwarf(die->cu)?

Ah yes, mod was was leftover from previous refactoring. I'll clean this
up.

> Removing these two members from the state struct would then allow to
> instantiate a new state in process_exported_symbols() for each processed
> symbol. That looks cleaner than changing state.sym and resetting some
> parts of the state as the function walks over the exported symbols.

Agreed, that makes sense.

Sami
Sami Tolvanen Aug. 28, 2024, 9:32 p.m. UTC | #8
Hi Masahiro,

On Thu, Aug 29, 2024 at 02:45:03AM +0900, Masahiro Yamada wrote:
> On Fri, Aug 16, 2024 at 2:39 AM Sami Tolvanen <samitolvanen@google.com> wrote:
> > +static int usage(void)
> > +{
> > +       error("usage: gendwarfksyms [options] elf-object-file ...");
> 
> 
> 
> Description for each option, please.

Sure, will add.

> > +static int parse_options(int argc, const char **argv)
> 
> 
> 
> Why not getopt_long()?

Good point, I'll switch to getopt.

> > +int main(int argc, const char **argv)
> > +{
> > +       unsigned int n;
> > +
> > +       if (parse_options(argc, argv) < 0)
> > +               return usage();
> > +
> > +       for (n = 0; n < object_count; n++) {
> 
> 
> When does  "object_count >= 2" happen ?

Right now it doesn't, but if we want to support LTO, we'll need to also
process the temporary object files we build for stand-alone assembly to
find types for the symbols exported there.

> > +extern int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie);
> 
> 
> No 'extern' for function declarations.

Ack, I'll drop these. Thanks for the review!

Sami
Masahiro Yamada Sept. 5, 2024, 2:29 a.m. UTC | #9
On Fri, Aug 16, 2024 at 2:39 AM Sami Tolvanen <samitolvanen@google.com> wrote:
>
> Add a basic DWARF parser, which uses libdw to traverse the debugging
> information in an object file and looks for functions and variables.
> In follow-up patches, this will be expanded to produce symbol versions
> for CONFIG_MODVERSIONS from DWARF.
>
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> ---
>  kernel/module/Kconfig                 |   8 ++
>  scripts/Makefile                      |   1 +
>  scripts/gendwarfksyms/.gitignore      |   2 +
>  scripts/gendwarfksyms/Makefile        |   7 ++
>  scripts/gendwarfksyms/dwarf.c         |  87 +++++++++++++++
>  scripts/gendwarfksyms/gendwarfksyms.c | 146 ++++++++++++++++++++++++++
>  scripts/gendwarfksyms/gendwarfksyms.h |  78 ++++++++++++++
>  7 files changed, 329 insertions(+)
>  create mode 100644 scripts/gendwarfksyms/.gitignore
>  create mode 100644 scripts/gendwarfksyms/Makefile
>  create mode 100644 scripts/gendwarfksyms/dwarf.c
>  create mode 100644 scripts/gendwarfksyms/gendwarfksyms.c
>  create mode 100644 scripts/gendwarfksyms/gendwarfksyms.h
>
> diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
> index 4047b6d48255..a506d4ac660f 100644
> --- a/kernel/module/Kconfig
> +++ b/kernel/module/Kconfig
> @@ -168,6 +168,14 @@ config MODVERSIONS
>           make them incompatible with the kernel you are running.  If
>           unsure, say N.
>
> +config GENDWARFKSYMS
> +       bool
> +       depends on DEBUG_INFO
> +       # Requires full debugging information, split DWARF not supported.
> +       depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT
> +       # Requires ELF object files.
> +       depends on !LTO
> +
>  config ASM_MODVERSIONS
>         bool
>         default HAVE_ASM_MODVERSIONS && MODVERSIONS
> diff --git a/scripts/Makefile b/scripts/Makefile
> index dccef663ca82..2fd0199662e9 100644
> --- a/scripts/Makefile
> +++ b/scripts/Makefile
> @@ -54,6 +54,7 @@ targets += module.lds
>
>  subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins
>  subdir-$(CONFIG_MODVERSIONS) += genksyms
> +subdir-$(CONFIG_GENDWARFKSYMS) += gendwarfksyms
>  subdir-$(CONFIG_SECURITY_SELINUX) += selinux
>
>  # Let clean descend into subdirs
> diff --git a/scripts/gendwarfksyms/.gitignore b/scripts/gendwarfksyms/.gitignore
> new file mode 100644
> index 000000000000..ab8c763b3afe
> --- /dev/null
> +++ b/scripts/gendwarfksyms/.gitignore
> @@ -0,0 +1,2 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +/gendwarfksyms
> diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
> new file mode 100644
> index 000000000000..c1389c161f9c
> --- /dev/null
> +++ b/scripts/gendwarfksyms/Makefile
> @@ -0,0 +1,7 @@
> +hostprogs-always-y += gendwarfksyms
> +
> +gendwarfksyms-objs += gendwarfksyms.o
> +gendwarfksyms-objs += dwarf.o
> +
> +HOST_EXTRACFLAGS := -I $(srctree)/tools/include
> +HOSTLDLIBS_gendwarfksyms := -ldw -lelf
> diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
> new file mode 100644
> index 000000000000..65a29d0bd8f4
> --- /dev/null
> +++ b/scripts/gendwarfksyms/dwarf.c
> @@ -0,0 +1,87 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2024 Google LLC
> + */
> +
> +#include "gendwarfksyms.h"
> +
> +/*
> + * Type string processing
> + */
> +static int process(struct state *state, const char *s)
> +{
> +       s = s ?: "<null>";
> +
> +       if (debug)
> +               fputs(s, stderr);
> +
> +       return 0;
> +}
> +
> +bool match_all(Dwarf_Die *die)
> +{
> +       return true;
> +}
> +
> +int process_die_container(struct state *state, Dwarf_Die *die,
> +                         die_callback_t func, die_match_callback_t match)
> +{
> +       Dwarf_Die current;
> +       int res;
> +
> +       res = checkp(dwarf_child(die, &current));
> +       while (!res) {
> +               if (match(&current))
> +                       check(func(state, &current));
> +               res = checkp(dwarf_siblingof(&current, &current));
> +       }
> +
> +       return 0;
> +}
> +
> +/*
> + * Symbol processing
> + */
> +static int process_subprogram(struct state *state, Dwarf_Die *die)
> +{
> +       return check(process(state, "subprogram;\n"));
> +}
> +
> +static int process_variable(struct state *state, Dwarf_Die *die)
> +{
> +       return check(process(state, "variable;\n"));
> +}
> +
> +static int process_exported_symbols(struct state *state, Dwarf_Die *die)
> +{
> +       int tag = dwarf_tag(die);
> +
> +       switch (tag) {
> +       /* Possible containers of exported symbols */
> +       case DW_TAG_namespace:
> +       case DW_TAG_class_type:
> +       case DW_TAG_structure_type:
> +               return check(process_die_container(
> +                       state, die, process_exported_symbols, match_all));
> +
> +       /* Possible exported symbols */
> +       case DW_TAG_subprogram:
> +       case DW_TAG_variable:
> +               if (tag == DW_TAG_subprogram)
> +                       check(process_subprogram(state, die));
> +               else
> +                       check(process_variable(state, die));
> +
> +               return 0;
> +       default:
> +               return 0;
> +       }
> +}
> +
> +int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie)
> +{
> +       struct state state = { .mod = mod, .dbg = dbg };
> +
> +       return check(process_die_container(
> +               &state, cudie, process_exported_symbols, match_all));
> +}
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
> new file mode 100644
> index 000000000000..27f2d6423c45
> --- /dev/null
> +++ b/scripts/gendwarfksyms/gendwarfksyms.c
> @@ -0,0 +1,146 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2024 Google LLC
> + */
> +
> +#include <fcntl.h>
> +#include <errno.h>
> +#include <stdarg.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include "gendwarfksyms.h"
> +
> +/*
> + * Options
> + */
> +
> +/* Print out debugging information to stderr */
> +bool debug;
> +
> +static const struct {
> +       const char *arg;
> +       bool *flag;
> +       const char **param;
> +} options[] = {
> +       { "--debug", &debug, NULL },
> +};
> +
> +static int usage(void)
> +{
> +       error("usage: gendwarfksyms [options] elf-object-file ...");
> +       return -1;
> +}
> +
> +static const char *object_files[MAX_INPUT_FILES];
> +static unsigned int object_count;
> +
> +static int parse_options(int argc, const char **argv)
> +{
> +       for (int i = 1; i < argc; i++) {
> +               bool flag = false;
> +
> +               for (int j = 0; j < ARRAY_SIZE(options); j++) {
> +                       if (strcmp(argv[i], options[j].arg))
> +                               continue;
> +
> +                       *options[j].flag = true;
> +
> +                       if (options[j].param) {
> +                               if (++i >= argc) {
> +                                       error("%s needs an argument",
> +                                             options[j].arg);
> +                                       return -1;
> +                               }
> +
> +                               *options[j].param = argv[i];
> +                       }
> +
> +                       flag = true;
> +                       break;
> +               }
> +
> +               if (!flag)
> +                       object_files[object_count++] = argv[i];
> +       }
> +
> +       return object_count ? 0 : -1;
> +}
> +
> +static int process_modules(Dwfl_Module *mod, void **userdata, const char *name,
> +                          Dwarf_Addr base, void *arg)
> +{
> +       Dwarf_Addr dwbias;
> +       Dwarf_Die cudie;
> +       Dwarf_CU *cu = NULL;
> +       Dwarf *dbg;
> +       int res;
> +
> +       debug("%s", name);
> +       dbg = dwfl_module_getdwarf(mod, &dwbias);
> +
> +       do {
> +               res = dwarf_get_units(dbg, cu, &cu, NULL, NULL, &cudie, NULL);
> +               if (res < 0) {
> +                       error("dwarf_get_units failed: no debugging information?");
> +                       return -1;
> +               } else if (res == 1) {
> +                       break; /* No more units */
> +               }
> +
> +               check(process_module(mod, dbg, &cudie));
> +       } while (cu);
> +
> +       return DWARF_CB_OK;
> +}
> +
> +static const Dwfl_Callbacks callbacks = {
> +       .section_address = dwfl_offline_section_address,
> +       .find_debuginfo = dwfl_standard_find_debuginfo,
> +};
> +
> +int main(int argc, const char **argv)
> +{
> +       unsigned int n;
> +
> +       if (parse_options(argc, argv) < 0)
> +               return usage();
> +
> +       for (n = 0; n < object_count; n++) {
> +               Dwfl *dwfl;
> +               int fd;
> +
> +               fd = open(object_files[n], O_RDONLY);
> +               if (fd == -1) {
> +                       error("open failed for '%s': %s", object_files[n],
> +                             strerror(errno));
> +                       return -1;
> +               }
> +
> +               dwfl = dwfl_begin(&callbacks);
> +               if (!dwfl) {
> +                       error("dwfl_begin failed for '%s': %s", object_files[n],
> +                             dwarf_errmsg(-1));
> +                       return -1;
> +               }
> +
> +               if (!dwfl_report_offline(dwfl, object_files[n], object_files[n],
> +                                        fd)) {
> +                       error("dwfl_report_offline failed for '%s': %s",
> +                             object_files[n], dwarf_errmsg(-1));
> +                       return -1;
> +               }
> +
> +               dwfl_report_end(dwfl, NULL, NULL);
> +
> +               if (dwfl_getmodules(dwfl, &process_modules, NULL, 0)) {
> +                       error("dwfl_getmodules failed for '%s'",
> +                             object_files[n]);
> +                       return -1;
> +               }
> +
> +               dwfl_end(dwfl);
> +               close(fd);
> +       }
> +
> +       return 0;
> +}
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
> new file mode 100644
> index 000000000000..5ab7ce7d4efb
> --- /dev/null
> +++ b/scripts/gendwarfksyms/gendwarfksyms.h
> @@ -0,0 +1,78 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/*
> + * Copyright (C) 2024 Google LLC
> + */
> +
> +#include <dwarf.h>
> +#include <elfutils/libdw.h>
> +#include <elfutils/libdwfl.h>
> +#include <linux/hashtable.h>
> +#include <inttypes.h>
> +#include <stdlib.h>
> +#include <stdio.h>


Could you include external headers first,
then in-tree headers?
(and one blank line in-between).



Also, please consider using scripts/include/hashtable.h



How about this?


#include <dwarf.h>
#include <elfutils/libdw.h>
#include <elfutils/libdwfl.h>
#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>

#include <hashtable.h>






If necessary, you can use this patch too:
https://lore.kernel.org/linux-kbuild/20240904235500.700432-1-masahiroy@kernel.org/T/#u
Sami Tolvanen Sept. 5, 2024, 8:52 p.m. UTC | #10
Hi,

On Thu, Sep 5, 2024 at 2:30 AM Masahiro Yamada <masahiroy@kernel.org> wrote:
>
> On Fri, Aug 16, 2024 at 2:39 AM Sami Tolvanen <samitolvanen@google.com> wrote:
> >
> > +++ b/scripts/gendwarfksyms/gendwarfksyms.h
> > @@ -0,0 +1,78 @@
> > +/* SPDX-License-Identifier: GPL-2.0-or-later */
> > +/*
> > + * Copyright (C) 2024 Google LLC
> > + */
> > +
> > +#include <dwarf.h>
> > +#include <elfutils/libdw.h>
> > +#include <elfutils/libdwfl.h>
> > +#include <linux/hashtable.h>
> > +#include <inttypes.h>
> > +#include <stdlib.h>
> > +#include <stdio.h>
>
>
> Could you include external headers first,
> then in-tree headers?
> (and one blank line in-between).

Sure, I'll reorder this.

> Also, please consider using scripts/include/hashtable.h
>
>
>
> How about this?
>
>
> #include <dwarf.h>
> #include <elfutils/libdw.h>
> #include <elfutils/libdwfl.h>
> #include <inttypes.h>
> #include <stdlib.h>
> #include <stdio.h>
>
> #include <hashtable.h>
>
>
>
>
>
>
> If necessary, you can use this patch too:
> https://lore.kernel.org/linux-kbuild/20240904235500.700432-1-masahiroy@kernel.org/T/#u

Thanks for the patch! I think this would otherwise work, but I also
need jhash (or a similar hash function), and I can't combine the
tools/include version with this, because it ends up pulling in a
duplicate definition of struct list_head. Would you consider adding a
hash function as well?

Sami
Masahiro Yamada Sept. 10, 2024, 9:43 a.m. UTC | #11
On Fri, Sep 6, 2024 at 5:53 AM Sami Tolvanen <samitolvanen@google.com> wrote:
>
> Hi,
>
> On Thu, Sep 5, 2024 at 2:30 AM Masahiro Yamada <masahiroy@kernel.org> wrote:
> >
> > On Fri, Aug 16, 2024 at 2:39 AM Sami Tolvanen <samitolvanen@google.com> wrote:
> > >
> > > +++ b/scripts/gendwarfksyms/gendwarfksyms.h
> > > @@ -0,0 +1,78 @@
> > > +/* SPDX-License-Identifier: GPL-2.0-or-later */
> > > +/*
> > > + * Copyright (C) 2024 Google LLC
> > > + */
> > > +
> > > +#include <dwarf.h>
> > > +#include <elfutils/libdw.h>
> > > +#include <elfutils/libdwfl.h>
> > > +#include <linux/hashtable.h>
> > > +#include <inttypes.h>
> > > +#include <stdlib.h>
> > > +#include <stdio.h>
> >
> >
> > Could you include external headers first,
> > then in-tree headers?
> > (and one blank line in-between).
>
> Sure, I'll reorder this.
>
> > Also, please consider using scripts/include/hashtable.h
> >
> >
> >
> > How about this?
> >
> >
> > #include <dwarf.h>
> > #include <elfutils/libdw.h>
> > #include <elfutils/libdwfl.h>
> > #include <inttypes.h>
> > #include <stdlib.h>
> > #include <stdio.h>
> >
> > #include <hashtable.h>
> >
> >
> >
> >
> >
> >
> > If necessary, you can use this patch too:
> > https://lore.kernel.org/linux-kbuild/20240904235500.700432-1-masahiroy@kernel.org/T/#u
>
> Thanks for the patch! I think this would otherwise work, but I also
> need jhash (or a similar hash function), and I can't combine the
> tools/include version with this, because it ends up pulling in a
> duplicate definition of struct list_head. Would you consider adding a
> hash function as well?


I did it as a part of my kconfig works.

Check scripts/include/hash.h added by the following patches.

https://lore.kernel.org/linux-kbuild/20240908124352.1828890-1-masahiroy@kernel.org/T/#mea41ff4c5b6c77aaaae1ed9dac6723bc2f705107
https://lore.kernel.org/linux-kbuild/20240908124352.1828890-1-masahiroy@kernel.org/T/#m9050a270fedb7df9a54e843674bc9ad8fd068f57


I think simple helpers are enough for name_hash and addr_hash,
but please let me know if you encounter a problem.
Sami Tolvanen Sept. 10, 2024, 9:09 p.m. UTC | #12
On Tue, Sep 10, 2024 at 2:44 AM Masahiro Yamada <masahiroy@kernel.org> wrote:
>
> On Fri, Sep 6, 2024 at 5:53 AM Sami Tolvanen <samitolvanen@google.com> wrote:
> >
> > Thanks for the patch! I think this would otherwise work, but I also
> > need jhash (or a similar hash function), and I can't combine the
> > tools/include version with this, because it ends up pulling in a
> > duplicate definition of struct list_head. Would you consider adding a
> > hash function as well?
>
>
> I did it as a part of my kconfig works.
>
> Check scripts/include/hash.h added by the following patches.
>
> https://lore.kernel.org/linux-kbuild/20240908124352.1828890-1-masahiroy@kernel.org/T/#mea41ff4c5b6c77aaaae1ed9dac6723bc2f705107
> https://lore.kernel.org/linux-kbuild/20240908124352.1828890-1-masahiroy@kernel.org/T/#m9050a270fedb7df9a54e843674bc9ad8fd068f57
>
>
> I think simple helpers are enough for name_hash and addr_hash,
> but please let me know if you encounter a problem.

Great, thanks! Looks like this should have everything I need.

Sami
diff mbox series

Patch

diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
index 4047b6d48255..a506d4ac660f 100644
--- a/kernel/module/Kconfig
+++ b/kernel/module/Kconfig
@@ -168,6 +168,14 @@  config MODVERSIONS
 	  make them incompatible with the kernel you are running.  If
 	  unsure, say N.
 
+config GENDWARFKSYMS
+	bool
+	depends on DEBUG_INFO
+	# Requires full debugging information, split DWARF not supported.
+	depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT
+	# Requires ELF object files.
+	depends on !LTO
+
 config ASM_MODVERSIONS
 	bool
 	default HAVE_ASM_MODVERSIONS && MODVERSIONS
diff --git a/scripts/Makefile b/scripts/Makefile
index dccef663ca82..2fd0199662e9 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -54,6 +54,7 @@  targets += module.lds
 
 subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins
 subdir-$(CONFIG_MODVERSIONS) += genksyms
+subdir-$(CONFIG_GENDWARFKSYMS) += gendwarfksyms
 subdir-$(CONFIG_SECURITY_SELINUX) += selinux
 
 # Let clean descend into subdirs
diff --git a/scripts/gendwarfksyms/.gitignore b/scripts/gendwarfksyms/.gitignore
new file mode 100644
index 000000000000..ab8c763b3afe
--- /dev/null
+++ b/scripts/gendwarfksyms/.gitignore
@@ -0,0 +1,2 @@ 
+# SPDX-License-Identifier: GPL-2.0-only
+/gendwarfksyms
diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
new file mode 100644
index 000000000000..c1389c161f9c
--- /dev/null
+++ b/scripts/gendwarfksyms/Makefile
@@ -0,0 +1,7 @@ 
+hostprogs-always-y += gendwarfksyms
+
+gendwarfksyms-objs += gendwarfksyms.o
+gendwarfksyms-objs += dwarf.o
+
+HOST_EXTRACFLAGS := -I $(srctree)/tools/include
+HOSTLDLIBS_gendwarfksyms := -ldw -lelf
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
new file mode 100644
index 000000000000..65a29d0bd8f4
--- /dev/null
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -0,0 +1,87 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include "gendwarfksyms.h"
+
+/*
+ * Type string processing
+ */
+static int process(struct state *state, const char *s)
+{
+	s = s ?: "<null>";
+
+	if (debug)
+		fputs(s, stderr);
+
+	return 0;
+}
+
+bool match_all(Dwarf_Die *die)
+{
+	return true;
+}
+
+int process_die_container(struct state *state, Dwarf_Die *die,
+			  die_callback_t func, die_match_callback_t match)
+{
+	Dwarf_Die current;
+	int res;
+
+	res = checkp(dwarf_child(die, &current));
+	while (!res) {
+		if (match(&current))
+			check(func(state, &current));
+		res = checkp(dwarf_siblingof(&current, &current));
+	}
+
+	return 0;
+}
+
+/*
+ * Symbol processing
+ */
+static int process_subprogram(struct state *state, Dwarf_Die *die)
+{
+	return check(process(state, "subprogram;\n"));
+}
+
+static int process_variable(struct state *state, Dwarf_Die *die)
+{
+	return check(process(state, "variable;\n"));
+}
+
+static int process_exported_symbols(struct state *state, Dwarf_Die *die)
+{
+	int tag = dwarf_tag(die);
+
+	switch (tag) {
+	/* Possible containers of exported symbols */
+	case DW_TAG_namespace:
+	case DW_TAG_class_type:
+	case DW_TAG_structure_type:
+		return check(process_die_container(
+			state, die, process_exported_symbols, match_all));
+
+	/* Possible exported symbols */
+	case DW_TAG_subprogram:
+	case DW_TAG_variable:
+		if (tag == DW_TAG_subprogram)
+			check(process_subprogram(state, die));
+		else
+			check(process_variable(state, die));
+
+		return 0;
+	default:
+		return 0;
+	}
+}
+
+int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie)
+{
+	struct state state = { .mod = mod, .dbg = dbg };
+
+	return check(process_die_container(
+		&state, cudie, process_exported_symbols, match_all));
+}
diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
new file mode 100644
index 000000000000..27f2d6423c45
--- /dev/null
+++ b/scripts/gendwarfksyms/gendwarfksyms.c
@@ -0,0 +1,146 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include <fcntl.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include "gendwarfksyms.h"
+
+/*
+ * Options
+ */
+
+/* Print out debugging information to stderr */
+bool debug;
+
+static const struct {
+	const char *arg;
+	bool *flag;
+	const char **param;
+} options[] = {
+	{ "--debug", &debug, NULL },
+};
+
+static int usage(void)
+{
+	error("usage: gendwarfksyms [options] elf-object-file ...");
+	return -1;
+}
+
+static const char *object_files[MAX_INPUT_FILES];
+static unsigned int object_count;
+
+static int parse_options(int argc, const char **argv)
+{
+	for (int i = 1; i < argc; i++) {
+		bool flag = false;
+
+		for (int j = 0; j < ARRAY_SIZE(options); j++) {
+			if (strcmp(argv[i], options[j].arg))
+				continue;
+
+			*options[j].flag = true;
+
+			if (options[j].param) {
+				if (++i >= argc) {
+					error("%s needs an argument",
+					      options[j].arg);
+					return -1;
+				}
+
+				*options[j].param = argv[i];
+			}
+
+			flag = true;
+			break;
+		}
+
+		if (!flag)
+			object_files[object_count++] = argv[i];
+	}
+
+	return object_count ? 0 : -1;
+}
+
+static int process_modules(Dwfl_Module *mod, void **userdata, const char *name,
+			   Dwarf_Addr base, void *arg)
+{
+	Dwarf_Addr dwbias;
+	Dwarf_Die cudie;
+	Dwarf_CU *cu = NULL;
+	Dwarf *dbg;
+	int res;
+
+	debug("%s", name);
+	dbg = dwfl_module_getdwarf(mod, &dwbias);
+
+	do {
+		res = dwarf_get_units(dbg, cu, &cu, NULL, NULL, &cudie, NULL);
+		if (res < 0) {
+			error("dwarf_get_units failed: no debugging information?");
+			return -1;
+		} else if (res == 1) {
+			break; /* No more units */
+		}
+
+		check(process_module(mod, dbg, &cudie));
+	} while (cu);
+
+	return DWARF_CB_OK;
+}
+
+static const Dwfl_Callbacks callbacks = {
+	.section_address = dwfl_offline_section_address,
+	.find_debuginfo = dwfl_standard_find_debuginfo,
+};
+
+int main(int argc, const char **argv)
+{
+	unsigned int n;
+
+	if (parse_options(argc, argv) < 0)
+		return usage();
+
+	for (n = 0; n < object_count; n++) {
+		Dwfl *dwfl;
+		int fd;
+
+		fd = open(object_files[n], O_RDONLY);
+		if (fd == -1) {
+			error("open failed for '%s': %s", object_files[n],
+			      strerror(errno));
+			return -1;
+		}
+
+		dwfl = dwfl_begin(&callbacks);
+		if (!dwfl) {
+			error("dwfl_begin failed for '%s': %s", object_files[n],
+			      dwarf_errmsg(-1));
+			return -1;
+		}
+
+		if (!dwfl_report_offline(dwfl, object_files[n], object_files[n],
+					 fd)) {
+			error("dwfl_report_offline failed for '%s': %s",
+			      object_files[n], dwarf_errmsg(-1));
+			return -1;
+		}
+
+		dwfl_report_end(dwfl, NULL, NULL);
+
+		if (dwfl_getmodules(dwfl, &process_modules, NULL, 0)) {
+			error("dwfl_getmodules failed for '%s'",
+			      object_files[n]);
+			return -1;
+		}
+
+		dwfl_end(dwfl);
+		close(fd);
+	}
+
+	return 0;
+}
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
new file mode 100644
index 000000000000..5ab7ce7d4efb
--- /dev/null
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -0,0 +1,78 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include <dwarf.h>
+#include <elfutils/libdw.h>
+#include <elfutils/libdwfl.h>
+#include <linux/hashtable.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifndef __GENDWARFKSYMS_H
+#define __GENDWARFKSYMS_H
+
+/*
+ * Options -- in gendwarfksyms.c
+ */
+extern bool debug;
+
+#define MAX_INPUT_FILES 128
+
+/*
+ * Output helpers
+ */
+#define __PREFIX "gendwarfksyms: "
+#define __println(prefix, format, ...)                                \
+	fprintf(stderr, prefix __PREFIX "%s: " format "\n", __func__, \
+		##__VA_ARGS__)
+
+#define debug(format, ...)                                    \
+	do {                                                  \
+		if (debug)                                    \
+			__println("", format, ##__VA_ARGS__); \
+	} while (0)
+
+#define warn(format, ...) __println("warning: ", format, ##__VA_ARGS__)
+#define error(format, ...) __println("error: ", format, ##__VA_ARGS__)
+
+/*
+ * Error handling helpers
+ */
+#define __check(expr, test, rv)                                 \
+	({                                                      \
+		int __res = expr;                               \
+		if (test) {                                     \
+			error("`%s` failed: %d", #expr, __res); \
+			return rv;                              \
+		}                                               \
+		__res;                                          \
+	})
+
+/* Error == non-zero values */
+#define check(expr) __check(expr, __res, -1)
+/* Error == negative values */
+#define checkp(expr) __check(expr, __res < 0, __res)
+
+/*
+ * dwarf.c
+ */
+
+struct state {
+	Dwfl_Module *mod;
+	Dwarf *dbg;
+};
+
+typedef int (*die_callback_t)(struct state *state, Dwarf_Die *die);
+typedef bool (*die_match_callback_t)(Dwarf_Die *die);
+extern bool match_all(Dwarf_Die *die);
+
+extern int process_die_container(struct state *state, Dwarf_Die *die,
+				 die_callback_t func,
+				 die_match_callback_t match);
+
+extern int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie);
+
+#endif /* __GENDWARFKSYMS_H */