diff mbox series

[v2,04/19] gendwarfksyms: Add support for type pointers

Message ID 20240815173903.4172139-25-samitolvanen@google.com (mailing list archive)
State Handled Elsewhere
Headers show
Series Implement DWARF modversions | expand

Commit Message

Sami Tolvanen Aug. 15, 2024, 5:39 p.m. UTC
The compiler may choose not to emit type information in DWARF for
external symbols. Clang, for example, does this for symbols not
defined in the current TU.

To provide a way to work around this issue, add support for
__gendwarfksyms_ptr_<symbol> pointers that force the compiler to emit
the necessary type information in DWARF also for the missing symbols.

Example usage:

  #define GENDWARFKSYMS_PTR(sym) \
      static typeof(sym) *__gendwarfksyms_ptr_##sym __used  \
          __section(".discard.gendwarfksyms") = &sym;

  extern int external_symbol(void);
  GENDWARFKSYMS_PTR(external_symbol);

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 scripts/gendwarfksyms/dwarf.c         | 26 +++++++++++++++++++++++++-
 scripts/gendwarfksyms/gendwarfksyms.h |  6 ++++++
 scripts/gendwarfksyms/symbols.c       | 16 ++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)

Comments

Masahiro Yamada Aug. 28, 2024, 6:50 a.m. UTC | #1
On Fri, Aug 16, 2024 at 2:39 AM Sami Tolvanen <samitolvanen@google.com> wrote:
>
> The compiler may choose not to emit type information in DWARF for
> external symbols. Clang, for example, does this for symbols not
> defined in the current TU.
>
> To provide a way to work around this issue, add support for
> __gendwarfksyms_ptr_<symbol> pointers that force the compiler to emit
> the necessary type information in DWARF also for the missing symbols.
>
> Example usage:
>
>   #define GENDWARFKSYMS_PTR(sym) \
>       static typeof(sym) *__gendwarfksyms_ptr_##sym __used  \
>           __section(".discard.gendwarfksyms") = &sym;
>
>   extern int external_symbol(void);
>   GENDWARFKSYMS_PTR(external_symbol);
>
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>




Commit ddb5cdbafaaad6b99d7007ae1740403124502d03
had a similar idea; it has a reference to each
export symbol, including the ones defined in different TUs,
but in assembly code.

Didn't it suffice your need?





> ---
>  scripts/gendwarfksyms/dwarf.c         | 26 +++++++++++++++++++++++++-
>  scripts/gendwarfksyms/gendwarfksyms.h |  6 ++++++
>  scripts/gendwarfksyms/symbols.c       | 16 ++++++++++++++++
>  3 files changed, 47 insertions(+), 1 deletion(-)
>
> diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
> index 71cfab0553da..956b30224316 100644
> --- a/scripts/gendwarfksyms/dwarf.c
> +++ b/scripts/gendwarfksyms/dwarf.c
> @@ -94,6 +94,28 @@ static int process_variable(struct state *state, Dwarf_Die *die)
>         return check(process(state, "variable;\n"));
>  }
>
> +static int process_symbol_ptr(struct state *state, Dwarf_Die *die)
> +{
> +       Dwarf_Die ptr_type;
> +       Dwarf_Die type;
> +
> +       if (!get_ref_die_attr(die, DW_AT_type, &ptr_type) ||
> +           dwarf_tag(&ptr_type) != DW_TAG_pointer_type) {
> +               error("%s must be a pointer type!", get_name(die));
> +               return -1;
> +       }
> +
> +       if (!get_ref_die_attr(&ptr_type, DW_AT_type, &type)) {
> +               error("%s pointer missing a type attribute?", get_name(die));
> +               return -1;
> +       }
> +
> +       if (dwarf_tag(&type) == DW_TAG_subroutine_type)
> +               return check(process_subprogram(state, &type));
> +       else
> +               return check(process_variable(state, &ptr_type));
> +}
> +
>  static int process_exported_symbols(struct state *state, Dwarf_Die *die)
>  {
>         int tag = dwarf_tag(die);
> @@ -114,7 +136,9 @@ static int process_exported_symbols(struct state *state, Dwarf_Die *die)
>
>                 debug("%s", state->sym->name);
>
> -               if (tag == DW_TAG_subprogram)
> +               if (is_symbol_ptr(get_name(&state->die)))
> +                       check(process_symbol_ptr(state, &state->die));
> +               else if (tag == DW_TAG_subprogram)
>                         check(process_subprogram(state, &state->die));
>                 else
>                         check(process_variable(state, &state->die));
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
> index cb9106dfddb9..8f6acd1b8f8f 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.h
> +++ b/scripts/gendwarfksyms/gendwarfksyms.h
> @@ -61,6 +61,11 @@ extern bool debug;
>  /*
>   * symbols.c
>   */
> +
> +/* See symbols.c:is_symbol_ptr */
> +#define SYMBOL_PTR_PREFIX "__gendwarfksyms_ptr_"
> +#define SYMBOL_PTR_PREFIX_LEN (sizeof(SYMBOL_PTR_PREFIX) - 1)
> +
>  struct symbol_addr {
>         uint32_t section;
>         Elf64_Addr address;
> @@ -78,6 +83,7 @@ struct symbol {
>         struct hlist_node name_hash;
>  };
>
> +extern bool is_symbol_ptr(const char *name);
>  extern int symbol_read_exports(FILE *file);
>  extern int symbol_read_symtab(int fd);
>  extern struct symbol *symbol_get(const char *name);
> diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
> index f96acb941196..d6d016458ae1 100644
> --- a/scripts/gendwarfksyms/symbols.c
> +++ b/scripts/gendwarfksyms/symbols.c
> @@ -41,6 +41,20 @@ static int __for_each_addr(struct symbol *sym, symbol_callback_t func,
>         return processed;
>  }
>
> +/*
> + * For symbols without debugging information (e.g. symbols defined in other
> + * TUs), we also match __gendwarfksyms_ptr_<symbol_name> symbols, which the
> + * kernel uses to ensure type information is present in the TU that exports
> + * the symbol. A __gendwarfksyms_ptr pointer must have the same type as the
> + * exported symbol, e.g.:
> + *
> + *   typeof(symname) *__gendwarf_ptr_symname = &symname;
> + */
> +bool is_symbol_ptr(const char *name)
> +{
> +       return name && !strncmp(name, SYMBOL_PTR_PREFIX, SYMBOL_PTR_PREFIX_LEN);
> +}
> +
>  static int for_each(const char *name, bool name_only, symbol_callback_t func,
>                     void *data)
>  {
> @@ -49,6 +63,8 @@ static int for_each(const char *name, bool name_only, symbol_callback_t func,
>
>         if (!name || !*name)
>                 return 0;
> +       if (is_symbol_ptr(name))
> +               name += SYMBOL_PTR_PREFIX_LEN;
>
>         hash_for_each_possible_safe(symbol_names, match, tmp, name_hash,
>                                     name_hash(name)) {
> --
> 2.46.0.184.g6999bdac58-goog
>
Masahiro Yamada Aug. 28, 2024, 7:15 a.m. UTC | #2
On Wed, Aug 28, 2024 at 3:50 PM Masahiro Yamada <masahiroy@kernel.org> wrote:
>
> On Fri, Aug 16, 2024 at 2:39 AM Sami Tolvanen <samitolvanen@google.com> wrote:
> >
> > The compiler may choose not to emit type information in DWARF for
> > external symbols. Clang, for example, does this for symbols not
> > defined in the current TU.
> >
> > To provide a way to work around this issue, add support for
> > __gendwarfksyms_ptr_<symbol> pointers that force the compiler to emit
> > the necessary type information in DWARF also for the missing symbols.
> >
> > Example usage:
> >
> >   #define GENDWARFKSYMS_PTR(sym) \
> >       static typeof(sym) *__gendwarfksyms_ptr_##sym __used  \
> >           __section(".discard.gendwarfksyms") = &sym;
> >
> >   extern int external_symbol(void);
> >   GENDWARFKSYMS_PTR(external_symbol);
> >
> > Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
>
>
>
>
> Commit ddb5cdbafaaad6b99d7007ae1740403124502d03
> had a similar idea; it has a reference to each
> export symbol, including the ones defined in different TUs,
> but in assembly code.
>
> Didn't it suffice your need?
>


Presumably, this is an unfortunate duplication, but I do not have an
idea to avoid it.

The symbol reference in assembly code works in *.S as well as *.c.

The C reference will pull-in the debug info, but it will not work in *.S





--
Best Regards
Masahiro Yamada
Sami Tolvanen Aug. 28, 2024, 9:58 p.m. UTC | #3
On Wed, Aug 28, 2024 at 04:15:03PM +0900, Masahiro Yamada wrote:
> On Wed, Aug 28, 2024 at 3:50 PM Masahiro Yamada <masahiroy@kernel.org> wrote:
> >
> > On Fri, Aug 16, 2024 at 2:39 AM Sami Tolvanen <samitolvanen@google.com> wrote:
> > >
> > > The compiler may choose not to emit type information in DWARF for
> > > external symbols. Clang, for example, does this for symbols not
> > > defined in the current TU.
> > >
> > > To provide a way to work around this issue, add support for
> > > __gendwarfksyms_ptr_<symbol> pointers that force the compiler to emit
> > > the necessary type information in DWARF also for the missing symbols.
> > >
> > > Example usage:
> > >
> > >   #define GENDWARFKSYMS_PTR(sym) \
> > >       static typeof(sym) *__gendwarfksyms_ptr_##sym __used  \
> > >           __section(".discard.gendwarfksyms") = &sym;
> > >
> > >   extern int external_symbol(void);
> > >   GENDWARFKSYMS_PTR(external_symbol);
> > >
> > > Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> >
> >
> >
> >
> > Commit ddb5cdbafaaad6b99d7007ae1740403124502d03
> > had a similar idea; it has a reference to each
> > export symbol, including the ones defined in different TUs,
> > but in assembly code.
> >
> > Didn't it suffice your need?
> >
> 
> 
> Presumably, this is an unfortunate duplication, but I do not have an
> idea to avoid it.
> 
> The symbol reference in assembly code works in *.S as well as *.c.
> 
> The C reference will pull-in the debug info, but it will not work in *.S

Correct. I'm not a huge fan of the extra reference either, but I don't
see a cleaner way to ensure we always have all the types in DWARF.

Sami
diff mbox series

Patch

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 71cfab0553da..956b30224316 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -94,6 +94,28 @@  static int process_variable(struct state *state, Dwarf_Die *die)
 	return check(process(state, "variable;\n"));
 }
 
+static int process_symbol_ptr(struct state *state, Dwarf_Die *die)
+{
+	Dwarf_Die ptr_type;
+	Dwarf_Die type;
+
+	if (!get_ref_die_attr(die, DW_AT_type, &ptr_type) ||
+	    dwarf_tag(&ptr_type) != DW_TAG_pointer_type) {
+		error("%s must be a pointer type!", get_name(die));
+		return -1;
+	}
+
+	if (!get_ref_die_attr(&ptr_type, DW_AT_type, &type)) {
+		error("%s pointer missing a type attribute?", get_name(die));
+		return -1;
+	}
+
+	if (dwarf_tag(&type) == DW_TAG_subroutine_type)
+		return check(process_subprogram(state, &type));
+	else
+		return check(process_variable(state, &ptr_type));
+}
+
 static int process_exported_symbols(struct state *state, Dwarf_Die *die)
 {
 	int tag = dwarf_tag(die);
@@ -114,7 +136,9 @@  static int process_exported_symbols(struct state *state, Dwarf_Die *die)
 
 		debug("%s", state->sym->name);
 
-		if (tag == DW_TAG_subprogram)
+		if (is_symbol_ptr(get_name(&state->die)))
+			check(process_symbol_ptr(state, &state->die));
+		else if (tag == DW_TAG_subprogram)
 			check(process_subprogram(state, &state->die));
 		else
 			check(process_variable(state, &state->die));
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index cb9106dfddb9..8f6acd1b8f8f 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -61,6 +61,11 @@  extern bool debug;
 /*
  * symbols.c
  */
+
+/* See symbols.c:is_symbol_ptr */
+#define SYMBOL_PTR_PREFIX "__gendwarfksyms_ptr_"
+#define SYMBOL_PTR_PREFIX_LEN (sizeof(SYMBOL_PTR_PREFIX) - 1)
+
 struct symbol_addr {
 	uint32_t section;
 	Elf64_Addr address;
@@ -78,6 +83,7 @@  struct symbol {
 	struct hlist_node name_hash;
 };
 
+extern bool is_symbol_ptr(const char *name);
 extern int symbol_read_exports(FILE *file);
 extern int symbol_read_symtab(int fd);
 extern struct symbol *symbol_get(const char *name);
diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
index f96acb941196..d6d016458ae1 100644
--- a/scripts/gendwarfksyms/symbols.c
+++ b/scripts/gendwarfksyms/symbols.c
@@ -41,6 +41,20 @@  static int __for_each_addr(struct symbol *sym, symbol_callback_t func,
 	return processed;
 }
 
+/*
+ * For symbols without debugging information (e.g. symbols defined in other
+ * TUs), we also match __gendwarfksyms_ptr_<symbol_name> symbols, which the
+ * kernel uses to ensure type information is present in the TU that exports
+ * the symbol. A __gendwarfksyms_ptr pointer must have the same type as the
+ * exported symbol, e.g.:
+ *
+ *   typeof(symname) *__gendwarf_ptr_symname = &symname;
+ */
+bool is_symbol_ptr(const char *name)
+{
+	return name && !strncmp(name, SYMBOL_PTR_PREFIX, SYMBOL_PTR_PREFIX_LEN);
+}
+
 static int for_each(const char *name, bool name_only, symbol_callback_t func,
 		    void *data)
 {
@@ -49,6 +63,8 @@  static int for_each(const char *name, bool name_only, symbol_callback_t func,
 
 	if (!name || !*name)
 		return 0;
+	if (is_symbol_ptr(name))
+		name += SYMBOL_PTR_PREFIX_LEN;
 
 	hash_for_each_possible_safe(symbol_names, match, tmp, name_hash,
 				    name_hash(name)) {