Message ID | 20240815173903.4172139-34-samitolvanen@google.com (mailing list archive) |
---|---|
State | Handled Elsewhere |
Headers | show |
Series | Implement DWARF modversions | expand |
On 8/15/24 19:39, Sami Tolvanen wrote: > Add support for producing genksyms-style symtypes files. Process > die_map to find the longest expansions for each type, and use symtypes > references in type definitions. The basic file format is similar to > genksyms, with two notable exceptions: > > 1. Type names with spaces (common with Rust) in references are > wrapped in single quotes. E.g.: > > s#'core::result::Result<u8, core::num::error::ParseIntError>' > > 2. The actual type definition is the simple parsed DWARF format we > output with --dump-dies, not the preprocessed C-style format > genksyms produces. Thank you for adding this output to the tool. > > Signed-off-by: Sami Tolvanen <samitolvanen@google.com> > --- > scripts/gendwarfksyms/Makefile | 1 + > scripts/gendwarfksyms/die.c | 13 + > scripts/gendwarfksyms/dwarf.c | 14 +- > scripts/gendwarfksyms/gendwarfksyms.c | 28 +- > scripts/gendwarfksyms/gendwarfksyms.h | 21 +- > scripts/gendwarfksyms/symbols.c | 11 +- > scripts/gendwarfksyms/types.c | 439 ++++++++++++++++++++++++++ > 7 files changed, 517 insertions(+), 10 deletions(-) > create mode 100644 scripts/gendwarfksyms/types.c > > diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile > index 681b42441840..4866a2fd0e46 100644 > --- a/scripts/gendwarfksyms/Makefile > +++ b/scripts/gendwarfksyms/Makefile > @@ -5,6 +5,7 @@ gendwarfksyms-objs += cache.o > gendwarfksyms-objs += die.o > gendwarfksyms-objs += dwarf.o > gendwarfksyms-objs += symbols.o > +gendwarfksyms-objs += types.o > > HOST_EXTRACFLAGS := -I $(srctree)/tools/include > HOSTLDLIBS_gendwarfksyms := -ldw -lelf > diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c > index fdd52df88fdd..e40f04b70f7f 100644 > --- a/scripts/gendwarfksyms/die.c > +++ b/scripts/gendwarfksyms/die.c > @@ -85,6 +85,19 @@ static void reset_die(struct die *cd) > cd->list = NULL; > } > > +int die_map_for_each(die_map_callback_t func, void *arg) > +{ > + struct die *cd; > + struct hlist_node *tmp; > + int i; > + > + hash_for_each_safe(die_map, i, tmp, cd, hash) { > + check(func(cd, arg)); > + } > + > + return 0; > +} > + > void die_map_free(void) > { > struct hlist_node *tmp; > diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c > index 9bca21a71639..62241cc97a76 100644 > --- a/scripts/gendwarfksyms/dwarf.c > +++ b/scripts/gendwarfksyms/dwarf.c > @@ -60,11 +60,11 @@ static bool is_export_symbol(struct state *state, Dwarf_Die *die) > if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin)) > source = &origin; > > - state->sym = symbol_get(get_name(die)); > + state->sym = symbol_get_unprocessed(get_name(die)); > > /* Look up using the origin name if there are no matches. */ > if (!state->sym && source != die) > - state->sym = symbol_get(get_name(source)); > + state->sym = symbol_get_unprocessed(get_name(source)); > > state->die = *source; > return !!state->sym; > @@ -384,6 +384,7 @@ static int process_subroutine_type(struct state *state, struct die *cache, > return check(__process_subroutine_type(state, cache, die, > "subroutine_type")); > } > + > static int process_variant_type(struct state *state, struct die *cache, > Dwarf_Die *die) > { > @@ -695,14 +696,16 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die) > static int process_subprogram(struct state *state, Dwarf_Die *die) > { > check(__process_subroutine_type(state, NULL, die, "subprogram")); > - return check(process(state, NULL, ";\n")); > + state->sym->state = MAPPED; > + return 0; > } > > static int process_variable(struct state *state, Dwarf_Die *die) > { > check(process(state, NULL, "variable ")); > check(process_type_attr(state, NULL, die)); > - return check(process(state, NULL, ";\n")); > + state->sym->state = MAPPED; > + return 0; > } > > static int process_symbol_ptr(struct state *state, Dwarf_Die *die) > @@ -757,6 +760,9 @@ static int process_exported_symbols(struct state *state, struct die *cache, > else > check(process_variable(state, &state->die)); > > + if (dump_dies) > + fputs("\n", stderr); > + > cache_clear_expanded(&state->expansion_cache); > return 0; > default: > diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c > index 1349e592783b..6a219a54c342 100644 > --- a/scripts/gendwarfksyms/gendwarfksyms.c > +++ b/scripts/gendwarfksyms/gendwarfksyms.c > @@ -20,6 +20,11 @@ bool debug; > bool dump_dies; > /* Print out inline debugging information about die_map changes */ > bool dump_die_map; > +/* Print out type_map contents */ > +bool dump_types; > +/* Produce a symtypes file */ > +bool symtypes; > +static const char *symtypes_file; > > static const struct { > const char *arg; > @@ -29,6 +34,8 @@ static const struct { > { "--debug", &debug, NULL }, > { "--dump-dies", &dump_dies, NULL }, > { "--dump-die-map", &dump_die_map, NULL }, > + { "--dump-types", &dump_types, NULL }, > + { "--symtypes", &symtypes, &symtypes_file }, > }; > > static int usage(void) > @@ -79,6 +86,7 @@ static int process_modules(Dwfl_Module *mod, void **userdata, const char *name, > Dwarf_Die cudie; > Dwarf_CU *cu = NULL; > Dwarf *dbg; > + FILE *symfile = arg; > int res; > > debug("%s", name); > @@ -100,6 +108,10 @@ static int process_modules(Dwfl_Module *mod, void **userdata, const char *name, > check(process_module(mod, dbg, &cudie)); > } while (cu); > > + /* > + * Use die_map to expand type strings and write them to `symfile`. > + */ > + check(generate_symtypes(symfile)); > die_map_free(); > > return DWARF_CB_OK; > @@ -112,6 +124,7 @@ static const Dwfl_Callbacks callbacks = { > > int main(int argc, const char **argv) > { > + FILE *symfile = NULL; > unsigned int n; > > if (parse_options(argc, argv) < 0) > @@ -122,6 +135,16 @@ int main(int argc, const char **argv) > > check(symbol_read_exports(stdin)); > > + if (symtypes_file) { > + symfile = fopen(symtypes_file, "w+"); The file is sufficient to open only for writing. > + > + if (!symfile) { > + error("fopen failed for '%s': %s", symtypes_file, > + strerror(errno)); > + return -1; > + } > + } > + > for (n = 0; n < object_count; n++) { > Dwfl *dwfl; > int fd; > @@ -151,7 +174,7 @@ int main(int argc, const char **argv) > > dwfl_report_end(dwfl, NULL, NULL); > > - if (dwfl_getmodules(dwfl, &process_modules, NULL, 0)) { > + if (dwfl_getmodules(dwfl, &process_modules, symfile, 0)) { > error("dwfl_getmodules failed for '%s'", > object_files[n]); > return -1; > @@ -161,5 +184,8 @@ int main(int argc, const char **argv) > close(fd); > } > > + if (symfile) > + fclose(symfile); > + > return 0; > } The fclose() call should be wrapped in check() to catch a situation when flushing the stream potentially failed. > diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h > index 7cd907e3d5e3..6edbd6478e0f 100644 > --- a/scripts/gendwarfksyms/gendwarfksyms.h > +++ b/scripts/gendwarfksyms/gendwarfksyms.h > @@ -22,6 +22,8 @@ > extern bool debug; > extern bool dump_dies; > extern bool dump_die_map; > +extern bool dump_types; > +extern bool symtypes; > > #define MAX_INPUT_FILES 128 > > @@ -89,6 +91,12 @@ extern bool dump_die_map; > #define SYMBOL_PTR_PREFIX "__gendwarfksyms_ptr_" > #define SYMBOL_PTR_PREFIX_LEN (sizeof(SYMBOL_PTR_PREFIX) - 1) > > +/* See dwarf.c:is_declaration */ > +#define SYMBOL_DECLONLY_PREFIX "__gendwarfksyms_declonly_" > +#define SYMBOL_DECLONLY_PREFIX_LEN (sizeof(SYMBOL_DECLONLY_PREFIX) - 1) Nit: These defines should go into the patch 15/19 "gendwarfksyms: Add support for declaration-only data structures". > + > +enum symbol_state { UNPROCESSED, MAPPED }; > + > struct symbol_addr { > uint32_t section; > Elf64_Addr address; > @@ -109,12 +117,14 @@ struct symbol { > struct symbol_addr addr; > struct hlist_node addr_hash; > struct hlist_node name_hash; > + enum symbol_state state; > + uintptr_t die_addr; > }; > > extern bool is_symbol_ptr(const char *name); > extern int symbol_read_exports(FILE *file); > extern int symbol_read_symtab(int fd); > -extern struct symbol *symbol_get(const char *name); > +extern struct symbol *symbol_get_unprocessed(const char *name); > > /* > * die.c > @@ -157,12 +167,15 @@ struct die { > struct hlist_node hash; > }; > > +typedef int (*die_map_callback_t)(struct die *, void *arg); > + > extern int __die_map_get(uintptr_t addr, enum die_state state, > struct die **res); > extern int die_map_get(Dwarf_Die *die, enum die_state state, struct die **res); > extern int die_map_add_string(struct die *pd, const char *str); > extern int die_map_add_linebreak(struct die *pd, int linebreak); > extern int die_map_add_die(struct die *pd, struct die *child); > +extern int die_map_for_each(die_map_callback_t func, void *arg); > extern void die_map_free(void); > > /* > @@ -222,4 +235,10 @@ extern int process_die_container(struct state *state, struct die *cache, > > extern int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie); > > +/* > + * types.c > + */ > + > +extern int generate_symtypes(FILE *file); > + > #endif /* __GENDWARFKSYMS_H */ > diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c > index d6d016458ae1..8cc04e6295a7 100644 > --- a/scripts/gendwarfksyms/symbols.c > +++ b/scripts/gendwarfksyms/symbols.c > @@ -117,6 +117,7 @@ int symbol_read_exports(FILE *file) > > sym->name = name; > sym->addr.section = SHN_UNDEF; > + sym->state = UNPROCESSED; > name = NULL; > > hash_add(symbol_names, &sym->name_hash, name_hash(sym->name)); > @@ -132,19 +133,21 @@ int symbol_read_exports(FILE *file) > return 0; > } > > -static int get_symbol(struct symbol *sym, void *arg) > +static int get_unprocessed(struct symbol *sym, void *arg) > { > struct symbol **res = arg; > > - *res = sym; > + if (sym->state == UNPROCESSED) > + *res = sym; > + > return 0; > } > > -struct symbol *symbol_get(const char *name) > +struct symbol *symbol_get_unprocessed(const char *name) > { > struct symbol *sym = NULL; > > - for_each(name, false, get_symbol, &sym); > + for_each(name, false, get_unprocessed, &sym); > return sym; > } > > diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c > new file mode 100644 > index 000000000000..7b9997d8322d > --- /dev/null > +++ b/scripts/gendwarfksyms/types.c > @@ -0,0 +1,439 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* > + * Copyright (C) 2024 Google LLC > + */ > + > +#include "gendwarfksyms.h" > +#include "crc32.h" > + > +static struct expansion_cache expansion_cache; > + > +/* > + * A simple linked list of shared or owned strings to avoid copying strings > + * around when not necessary. > + */ > +struct type_list { > + const char *str; > + void *owned; > + struct type_list *next; > +}; > + > +static struct type_list *type_list_alloc(void) > +{ > + struct type_list *list; > + > + list = calloc(1, sizeof(struct type_list)); > + if (!list) > + error("calloc failed"); > + > + return list; > +} > + > +static void type_list_free(struct type_list *list) > +{ > + struct type_list *tmp; > + > + while (list) { > + if (list->owned) > + free(list->owned); > + > + tmp = list; > + list = list->next; > + free(tmp); > + } > +} > + > +static int type_list_append(struct type_list *list, const char *s, void *owned) > +{ > + if (!list || !s) > + return 0; > + > + while (list->next) > + list = list->next; > + > + if (list->str) { > + list->next = type_list_alloc(); > + > + if (!list->next) { > + error("type_list_alloc failed"); > + return -1; > + } > + > + list = list->next; > + } > + > + list->str = s; > + list->owned = owned; > + > + return strlen(list->str); > +} > + > +static int type_list_write(struct type_list *list, FILE *file) > +{ > + while (list) { > + if (list->str) > + checkp(fputs(list->str, file)); > + list = list->next; > + } > + > + return 0; > +} > + > +/* > + * An expanded type string in symtypes format. > + */ > +struct type_expansion { > + char *name; > + struct type_list *expanded; > + struct type_list *last; > + size_t len; > + struct hlist_node hash; > +}; I found the manipulation of type_expansion.expanded and type_expansion.last somewhat strange. The list starts already with one element in type_expansion_init(). This is apparently to make the last pointer valid. This element is however empty and gets only assigned on the first call to type_list_append(). Other elements are then added normally, always assigned. Perhaps consider using a regular list implementation, similarly to what was discussed under the patch 06/19 "gendwarfksyms: Add a cache for processed DIEs". > + > +static int type_expansion_init(struct type_expansion *type, bool alloc) > +{ > + memset(type, 0, sizeof(struct type_expansion)); > + if (alloc) { > + type->expanded = type_list_alloc(); > + if (!type->expanded) > + return -1; > + > + type->last = type->expanded; > + } > + return 0; > +} > + > +static inline void type_expansion_free(struct type_expansion *type) > +{ > + free(type->name); > + type_list_free(type->expanded); > + type_expansion_init(type, false); > +} > + > +static int type_expansion_append(struct type_expansion *type, const char *s, > + void *owned) > +{ > + type->len += checkp(type_list_append(type->last, s, owned)); > + > + if (type->last->next) > + type->last = type->last->next; > + > + return 0; > +} > + > +/* > + * type_map -- the longest expansions for each type. > + * > + * const char *name -> struct type_expansion * > + */ > +#define TYPE_HASH_BITS 16 > +static DEFINE_HASHTABLE(type_map, TYPE_HASH_BITS); > + > +static int type_map_get(const char *name, struct type_expansion **res) > +{ > + struct type_expansion *e; > + > + hash_for_each_possible(type_map, e, hash, name_hash(name)) { > + if (!strcmp(name, e->name)) { > + *res = e; > + return 0; > + } > + } > + > + return -1; > +} > + > +static int type_map_add(const char *name, struct type_expansion *type) > +{ > + struct type_expansion *e; > + > + if (type_map_get(name, &e)) { > + e = malloc(sizeof(struct type_expansion)); > + if (!e) { > + error("malloc failed"); > + return -1; > + } > + > + type_expansion_init(e, false); > + > + e->name = strdup(name); > + if (!e->name) { > + error("strdup failed"); > + return -1; > + } > + > + hash_add(type_map, &e->hash, name_hash(e->name)); > + > + if (dump_types) > + debug("adding %s", e->name); > + } else { > + /* Use the longest available expansion */ > + if (type->len <= e->len) > + return 0; > + > + type_list_free(e->expanded); > + > + if (dump_types) > + debug("replacing %s", e->name); > + } > + > + /* Take ownership of type->expanded */ > + e->expanded = type->expanded; > + e->last = type->last; > + e->len = type->len; > + type->expanded = NULL; > + type->last = NULL; > + type->len = 0; > + > + if (dump_types) { > + fputs(e->name, stderr); > + fputs(" ", stderr); > + type_list_write(e->expanded, stderr); > + fputs("\n", stderr); > + } > + > + return 0; > +} > + > +static int type_map_write(FILE *file) > +{ > + struct type_expansion *e; > + struct hlist_node *tmp; > + int i; > + > + if (!file) > + return 0; > + > + hash_for_each_safe(type_map, i, tmp, e, hash) { > + checkp(fputs(e->name, file)); > + checkp(fputs(" ", file)); > + type_list_write(e->expanded, file); > + checkp(fputs("\n", file)); > + } > + > + return 0; > +} > + > +static void type_map_free(void) > +{ > + struct type_expansion *e; > + struct hlist_node *tmp; > + int i; > + > + hash_for_each_safe(type_map, i, tmp, e, hash) { > + type_expansion_free(e); > + free(e); > + } > + > + hash_init(type_map); > +} > + > +/* > + * Type reference format: <prefix>#<name>, where prefix: > + * s -> structure > + * u -> union > + * e -> enum > + * t -> typedef > + * > + * Names with spaces are additionally wrapped in single quotes. > + */ > +static inline bool is_type_prefix(const char *s) > +{ > + return (s[0] == 's' || s[0] == 'u' || s[0] == 'e' || s[0] == 't') && > + s[1] == '#'; > +} > + > +static char get_type_prefix(int tag) > +{ > + switch (tag) { > + case DW_TAG_class_type: > + case DW_TAG_structure_type: > + return 's'; > + case DW_TAG_union_type: > + return 'u'; > + case DW_TAG_enumeration_type: > + return 'e'; > + case DW_TAG_typedef_type: > + return 't'; > + default: > + return 0; > + } > +} > + > +static char *get_type_name(struct die *cache) > +{ > + const char *format; > + char prefix; > + char *name; > + size_t len; > + > + if (cache->state == INCOMPLETE) { > + warn("found incomplete cache entry: %p", cache); > + return NULL; > + } > + if (!cache->fqn) > + return NULL; > + > + prefix = get_type_prefix(cache->tag); > + if (!prefix) > + return NULL; > + > + /* <prefix>#<type_name>\0 */ > + len = 2 + strlen(cache->fqn) + 1; > + > + /* Wrap names with spaces in single quotes */ > + if (strstr(cache->fqn, " ")) { > + format = "%c#'%s'"; > + len += 2; > + } else { > + format = "%c#%s"; > + } > + > + name = malloc(len); > + if (!name) { > + error("malloc failed"); > + return NULL; > + } > + > + if (snprintf(name, len, format, prefix, cache->fqn) >= len) { > + error("snprintf failed for '%s' (length %zu)", cache->fqn, > + len); > + free(name); > + return NULL; > + } This could be quite simplified: const char *quote = strstr(cache->fqn, " ") != NULL ? "'" : ""; if (asprintf(&name, "%c#%s%s%s", prefix, quote, cache->fqn, quote) < 0) [...] > + > + return name; > +} > + > +static int __type_expand(struct die *cache, struct type_expansion *type, > + bool recursive); > + > +static int type_expand_child(struct die *cache, struct type_expansion *type, > + bool recursive) > +{ > + struct type_expansion child; > + char *name; > + > + name = get_type_name(cache); > + if (!name) > + return check(__type_expand(cache, type, recursive)); > + > + if (recursive && !__cache_was_expanded(&expansion_cache, cache->addr)) { > + check(__cache_mark_expanded(&expansion_cache, cache->addr)); > + check(type_expansion_init(&child, true)); > + check(__type_expand(cache, &child, true)); > + check(type_map_add(name, &child)); > + type_expansion_free(&child); > + } > + > + check(type_expansion_append(type, name, name)); > + return 0; > +} > + > +static int __type_expand(struct die *cache, struct type_expansion *type, > + bool recursive) > +{ > + struct die_fragment *df = cache->list; > + struct die *child; > + > + while (df) { > + switch (df->type) { > + case STRING: > + check(type_expansion_append(type, df->data.str, NULL)); > + break; > + case DIE: > + /* Use a complete die_map expansion if available */ > + if (__die_map_get(df->data.addr, COMPLETE, &child) && > + __die_map_get(df->data.addr, UNEXPANDED, &child)) { > + error("unknown child: %" PRIxPTR, > + df->data.addr); > + return -1; > + } > + > + check(type_expand_child(child, type, recursive)); > + break; > + case LINEBREAK: > + /* > + * Keep whitespace in the symtypes format, but avoid > + * repeated spaces. > + */ > + if (!df->next || df->next->type != LINEBREAK) > + check(type_expansion_append(type, " ", NULL)); > + break; > + default: > + error("empty die_fragment in %p", cache); > + return -1; > + } > + > + df = df->next; > + } > + > + return 0; > +} > + > +static int type_expand(struct die *cache, struct type_expansion *type, > + bool recursive) > +{ > + check(type_expansion_init(type, true)); > + check(__type_expand(cache, type, recursive)); > + cache_clear_expanded(&expansion_cache); > + return 0; > +} > + > +static int expand_type(struct die *cache, void *arg) > +{ > + struct type_expansion type; > + char *name; > + > + /* > + * Skip unexpanded die_map entries if there's a complete > + * expansion available for this DIE. > + */ > + if (cache->state == UNEXPANDED) > + __die_map_get(cache->addr, COMPLETE, &cache); > + > + if (cache->mapped) > + return 0; > + > + cache->mapped = true; > + > + name = get_type_name(cache); > + if (!name) > + return 0; > + > + debug("%s", name); > + check(type_expand(cache, &type, true)); > + check(type_map_add(name, &type)); > + > + type_expansion_free(&type); > + free(name); > + > + return 0; > +} > + > +int generate_symtypes(FILE *file) > +{ > + hash_init(expansion_cache.cache); > + > + /* > + * die_map processing: > + * > + * 1. die_map contains all types referenced in exported symbol > + * signatures, but can contain duplicates just like the original > + * DWARF, and some references may not be fully expanded depending > + * on how far we processed the DIE tree for that specific symbol. > + * > + * For each die_map entry, find the longest available expansion, > + * and add it to type_map. > + */ > + check(die_map_for_each(expand_type, NULL)); > + > + /* > + * 2. If a symtypes file is requested, write type_map contents to > + * the file. > + */ > + check(type_map_write(file)); > + type_map_free(); > + > + return 0; > +}
Hi Petr, On Tue, Sep 10, 2024 at 7:58 AM Petr Pavlu <petr.pavlu@suse.com> wrote: > > On 8/15/24 19:39, Sami Tolvanen wrote: > > @@ -122,6 +135,16 @@ int main(int argc, const char **argv) > > > > check(symbol_read_exports(stdin)); > > > > + if (symtypes_file) { > > + symfile = fopen(symtypes_file, "w+"); > > The file is sufficient to open only for writing. True, I'll drop the +. > > + if (symfile) > > + fclose(symfile); > > + > > return 0; > > } > > The fclose() call should be wrapped in check() to catch a situation when > flushing the stream potentially failed. Ack. > > +/* See dwarf.c:is_declaration */ > > +#define SYMBOL_DECLONLY_PREFIX "__gendwarfksyms_declonly_" > > +#define SYMBOL_DECLONLY_PREFIX_LEN (sizeof(SYMBOL_DECLONLY_PREFIX) - 1) > > Nit: These defines should go into the patch 15/19 "gendwarfksyms: Add > support for declaration-only data structures". Yeah, I noticed these too. Will fix in the next version. > > +struct type_expansion { > > + char *name; > > + struct type_list *expanded; > > + struct type_list *last; > > + size_t len; > > + struct hlist_node hash; > > +}; > > I found the manipulation of type_expansion.expanded and > type_expansion.last somewhat strange. > > The list starts already with one element in type_expansion_init(). This > is apparently to make the last pointer valid. This element is however > empty and gets only assigned on the first call to type_list_append(). > Other elements are then added normally, always assigned. > > Perhaps consider using a regular list implementation, similarly to what > was discussed under the patch 06/19 "gendwarfksyms: Add a cache for > processed DIEs". Agreed, I'll switch this to a regular list in v3. > > + /* Wrap names with spaces in single quotes */ > > + if (strstr(cache->fqn, " ")) { > > + format = "%c#'%s'"; > > + len += 2; > > + } else { > > + format = "%c#%s"; > > + } > > + > > + name = malloc(len); > > + if (!name) { > > + error("malloc failed"); > > + return NULL; > > + } > > + > > + if (snprintf(name, len, format, prefix, cache->fqn) >= len) { > > + error("snprintf failed for '%s' (length %zu)", cache->fqn, > > + len); > > + free(name); > > + return NULL; > > + } > > This could be quite simplified: > > const char *quote = strstr(cache->fqn, " ") != NULL ? "'" : ""; > if (asprintf(&name, "%c#%s%s%s", prefix, quote, cache->fqn, quote) < 0) > [...] Good point, I'll change this too. Thanks for taking a look! Sami
diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile index 681b42441840..4866a2fd0e46 100644 --- a/scripts/gendwarfksyms/Makefile +++ b/scripts/gendwarfksyms/Makefile @@ -5,6 +5,7 @@ gendwarfksyms-objs += cache.o gendwarfksyms-objs += die.o gendwarfksyms-objs += dwarf.o gendwarfksyms-objs += symbols.o +gendwarfksyms-objs += types.o HOST_EXTRACFLAGS := -I $(srctree)/tools/include HOSTLDLIBS_gendwarfksyms := -ldw -lelf diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c index fdd52df88fdd..e40f04b70f7f 100644 --- a/scripts/gendwarfksyms/die.c +++ b/scripts/gendwarfksyms/die.c @@ -85,6 +85,19 @@ static void reset_die(struct die *cd) cd->list = NULL; } +int die_map_for_each(die_map_callback_t func, void *arg) +{ + struct die *cd; + struct hlist_node *tmp; + int i; + + hash_for_each_safe(die_map, i, tmp, cd, hash) { + check(func(cd, arg)); + } + + return 0; +} + void die_map_free(void) { struct hlist_node *tmp; diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index 9bca21a71639..62241cc97a76 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -60,11 +60,11 @@ static bool is_export_symbol(struct state *state, Dwarf_Die *die) if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin)) source = &origin; - state->sym = symbol_get(get_name(die)); + state->sym = symbol_get_unprocessed(get_name(die)); /* Look up using the origin name if there are no matches. */ if (!state->sym && source != die) - state->sym = symbol_get(get_name(source)); + state->sym = symbol_get_unprocessed(get_name(source)); state->die = *source; return !!state->sym; @@ -384,6 +384,7 @@ static int process_subroutine_type(struct state *state, struct die *cache, return check(__process_subroutine_type(state, cache, die, "subroutine_type")); } + static int process_variant_type(struct state *state, struct die *cache, Dwarf_Die *die) { @@ -695,14 +696,16 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die) static int process_subprogram(struct state *state, Dwarf_Die *die) { check(__process_subroutine_type(state, NULL, die, "subprogram")); - return check(process(state, NULL, ";\n")); + state->sym->state = MAPPED; + return 0; } static int process_variable(struct state *state, Dwarf_Die *die) { check(process(state, NULL, "variable ")); check(process_type_attr(state, NULL, die)); - return check(process(state, NULL, ";\n")); + state->sym->state = MAPPED; + return 0; } static int process_symbol_ptr(struct state *state, Dwarf_Die *die) @@ -757,6 +760,9 @@ static int process_exported_symbols(struct state *state, struct die *cache, else check(process_variable(state, &state->die)); + if (dump_dies) + fputs("\n", stderr); + cache_clear_expanded(&state->expansion_cache); return 0; default: diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c index 1349e592783b..6a219a54c342 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.c +++ b/scripts/gendwarfksyms/gendwarfksyms.c @@ -20,6 +20,11 @@ bool debug; bool dump_dies; /* Print out inline debugging information about die_map changes */ bool dump_die_map; +/* Print out type_map contents */ +bool dump_types; +/* Produce a symtypes file */ +bool symtypes; +static const char *symtypes_file; static const struct { const char *arg; @@ -29,6 +34,8 @@ static const struct { { "--debug", &debug, NULL }, { "--dump-dies", &dump_dies, NULL }, { "--dump-die-map", &dump_die_map, NULL }, + { "--dump-types", &dump_types, NULL }, + { "--symtypes", &symtypes, &symtypes_file }, }; static int usage(void) @@ -79,6 +86,7 @@ static int process_modules(Dwfl_Module *mod, void **userdata, const char *name, Dwarf_Die cudie; Dwarf_CU *cu = NULL; Dwarf *dbg; + FILE *symfile = arg; int res; debug("%s", name); @@ -100,6 +108,10 @@ static int process_modules(Dwfl_Module *mod, void **userdata, const char *name, check(process_module(mod, dbg, &cudie)); } while (cu); + /* + * Use die_map to expand type strings and write them to `symfile`. + */ + check(generate_symtypes(symfile)); die_map_free(); return DWARF_CB_OK; @@ -112,6 +124,7 @@ static const Dwfl_Callbacks callbacks = { int main(int argc, const char **argv) { + FILE *symfile = NULL; unsigned int n; if (parse_options(argc, argv) < 0) @@ -122,6 +135,16 @@ int main(int argc, const char **argv) check(symbol_read_exports(stdin)); + if (symtypes_file) { + symfile = fopen(symtypes_file, "w+"); + + if (!symfile) { + error("fopen failed for '%s': %s", symtypes_file, + strerror(errno)); + return -1; + } + } + for (n = 0; n < object_count; n++) { Dwfl *dwfl; int fd; @@ -151,7 +174,7 @@ int main(int argc, const char **argv) dwfl_report_end(dwfl, NULL, NULL); - if (dwfl_getmodules(dwfl, &process_modules, NULL, 0)) { + if (dwfl_getmodules(dwfl, &process_modules, symfile, 0)) { error("dwfl_getmodules failed for '%s'", object_files[n]); return -1; @@ -161,5 +184,8 @@ int main(int argc, const char **argv) close(fd); } + if (symfile) + fclose(symfile); + return 0; } diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h index 7cd907e3d5e3..6edbd6478e0f 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.h +++ b/scripts/gendwarfksyms/gendwarfksyms.h @@ -22,6 +22,8 @@ extern bool debug; extern bool dump_dies; extern bool dump_die_map; +extern bool dump_types; +extern bool symtypes; #define MAX_INPUT_FILES 128 @@ -89,6 +91,12 @@ extern bool dump_die_map; #define SYMBOL_PTR_PREFIX "__gendwarfksyms_ptr_" #define SYMBOL_PTR_PREFIX_LEN (sizeof(SYMBOL_PTR_PREFIX) - 1) +/* See dwarf.c:is_declaration */ +#define SYMBOL_DECLONLY_PREFIX "__gendwarfksyms_declonly_" +#define SYMBOL_DECLONLY_PREFIX_LEN (sizeof(SYMBOL_DECLONLY_PREFIX) - 1) + +enum symbol_state { UNPROCESSED, MAPPED }; + struct symbol_addr { uint32_t section; Elf64_Addr address; @@ -109,12 +117,14 @@ struct symbol { struct symbol_addr addr; struct hlist_node addr_hash; struct hlist_node name_hash; + enum symbol_state state; + uintptr_t die_addr; }; extern bool is_symbol_ptr(const char *name); extern int symbol_read_exports(FILE *file); extern int symbol_read_symtab(int fd); -extern struct symbol *symbol_get(const char *name); +extern struct symbol *symbol_get_unprocessed(const char *name); /* * die.c @@ -157,12 +167,15 @@ struct die { struct hlist_node hash; }; +typedef int (*die_map_callback_t)(struct die *, void *arg); + extern int __die_map_get(uintptr_t addr, enum die_state state, struct die **res); extern int die_map_get(Dwarf_Die *die, enum die_state state, struct die **res); extern int die_map_add_string(struct die *pd, const char *str); extern int die_map_add_linebreak(struct die *pd, int linebreak); extern int die_map_add_die(struct die *pd, struct die *child); +extern int die_map_for_each(die_map_callback_t func, void *arg); extern void die_map_free(void); /* @@ -222,4 +235,10 @@ extern int process_die_container(struct state *state, struct die *cache, extern int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie); +/* + * types.c + */ + +extern int generate_symtypes(FILE *file); + #endif /* __GENDWARFKSYMS_H */ diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c index d6d016458ae1..8cc04e6295a7 100644 --- a/scripts/gendwarfksyms/symbols.c +++ b/scripts/gendwarfksyms/symbols.c @@ -117,6 +117,7 @@ int symbol_read_exports(FILE *file) sym->name = name; sym->addr.section = SHN_UNDEF; + sym->state = UNPROCESSED; name = NULL; hash_add(symbol_names, &sym->name_hash, name_hash(sym->name)); @@ -132,19 +133,21 @@ int symbol_read_exports(FILE *file) return 0; } -static int get_symbol(struct symbol *sym, void *arg) +static int get_unprocessed(struct symbol *sym, void *arg) { struct symbol **res = arg; - *res = sym; + if (sym->state == UNPROCESSED) + *res = sym; + return 0; } -struct symbol *symbol_get(const char *name) +struct symbol *symbol_get_unprocessed(const char *name) { struct symbol *sym = NULL; - for_each(name, false, get_symbol, &sym); + for_each(name, false, get_unprocessed, &sym); return sym; } diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c new file mode 100644 index 000000000000..7b9997d8322d --- /dev/null +++ b/scripts/gendwarfksyms/types.c @@ -0,0 +1,439 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2024 Google LLC + */ + +#include "gendwarfksyms.h" +#include "crc32.h" + +static struct expansion_cache expansion_cache; + +/* + * A simple linked list of shared or owned strings to avoid copying strings + * around when not necessary. + */ +struct type_list { + const char *str; + void *owned; + struct type_list *next; +}; + +static struct type_list *type_list_alloc(void) +{ + struct type_list *list; + + list = calloc(1, sizeof(struct type_list)); + if (!list) + error("calloc failed"); + + return list; +} + +static void type_list_free(struct type_list *list) +{ + struct type_list *tmp; + + while (list) { + if (list->owned) + free(list->owned); + + tmp = list; + list = list->next; + free(tmp); + } +} + +static int type_list_append(struct type_list *list, const char *s, void *owned) +{ + if (!list || !s) + return 0; + + while (list->next) + list = list->next; + + if (list->str) { + list->next = type_list_alloc(); + + if (!list->next) { + error("type_list_alloc failed"); + return -1; + } + + list = list->next; + } + + list->str = s; + list->owned = owned; + + return strlen(list->str); +} + +static int type_list_write(struct type_list *list, FILE *file) +{ + while (list) { + if (list->str) + checkp(fputs(list->str, file)); + list = list->next; + } + + return 0; +} + +/* + * An expanded type string in symtypes format. + */ +struct type_expansion { + char *name; + struct type_list *expanded; + struct type_list *last; + size_t len; + struct hlist_node hash; +}; + +static int type_expansion_init(struct type_expansion *type, bool alloc) +{ + memset(type, 0, sizeof(struct type_expansion)); + if (alloc) { + type->expanded = type_list_alloc(); + if (!type->expanded) + return -1; + + type->last = type->expanded; + } + return 0; +} + +static inline void type_expansion_free(struct type_expansion *type) +{ + free(type->name); + type_list_free(type->expanded); + type_expansion_init(type, false); +} + +static int type_expansion_append(struct type_expansion *type, const char *s, + void *owned) +{ + type->len += checkp(type_list_append(type->last, s, owned)); + + if (type->last->next) + type->last = type->last->next; + + return 0; +} + +/* + * type_map -- the longest expansions for each type. + * + * const char *name -> struct type_expansion * + */ +#define TYPE_HASH_BITS 16 +static DEFINE_HASHTABLE(type_map, TYPE_HASH_BITS); + +static int type_map_get(const char *name, struct type_expansion **res) +{ + struct type_expansion *e; + + hash_for_each_possible(type_map, e, hash, name_hash(name)) { + if (!strcmp(name, e->name)) { + *res = e; + return 0; + } + } + + return -1; +} + +static int type_map_add(const char *name, struct type_expansion *type) +{ + struct type_expansion *e; + + if (type_map_get(name, &e)) { + e = malloc(sizeof(struct type_expansion)); + if (!e) { + error("malloc failed"); + return -1; + } + + type_expansion_init(e, false); + + e->name = strdup(name); + if (!e->name) { + error("strdup failed"); + return -1; + } + + hash_add(type_map, &e->hash, name_hash(e->name)); + + if (dump_types) + debug("adding %s", e->name); + } else { + /* Use the longest available expansion */ + if (type->len <= e->len) + return 0; + + type_list_free(e->expanded); + + if (dump_types) + debug("replacing %s", e->name); + } + + /* Take ownership of type->expanded */ + e->expanded = type->expanded; + e->last = type->last; + e->len = type->len; + type->expanded = NULL; + type->last = NULL; + type->len = 0; + + if (dump_types) { + fputs(e->name, stderr); + fputs(" ", stderr); + type_list_write(e->expanded, stderr); + fputs("\n", stderr); + } + + return 0; +} + +static int type_map_write(FILE *file) +{ + struct type_expansion *e; + struct hlist_node *tmp; + int i; + + if (!file) + return 0; + + hash_for_each_safe(type_map, i, tmp, e, hash) { + checkp(fputs(e->name, file)); + checkp(fputs(" ", file)); + type_list_write(e->expanded, file); + checkp(fputs("\n", file)); + } + + return 0; +} + +static void type_map_free(void) +{ + struct type_expansion *e; + struct hlist_node *tmp; + int i; + + hash_for_each_safe(type_map, i, tmp, e, hash) { + type_expansion_free(e); + free(e); + } + + hash_init(type_map); +} + +/* + * Type reference format: <prefix>#<name>, where prefix: + * s -> structure + * u -> union + * e -> enum + * t -> typedef + * + * Names with spaces are additionally wrapped in single quotes. + */ +static inline bool is_type_prefix(const char *s) +{ + return (s[0] == 's' || s[0] == 'u' || s[0] == 'e' || s[0] == 't') && + s[1] == '#'; +} + +static char get_type_prefix(int tag) +{ + switch (tag) { + case DW_TAG_class_type: + case DW_TAG_structure_type: + return 's'; + case DW_TAG_union_type: + return 'u'; + case DW_TAG_enumeration_type: + return 'e'; + case DW_TAG_typedef_type: + return 't'; + default: + return 0; + } +} + +static char *get_type_name(struct die *cache) +{ + const char *format; + char prefix; + char *name; + size_t len; + + if (cache->state == INCOMPLETE) { + warn("found incomplete cache entry: %p", cache); + return NULL; + } + if (!cache->fqn) + return NULL; + + prefix = get_type_prefix(cache->tag); + if (!prefix) + return NULL; + + /* <prefix>#<type_name>\0 */ + len = 2 + strlen(cache->fqn) + 1; + + /* Wrap names with spaces in single quotes */ + if (strstr(cache->fqn, " ")) { + format = "%c#'%s'"; + len += 2; + } else { + format = "%c#%s"; + } + + name = malloc(len); + if (!name) { + error("malloc failed"); + return NULL; + } + + if (snprintf(name, len, format, prefix, cache->fqn) >= len) { + error("snprintf failed for '%s' (length %zu)", cache->fqn, + len); + free(name); + return NULL; + } + + return name; +} + +static int __type_expand(struct die *cache, struct type_expansion *type, + bool recursive); + +static int type_expand_child(struct die *cache, struct type_expansion *type, + bool recursive) +{ + struct type_expansion child; + char *name; + + name = get_type_name(cache); + if (!name) + return check(__type_expand(cache, type, recursive)); + + if (recursive && !__cache_was_expanded(&expansion_cache, cache->addr)) { + check(__cache_mark_expanded(&expansion_cache, cache->addr)); + check(type_expansion_init(&child, true)); + check(__type_expand(cache, &child, true)); + check(type_map_add(name, &child)); + type_expansion_free(&child); + } + + check(type_expansion_append(type, name, name)); + return 0; +} + +static int __type_expand(struct die *cache, struct type_expansion *type, + bool recursive) +{ + struct die_fragment *df = cache->list; + struct die *child; + + while (df) { + switch (df->type) { + case STRING: + check(type_expansion_append(type, df->data.str, NULL)); + break; + case DIE: + /* Use a complete die_map expansion if available */ + if (__die_map_get(df->data.addr, COMPLETE, &child) && + __die_map_get(df->data.addr, UNEXPANDED, &child)) { + error("unknown child: %" PRIxPTR, + df->data.addr); + return -1; + } + + check(type_expand_child(child, type, recursive)); + break; + case LINEBREAK: + /* + * Keep whitespace in the symtypes format, but avoid + * repeated spaces. + */ + if (!df->next || df->next->type != LINEBREAK) + check(type_expansion_append(type, " ", NULL)); + break; + default: + error("empty die_fragment in %p", cache); + return -1; + } + + df = df->next; + } + + return 0; +} + +static int type_expand(struct die *cache, struct type_expansion *type, + bool recursive) +{ + check(type_expansion_init(type, true)); + check(__type_expand(cache, type, recursive)); + cache_clear_expanded(&expansion_cache); + return 0; +} + +static int expand_type(struct die *cache, void *arg) +{ + struct type_expansion type; + char *name; + + /* + * Skip unexpanded die_map entries if there's a complete + * expansion available for this DIE. + */ + if (cache->state == UNEXPANDED) + __die_map_get(cache->addr, COMPLETE, &cache); + + if (cache->mapped) + return 0; + + cache->mapped = true; + + name = get_type_name(cache); + if (!name) + return 0; + + debug("%s", name); + check(type_expand(cache, &type, true)); + check(type_map_add(name, &type)); + + type_expansion_free(&type); + free(name); + + return 0; +} + +int generate_symtypes(FILE *file) +{ + hash_init(expansion_cache.cache); + + /* + * die_map processing: + * + * 1. die_map contains all types referenced in exported symbol + * signatures, but can contain duplicates just like the original + * DWARF, and some references may not be fully expanded depending + * on how far we processed the DIE tree for that specific symbol. + * + * For each die_map entry, find the longest available expansion, + * and add it to type_map. + */ + check(die_map_for_each(expand_type, NULL)); + + /* + * 2. If a symtypes file is requested, write type_map contents to + * the file. + */ + check(type_map_write(file)); + type_map_free(); + + return 0; +}
Add support for producing genksyms-style symtypes files. Process die_map to find the longest expansions for each type, and use symtypes references in type definitions. The basic file format is similar to genksyms, with two notable exceptions: 1. Type names with spaces (common with Rust) in references are wrapped in single quotes. E.g.: s#'core::result::Result<u8, core::num::error::ParseIntError>' 2. The actual type definition is the simple parsed DWARF format we output with --dump-dies, not the preprocessed C-style format genksyms produces. Signed-off-by: Sami Tolvanen <samitolvanen@google.com> --- scripts/gendwarfksyms/Makefile | 1 + scripts/gendwarfksyms/die.c | 13 + scripts/gendwarfksyms/dwarf.c | 14 +- scripts/gendwarfksyms/gendwarfksyms.c | 28 +- scripts/gendwarfksyms/gendwarfksyms.h | 21 +- scripts/gendwarfksyms/symbols.c | 11 +- scripts/gendwarfksyms/types.c | 439 ++++++++++++++++++++++++++ 7 files changed, 517 insertions(+), 10 deletions(-) create mode 100644 scripts/gendwarfksyms/types.c