@@ -93,16 +93,11 @@ struct elf_function {
struct btf_encoder_func_state state;
};
-struct var_info {
- uint64_t addr;
- const char *name;
- uint32_t sz;
-};
-
struct elf_secinfo {
uint64_t addr;
const char *name;
uint64_t sz;
+ uint32_t type;
};
/*
@@ -125,17 +120,11 @@ struct btf_encoder {
gen_floats,
skip_encoding_decl_tag,
tag_kfuncs,
- is_rel,
gen_distilled_base;
uint32_t array_index_id;
struct elf_secinfo *secinfo;
size_t seccnt;
- struct {
- struct var_info *vars;
- int var_cnt;
- int allocated;
- uint32_t shndx;
- } percpu;
+ size_t percpu_shndx;
int encode_vars;
struct {
struct elf_function *entries;
@@ -2098,111 +2087,18 @@ int btf_encoder__encode(struct btf_encoder *encoder)
return err;
}
-static int percpu_var_cmp(const void *_a, const void *_b)
-{
- const struct var_info *a = _a;
- const struct var_info *b = _b;
-
- if (a->addr == b->addr)
- return 0;
- return a->addr < b->addr ? -1 : 1;
-}
-
-static bool btf_encoder__percpu_var_exists(struct btf_encoder *encoder, uint64_t addr, uint32_t *sz, const char **name)
-{
- struct var_info key = { .addr = addr };
- const struct var_info *p = bsearch(&key, encoder->percpu.vars, encoder->percpu.var_cnt,
- sizeof(encoder->percpu.vars[0]), percpu_var_cmp);
- if (!p)
- return false;
-
- *sz = p->sz;
- *name = p->name;
- return true;
-}
-
-static int btf_encoder__collect_percpu_var(struct btf_encoder *encoder, GElf_Sym *sym, size_t sym_sec_idx)
-{
- const char *sym_name;
- uint64_t addr;
- uint32_t size;
-
- /* compare a symbol's shndx to determine if it's a percpu variable */
- if (sym_sec_idx != encoder->percpu.shndx)
- return 0;
- if (elf_sym__type(sym) != STT_OBJECT)
- return 0;
-
- addr = elf_sym__value(sym);
- size = elf_sym__size(sym);
- if (!size)
- return 0; /* ignore zero-sized symbols */
-
- sym_name = elf_sym__name(sym, encoder->symtab);
- if (!btf_name_valid(sym_name)) {
- dump_invalid_symbol("Found symbol of invalid name when encoding btf",
- sym_name, encoder->verbose, encoder->force);
- if (encoder->force)
- return 0;
- return -1;
- }
-
- if (encoder->verbose)
- printf("Found per-CPU symbol '%s' at address 0x%" PRIx64 "\n", sym_name, addr);
-
- /* Make sure addr is section-relative. For kernel modules (which are
- * ET_REL files) this is already the case. For vmlinux (which is an
- * ET_EXEC file) we need to subtract the section address.
- */
- if (!encoder->is_rel)
- addr -= encoder->secinfo[encoder->percpu.shndx].addr;
-
- if (encoder->percpu.var_cnt == encoder->percpu.allocated) {
- struct var_info *new;
-
- new = reallocarray_grow(encoder->percpu.vars,
- &encoder->percpu.allocated,
- sizeof(*encoder->percpu.vars));
- if (!new) {
- fprintf(stderr, "Failed to allocate memory for variables\n");
- return -1;
- }
- encoder->percpu.vars = new;
- }
- encoder->percpu.vars[encoder->percpu.var_cnt].addr = addr;
- encoder->percpu.vars[encoder->percpu.var_cnt].sz = size;
- encoder->percpu.vars[encoder->percpu.var_cnt].name = sym_name;
- encoder->percpu.var_cnt++;
-
- return 0;
-}
-
-static int btf_encoder__collect_symbols(struct btf_encoder *encoder, bool collect_percpu_vars)
+static int btf_encoder__collect_symbols(struct btf_encoder *encoder)
{
- Elf32_Word sym_sec_idx;
+ uint32_t sym_sec_idx;
uint32_t core_id;
GElf_Sym sym;
- /* cache variables' addresses, preparing for searching in symtab. */
- encoder->percpu.var_cnt = 0;
-
- /* search within symtab for percpu variables */
elf_symtab__for_each_symbol_index(encoder->symtab, core_id, sym, sym_sec_idx) {
- if (collect_percpu_vars && btf_encoder__collect_percpu_var(encoder, &sym, sym_sec_idx))
- return -1;
if (btf_encoder__collect_function(encoder, &sym))
return -1;
}
- if (collect_percpu_vars) {
- if (encoder->percpu.var_cnt)
- qsort(encoder->percpu.vars, encoder->percpu.var_cnt, sizeof(encoder->percpu.vars[0]), percpu_var_cmp);
-
- if (encoder->verbose)
- printf("Found %d per-CPU variables!\n", encoder->percpu.var_cnt);
- }
-
if (encoder->functions.cnt) {
qsort(encoder->functions.entries, encoder->functions.cnt, sizeof(encoder->functions.entries[0]),
functions_cmp);
@@ -2224,15 +2120,57 @@ static bool ftype__has_arg_names(const struct ftype *ftype)
return true;
}
+static size_t get_elf_section(struct btf_encoder *encoder, uint64_t addr)
+{
+ /* Start at index 1 to ignore initial SHT_NULL section */
+ for (size_t i = 1; i < encoder->seccnt; i++) {
+ /* Variables are only present in PROGBITS or NOBITS (.bss) */
+ if (encoder->secinfo[i].type == SHT_PROGBITS ||
+ encoder->secinfo[i].type == SHT_NOBITS)
+ continue;
+
+ if (encoder->secinfo[i].addr <= addr &&
+ (addr - encoder->secinfo[i].addr) < encoder->secinfo[i].sz)
+ return i;
+ }
+ return 0;
+}
+
+/*
+ * Filter out variables / symbol names with common prefixes and no useful
+ * values. Prefixes should be added sparingly, and it should be objectively
+ * obvious that they are not useful.
+ */
+static bool filter_variable_name(const char *name)
+{
+ static const struct { char *s; size_t len; } skip[] = {
+ #define X(str) {str, sizeof(str) - 1}
+ X("__UNIQUE_ID"),
+ X("__tpstrtab_"),
+ X("__exitcall_"),
+ X("__func_stack_frame_non_standard_")
+ #undef X
+ };
+ int i;
+
+ if (*name != '_')
+ return false;
+
+ for (i = 0; i < ARRAY_SIZE(skip); i++) {
+ if (strncmp(name, skip[i].s, skip[i].len) == 0)
+ return true;
+ }
+ return false;
+}
+
static int btf_encoder__encode_cu_variables(struct btf_encoder *encoder)
{
struct cu *cu = encoder->cu;
uint32_t core_id;
struct tag *pos;
int err = -1;
- struct elf_secinfo *pcpu_scn = &encoder->secinfo[encoder->percpu.shndx];
- if (encoder->percpu.shndx == 0 || !encoder->symtab)
+ if (encoder->percpu_shndx == 0 || !encoder->symtab)
return 0;
if (encoder->verbose)
@@ -2240,59 +2178,69 @@ static int btf_encoder__encode_cu_variables(struct btf_encoder *encoder)
cu__for_each_variable(cu, core_id, pos) {
struct variable *var = tag__variable(pos);
- uint32_t size, type, linkage;
- const char *name, *dwarf_name;
+ uint32_t type, linkage;
+ const char *name;
struct llvm_annotation *annot;
const struct tag *tag;
+ size_t shndx, size;
uint64_t addr;
int id;
+ /* Skip incomplete (non-defining) declarations */
if (var->declaration && !var->spec)
continue;
- /* percpu variables are allocated in global space */
- if (variable__scope(var) != VSCOPE_GLOBAL && !var->spec)
+ /*
+ * top_level: indicates that the variable is declared at the top
+ * level of the CU, and thus it is globally scoped.
+ * artificial: indicates that the variable is a compiler-generated
+ * "fake" variable that doesn't appear in the source.
+ * scope: set by pahole to indicate the type of storage the
+ * variable has. GLOBAL indicates it is stored in static
+ * memory (as opposed to a stack variable or register)
+ *
+ * Some variables are "top_level" but not GLOBAL:
+ * e.g. current_stack_pointer, which is a register variable,
+ * despite having global CU-declarations. We don't want that,
+ * since no code could actually find this variable.
+ * Some variables are GLOBAL but not top_level:
+ * e.g. function static variables
+ */
+ if (!var->top_level || var->artificial || var->scope != VSCOPE_GLOBAL)
continue;
/* addr has to be recorded before we follow spec */
addr = var->ip.addr;
- dwarf_name = variable__name(var);
- /* Make sure addr is section-relative. DWARF, unlike ELF,
- * always contains virtual symbol addresses, so subtract
- * the section address unconditionally.
- */
- if (addr < pcpu_scn->addr || addr >= pcpu_scn->addr + pcpu_scn->sz)
+ /* Get the ELF section info for the variable */
+ shndx = get_elf_section(encoder, addr);
+ if (shndx != encoder->percpu_shndx)
continue;
- addr -= pcpu_scn->addr;
- if (!btf_encoder__percpu_var_exists(encoder, addr, &size, &name))
- continue; /* not a per-CPU variable */
+ /* Convert addr to section relative */
+ addr -= encoder->secinfo[shndx].addr;
- /* A lot of "special" DWARF variables (e.g, __UNIQUE_ID___xxx)
- * have addr == 0, which is the same as, say, valid
- * fixed_percpu_data per-CPU variable. To distinguish between
- * them, additionally compare DWARF and ELF symbol names. If
- * DWARF doesn't provide proper name, pessimistically assume
- * bad variable.
- *
- * Examples of such special variables are:
- *
- * 1. __ADDRESSABLE(sym), which are forcely emitted as symbols.
- * 2. __UNIQUE_ID(prefix), which are introduced to generate unique ids.
- * 3. __exitcall(fn), functions which are labeled as exit calls.
- *
- * This is relevant only for vmlinux image, as for kernel
- * modules per-CPU data section has non-zero offset so all
- * per-CPU symbols have non-zero values.
- */
- if (var->ip.addr == 0) {
- if (!dwarf_name || strcmp(dwarf_name, name))
+ /* DWARF specification reference should be followed, because
+ * information like the name & type may not be present on var */
+ if (var->spec)
+ var = var->spec;
+
+ name = variable__name(var);
+ if (!name)
+ continue;
+
+ /* Check for invalid BTF names */
+ if (!btf_name_valid(name)) {
+ dump_invalid_symbol("Found invalid variable name when encoding btf",
+ name, encoder->verbose, encoder->force);
+ if (encoder->force)
continue;
+ else
+ return -1;
}
- if (var->spec)
- var = var->spec;
+ if (filter_variable_name(name))
+ continue;
if (var->ip.tag.type == 0) {
fprintf(stderr, "error: found variable '%s' in CU '%s' that has void type\n",
@@ -2304,9 +2252,10 @@ static int btf_encoder__encode_cu_variables(struct btf_encoder *encoder)
}
tag = cu__type(cu, var->ip.tag.type);
- if (tag__size(tag, cu) == 0) {
+ size = tag__size(tag, cu);
+ if (size == 0) {
if (encoder->verbose)
- fprintf(stderr, "Ignoring zero-sized per-CPU variable '%s'...\n", dwarf_name ?: "<missing name>");
+ fprintf(stderr, "Ignoring zero-sized per-CPU variable '%s'...\n", name);
continue;
}
@@ -2388,8 +2337,6 @@ struct btf_encoder *btf_encoder__new(struct cu *cu, const char *detached_filenam
goto out_delete;
}
- encoder->is_rel = ehdr.e_type == ET_REL;
-
switch (ehdr.e_ident[EI_DATA]) {
case ELFDATA2LSB:
btf__set_endianness(encoder->btf, BTF_LITTLE_ENDIAN);
@@ -2430,15 +2377,16 @@ struct btf_encoder *btf_encoder__new(struct cu *cu, const char *detached_filenam
encoder->secinfo[shndx].addr = shdr.sh_addr;
encoder->secinfo[shndx].sz = shdr.sh_size;
encoder->secinfo[shndx].name = secname;
+ encoder->secinfo[shndx].type = shdr.sh_type;
if (strcmp(secname, PERCPU_SECTION) == 0)
- encoder->percpu.shndx = shndx;
+ encoder->percpu_shndx = shndx;
}
- if (!encoder->percpu.shndx && encoder->verbose)
+ if (!encoder->percpu_shndx && encoder->verbose)
printf("%s: '%s' doesn't have '%s' section\n", __func__, cu->filename, PERCPU_SECTION);
- if (btf_encoder__collect_symbols(encoder, encoder->encode_vars & BTF_VAR_PERCPU))
+ if (btf_encoder__collect_symbols(encoder))
goto out_delete;
if (encoder->verbose)
@@ -2480,9 +2428,6 @@ void btf_encoder__delete(struct btf_encoder *encoder)
encoder->functions.allocated = encoder->functions.cnt = 0;
free(encoder->functions.entries);
encoder->functions.entries = NULL;
- encoder->percpu.allocated = encoder->percpu.var_cnt = 0;
- free(encoder->percpu.vars);
- encoder->percpu.vars = NULL;
free(encoder);
}