Message ID | 20240614171428.968174-3-kris.van.hees@oracle.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Generate address range data for built-in modules | expand |
My apologies (esp. to Masahiro Yamada)... this patch was supposed to resolve the outstanding issue of needing to add gawk to the dependencies in the documentation and that part of the patch still didn't make it in. I've added it on my end for v5, and will absolutely ensure that it will be in the posted version. Again, sorry for have yet again overlooked that. Kris On Fri, Jun 14, 2024 at 01:14:27PM -0400, Kris Van Hees wrote: > The offset range data for builtin modules is generated using: > - modules.builtin: associates object files with module names > - vmlinux.map: provides load order of sections and offset of first member > per section > - vmlinux.o.map: provides offset of object file content per section > - .*.cmd: build cmd file with KBUILD_MODFILE and KBUILD_MODNAME > > The generated data will look like: > > .text 00000000-00000000 = _text > .text 0000baf0-0000cb10 amd_uncore > .text 0009bd10-0009c8e0 iosf_mbi > ... > .text 008e6660-008e9630 snd_soc_wcd_mbhc > .text 008e9630-008ea610 snd_soc_wcd9335 snd_soc_wcd934x snd_soc_wcd938x > .text 008ea610-008ea780 snd_soc_wcd9335 > ... > .data 00000000-00000000 = _sdata > .data 0000f020-0000f680 amd_uncore > > For each ELF section, it lists the offset of the first symbol. This can > be used to determine the base address of the section at runtime. > > Next, it lists (in strict ascending order) offset ranges in that section > that cover the symbols of one or more builtin modules. Multiple ranges > can apply to a single module, and ranges can be shared between modules. > > The CONFIG_BUILTIN_MODULE_RANGES option controls whether offset range data > is generated for kernel modules that are built into the kernel image. > > Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com> > Reviewed-by: Nick Alcock <nick.alcock@oracle.com> > Reviewed-by: Alan Maguire <alan.maguire@oracle.com> > --- > Changes since v3: > - Consolidated patches 2 through 5 into a single patch > - Move CONFIG_BUILTIN_MODULE_RANGES to Kconfig.debug > - Make CONFIG_BUILTIN_MODULE_RANGES select CONFIG_VMLINUX_MAP > - Disable CONFIG_BUILTIN_MODULE_RANGES if CONFIG_LTO_CLANG_(FULL|THIN)=y > - Support LLVM (lld) compiles in generate_builtin_ranges.awk > - Support CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y > > Changes since v2: > - Add explicit dependency on FTRACE for CONFIG_BUILTIN_MODULE_RANGES > - 1st arg to generate_builtin_ranges.awk is now modules.builtin.modinfo > - Switched from using modules.builtin.objs to parsing .*.cmd files > - Parse data from .*.cmd in generate_builtin_ranges.awk > - Use $(real-prereqs) rather than $(filter-out ...) > --- > lib/Kconfig.debug | 19 ++ > scripts/Makefile.vmlinux | 16 ++ > scripts/Makefile.vmlinux_o | 3 + > scripts/generate_builtin_ranges.awk | 284 ++++++++++++++++++++++++++++ > 4 files changed, 322 insertions(+) > create mode 100755 scripts/generate_builtin_ranges.awk > > diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug > index 291185f54ee4..03fddad67d59 100644 > --- a/lib/Kconfig.debug > +++ b/lib/Kconfig.debug > @@ -571,6 +571,25 @@ config VMLINUX_MAP > pieces of code get eliminated with > CONFIG_LD_DEAD_CODE_DATA_ELIMINATION. > > +config BUILTIN_MODULE_RANGES > + bool "Generate address range information for builtin modules" > + depends on !LTO_CLANG_FULL > + depends on !LTO_CLANG_THIN > + select VMLINUX_MAP > + help > + When modules are built into the kernel, there will be no module name > + associated with its symbols in /proc/kallsyms. Tracers may want to > + identify symbols by module name and symbol name regardless of whether > + the module is configured as loadable or not. > + > + This option generates modules.builtin.ranges in the build tree with > + offset ranges (per ELF section) for the module(s) they belong to. > + It also records an anchor symbol to determine the load address of the > + section. > + > + It is fully compatible with CONFIG_RANDOMIZE_BASE and similar late- > + address-modification options. > + > config DEBUG_FORCE_WEAK_PER_CPU > bool "Force weak per-cpu definitions" > depends on DEBUG_KERNEL > diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux > index c9f3e03124d7..5fd1f272ccde 100644 > --- a/scripts/Makefile.vmlinux > +++ b/scripts/Makefile.vmlinux > @@ -36,6 +36,22 @@ targets += vmlinux > vmlinux: scripts/link-vmlinux.sh vmlinux.o $(KBUILD_LDS) FORCE > +$(call if_changed_dep,link_vmlinux) > > +# module.builtin.ranges > +# --------------------------------------------------------------------------- > +ifdef CONFIG_BUILTIN_MODULE_RANGES > +__default: modules.builtin.ranges > + > +quiet_cmd_modules_builtin_ranges = GEN $@ > + cmd_modules_builtin_ranges = \ > + $(srctree)/scripts/generate_builtin_ranges.awk $(real-prereqs) > $@ > + > +vmlinux.map: vmlinux > + > +targets += modules.builtin.ranges > +modules.builtin.ranges: modules.builtin vmlinux.map vmlinux.o.map FORCE > + $(call if_changed,modules_builtin_ranges) > +endif > + > # Add FORCE to the prequisites of a target to force it to be always rebuilt. > # --------------------------------------------------------------------------- > > diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o > index 6de297916ce6..252505505e0e 100644 > --- a/scripts/Makefile.vmlinux_o > +++ b/scripts/Makefile.vmlinux_o > @@ -45,9 +45,12 @@ objtool-args = $(vmlinux-objtool-args-y) --link > # Link of vmlinux.o used for section mismatch analysis > # --------------------------------------------------------------------------- > > +vmlinux-o-ld-args-$(CONFIG_BUILTIN_MODULE_RANGES) += -Map=$@.map > + > quiet_cmd_ld_vmlinux.o = LD $@ > cmd_ld_vmlinux.o = \ > $(LD) ${KBUILD_LDFLAGS} -r -o $@ \ > + $(vmlinux-o-ld-args-y) \ > $(addprefix -T , $(initcalls-lds)) \ > --whole-archive vmlinux.a --no-whole-archive \ > --start-group $(KBUILD_VMLINUX_LIBS) --end-group \ > diff --git a/scripts/generate_builtin_ranges.awk b/scripts/generate_builtin_ranges.awk > new file mode 100755 > index 000000000000..ba7a5dcef284 > --- /dev/null > +++ b/scripts/generate_builtin_ranges.awk > @@ -0,0 +1,284 @@ > +#!/usr/bin/gawk -f > +# SPDX-License-Identifier: GPL-2.0 > +# generate_builtin_ranges.awk: Generate address range data for builtin modules > +# Written by Kris Van Hees <kris.van.hees@oracle.com> > +# > +# Usage: generate_builtin_ranges.awk modules.builtin vmlinux.map \ > +# vmlinux.o.map > modules.builtin.ranges > +# > + > +# Return the module name(s) (if any) associated with the given object. > +# > +# If we have seen this object before, return information from the cache. > +# Otherwise, retrieve it from the corresponding .cmd file. > +# > +function get_module_info(fn, mod, obj, mfn, s) { > + if (fn in omod) > + return omod[fn]; > + > + if (match(fn, /\/[^/]+$/) == 0) > + return ""; > + > + obj = fn; > + mod = ""; > + mfn = ""; > + fn = substr(fn, 1, RSTART) "." substr(fn, RSTART + 1) ".cmd"; > + if (getline s <fn == 1) { > + if (match(s, /DKBUILD_MODFILE=['"]+[^'"]+/) > 0) { > + mfn = substr(s, RSTART + 16, RLENGTH - 16); > + gsub(/['"]/, "", mfn); > + > + mod = mfn; > + gsub(/([^/ ]*\/)+/, "", mod); > + gsub(/-/, "_", mod); > + } > + } > + close(fn); > + > + # A single module (common case) also reflects objects that are not part > + # of a module. Some of those objects have names that are also a module > + # name (e.g. core). We check the associated module file name, and if > + # they do not match, the object is not part of a module. > + if (mod !~ / /) { > + if (!(mod in mods)) > + return ""; > + if (mods[mod] != mfn) > + return ""; > + } > + > + # At this point, mod is a single (valid) module name, or a list of > + # module names (that do not need validation). > + omod[obj] = mod; > + close(fn); > + > + return mod; > +} > + > +FNR == 1 { > + FC++; > +} > + > +# (1) Build a lookup map of built-in module names. > +# > +# The first file argument is used as input (modules.builtin). > +# > +# Lines will be like: > +# kernel/crypto/lzo-rle.ko > +# and we derive the built-in module name from this as "lzo_rle" and associate > +# it with object name "crypto/lzo-rle". > +# > +FC == 1 { > + sub(/kernel\//, ""); # strip off "kernel/" prefix > + sub(/\.ko$/, ""); # strip off .ko suffix > + > + mod = $1; > + sub(/([^/]*\/)+/, "", mod); # mod = basename($1) > + gsub(/-/, "_", mod); # Convert - to _ > + > + mods[mod] = $1; > + next; > +} > + > +# (2) Determine the load address for each section. > +# > +# The second file argument is used as input (vmlinux.map). > +# > +# Since some AWK implementations cannot handle large integers, we strip of the > +# first 4 hex digits from the address. This is safe because the kernel space > +# is not large enough for addresses to extend into those digits. > +# > + > +# First determine whether we are dealing with a GNU ld or LLVM lld linker map. > +# > +FC == 2 && FNR == 1 && NF == 7 && $1 == "VMA" && $7 == "Symbol" { > + map_is_lld = 1; > + next; > +} > + > +# (LLD) Convert a section record fronm lld format to ld format. > +# > +FC == 2 && map_is_lld && NF == 5 && /[0-9] \./ { > + $0 = $5 " 0x"$1 " dummy"; > +} > + > +# (LLD) Convert an anchor record from lld format to ld format. > +# > +FC == 2 && map_is_lld && !anchor && NF == 7 && raw_addr == "0x"$1 && $6 == "=" && $7 == "." { > + $0 = "0x"$1 " " $5 " = " $7; > +} > + > +# (LLD) Convert an object record from lld format to ld format. > +# > +FC == 2 && map_is_lld && NF == 5 && $5 ~ /:\(\./ { > + gsub(/\)/, ""); > + sub(/:\(/, " "); > + sub(/ vmlinux\.a\(/, " "); > + $0 = " "$6 " 0x"$1 " 0x"$3 " " $5; > +} > + > +FC == 2 && /^\./ && NF > 2 { > + if (type) > + delete sect_addend[type]; > + > + if ($1 ~ /\.percpu/) > + next; > + > + raw_addr = $2; > + addr_prefix = "^" substr($2, 1, 6); > + sub(addr_prefix, "0x", $2); > + base = strtonum($2); > + type = $1; > + tpat = "^ \\"type"[\\. ]"; > + anchor = 0; > + sect_base[type] = base; > + > + next; > +} > + > +!type { > + next; > +} > + > +# (3) We need to determine the base address of the section so that ranges can > +# be expressed based on offsets from the base address. This accommodates the > +# kernel sections getting loaded at different addresses than what is recorded > +# in vmlinux.map. > +# > +# At runtime, we will need to determine the base address of each section we are > +# interested in. We do that by recording the offset of the first symbol in the > +# section. Once we know the address of this symbol in the running kernel, we > +# can calculate the base address of the section. > +# > +# If possible, we use an explicit anchor symbol (sym = .) listed at the base > +# address (offset 0). > +# > +# If there is no such symbol, we record the first symbol in the section along > +# with its offset. > +# > +# We also determine the offset of the first member in the section in case the > +# final linking inserts some content between the start of the section and the > +# first member. I.e. in that case, vmlinux.map will list the first member at > +# a non-zero offset whereas vmlinux.o.map will list it at offset 0. We record > +# the addend so we can apply it when processing vmlinux.o.map (next). > +# > +FC == 2 && !anchor && raw_addr == $1 && $3 == "=" && $4 == "." { > + anchor = sprintf("%s %08x-%08x = %s", type, 0, 0, $2); > + sect_anchor[type] = anchor; > + > + next; > +} > + > +FC == 2 && !anchor && $1 ~ /^0x/ && $2 !~ /^0x/ && NF <= 4 { > + sub(addr_prefix, "0x", $1); > + addr = strtonum($1) - base; > + anchor = sprintf("%s %08x-%08x = %s", type, addr, addr, $2); > + sect_anchor[type] = anchor; > + > + next; > +} > + > +FC == 2 && /^ \./ && NF == 1 { > + # If the section name is long, the remainder of the entry is found on > + # the next line. > + s = $0; > + getline; > + $0 = s " " $0; > +} > + > +FC == 2 && base && $0 ~ tpat && NF == 4 { > + # If the first object is vmlinux.o then we need vmlinux.o.map to get > + # the offsets of the actual objects. That is valid because in this > + # case the vmlinux.o is linked into vmlinux verbatim (per section). > + if ($4 == "vmlinux.o") > + need_o_map = 1; > + > + sub(addr_prefix, "0x", $2); > + addr = strtonum($2); > + sect_addend[type] = addr - base; > + > + if (anchor) > + base = 0; > + if (need_o_map) > + type = 0; > + > + next; > +} > + > +FC == 2 && !need_o_map && $0 ~ tpat && NF == 4 { > + if ($1 ~ /\.percpu/ || !(type in sect_addend)) > + next; > + > + sub(addr_prefix, "0x", $2); > + addr = strtonum($2) - sect_base[type]; > + > + mod = get_module_info($4); > + if (mod == mod_name) > + next; > + > + if (mod_name) { > + idx = mod_start + sect_base[type]; > + entries[idx] = sprintf("%s %08x-%08x %s", type, mod_start, addr, mod_name); > + count[type]++; > + } > + > + mod_name = mod; > + mod_start = addr; > + > + next; > +} > + > +# If we do not need to parse the vmlinux.o.map file, we are done. > +FC == 3 && !need_o_map { > + exit; > +} > + > +# (4) Collect offset ranges (relative to the section base address) for built-in > +# modules. > +# > + > +# (LLD) Convert an object record from lld format to ld format. > +# > +FC == 3 && map_is_lld && NF == 5 && $5 ~ /:\(\./ { > + gsub(/\)/, ""); > + sub(/:\(/, " "); > + > + type = $6; > + if (!(type in sect_addend)) > + next; > + > + sub(/ vmlinux\.a\(/, " "); > + $0 = " "type " 0x"$1 " 0x"$3 " " $5; > +} > + > +FC == 3 && /^ \./ && NF == 4 && $3 != "0x0" { > + type = $1; > + if (!(type in sect_addend)) > + next; > + > + sub(addr_prefix, "0x", $2); > + addr = strtonum($2) + sect_addend[type]; > + > + mod = get_module_info($4); > + if (mod == mod_name) > + next; > + > + if (mod_name) { > + idx = mod_start + sect_base[type] + sect_addend[type]; > + entries[idx] = sprintf("%s %08x-%08x %s", type, mod_start, addr, mod_name); > + count[type]++; > + } > + > + mod_name = mod; > + mod_start = addr; > +} > + > +END { > + for (type in count) { > + if (type in sect_anchor) > + entries[sect_base[type]] = sect_anchor[type]; > + } > + > + n = asorti(entries, indices); > + for (i = 1; i <= n; i++) > + print entries[indices[i]]; > +} > -- > 2.45.1
On Fri, Jun 14, 2024 at 01:14:27PM -0400, Kris Van Hees wrote: > The offset range data for builtin modules is generated using: > - modules.builtin: associates object files with module names > - vmlinux.map: provides load order of sections and offset of first member > per section > - vmlinux.o.map: provides offset of object file content per section > - .*.cmd: build cmd file with KBUILD_MODFILE and KBUILD_MODNAME What tests do we have to ensure this is working correctly and not spewing out lies? What proactive mechanisms do we have to verify the semantics won't change, or to warn at build time that this awk script will break upon new changes? Is this just best effort? Is that good enough? Why? Luis
On Tue, Jun 18, 2024 at 11:57:38AM -0700, Luis Chamberlain wrote: > On Fri, Jun 14, 2024 at 01:14:27PM -0400, Kris Van Hees wrote: > > The offset range data for builtin modules is generated using: > > - modules.builtin: associates object files with module names > > - vmlinux.map: provides load order of sections and offset of first member > > per section > > - vmlinux.o.map: provides offset of object file content per section > > - .*.cmd: build cmd file with KBUILD_MODFILE and KBUILD_MODNAME > > What tests do we have to ensure this is working correctly and not > spewing out lies? What proactive mechanisms do we have to verify the > semantics won't change, or to warn at build time that this awk script > will break upon new changes? Is this just best effort? Is that good > enough? Why? I posted a new patch series [0] that hopefully addresses your questions. Most specifically, I included a patch with a verifier script that validates the generated data. It is available for use but is not automatically executed because the modules.builtin.ranges data is not required for proper kernel operation. After all, the generated data is there for tools to use and is not critical to the kernel itself. While there is always the possibility of something breaking in this generation due to future kernel changes, I'd say that this same issue applies to pretty much everything in the build process of the kernel. Some changes will always require other steps to be updated - I'll be happy to maintain this contribution to help ensure changes are addressed. The generated data depends on 2 main things for its correctness: the data that is found in the linker maps, and the logic of the script parsing that data. The logic (documented in the commit message and more in detail in the actual script [1]) is pretty straight-forward because it is all based on a linear walk of the content of vmlinux (using vmlinux.map), collecting the start and end offsets of each object (CU) and agrgegating this information based on the built-in module(s) the object CU) belongs to (if any). For the case where vmlinux was linked using vmlinux.o, the script uses vmlinux.o.map data to get the actual content of included sections. The documented limitation is (of course) that if no data is available to associate addresses (or offsets) in vmlinux with the source objects (CUs), it is not possible to generate modules.builtin.ranges data. That is reflected by making the config option to have this data generated conflict with using LTO_CLANG_FULL or LTO_CLANG_THIN. But again, given that the generated data does not directly impact the operation of the kernel, the impact of possible breakage is minimal. And like any other kernel feature, it will have to be maintained which I will happily do to ensure this works and keeps working. Kris [0] https://lore.kernel.org/lkml/20240716031045.1781332-1-kris.van.hees@oracle.com/ [1] https://lore.kernel.org/lkml/20240716031045.1781332-3-kris.van.hees@oracle.com/
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 291185f54ee4..03fddad67d59 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -571,6 +571,25 @@ config VMLINUX_MAP pieces of code get eliminated with CONFIG_LD_DEAD_CODE_DATA_ELIMINATION. +config BUILTIN_MODULE_RANGES + bool "Generate address range information for builtin modules" + depends on !LTO_CLANG_FULL + depends on !LTO_CLANG_THIN + select VMLINUX_MAP + help + When modules are built into the kernel, there will be no module name + associated with its symbols in /proc/kallsyms. Tracers may want to + identify symbols by module name and symbol name regardless of whether + the module is configured as loadable or not. + + This option generates modules.builtin.ranges in the build tree with + offset ranges (per ELF section) for the module(s) they belong to. + It also records an anchor symbol to determine the load address of the + section. + + It is fully compatible with CONFIG_RANDOMIZE_BASE and similar late- + address-modification options. + config DEBUG_FORCE_WEAK_PER_CPU bool "Force weak per-cpu definitions" depends on DEBUG_KERNEL diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index c9f3e03124d7..5fd1f272ccde 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -36,6 +36,22 @@ targets += vmlinux vmlinux: scripts/link-vmlinux.sh vmlinux.o $(KBUILD_LDS) FORCE +$(call if_changed_dep,link_vmlinux) +# module.builtin.ranges +# --------------------------------------------------------------------------- +ifdef CONFIG_BUILTIN_MODULE_RANGES +__default: modules.builtin.ranges + +quiet_cmd_modules_builtin_ranges = GEN $@ + cmd_modules_builtin_ranges = \ + $(srctree)/scripts/generate_builtin_ranges.awk $(real-prereqs) > $@ + +vmlinux.map: vmlinux + +targets += modules.builtin.ranges +modules.builtin.ranges: modules.builtin vmlinux.map vmlinux.o.map FORCE + $(call if_changed,modules_builtin_ranges) +endif + # Add FORCE to the prequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 6de297916ce6..252505505e0e 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -45,9 +45,12 @@ objtool-args = $(vmlinux-objtool-args-y) --link # Link of vmlinux.o used for section mismatch analysis # --------------------------------------------------------------------------- +vmlinux-o-ld-args-$(CONFIG_BUILTIN_MODULE_RANGES) += -Map=$@.map + quiet_cmd_ld_vmlinux.o = LD $@ cmd_ld_vmlinux.o = \ $(LD) ${KBUILD_LDFLAGS} -r -o $@ \ + $(vmlinux-o-ld-args-y) \ $(addprefix -T , $(initcalls-lds)) \ --whole-archive vmlinux.a --no-whole-archive \ --start-group $(KBUILD_VMLINUX_LIBS) --end-group \ diff --git a/scripts/generate_builtin_ranges.awk b/scripts/generate_builtin_ranges.awk new file mode 100755 index 000000000000..ba7a5dcef284 --- /dev/null +++ b/scripts/generate_builtin_ranges.awk @@ -0,0 +1,284 @@ +#!/usr/bin/gawk -f +# SPDX-License-Identifier: GPL-2.0 +# generate_builtin_ranges.awk: Generate address range data for builtin modules +# Written by Kris Van Hees <kris.van.hees@oracle.com> +# +# Usage: generate_builtin_ranges.awk modules.builtin vmlinux.map \ +# vmlinux.o.map > modules.builtin.ranges +# + +# Return the module name(s) (if any) associated with the given object. +# +# If we have seen this object before, return information from the cache. +# Otherwise, retrieve it from the corresponding .cmd file. +# +function get_module_info(fn, mod, obj, mfn, s) { + if (fn in omod) + return omod[fn]; + + if (match(fn, /\/[^/]+$/) == 0) + return ""; + + obj = fn; + mod = ""; + mfn = ""; + fn = substr(fn, 1, RSTART) "." substr(fn, RSTART + 1) ".cmd"; + if (getline s <fn == 1) { + if (match(s, /DKBUILD_MODFILE=['"]+[^'"]+/) > 0) { + mfn = substr(s, RSTART + 16, RLENGTH - 16); + gsub(/['"]/, "", mfn); + + mod = mfn; + gsub(/([^/ ]*\/)+/, "", mod); + gsub(/-/, "_", mod); + } + } + close(fn); + + # A single module (common case) also reflects objects that are not part + # of a module. Some of those objects have names that are also a module + # name (e.g. core). We check the associated module file name, and if + # they do not match, the object is not part of a module. + if (mod !~ / /) { + if (!(mod in mods)) + return ""; + if (mods[mod] != mfn) + return ""; + } + + # At this point, mod is a single (valid) module name, or a list of + # module names (that do not need validation). + omod[obj] = mod; + close(fn); + + return mod; +} + +FNR == 1 { + FC++; +} + +# (1) Build a lookup map of built-in module names. +# +# The first file argument is used as input (modules.builtin). +# +# Lines will be like: +# kernel/crypto/lzo-rle.ko +# and we derive the built-in module name from this as "lzo_rle" and associate +# it with object name "crypto/lzo-rle". +# +FC == 1 { + sub(/kernel\//, ""); # strip off "kernel/" prefix + sub(/\.ko$/, ""); # strip off .ko suffix + + mod = $1; + sub(/([^/]*\/)+/, "", mod); # mod = basename($1) + gsub(/-/, "_", mod); # Convert - to _ + + mods[mod] = $1; + next; +} + +# (2) Determine the load address for each section. +# +# The second file argument is used as input (vmlinux.map). +# +# Since some AWK implementations cannot handle large integers, we strip of the +# first 4 hex digits from the address. This is safe because the kernel space +# is not large enough for addresses to extend into those digits. +# + +# First determine whether we are dealing with a GNU ld or LLVM lld linker map. +# +FC == 2 && FNR == 1 && NF == 7 && $1 == "VMA" && $7 == "Symbol" { + map_is_lld = 1; + next; +} + +# (LLD) Convert a section record fronm lld format to ld format. +# +FC == 2 && map_is_lld && NF == 5 && /[0-9] \./ { + $0 = $5 " 0x"$1 " dummy"; +} + +# (LLD) Convert an anchor record from lld format to ld format. +# +FC == 2 && map_is_lld && !anchor && NF == 7 && raw_addr == "0x"$1 && $6 == "=" && $7 == "." { + $0 = "0x"$1 " " $5 " = " $7; +} + +# (LLD) Convert an object record from lld format to ld format. +# +FC == 2 && map_is_lld && NF == 5 && $5 ~ /:\(\./ { + gsub(/\)/, ""); + sub(/:\(/, " "); + sub(/ vmlinux\.a\(/, " "); + $0 = " "$6 " 0x"$1 " 0x"$3 " " $5; +} + +FC == 2 && /^\./ && NF > 2 { + if (type) + delete sect_addend[type]; + + if ($1 ~ /\.percpu/) + next; + + raw_addr = $2; + addr_prefix = "^" substr($2, 1, 6); + sub(addr_prefix, "0x", $2); + base = strtonum($2); + type = $1; + tpat = "^ \\"type"[\\. ]"; + anchor = 0; + sect_base[type] = base; + + next; +} + +!type { + next; +} + +# (3) We need to determine the base address of the section so that ranges can +# be expressed based on offsets from the base address. This accommodates the +# kernel sections getting loaded at different addresses than what is recorded +# in vmlinux.map. +# +# At runtime, we will need to determine the base address of each section we are +# interested in. We do that by recording the offset of the first symbol in the +# section. Once we know the address of this symbol in the running kernel, we +# can calculate the base address of the section. +# +# If possible, we use an explicit anchor symbol (sym = .) listed at the base +# address (offset 0). +# +# If there is no such symbol, we record the first symbol in the section along +# with its offset. +# +# We also determine the offset of the first member in the section in case the +# final linking inserts some content between the start of the section and the +# first member. I.e. in that case, vmlinux.map will list the first member at +# a non-zero offset whereas vmlinux.o.map will list it at offset 0. We record +# the addend so we can apply it when processing vmlinux.o.map (next). +# +FC == 2 && !anchor && raw_addr == $1 && $3 == "=" && $4 == "." { + anchor = sprintf("%s %08x-%08x = %s", type, 0, 0, $2); + sect_anchor[type] = anchor; + + next; +} + +FC == 2 && !anchor && $1 ~ /^0x/ && $2 !~ /^0x/ && NF <= 4 { + sub(addr_prefix, "0x", $1); + addr = strtonum($1) - base; + anchor = sprintf("%s %08x-%08x = %s", type, addr, addr, $2); + sect_anchor[type] = anchor; + + next; +} + +FC == 2 && /^ \./ && NF == 1 { + # If the section name is long, the remainder of the entry is found on + # the next line. + s = $0; + getline; + $0 = s " " $0; +} + +FC == 2 && base && $0 ~ tpat && NF == 4 { + # If the first object is vmlinux.o then we need vmlinux.o.map to get + # the offsets of the actual objects. That is valid because in this + # case the vmlinux.o is linked into vmlinux verbatim (per section). + if ($4 == "vmlinux.o") + need_o_map = 1; + + sub(addr_prefix, "0x", $2); + addr = strtonum($2); + sect_addend[type] = addr - base; + + if (anchor) + base = 0; + if (need_o_map) + type = 0; + + next; +} + +FC == 2 && !need_o_map && $0 ~ tpat && NF == 4 { + if ($1 ~ /\.percpu/ || !(type in sect_addend)) + next; + + sub(addr_prefix, "0x", $2); + addr = strtonum($2) - sect_base[type]; + + mod = get_module_info($4); + if (mod == mod_name) + next; + + if (mod_name) { + idx = mod_start + sect_base[type]; + entries[idx] = sprintf("%s %08x-%08x %s", type, mod_start, addr, mod_name); + count[type]++; + } + + mod_name = mod; + mod_start = addr; + + next; +} + +# If we do not need to parse the vmlinux.o.map file, we are done. +FC == 3 && !need_o_map { + exit; +} + +# (4) Collect offset ranges (relative to the section base address) for built-in +# modules. +# + +# (LLD) Convert an object record from lld format to ld format. +# +FC == 3 && map_is_lld && NF == 5 && $5 ~ /:\(\./ { + gsub(/\)/, ""); + sub(/:\(/, " "); + + type = $6; + if (!(type in sect_addend)) + next; + + sub(/ vmlinux\.a\(/, " "); + $0 = " "type " 0x"$1 " 0x"$3 " " $5; +} + +FC == 3 && /^ \./ && NF == 4 && $3 != "0x0" { + type = $1; + if (!(type in sect_addend)) + next; + + sub(addr_prefix, "0x", $2); + addr = strtonum($2) + sect_addend[type]; + + mod = get_module_info($4); + if (mod == mod_name) + next; + + if (mod_name) { + idx = mod_start + sect_base[type] + sect_addend[type]; + entries[idx] = sprintf("%s %08x-%08x %s", type, mod_start, addr, mod_name); + count[type]++; + } + + mod_name = mod; + mod_start = addr; +} + +END { + for (type in count) { + if (type in sect_anchor) + entries[sect_base[type]] = sect_anchor[type]; + } + + n = asorti(entries, indices); + for (i = 1; i <= n; i++) + print entries[indices[i]]; +}