Message ID | 20221020123704.91203-7-quentin@isovalent.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | bpftool: Add LLVM as default library for disassembling JIT-ed programs | expand |
On Thu, Oct 20, 2022 at 5:37 AM Quentin Monnet <quentin@isovalent.com> wrote: > > + > +/* This callback to set the ref_type is necessary to have the LLVM disassembler > + * print PC-relative addresses instead of byte offsets for branch instruction > + * targets. > + */ > +static const char * > +symbol_lookup_callback(__maybe_unused void *disasm_info, > + __maybe_unused uint64_t ref_value, > + uint64_t *ref_type, __maybe_unused uint64_t ref_PC, > + __maybe_unused const char **ref_name) > +{ > + *ref_type = LLVMDisassembler_ReferenceType_InOut_None; > + return NULL; > +} Could you give an example before/after for asm that contains 'call foo' instructions? I'm not sure that above InOut_None will not break symbolization.
2022-10-20 10:49 UTC-0700 ~ Alexei Starovoitov <alexei.starovoitov@gmail.com> > On Thu, Oct 20, 2022 at 5:37 AM Quentin Monnet <quentin@isovalent.com> wrote: >> >> + >> +/* This callback to set the ref_type is necessary to have the LLVM disassembler >> + * print PC-relative addresses instead of byte offsets for branch instruction >> + * targets. >> + */ >> +static const char * >> +symbol_lookup_callback(__maybe_unused void *disasm_info, >> + __maybe_unused uint64_t ref_value, >> + uint64_t *ref_type, __maybe_unused uint64_t ref_PC, >> + __maybe_unused const char **ref_name) >> +{ >> + *ref_type = LLVMDisassembler_ReferenceType_InOut_None; >> + return NULL; >> +} > > Could you give an example before/after for asm > that contains 'call foo' instructions? > I'm not sure that above InOut_None will not break > symbolization. Hi Alexei, I ran a quick test and it doesn't seem we lose any information. Building from: #include <linux/bpf.h> #include "bpf_helper_defs.h" #define SEC(name) __attribute__((section(name), used)) static __attribute__((noinline)) int bar(int b) { return bpf_get_prandom_u32() > b; } SEC("xdp") int foo(struct xdp_md *ctx) { void *data = (void *)(long)ctx->data; void *data_end = (void *)(long)ctx->data_end; return bar(data_end - data); } Here is the output from the existing version (using libbfd): # bpftool version bpftool v7.1.0 using libbpf v1.1 features: libbfd, libbpf_strict, skeletons # bpftool prog dump jited name foo int foo(struct xdp_md * ctx): bpf_prog_65e359e7b0251046_foo: ; void *data = (void *)(long)ctx->data; 0: nopl 0x0(%rax,%rax,1) 5: xchg %ax,%ax 7: push %rbp 8: mov %rsp,%rbp b: mov 0x0(%rdi),%rsi ; void *data_end = (void *)(long)ctx->data_end; f: mov 0x8(%rdi),%rdi ; return bar(data_end - data); 13: sub %esi,%edi ; return bar(data_end - data); 15: call 0x0000000000000038 ; return bar(data_end - data); 1a: leave 1b: ret int bar(int b): bpf_prog_9b001d67a67f01cc_bar: ; int bar(int b) { 0: nopl 0x0(%rax,%rax,1) 5: xchg %ax,%ax 7: push %rbp 8: mov %rsp,%rbp b: push %rbx c: mov %edi,%ebx ; return bpf_get_prandom_u32() > b; e: call 0xffffffffcab00454 13: mov %eax,%edi 15: mov $0x1,%eax ; return bpf_get_prandom_u32() > b; 1a: cmp %ebx,%edi 1c: ja 0x0000000000000020 1e: xor %eax,%eax ; return bpf_get_prandom_u32() > b; 20: pop %rbx 21: leave 22: ret Did you expect "bar" to appear on insn '15:'? I don't think we get this from bpftool at the moment? Or did I misunderstand your question? The output from LLVM's disassembler comes below: # ./bpftool version bpftool v7.1.0 using libbpf v1.1 features: llvm, libbpf_strict, skeletons # ./bpftool prog dump jited name foo int foo(struct xdp_md * ctx): bpf_prog_65e359e7b0251046_foo: ; void *data = (void *)(long)ctx->data; 0: nopl (%rax,%rax) 5: nop 7: pushq %rbp 8: movq %rsp, %rbp b: movq (%rdi), %rsi ; void *data_end = (void *)(long)ctx->data_end; f: movq 8(%rdi), %rdi ; return bar(data_end - data); 13: subl %esi, %edi ; return bar(data_end - data); 15: callq 0x38 ; return bar(data_end - data); 1a: leave 1b: retq int bar(int b): bpf_prog_9b001d67a67f01cc_bar: ; int bar(int b) { 0: nopl (%rax,%rax) 5: nop 7: pushq %rbp 8: movq %rsp, %rbp b: pushq %rbx c: movl %edi, %ebx ; return bpf_get_prandom_u32() > b; e: callq 0xffffffffcab00454 13: movl %eax, %edi 15: movl $1, %eax ; return bpf_get_prandom_u32() > b; 1a: cmpl %ebx, %edi 1c: ja 0x20 1e: xorl %eax, %eax ; return bpf_get_prandom_u32() > b; 20: popq %rbx 21: leave 22: retq LLVM, but _without_ the LLVMDisassembler_ReferenceType_InOut_None: int foo(struct xdp_md * ctx): bpf_prog_65e359e7b0251046_foo: ; void *data = (void *)(long)ctx->data; 0: nopl (%rax,%rax) 5: nop 7: pushq %rbp 8: movq %rsp, %rbp b: movq (%rdi), %rsi ; void *data_end = (void *)(long)ctx->data_end; f: movq 8(%rdi), %rdi ; return bar(data_end - data); 13: subl %esi, %edi ; return bar(data_end - data); 15: callq 30 ; return bar(data_end - data); 1a: leave 1b: retq int bar(int b): bpf_prog_9b001d67a67f01cc_bar: ; int bar(int b) { 0: nopl (%rax,%rax) 5: nop 7: pushq %rbp 8: movq %rsp, %rbp b: pushq %rbx c: movl %edi, %ebx ; return bpf_get_prandom_u32() > b; e: callq -894434239 13: movl %eax, %edi 15: movl $1, %eax ; return bpf_get_prandom_u32() > b; 1a: cmpl %ebx, %edi 1c: ja 2 1e: xorl %eax, %eax ; return bpf_get_prandom_u32() > b; 20: popq %rbx 21: leave 22: retq
On Mon, Oct 24, 2022 at 4:05 AM Quentin Monnet <quentin@isovalent.com> wrote: > > 2022-10-20 10:49 UTC-0700 ~ Alexei Starovoitov > <alexei.starovoitov@gmail.com> > > On Thu, Oct 20, 2022 at 5:37 AM Quentin Monnet <quentin@isovalent.com> wrote: > >> > >> + > >> +/* This callback to set the ref_type is necessary to have the LLVM disassembler > >> + * print PC-relative addresses instead of byte offsets for branch instruction > >> + * targets. > >> + */ > >> +static const char * > >> +symbol_lookup_callback(__maybe_unused void *disasm_info, > >> + __maybe_unused uint64_t ref_value, > >> + uint64_t *ref_type, __maybe_unused uint64_t ref_PC, > >> + __maybe_unused const char **ref_name) > >> +{ > >> + *ref_type = LLVMDisassembler_ReferenceType_InOut_None; > >> + return NULL; > >> +} > > > > Could you give an example before/after for asm > > that contains 'call foo' instructions? > > I'm not sure that above InOut_None will not break > > symbolization. > > Hi Alexei, I ran a quick test and it doesn't seem we lose any > information. Building from: Thanks for checking. The output looks good.
On Mon, Oct 24, 2022 at 11:40 AM Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > > On Mon, Oct 24, 2022 at 4:05 AM Quentin Monnet <quentin@isovalent.com> wrote: > > > > 2022-10-20 10:49 UTC-0700 ~ Alexei Starovoitov > > <alexei.starovoitov@gmail.com> > > > On Thu, Oct 20, 2022 at 5:37 AM Quentin Monnet <quentin@isovalent.com> wrote: > > >> > > >> + > > >> +/* This callback to set the ref_type is necessary to have the LLVM disassembler > > >> + * print PC-relative addresses instead of byte offsets for branch instruction > > >> + * targets. > > >> + */ > > >> +static const char * > > >> +symbol_lookup_callback(__maybe_unused void *disasm_info, > > >> + __maybe_unused uint64_t ref_value, > > >> + uint64_t *ref_type, __maybe_unused uint64_t ref_PC, > > >> + __maybe_unused const char **ref_name) > > >> +{ > > >> + *ref_type = LLVMDisassembler_ReferenceType_InOut_None; > > >> + return NULL; > > >> +} > > > > > > Could you give an example before/after for asm > > > that contains 'call foo' instructions? > > > I'm not sure that above InOut_None will not break > > > symbolization. > > > > Hi Alexei, I ran a quick test and it doesn't seem we lose any > > information. Building from: > > Thanks for checking. The output looks good. Please rebase. It doesn't apply anymore.
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 1c81f4d514bb..787b857d3fb5 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -95,6 +95,7 @@ RM ?= rm -f FEATURE_USER = .bpftool FEATURE_TESTS := clang-bpf-co-re +FEATURE_TESTS += llvm FEATURE_TESTS += libcap FEATURE_TESTS += libbfd FEATURE_TESTS += libbfd-liberty @@ -103,6 +104,7 @@ FEATURE_TESTS += disassembler-four-args FEATURE_TESTS += disassembler-init-styled FEATURE_DISPLAY := clang-bpf-co-re +FEATURE_DISPLAY += llvm FEATURE_DISPLAY += libcap FEATURE_DISPLAY += libbfd FEATURE_DISPLAY += libbfd-liberty @@ -137,27 +139,37 @@ all: $(OUTPUT)bpftool SRCS := $(wildcard *.c) -ifeq ($(feature-libbfd),1) - LIBS += -lbfd -ldl -lopcodes -else ifeq ($(feature-libbfd-liberty),1) - LIBS += -lbfd -ldl -lopcodes -liberty -else ifeq ($(feature-libbfd-liberty-z),1) - LIBS += -lbfd -ldl -lopcodes -liberty -lz -endif +ifeq ($(feature-llvm),1) + # If LLVM is available, use it for JIT disassembly + CFLAGS += -DHAVE_LLVM_SUPPORT + LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets + CFLAGS += $(shell $(LLVM_CONFIG) --cflags --libs $(LLVM_CONFIG_LIB_COMPONENTS)) + LIBS += $(shell $(LLVM_CONFIG) --libs $(LLVM_CONFIG_LIB_COMPONENTS)) + LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags) +else + # Fall back on libbfd + ifeq ($(feature-libbfd),1) + LIBS += -lbfd -ldl -lopcodes + else ifeq ($(feature-libbfd-liberty),1) + LIBS += -lbfd -ldl -lopcodes -liberty + else ifeq ($(feature-libbfd-liberty-z),1) + LIBS += -lbfd -ldl -lopcodes -liberty -lz + endif -# If one of the above feature combinations is set, we support libbfd -ifneq ($(filter -lbfd,$(LIBS)),) - CFLAGS += -DHAVE_LIBBFD_SUPPORT + # If one of the above feature combinations is set, we support libbfd + ifneq ($(filter -lbfd,$(LIBS)),) + CFLAGS += -DHAVE_LIBBFD_SUPPORT - # Libbfd interface changed over time, figure out what we need - ifeq ($(feature-disassembler-four-args), 1) - CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE - endif - ifeq ($(feature-disassembler-init-styled), 1) - CFLAGS += -DDISASM_INIT_STYLED + # Libbfd interface changed over time, figure out what we need + ifeq ($(feature-disassembler-four-args), 1) + CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE + endif + ifeq ($(feature-disassembler-init-styled), 1) + CFLAGS += -DDISASM_INIT_STYLED + endif endif endif -ifeq ($(filter -DHAVE_LIBBFD_SUPPORT,$(CFLAGS)),) +ifeq ($(filter -DHAVE_LLVM_SUPPORT -DHAVE_LIBBFD_SUPPORT,$(CFLAGS)),) # No support for JIT disassembly SRCS := $(filter-out jit_disasm.c,$(SRCS)) endif diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index e31ad3950fd6..c28b21f90cb9 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -20,18 +20,123 @@ #include <stdlib.h> #include <unistd.h> #include <string.h> -#include <bfd.h> -#include <dis-asm.h> #include <sys/stat.h> #include <limits.h> #include <bpf/libbpf.h> + +#ifdef HAVE_LLVM_SUPPORT +#include <llvm-c/Core.h> +#include <llvm-c/Disassembler.h> +#include <llvm-c/Target.h> +#include <llvm-c/TargetMachine.h> +#endif + +#ifdef HAVE_LIBBFD_SUPPORT +#include <bfd.h> +#include <dis-asm.h> #include <tools/dis-asm-compat.h> +#endif #include "json_writer.h" #include "main.h" static int oper_count; +#ifdef HAVE_LLVM_SUPPORT +#define DISASM_SPACER + +typedef LLVMDisasmContextRef disasm_ctx_t; + +static int printf_json(char *s) +{ + s = strtok(s, " \t"); + jsonw_string_field(json_wtr, "operation", s); + + jsonw_name(json_wtr, "operands"); + jsonw_start_array(json_wtr); + oper_count = 1; + + while ((s = strtok(NULL, " \t,()")) != 0) { + jsonw_string(json_wtr, s); + oper_count++; + } + return 0; +} + +/* This callback to set the ref_type is necessary to have the LLVM disassembler + * print PC-relative addresses instead of byte offsets for branch instruction + * targets. + */ +static const char * +symbol_lookup_callback(__maybe_unused void *disasm_info, + __maybe_unused uint64_t ref_value, + uint64_t *ref_type, __maybe_unused uint64_t ref_PC, + __maybe_unused const char **ref_name) +{ + *ref_type = LLVMDisassembler_ReferenceType_InOut_None; + return NULL; +} + +static int +init_context(disasm_ctx_t *ctx, const char *arch, + __maybe_unused const char *disassembler_options, + __maybe_unused unsigned char *image, __maybe_unused ssize_t len) +{ + char *triple; + + if (arch) { + p_err("Architecture %s not supported", arch); + return -1; + } + + triple = LLVMGetDefaultTargetTriple(); + if (!triple) { + p_err("Failed to retrieve triple"); + return -1; + } + *ctx = LLVMCreateDisasm(triple, NULL, 0, NULL, symbol_lookup_callback); + LLVMDisposeMessage(triple); + + if (!*ctx) { + p_err("Failed to create disassembler"); + return -1; + } + + return 0; +} + +static void destroy_context(disasm_ctx_t *ctx) +{ + LLVMDisposeMessage(*ctx); +} + +static int +disassemble_insn(disasm_ctx_t *ctx, unsigned char *image, ssize_t len, int pc) +{ + char buf[256]; + int count; + + count = LLVMDisasmInstruction(*ctx, image + pc, len - pc, pc, + buf, sizeof(buf)); + if (json_output) + printf_json(buf); + else + printf("%s", buf); + + return count; +} + +int disasm_init(void) +{ + LLVMInitializeNativeTarget(); + LLVMInitializeNativeDisassembler(); + return 0; +} +#endif /* HAVE_LLVM_SUPPORT */ + +#ifdef HAVE_LIBBFD_SUPPORT +#define DISASM_SPACER "\t" + typedef struct { struct disassemble_info *info; disassembler_ftype disassemble; @@ -210,6 +315,7 @@ int disasm_init(void) bfd_init(); return 0; } +#endif /* HAVE_LIBBPFD_SUPPORT */ int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, const char *arch, const char *disassembler_options, @@ -252,7 +358,7 @@ int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, if (linfo) btf_dump_linfo_plain(btf, linfo, "; ", linum); - printf("%4x:\t", pc); + printf("%4x:" DISASM_SPACER, pc); } count = disassemble_insn(&ctx, image, len, pc); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index c9e171082cf6..9a149c67aa5d 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -172,7 +172,7 @@ int map_parse_fds(int *argc, char ***argv, int **fds); int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); struct bpf_prog_linfo; -#ifdef HAVE_LIBBFD_SUPPORT +#if defined(HAVE_LLVM_SUPPORT) || defined(HAVE_LIBBFD_SUPPORT) int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, const char *arch, const char *disassembler_options, const struct btf *btf, @@ -193,7 +193,7 @@ int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, } static inline int disasm_init(void) { - p_err("No libbfd support"); + p_err("No JIT disassembly support"); return -1; } #endif