Message ID | 20241023210437.2266063-1-vadfed@meta.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | [bpf-next,1/2] bpf: add bpf_get_hw_counter kfunc | expand |
On Wed, Oct 23, 2024 at 2:05 PM Vadim Fedorenko <vadfed@meta.com> wrote: > > New kfunc to return ARCH-specific timecounter. For x86 BPF JIT converts > it into rdtsc ordered call. Other architectures will get JIT > implementation too if supported. The fallback is to > __arch_get_hw_counter(). arch_get_hw_counter is a great idea. > Signed-off-by: Vadim Fedorenko <vadfed@meta.com> > --- > arch/x86/net/bpf_jit_comp.c | 23 +++++++++++++++++++++++ > arch/x86/net/bpf_jit_comp32.c | 11 +++++++++++ > kernel/bpf/helpers.c | 7 +++++++ > kernel/bpf/verifier.c | 11 +++++++++++ > 4 files changed, 52 insertions(+) > > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c > index 06b080b61aa5..55595a0fa55b 100644 > --- a/arch/x86/net/bpf_jit_comp.c > +++ b/arch/x86/net/bpf_jit_comp.c > @@ -2126,6 +2126,29 @@ st: if (is_imm8(insn->off)) > case BPF_JMP | BPF_CALL: { > u8 *ip = image + addrs[i - 1]; > > + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) { > + if (insn->dst_reg == 1) { > + struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id()); > + > + /* Save RDX because RDTSC will use EDX:EAX to return u64 */ > + emit_mov_reg(&prog, true, AUX_REG, BPF_REG_3); > + if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC)) > + EMIT_LFENCE(); > + EMIT2(0x0F, 0x31); > + > + /* shl RDX, 32 */ > + maybe_emit_1mod(&prog, BPF_REG_3, true); > + EMIT3(0xC1, add_1reg(0xE0, BPF_REG_3), 32); > + /* or RAX, RDX */ > + maybe_emit_mod(&prog, BPF_REG_0, BPF_REG_3, true); > + EMIT2(0x09, add_2reg(0xC0, BPF_REG_0, BPF_REG_3)); > + /* restore RDX from R11 */ > + emit_mov_reg(&prog, true, BPF_REG_3, AUX_REG); This doesn't match static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd) { if (likely(clock_mode == VDSO_CLOCKMODE_TSC)) return (u64)rdtsc_ordered() & S64_MAX; - & is missing - rdtsc vs rdtscp but the later one is much slower (I was told). So maybe instead of arch_get_hw_counter() it should be modelled as JIT of sched_clock() ? > + > + break; > + } > + } > + > func = (u8 *) __bpf_call_base + imm32; > if (tail_call_reachable) { > LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth); > diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c > index de0f9e5f9f73..c36ff18a044b 100644 > --- a/arch/x86/net/bpf_jit_comp32.c > +++ b/arch/x86/net/bpf_jit_comp32.c > @@ -2091,6 +2091,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, > if (insn->src_reg == BPF_PSEUDO_CALL) > goto notyet; > > + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) { > + if (insn->dst_reg == 1) { > + struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id()); > + > + if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC)) > + EMIT3(0x0F, 0xAE, 0xE8); > + EMIT2(0x0F, 0x31); > + break; > + } > + } > + > if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { > int err; > > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > index 5c3fdb29c1b1..6624b2465484 100644 > --- a/kernel/bpf/helpers.c > +++ b/kernel/bpf/helpers.c > @@ -23,6 +23,7 @@ > #include <linux/btf_ids.h> > #include <linux/bpf_mem_alloc.h> > #include <linux/kasan.h> > +#include <asm/vdso/gettimeofday.h> > > #include "../../lib/kstrtox.h" > > @@ -3023,6 +3024,11 @@ __bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user > return ret + 1; > } > > +__bpf_kfunc int bpf_get_hw_counter(void) > +{ > + return __arch_get_hw_counter(1, NULL); > +} > + > __bpf_kfunc_end_defs(); > > BTF_KFUNCS_START(generic_btf_ids) > @@ -3112,6 +3118,7 @@ BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL) > BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY) > BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE) > BTF_ID_FLAGS(func, bpf_get_kmem_cache) > +BTF_ID_FLAGS(func, bpf_get_hw_counter, KF_FASTCALL) > BTF_KFUNCS_END(common_btf_ids) > > static const struct btf_kfunc_id_set common_kfunc_set = { > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index f514247ba8ba..5f0e4f91ce48 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -11260,6 +11260,7 @@ enum special_kfunc_type { > KF_bpf_iter_css_task_new, > KF_bpf_session_cookie, > KF_bpf_get_kmem_cache, > + KF_bpf_get_hw_counter, > }; > > BTF_SET_START(special_kfunc_set) > @@ -11326,6 +11327,7 @@ BTF_ID(func, bpf_session_cookie) > BTF_ID_UNUSED > #endif > BTF_ID(func, bpf_get_kmem_cache) > +BTF_ID(func, bpf_get_hw_counter) > > static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) > { > @@ -20396,6 +20398,15 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, > desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { > insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); > *cnt = 1; > + } else if (IS_ENABLED(CONFIG_X86) && It's better to introduce bpf_jit_inlines_kfunc_call() similar to bpf_jit_inlines_helper_call(). > + desc->func_id == special_kfunc_list[KF_bpf_get_hw_counter]) { > + insn->imm = 0; > + insn->code = BPF_JMP | BPF_CALL; > + insn->src_reg = BPF_PSEUDO_KFUNC_CALL; > + insn->dst_reg = 1; /* Implement enum for inlined fast calls */ Yes. Pls do it cleanly from the start. Why rewrite though? Can JIT match the addr of bpf_get_hw_counter ? And no need to rewrite call insn ?
On 24/10/2024 02:39, Alexei Starovoitov wrote: > On Wed, Oct 23, 2024 at 2:05 PM Vadim Fedorenko <vadfed@meta.com> wrote: >> >> New kfunc to return ARCH-specific timecounter. For x86 BPF JIT converts >> it into rdtsc ordered call. Other architectures will get JIT >> implementation too if supported. The fallback is to >> __arch_get_hw_counter(). > > arch_get_hw_counter is a great idea. > >> Signed-off-by: Vadim Fedorenko <vadfed@meta.com> >> --- >> arch/x86/net/bpf_jit_comp.c | 23 +++++++++++++++++++++++ >> arch/x86/net/bpf_jit_comp32.c | 11 +++++++++++ >> kernel/bpf/helpers.c | 7 +++++++ >> kernel/bpf/verifier.c | 11 +++++++++++ >> 4 files changed, 52 insertions(+) >> >> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c >> index 06b080b61aa5..55595a0fa55b 100644 >> --- a/arch/x86/net/bpf_jit_comp.c >> +++ b/arch/x86/net/bpf_jit_comp.c >> @@ -2126,6 +2126,29 @@ st: if (is_imm8(insn->off)) >> case BPF_JMP | BPF_CALL: { >> u8 *ip = image + addrs[i - 1]; >> >> + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) { >> + if (insn->dst_reg == 1) { >> + struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id()); >> + >> + /* Save RDX because RDTSC will use EDX:EAX to return u64 */ >> + emit_mov_reg(&prog, true, AUX_REG, BPF_REG_3); >> + if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC)) >> + EMIT_LFENCE(); >> + EMIT2(0x0F, 0x31); >> + >> + /* shl RDX, 32 */ >> + maybe_emit_1mod(&prog, BPF_REG_3, true); >> + EMIT3(0xC1, add_1reg(0xE0, BPF_REG_3), 32); >> + /* or RAX, RDX */ >> + maybe_emit_mod(&prog, BPF_REG_0, BPF_REG_3, true); >> + EMIT2(0x09, add_2reg(0xC0, BPF_REG_0, BPF_REG_3)); >> + /* restore RDX from R11 */ >> + emit_mov_reg(&prog, true, BPF_REG_3, AUX_REG); > > This doesn't match > static inline u64 __arch_get_hw_counter(s32 clock_mode, > const struct vdso_data *vd) > { > if (likely(clock_mode == VDSO_CLOCKMODE_TSC)) > return (u64)rdtsc_ordered() & S64_MAX; > > - & is missing & S64_MAX is only needed to early detect possible wrap-around of timecounter in case of vDSO call for CLOCK_MONOTONIC_RAW/CLOCK_COARSE which adds namespace time offset. TSC is reset during CPU reset and will not overflow within 10 years according to "Intel 64 and IA-32 Architecture Software Developer's Manual,Vol 3B", so it doesn't really matter if we mask the highest bit or not while accessing raw cycles counter. > - rdtsc vs rdtscp rdtscp provides additional 32 bit of "signature value" atomically with TSC value in ECX. This value is not really usable outside of domain which set it previously while initialization. The kernel stores encoded cpuid into IA32_TSC_AUX to provide it back to user-space application, but at the same time ignores its value during read. The combination of lfence and rdtsc will give us the same result (ordered read of TSC value independent on the core) without trashing ECX value. > but the later one is much slower (I was told). It is slower on AMD CPUs for now, easily visible in perf traces under load. > So maybe instead of arch_get_hw_counter() it should be modelled > as JIT of sched_clock() ? sched_clock() is much more complicated because it converts cycles counter into ns, we don't actually need this conversion, let's stick to arch_get_hw_counter(). > >> + >> + break; >> + } >> + } >> + >> func = (u8 *) __bpf_call_base + imm32; >> if (tail_call_reachable) { >> LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth); >> diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c >> index de0f9e5f9f73..c36ff18a044b 100644 >> --- a/arch/x86/net/bpf_jit_comp32.c >> +++ b/arch/x86/net/bpf_jit_comp32.c >> @@ -2091,6 +2091,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, >> if (insn->src_reg == BPF_PSEUDO_CALL) >> goto notyet; >> >> + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) { >> + if (insn->dst_reg == 1) { >> + struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id()); >> + >> + if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC)) >> + EMIT3(0x0F, 0xAE, 0xE8); >> + EMIT2(0x0F, 0x31); >> + break; >> + } >> + } >> + >> if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { >> int err; >> >> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c >> index 5c3fdb29c1b1..6624b2465484 100644 >> --- a/kernel/bpf/helpers.c >> +++ b/kernel/bpf/helpers.c >> @@ -23,6 +23,7 @@ >> #include <linux/btf_ids.h> >> #include <linux/bpf_mem_alloc.h> >> #include <linux/kasan.h> >> +#include <asm/vdso/gettimeofday.h> >> >> #include "../../lib/kstrtox.h" >> >> @@ -3023,6 +3024,11 @@ __bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user >> return ret + 1; >> } >> >> +__bpf_kfunc int bpf_get_hw_counter(void) >> +{ >> + return __arch_get_hw_counter(1, NULL); >> +} >> + >> __bpf_kfunc_end_defs(); >> >> BTF_KFUNCS_START(generic_btf_ids) >> @@ -3112,6 +3118,7 @@ BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL) >> BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY) >> BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE) >> BTF_ID_FLAGS(func, bpf_get_kmem_cache) >> +BTF_ID_FLAGS(func, bpf_get_hw_counter, KF_FASTCALL) >> BTF_KFUNCS_END(common_btf_ids) >> >> static const struct btf_kfunc_id_set common_kfunc_set = { >> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c >> index f514247ba8ba..5f0e4f91ce48 100644 >> --- a/kernel/bpf/verifier.c >> +++ b/kernel/bpf/verifier.c >> @@ -11260,6 +11260,7 @@ enum special_kfunc_type { >> KF_bpf_iter_css_task_new, >> KF_bpf_session_cookie, >> KF_bpf_get_kmem_cache, >> + KF_bpf_get_hw_counter, >> }; >> >> BTF_SET_START(special_kfunc_set) >> @@ -11326,6 +11327,7 @@ BTF_ID(func, bpf_session_cookie) >> BTF_ID_UNUSED >> #endif >> BTF_ID(func, bpf_get_kmem_cache) >> +BTF_ID(func, bpf_get_hw_counter) >> >> static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) >> { >> @@ -20396,6 +20398,15 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, >> desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { >> insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); >> *cnt = 1; >> + } else if (IS_ENABLED(CONFIG_X86) && > > It's better to introduce bpf_jit_inlines_kfunc_call() > similar to bpf_jit_inlines_helper_call(). Yep, I thought about introducing it while adding more architectures, but can do it from the beginning. > >> + desc->func_id == special_kfunc_list[KF_bpf_get_hw_counter]) { >> + insn->imm = 0; >> + insn->code = BPF_JMP | BPF_CALL; >> + insn->src_reg = BPF_PSEUDO_KFUNC_CALL; >> + insn->dst_reg = 1; /* Implement enum for inlined fast calls */ > > Yes. Pls do it cleanly from the start. > > Why rewrite though? > Can JIT match the addr of bpf_get_hw_counter ? > And no need to rewrite call insn ? I was thinking about this way, just wasn't able to find easy examples of matching function addresses in jit. I'll try to make it but it may add some extra functions to the jit.
Hi Vadim,
kernel test robot noticed the following build warnings:
[auto build test WARNING on bpf-next/master]
url: https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/selftests-bpf-add-selftest-to-check-rdtsc-jit/20241024-050747
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link: https://lore.kernel.org/r/20241023210437.2266063-1-vadfed%40meta.com
patch subject: [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc
config: powerpc-allmodconfig (https://download.01.org/0day-ci/archive/20241024/202410242353.krjd8d6t-lkp@intel.com/config)
compiler: powerpc64-linux-gcc (GCC) 14.1.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241024/202410242353.krjd8d6t-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202410242353.krjd8d6t-lkp@intel.com/
All warnings (new ones prefixed by >>):
In file included from kernel/bpf/helpers.c:26:
>> arch/powerpc/include/asm/vdso/gettimeofday.h:97:63: warning: 'struct vdso_data' declared inside parameter list will not be visible outside of this definition or declaration
97 | const struct vdso_data *vd)
| ^~~~~~~~~
vim +97 arch/powerpc/include/asm/vdso/gettimeofday.h
ce7d8056e38b77 Christophe Leroy 2020-11-27 95
ce7d8056e38b77 Christophe Leroy 2020-11-27 96 static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
ce7d8056e38b77 Christophe Leroy 2020-11-27 @97 const struct vdso_data *vd)
ce7d8056e38b77 Christophe Leroy 2020-11-27 98 {
ce7d8056e38b77 Christophe Leroy 2020-11-27 99 return get_tb();
ce7d8056e38b77 Christophe Leroy 2020-11-27 100 }
ce7d8056e38b77 Christophe Leroy 2020-11-27 101
Hi Vadim, kernel test robot noticed the following build errors: [auto build test ERROR on bpf-next/master] url: https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/selftests-bpf-add-selftest-to-check-rdtsc-jit/20241024-050747 base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master patch link: https://lore.kernel.org/r/20241023210437.2266063-1-vadfed%40meta.com patch subject: [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc config: arm64-randconfig-001-20241024 (https://download.01.org/0day-ci/archive/20241024/202410242310.od2UFxiK-lkp@intel.com/config) compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project 5886454669c3c9026f7f27eab13509dd0241f2d6) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241024/202410242310.od2UFxiK-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202410242310.od2UFxiK-lkp@intel.com/ All error/warnings (new ones prefixed by >>): In file included from kernel/bpf/helpers.c:4: In file included from include/linux/bpf.h:21: In file included from include/linux/kallsyms.h:13: In file included from include/linux/mm.h:2213: include/linux/vmstat.h:504:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion] 504 | return vmstat_text[NR_VM_ZONE_STAT_ITEMS + | ~~~~~~~~~~~~~~~~~~~~~ ^ 505 | item]; | ~~~~ include/linux/vmstat.h:511:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion] 511 | return vmstat_text[NR_VM_ZONE_STAT_ITEMS + | ~~~~~~~~~~~~~~~~~~~~~ ^ 512 | NR_VM_NUMA_EVENT_ITEMS + | ~~~~~~~~~~~~~~~~~~~~~~ include/linux/vmstat.h:518:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion] 518 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" | ~~~~~~~~~~~ ^ ~~~ In file included from kernel/bpf/helpers.c:26: >> arch/arm64/include/asm/vdso/gettimeofday.h:70:21: warning: declaration of 'struct vdso_data' will not be visible outside of this function [-Wvisibility] 70 | const struct vdso_data *vd) | ^ >> arch/arm64/include/asm/vdso/gettimeofday.h:79:20: error: use of undeclared identifier 'VDSO_CLOCKMODE_NONE' 79 | if (clock_mode == VDSO_CLOCKMODE_NONE) | ^ >> arch/arm64/include/asm/vdso/gettimeofday.h:105:9: error: use of undeclared identifier '_vdso_data' 105 | return _vdso_data; | ^ kernel/bpf/helpers.c:115:36: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 115 | .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, | ~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:128:36: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 128 | .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, | ~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:539:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 539 | .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, | ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:542:41: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 542 | .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, | ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:567:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 567 | .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, | ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:570:41: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 570 | .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, | ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:583:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 583 | .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, | ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:653:35: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 653 | .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, | ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:725:39: warning: bitwise operation between different enumeration types ('enum bpf_return_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 725 | .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY, | ~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~ kernel/bpf/helpers.c:738:39: warning: bitwise operation between different enumeration types ('enum bpf_return_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 738 | .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY, | ~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:1080:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 1080 | .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, | ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~ kernel/bpf/helpers.c:1641:44: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 1641 | .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE, | ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~ kernel/bpf/helpers.c:1746:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 1746 | .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT, | ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~ kernel/bpf/helpers.c:1789:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 1789 | .arg3_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY, | ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:1837:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 1837 | .arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY, | ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:1839:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 1839 | .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, | ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:1879:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 1879 | .arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY, | ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ 21 warnings and 2 errors generated. vim +/VDSO_CLOCKMODE_NONE +79 arch/arm64/include/asm/vdso/gettimeofday.h 28b1a824a4f44d Vincenzo Frascino 2019-06-21 68 4c5a116ada953b Thomas Gleixner 2020-08-04 69 static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, 4c5a116ada953b Thomas Gleixner 2020-08-04 @70 const struct vdso_data *vd) 28b1a824a4f44d Vincenzo Frascino 2019-06-21 71 { 28b1a824a4f44d Vincenzo Frascino 2019-06-21 72 u64 res; 28b1a824a4f44d Vincenzo Frascino 2019-06-21 73 27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25 74 /* 5e3c6a312a0946 Thomas Gleixner 2020-02-07 75 * Core checks for mode already, so this raced against a concurrent 5e3c6a312a0946 Thomas Gleixner 2020-02-07 76 * update. Return something. Core will do another round and then 5e3c6a312a0946 Thomas Gleixner 2020-02-07 77 * see the mode change and fallback to the syscall. 27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25 78 */ 5e3c6a312a0946 Thomas Gleixner 2020-02-07 @79 if (clock_mode == VDSO_CLOCKMODE_NONE) 5e3c6a312a0946 Thomas Gleixner 2020-02-07 80 return 0; 27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25 81 27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25 82 /* 9025cebf12d176 Joey Gouly 2022-08-30 83 * If FEAT_ECV is available, use the self-synchronizing counter. 9025cebf12d176 Joey Gouly 2022-08-30 84 * Otherwise the isb is required to prevent that the counter value 27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25 85 * is speculated. 27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25 86 */ 9025cebf12d176 Joey Gouly 2022-08-30 87 asm volatile( 9025cebf12d176 Joey Gouly 2022-08-30 88 ALTERNATIVE("isb\n" 9025cebf12d176 Joey Gouly 2022-08-30 89 "mrs %0, cntvct_el0", 9025cebf12d176 Joey Gouly 2022-08-30 90 "nop\n" 9025cebf12d176 Joey Gouly 2022-08-30 91 __mrs_s("%0", SYS_CNTVCTSS_EL0), 9025cebf12d176 Joey Gouly 2022-08-30 92 ARM64_HAS_ECV) 9025cebf12d176 Joey Gouly 2022-08-30 93 : "=r" (res) 9025cebf12d176 Joey Gouly 2022-08-30 94 : 9025cebf12d176 Joey Gouly 2022-08-30 95 : "memory"); 9025cebf12d176 Joey Gouly 2022-08-30 96 77ec462536a13d Will Deacon 2021-03-18 97 arch_counter_enforce_ordering(res); 28b1a824a4f44d Vincenzo Frascino 2019-06-21 98 28b1a824a4f44d Vincenzo Frascino 2019-06-21 99 return res; 28b1a824a4f44d Vincenzo Frascino 2019-06-21 100 } 28b1a824a4f44d Vincenzo Frascino 2019-06-21 101 28b1a824a4f44d Vincenzo Frascino 2019-06-21 102 static __always_inline 28b1a824a4f44d Vincenzo Frascino 2019-06-21 103 const struct vdso_data *__arch_get_vdso_data(void) 28b1a824a4f44d Vincenzo Frascino 2019-06-21 104 { 28b1a824a4f44d Vincenzo Frascino 2019-06-21 @105 return _vdso_data; 28b1a824a4f44d Vincenzo Frascino 2019-06-21 106 } 28b1a824a4f44d Vincenzo Frascino 2019-06-21 107
Hi Vadim, kernel test robot noticed the following build errors: [auto build test ERROR on bpf-next/master] url: https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/selftests-bpf-add-selftest-to-check-rdtsc-jit/20241024-050747 base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master patch link: https://lore.kernel.org/r/20241023210437.2266063-1-vadfed%40meta.com patch subject: [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc config: riscv-defconfig (https://download.01.org/0day-ci/archive/20241024/202410242317.RqcJ3H1k-lkp@intel.com/config) compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project 5886454669c3c9026f7f27eab13509dd0241f2d6) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241024/202410242317.RqcJ3H1k-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202410242317.RqcJ3H1k-lkp@intel.com/ All error/warnings (new ones prefixed by >>): In file included from kernel/bpf/helpers.c:4: In file included from include/linux/bpf.h:21: In file included from include/linux/kallsyms.h:13: In file included from include/linux/mm.h:2213: include/linux/vmstat.h:518:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion] 518 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" | ~~~~~~~~~~~ ^ ~~~ In file included from kernel/bpf/helpers.c:26: >> arch/riscv/include/asm/vdso/gettimeofday.h:72:21: warning: declaration of 'struct vdso_data' will not be visible outside of this function [-Wvisibility] 72 | const struct vdso_data *vd) | ^ >> arch/riscv/include/asm/vdso/gettimeofday.h:84:9: error: use of undeclared identifier '_vdso_data' 84 | return _vdso_data; | ^ >> arch/riscv/include/asm/vdso/gettimeofday.h:91:9: error: use of undeclared identifier '_timens_data' 91 | return _timens_data; | ^ kernel/bpf/helpers.c:115:36: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 115 | .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, | ~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:128:36: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 128 | .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, | ~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:539:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 539 | .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, | ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:542:41: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 542 | .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, | ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:567:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 567 | .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, | ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:570:41: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 570 | .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, | ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:583:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 583 | .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, | ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:653:35: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 653 | .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, | ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:725:39: warning: bitwise operation between different enumeration types ('enum bpf_return_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 725 | .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY, | ~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~ kernel/bpf/helpers.c:738:39: warning: bitwise operation between different enumeration types ('enum bpf_return_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 738 | .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY, | ~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:1080:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 1080 | .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, | ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~ kernel/bpf/helpers.c:1641:44: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 1641 | .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE, | ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~ kernel/bpf/helpers.c:1746:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 1746 | .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT, | ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~ kernel/bpf/helpers.c:1789:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 1789 | .arg3_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY, | ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:1837:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 1837 | .arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY, | ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:1839:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 1839 | .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, | ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ kernel/bpf/helpers.c:1879:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion] 1879 | .arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY, | ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~ 19 warnings and 2 errors generated. vim +/_vdso_data +84 arch/riscv/include/asm/vdso/gettimeofday.h aa5af0aa90bad3 Evan Green 2023-04-07 70 4c5a116ada953b Thomas Gleixner 2020-08-04 71 static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, 4c5a116ada953b Thomas Gleixner 2020-08-04 @72 const struct vdso_data *vd) ad5d1122b82fbd Vincent Chen 2020-06-09 73 { ad5d1122b82fbd Vincent Chen 2020-06-09 74 /* ad5d1122b82fbd Vincent Chen 2020-06-09 75 * The purpose of csr_read(CSR_TIME) is to trap the system into ad5d1122b82fbd Vincent Chen 2020-06-09 76 * M-mode to obtain the value of CSR_TIME. Hence, unlike other ad5d1122b82fbd Vincent Chen 2020-06-09 77 * architecture, no fence instructions surround the csr_read() ad5d1122b82fbd Vincent Chen 2020-06-09 78 */ ad5d1122b82fbd Vincent Chen 2020-06-09 79 return csr_read(CSR_TIME); ad5d1122b82fbd Vincent Chen 2020-06-09 80 } ad5d1122b82fbd Vincent Chen 2020-06-09 81 ad5d1122b82fbd Vincent Chen 2020-06-09 82 static __always_inline const struct vdso_data *__arch_get_vdso_data(void) ad5d1122b82fbd Vincent Chen 2020-06-09 83 { ad5d1122b82fbd Vincent Chen 2020-06-09 @84 return _vdso_data; ad5d1122b82fbd Vincent Chen 2020-06-09 85 } ad5d1122b82fbd Vincent Chen 2020-06-09 86 dffe11e280a42c Tong Tiangen 2021-09-01 87 #ifdef CONFIG_TIME_NS dffe11e280a42c Tong Tiangen 2021-09-01 88 static __always_inline dffe11e280a42c Tong Tiangen 2021-09-01 89 const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) dffe11e280a42c Tong Tiangen 2021-09-01 90 { dffe11e280a42c Tong Tiangen 2021-09-01 @91 return _timens_data; dffe11e280a42c Tong Tiangen 2021-09-01 92 } dffe11e280a42c Tong Tiangen 2021-09-01 93 #endif ad5d1122b82fbd Vincent Chen 2020-06-09 94 #endif /* !__ASSEMBLY__ */ ad5d1122b82fbd Vincent Chen 2020-06-09 95
On Thu, Oct 24, 2024 at 6:11 AM Vadim Fedorenko <vadim.fedorenko@linux.dev> wrote: > > > static inline u64 __arch_get_hw_counter(s32 clock_mode, > > const struct vdso_data *vd) > > { > > if (likely(clock_mode == VDSO_CLOCKMODE_TSC)) > > return (u64)rdtsc_ordered() & S64_MAX; > > > > - & is missing > > & S64_MAX is only needed to early detect possible wrap-around of > timecounter in case of vDSO call for CLOCK_MONOTONIC_RAW/CLOCK_COARSE > which adds namespace time offset. TSC is reset during CPU reset and will > not overflow within 10 years according to "Intel 64 and IA-32 > Architecture Software Developer's Manual,Vol 3B", so it doesn't really > matter if we mask the highest bit or not while accessing raw cycles > counter. > > > - rdtsc vs rdtscp > > rdtscp provides additional 32 bit of "signature value" atomically with > TSC value in ECX. This value is not really usable outside of domain > which set it previously while initialization. The kernel stores encoded > cpuid into IA32_TSC_AUX to provide it back to user-space application, > but at the same time ignores its value during read. The combination of > lfence and rdtsc will give us the same result (ordered read of TSC value > independent on the core) without trashing ECX value. That makes sense. One more thing: > __bpf_kfunc int bpf_get_hw_counter(void) it should be returning u64 > >> + insn->imm = 0; > >> + insn->code = BPF_JMP | BPF_CALL; > >> + insn->src_reg = BPF_PSEUDO_KFUNC_CALL; > >> + insn->dst_reg = 1; /* Implement enum for inlined fast calls */ > > > > Yes. Pls do it cleanly from the start. > > > > Why rewrite though? > > Can JIT match the addr of bpf_get_hw_counter ? > > And no need to rewrite call insn ? > > I was thinking about this way, just wasn't able to find easy examples of > matching function addresses in jit. I'll try to make it but it may add > some extra functions to the jit. func = (u8 *) __bpf_call_base + imm32; if (func == &bpf_get_hw_counter)
On Thu, 2024-10-24 at 14:11 +0100, Vadim Fedorenko wrote: [...] > > > @@ -20396,6 +20398,15 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, > > > desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { > > > insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); > > > *cnt = 1; > > > + } else if (IS_ENABLED(CONFIG_X86) && > > > > It's better to introduce bpf_jit_inlines_kfunc_call() > > similar to bpf_jit_inlines_helper_call(). > > Yep, I thought about introducing it while adding more architectures, but > can do it from the beginning. After thinking a bit more, I think I messed up in a private discussion with Vadim. It is necessary to introduce bpf_jit_inlines_kfunc_call() and use it in the mark_fastcall_pattern_for_call(), otherwise the following situation is possible: - the program is executed on the arch where inlining for bpf_get_hw_counter() is not implemented; - there is a pattern in the code: r1 = *(u64 *)(r10 - 256); call bpf_get_hw_fastcall *(u64 *)(r10 - 256) = r1; ... r10 - 8 is not used ... ... r1 is read ... - mark_fastcall_pattern_for_call() would mark spill and fill as members of the pattern; - fastcall contract is not violated, because reserved stack slots are used as expected; - remove_fastcall_spills_fills() would remove spill and fill: call bpf_get_hw_fastcall ... r1 is read ... - since call is not transformed to instructions by a specific jit the value of r1 would be clobbered, making the program invalid. Vadim, sorry I did not point this out earlier, I thought that fastcall contract ensuring logic would handle everything. [...]
On Thu, 2024-10-24 at 10:31 -0700, Eduard Zingerman wrote:
[...]
> ... r10 - 8 is not used ...
I mean r10 - 256.
[...]
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 06b080b61aa5..55595a0fa55b 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -2126,6 +2126,29 @@ st: if (is_imm8(insn->off)) case BPF_JMP | BPF_CALL: { u8 *ip = image + addrs[i - 1]; + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) { + if (insn->dst_reg == 1) { + struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id()); + + /* Save RDX because RDTSC will use EDX:EAX to return u64 */ + emit_mov_reg(&prog, true, AUX_REG, BPF_REG_3); + if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC)) + EMIT_LFENCE(); + EMIT2(0x0F, 0x31); + + /* shl RDX, 32 */ + maybe_emit_1mod(&prog, BPF_REG_3, true); + EMIT3(0xC1, add_1reg(0xE0, BPF_REG_3), 32); + /* or RAX, RDX */ + maybe_emit_mod(&prog, BPF_REG_0, BPF_REG_3, true); + EMIT2(0x09, add_2reg(0xC0, BPF_REG_0, BPF_REG_3)); + /* restore RDX from R11 */ + emit_mov_reg(&prog, true, BPF_REG_3, AUX_REG); + + break; + } + } + func = (u8 *) __bpf_call_base + imm32; if (tail_call_reachable) { LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth); diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index de0f9e5f9f73..c36ff18a044b 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -2091,6 +2091,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, if (insn->src_reg == BPF_PSEUDO_CALL) goto notyet; + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) { + if (insn->dst_reg == 1) { + struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id()); + + if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC)) + EMIT3(0x0F, 0xAE, 0xE8); + EMIT2(0x0F, 0x31); + break; + } + } + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { int err; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 5c3fdb29c1b1..6624b2465484 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -23,6 +23,7 @@ #include <linux/btf_ids.h> #include <linux/bpf_mem_alloc.h> #include <linux/kasan.h> +#include <asm/vdso/gettimeofday.h> #include "../../lib/kstrtox.h" @@ -3023,6 +3024,11 @@ __bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user return ret + 1; } +__bpf_kfunc int bpf_get_hw_counter(void) +{ + return __arch_get_hw_counter(1, NULL); +} + __bpf_kfunc_end_defs(); BTF_KFUNCS_START(generic_btf_ids) @@ -3112,6 +3118,7 @@ BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_get_kmem_cache) +BTF_ID_FLAGS(func, bpf_get_hw_counter, KF_FASTCALL) BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f514247ba8ba..5f0e4f91ce48 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11260,6 +11260,7 @@ enum special_kfunc_type { KF_bpf_iter_css_task_new, KF_bpf_session_cookie, KF_bpf_get_kmem_cache, + KF_bpf_get_hw_counter, }; BTF_SET_START(special_kfunc_set) @@ -11326,6 +11327,7 @@ BTF_ID(func, bpf_session_cookie) BTF_ID_UNUSED #endif BTF_ID(func, bpf_get_kmem_cache) +BTF_ID(func, bpf_get_hw_counter) static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) { @@ -20396,6 +20398,15 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); *cnt = 1; + } else if (IS_ENABLED(CONFIG_X86) && + desc->func_id == special_kfunc_list[KF_bpf_get_hw_counter]) { + insn->imm = 0; + insn->code = BPF_JMP | BPF_CALL; + insn->src_reg = BPF_PSEUDO_KFUNC_CALL; + insn->dst_reg = 1; /* Implement enum for inlined fast calls */ + + insn_buf[0] = *insn; + *cnt = 1; } else if (is_bpf_wq_set_callback_impl_kfunc(desc->func_id)) { struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(BPF_REG_4, (long)env->prog->aux) };
New kfunc to return ARCH-specific timecounter. For x86 BPF JIT converts it into rdtsc ordered call. Other architectures will get JIT implementation too if supported. The fallback is to __arch_get_hw_counter(). Signed-off-by: Vadim Fedorenko <vadfed@meta.com> --- arch/x86/net/bpf_jit_comp.c | 23 +++++++++++++++++++++++ arch/x86/net/bpf_jit_comp32.c | 11 +++++++++++ kernel/bpf/helpers.c | 7 +++++++ kernel/bpf/verifier.c | 11 +++++++++++ 4 files changed, 52 insertions(+)