diff mbox series

[bpf-next,1/2] bpf: add bpf_get_hw_counter kfunc

Message ID 20241023210437.2266063-1-vadfed@meta.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series [bpf-next,1/2] bpf: add bpf_get_hw_counter kfunc | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 6 this patch: 6
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 15 maintainers not CCed: dave.hansen@linux.intel.com song@kernel.org udknight@gmail.com haoluo@google.com bp@alien8.de netdev@vger.kernel.org john.fastabend@gmail.com sdf@fomichev.me martin.lau@linux.dev hpa@zytor.com dsahern@kernel.org kpsingh@kernel.org yonghong.song@linux.dev mingo@redhat.com jolsa@kernel.org
netdev/build_clang success Errors and warnings before: 6 this patch: 6
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 70 this patch: 71
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 99 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 11 this patch: 11
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 fail Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test
bpf/vmtest-bpf-next-VM_Test-8 fail Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for s390x-gcc / test
bpf/vmtest-bpf-next-VM_Test-12 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-13 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-15 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc

Commit Message

Vadim Fedorenko Oct. 23, 2024, 9:04 p.m. UTC
New kfunc to return ARCH-specific timecounter. For x86 BPF JIT converts
it into rdtsc ordered call. Other architectures will get JIT
implementation too if supported. The fallback is to
__arch_get_hw_counter().

Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
---
 arch/x86/net/bpf_jit_comp.c   | 23 +++++++++++++++++++++++
 arch/x86/net/bpf_jit_comp32.c | 11 +++++++++++
 kernel/bpf/helpers.c          |  7 +++++++
 kernel/bpf/verifier.c         | 11 +++++++++++
 4 files changed, 52 insertions(+)

Comments

Alexei Starovoitov Oct. 24, 2024, 1:39 a.m. UTC | #1
On Wed, Oct 23, 2024 at 2:05 PM Vadim Fedorenko <vadfed@meta.com> wrote:
>
> New kfunc to return ARCH-specific timecounter. For x86 BPF JIT converts
> it into rdtsc ordered call. Other architectures will get JIT
> implementation too if supported. The fallback is to
> __arch_get_hw_counter().

arch_get_hw_counter is a great idea.

> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
> ---
>  arch/x86/net/bpf_jit_comp.c   | 23 +++++++++++++++++++++++
>  arch/x86/net/bpf_jit_comp32.c | 11 +++++++++++
>  kernel/bpf/helpers.c          |  7 +++++++
>  kernel/bpf/verifier.c         | 11 +++++++++++
>  4 files changed, 52 insertions(+)
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 06b080b61aa5..55595a0fa55b 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -2126,6 +2126,29 @@ st:                      if (is_imm8(insn->off))
>                 case BPF_JMP | BPF_CALL: {
>                         u8 *ip = image + addrs[i - 1];
>
> +                       if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) {
> +                               if (insn->dst_reg == 1) {
> +                                       struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id());
> +
> +                                       /* Save RDX because RDTSC will use EDX:EAX to return u64 */
> +                                       emit_mov_reg(&prog, true, AUX_REG, BPF_REG_3);
> +                                       if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC))
> +                                               EMIT_LFENCE();
> +                                       EMIT2(0x0F, 0x31);
> +
> +                                       /* shl RDX, 32 */
> +                                       maybe_emit_1mod(&prog, BPF_REG_3, true);
> +                                       EMIT3(0xC1, add_1reg(0xE0, BPF_REG_3), 32);
> +                                       /* or RAX, RDX */
> +                                       maybe_emit_mod(&prog, BPF_REG_0, BPF_REG_3, true);
> +                                       EMIT2(0x09, add_2reg(0xC0, BPF_REG_0, BPF_REG_3));
> +                                       /* restore RDX from R11 */
> +                                       emit_mov_reg(&prog, true, BPF_REG_3, AUX_REG);

This doesn't match
static inline u64 __arch_get_hw_counter(s32 clock_mode,
                                        const struct vdso_data *vd)
{
        if (likely(clock_mode == VDSO_CLOCKMODE_TSC))
                return (u64)rdtsc_ordered() & S64_MAX;

- & is missing
- rdtsc vs rdtscp

but the later one is much slower (I was told).

So maybe instead of arch_get_hw_counter() it should be modelled
as JIT of sched_clock() ?

> +
> +                                       break;
> +                               }
> +                       }
> +
>                         func = (u8 *) __bpf_call_base + imm32;
>                         if (tail_call_reachable) {
>                                 LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth);
> diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
> index de0f9e5f9f73..c36ff18a044b 100644
> --- a/arch/x86/net/bpf_jit_comp32.c
> +++ b/arch/x86/net/bpf_jit_comp32.c
> @@ -2091,6 +2091,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
>                         if (insn->src_reg == BPF_PSEUDO_CALL)
>                                 goto notyet;
>
> +                       if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) {
> +                               if (insn->dst_reg == 1) {
> +                                       struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id());
> +
> +                                       if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC))
> +                                               EMIT3(0x0F, 0xAE, 0xE8);
> +                                       EMIT2(0x0F, 0x31);
> +                                       break;
> +                               }
> +                       }
> +
>                         if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
>                                 int err;
>
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 5c3fdb29c1b1..6624b2465484 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -23,6 +23,7 @@
>  #include <linux/btf_ids.h>
>  #include <linux/bpf_mem_alloc.h>
>  #include <linux/kasan.h>
> +#include <asm/vdso/gettimeofday.h>
>
>  #include "../../lib/kstrtox.h"
>
> @@ -3023,6 +3024,11 @@ __bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user
>         return ret + 1;
>  }
>
> +__bpf_kfunc int bpf_get_hw_counter(void)
> +{
> +       return __arch_get_hw_counter(1, NULL);
> +}
> +
>  __bpf_kfunc_end_defs();
>
>  BTF_KFUNCS_START(generic_btf_ids)
> @@ -3112,6 +3118,7 @@ BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
>  BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
>  BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
>  BTF_ID_FLAGS(func, bpf_get_kmem_cache)
> +BTF_ID_FLAGS(func, bpf_get_hw_counter, KF_FASTCALL)
>  BTF_KFUNCS_END(common_btf_ids)
>
>  static const struct btf_kfunc_id_set common_kfunc_set = {
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index f514247ba8ba..5f0e4f91ce48 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -11260,6 +11260,7 @@ enum special_kfunc_type {
>         KF_bpf_iter_css_task_new,
>         KF_bpf_session_cookie,
>         KF_bpf_get_kmem_cache,
> +       KF_bpf_get_hw_counter,
>  };
>
>  BTF_SET_START(special_kfunc_set)
> @@ -11326,6 +11327,7 @@ BTF_ID(func, bpf_session_cookie)
>  BTF_ID_UNUSED
>  #endif
>  BTF_ID(func, bpf_get_kmem_cache)
> +BTF_ID(func, bpf_get_hw_counter)
>
>  static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
>  {
> @@ -20396,6 +20398,15 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>                    desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
>                 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
>                 *cnt = 1;
> +       } else if (IS_ENABLED(CONFIG_X86) &&

It's better to introduce bpf_jit_inlines_kfunc_call()
similar to bpf_jit_inlines_helper_call().

> +                  desc->func_id == special_kfunc_list[KF_bpf_get_hw_counter]) {
> +               insn->imm = 0;
> +               insn->code = BPF_JMP | BPF_CALL;
> +               insn->src_reg = BPF_PSEUDO_KFUNC_CALL;
> +               insn->dst_reg = 1; /* Implement enum for inlined fast calls */

Yes. Pls do it cleanly from the start.

Why rewrite though?
Can JIT match the addr of bpf_get_hw_counter ?
And no need to rewrite call insn ?
Vadim Fedorenko Oct. 24, 2024, 1:11 p.m. UTC | #2
On 24/10/2024 02:39, Alexei Starovoitov wrote:
> On Wed, Oct 23, 2024 at 2:05 PM Vadim Fedorenko <vadfed@meta.com> wrote:
>>
>> New kfunc to return ARCH-specific timecounter. For x86 BPF JIT converts
>> it into rdtsc ordered call. Other architectures will get JIT
>> implementation too if supported. The fallback is to
>> __arch_get_hw_counter().
> 
> arch_get_hw_counter is a great idea.
> 
>> Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
>> ---
>>   arch/x86/net/bpf_jit_comp.c   | 23 +++++++++++++++++++++++
>>   arch/x86/net/bpf_jit_comp32.c | 11 +++++++++++
>>   kernel/bpf/helpers.c          |  7 +++++++
>>   kernel/bpf/verifier.c         | 11 +++++++++++
>>   4 files changed, 52 insertions(+)
>>
>> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
>> index 06b080b61aa5..55595a0fa55b 100644
>> --- a/arch/x86/net/bpf_jit_comp.c
>> +++ b/arch/x86/net/bpf_jit_comp.c
>> @@ -2126,6 +2126,29 @@ st:                      if (is_imm8(insn->off))
>>                  case BPF_JMP | BPF_CALL: {
>>                          u8 *ip = image + addrs[i - 1];
>>
>> +                       if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) {
>> +                               if (insn->dst_reg == 1) {
>> +                                       struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id());
>> +
>> +                                       /* Save RDX because RDTSC will use EDX:EAX to return u64 */
>> +                                       emit_mov_reg(&prog, true, AUX_REG, BPF_REG_3);
>> +                                       if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC))
>> +                                               EMIT_LFENCE();
>> +                                       EMIT2(0x0F, 0x31);
>> +
>> +                                       /* shl RDX, 32 */
>> +                                       maybe_emit_1mod(&prog, BPF_REG_3, true);
>> +                                       EMIT3(0xC1, add_1reg(0xE0, BPF_REG_3), 32);
>> +                                       /* or RAX, RDX */
>> +                                       maybe_emit_mod(&prog, BPF_REG_0, BPF_REG_3, true);
>> +                                       EMIT2(0x09, add_2reg(0xC0, BPF_REG_0, BPF_REG_3));
>> +                                       /* restore RDX from R11 */
>> +                                       emit_mov_reg(&prog, true, BPF_REG_3, AUX_REG);
> 
> This doesn't match
> static inline u64 __arch_get_hw_counter(s32 clock_mode,
>                                          const struct vdso_data *vd)
> {
>          if (likely(clock_mode == VDSO_CLOCKMODE_TSC))
>                  return (u64)rdtsc_ordered() & S64_MAX;
> 
> - & is missing

& S64_MAX is only needed to early detect possible wrap-around of
timecounter in case of vDSO call for CLOCK_MONOTONIC_RAW/CLOCK_COARSE
which adds namespace time offset. TSC is reset during CPU reset and will
not overflow within 10 years according to "Intel 64 and IA-32
Architecture Software Developer's Manual,Vol 3B", so it doesn't really
matter if we mask the highest bit or not while accessing raw cycles
counter.

> - rdtsc vs rdtscp

rdtscp provides additional 32 bit of "signature value" atomically with
TSC value in ECX. This value is not really usable outside of domain
which set it previously while initialization. The kernel stores encoded
cpuid into IA32_TSC_AUX to provide it back to user-space application,
but at the same time ignores its value during read. The combination of
lfence and rdtsc will give us the same result (ordered read of TSC value
independent on the core) without trashing ECX value.

> but the later one is much slower (I was told).

It is slower on AMD CPUs for now, easily visible in perf traces under load.

> So maybe instead of arch_get_hw_counter() it should be modelled
> as JIT of sched_clock() ?

sched_clock() is much more complicated because it converts cycles
counter into ns, we don't actually need this conversion, let's stick
to arch_get_hw_counter().

> 
>> +
>> +                                       break;
>> +                               }
>> +                       }
>> +
>>                          func = (u8 *) __bpf_call_base + imm32;
>>                          if (tail_call_reachable) {
>>                                  LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth);
>> diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
>> index de0f9e5f9f73..c36ff18a044b 100644
>> --- a/arch/x86/net/bpf_jit_comp32.c
>> +++ b/arch/x86/net/bpf_jit_comp32.c
>> @@ -2091,6 +2091,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
>>                          if (insn->src_reg == BPF_PSEUDO_CALL)
>>                                  goto notyet;
>>
>> +                       if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) {
>> +                               if (insn->dst_reg == 1) {
>> +                                       struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id());
>> +
>> +                                       if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC))
>> +                                               EMIT3(0x0F, 0xAE, 0xE8);
>> +                                       EMIT2(0x0F, 0x31);
>> +                                       break;
>> +                               }
>> +                       }
>> +
>>                          if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
>>                                  int err;
>>
>> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
>> index 5c3fdb29c1b1..6624b2465484 100644
>> --- a/kernel/bpf/helpers.c
>> +++ b/kernel/bpf/helpers.c
>> @@ -23,6 +23,7 @@
>>   #include <linux/btf_ids.h>
>>   #include <linux/bpf_mem_alloc.h>
>>   #include <linux/kasan.h>
>> +#include <asm/vdso/gettimeofday.h>
>>
>>   #include "../../lib/kstrtox.h"
>>
>> @@ -3023,6 +3024,11 @@ __bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user
>>          return ret + 1;
>>   }
>>
>> +__bpf_kfunc int bpf_get_hw_counter(void)
>> +{
>> +       return __arch_get_hw_counter(1, NULL);
>> +}
>> +
>>   __bpf_kfunc_end_defs();
>>
>>   BTF_KFUNCS_START(generic_btf_ids)
>> @@ -3112,6 +3118,7 @@ BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
>>   BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
>>   BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
>>   BTF_ID_FLAGS(func, bpf_get_kmem_cache)
>> +BTF_ID_FLAGS(func, bpf_get_hw_counter, KF_FASTCALL)
>>   BTF_KFUNCS_END(common_btf_ids)
>>
>>   static const struct btf_kfunc_id_set common_kfunc_set = {
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index f514247ba8ba..5f0e4f91ce48 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -11260,6 +11260,7 @@ enum special_kfunc_type {
>>          KF_bpf_iter_css_task_new,
>>          KF_bpf_session_cookie,
>>          KF_bpf_get_kmem_cache,
>> +       KF_bpf_get_hw_counter,
>>   };
>>
>>   BTF_SET_START(special_kfunc_set)
>> @@ -11326,6 +11327,7 @@ BTF_ID(func, bpf_session_cookie)
>>   BTF_ID_UNUSED
>>   #endif
>>   BTF_ID(func, bpf_get_kmem_cache)
>> +BTF_ID(func, bpf_get_hw_counter)
>>
>>   static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
>>   {
>> @@ -20396,6 +20398,15 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>>                     desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
>>                  insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
>>                  *cnt = 1;
>> +       } else if (IS_ENABLED(CONFIG_X86) &&
> 
> It's better to introduce bpf_jit_inlines_kfunc_call()
> similar to bpf_jit_inlines_helper_call().

Yep, I thought about introducing it while adding more architectures, but
can do it from the beginning.

> 
>> +                  desc->func_id == special_kfunc_list[KF_bpf_get_hw_counter]) {
>> +               insn->imm = 0;
>> +               insn->code = BPF_JMP | BPF_CALL;
>> +               insn->src_reg = BPF_PSEUDO_KFUNC_CALL;
>> +               insn->dst_reg = 1; /* Implement enum for inlined fast calls */
> 
> Yes. Pls do it cleanly from the start.
> 
> Why rewrite though?
> Can JIT match the addr of bpf_get_hw_counter ?
> And no need to rewrite call insn ?

I was thinking about this way, just wasn't able to find easy examples of
matching function addresses in jit. I'll try to make it but it may add
some extra functions to the jit.
kernel test robot Oct. 24, 2024, 4:03 p.m. UTC | #3
Hi Vadim,

kernel test robot noticed the following build warnings:

[auto build test WARNING on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/selftests-bpf-add-selftest-to-check-rdtsc-jit/20241024-050747
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link:    https://lore.kernel.org/r/20241023210437.2266063-1-vadfed%40meta.com
patch subject: [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc
config: powerpc-allmodconfig (https://download.01.org/0day-ci/archive/20241024/202410242353.krjd8d6t-lkp@intel.com/config)
compiler: powerpc64-linux-gcc (GCC) 14.1.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241024/202410242353.krjd8d6t-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202410242353.krjd8d6t-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from kernel/bpf/helpers.c:26:
>> arch/powerpc/include/asm/vdso/gettimeofday.h:97:63: warning: 'struct vdso_data' declared inside parameter list will not be visible outside of this definition or declaration
      97 |                                                  const struct vdso_data *vd)
         |                                                               ^~~~~~~~~


vim +97 arch/powerpc/include/asm/vdso/gettimeofday.h

ce7d8056e38b77 Christophe Leroy 2020-11-27   95  
ce7d8056e38b77 Christophe Leroy 2020-11-27   96  static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
ce7d8056e38b77 Christophe Leroy 2020-11-27  @97  						 const struct vdso_data *vd)
ce7d8056e38b77 Christophe Leroy 2020-11-27   98  {
ce7d8056e38b77 Christophe Leroy 2020-11-27   99  	return get_tb();
ce7d8056e38b77 Christophe Leroy 2020-11-27  100  }
ce7d8056e38b77 Christophe Leroy 2020-11-27  101
kernel test robot Oct. 24, 2024, 4:13 p.m. UTC | #4
Hi Vadim,

kernel test robot noticed the following build errors:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/selftests-bpf-add-selftest-to-check-rdtsc-jit/20241024-050747
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link:    https://lore.kernel.org/r/20241023210437.2266063-1-vadfed%40meta.com
patch subject: [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc
config: arm64-randconfig-001-20241024 (https://download.01.org/0day-ci/archive/20241024/202410242310.od2UFxiK-lkp@intel.com/config)
compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project 5886454669c3c9026f7f27eab13509dd0241f2d6)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241024/202410242310.od2UFxiK-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202410242310.od2UFxiK-lkp@intel.com/

All error/warnings (new ones prefixed by >>):

   In file included from kernel/bpf/helpers.c:4:
   In file included from include/linux/bpf.h:21:
   In file included from include/linux/kallsyms.h:13:
   In file included from include/linux/mm.h:2213:
   include/linux/vmstat.h:504:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
     504 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     505 |                            item];
         |                            ~~~~
   include/linux/vmstat.h:511:43: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum numa_stat_item') [-Wenum-enum-conversion]
     511 |         return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~ ^
     512 |                            NR_VM_NUMA_EVENT_ITEMS +
         |                            ~~~~~~~~~~~~~~~~~~~~~~
   include/linux/vmstat.h:518:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     518 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
   In file included from kernel/bpf/helpers.c:26:
>> arch/arm64/include/asm/vdso/gettimeofday.h:70:21: warning: declaration of 'struct vdso_data' will not be visible outside of this function [-Wvisibility]
      70 |                                                  const struct vdso_data *vd)
         |                                                               ^
>> arch/arm64/include/asm/vdso/gettimeofday.h:79:20: error: use of undeclared identifier 'VDSO_CLOCKMODE_NONE'
      79 |         if (clock_mode == VDSO_CLOCKMODE_NONE)
         |                           ^
>> arch/arm64/include/asm/vdso/gettimeofday.h:105:9: error: use of undeclared identifier '_vdso_data'
     105 |         return _vdso_data;
         |                ^
   kernel/bpf/helpers.c:115:36: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     115 |         .arg2_type      = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
         |                           ~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:128:36: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     128 |         .arg2_type      = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
         |                           ~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:539:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     539 |         .arg1_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:542:41: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     542 |         .arg4_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
         |                           ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:567:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     567 |         .arg1_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:570:41: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     570 |         .arg4_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
         |                           ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:583:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     583 |         .arg1_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:653:35: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     653 |         .arg4_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:725:39: warning: bitwise operation between different enumeration types ('enum bpf_return_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     725 |         .ret_type       = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~
   kernel/bpf/helpers.c:738:39: warning: bitwise operation between different enumeration types ('enum bpf_return_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     738 |         .ret_type       = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:1080:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1080 |         .arg4_type      = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~
   kernel/bpf/helpers.c:1641:44: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1641 |         .arg2_type    = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE,
         |                         ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~
   kernel/bpf/helpers.c:1746:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1746 |         .arg4_type      = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT,
         |                           ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~
   kernel/bpf/helpers.c:1789:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1789 |         .arg3_type      = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:1837:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1837 |         .arg1_type      = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:1839:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1839 |         .arg3_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:1879:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1879 |         .arg1_type      = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   21 warnings and 2 errors generated.


vim +/VDSO_CLOCKMODE_NONE +79 arch/arm64/include/asm/vdso/gettimeofday.h

28b1a824a4f44d Vincenzo Frascino 2019-06-21   68  
4c5a116ada953b Thomas Gleixner   2020-08-04   69  static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
4c5a116ada953b Thomas Gleixner   2020-08-04  @70  						 const struct vdso_data *vd)
28b1a824a4f44d Vincenzo Frascino 2019-06-21   71  {
28b1a824a4f44d Vincenzo Frascino 2019-06-21   72  	u64 res;
28b1a824a4f44d Vincenzo Frascino 2019-06-21   73  
27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25   74  	/*
5e3c6a312a0946 Thomas Gleixner   2020-02-07   75  	 * Core checks for mode already, so this raced against a concurrent
5e3c6a312a0946 Thomas Gleixner   2020-02-07   76  	 * update. Return something. Core will do another round and then
5e3c6a312a0946 Thomas Gleixner   2020-02-07   77  	 * see the mode change and fallback to the syscall.
27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25   78  	 */
5e3c6a312a0946 Thomas Gleixner   2020-02-07  @79  	if (clock_mode == VDSO_CLOCKMODE_NONE)
5e3c6a312a0946 Thomas Gleixner   2020-02-07   80  		return 0;
27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25   81  
27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25   82  	/*
9025cebf12d176 Joey Gouly        2022-08-30   83  	 * If FEAT_ECV is available, use the self-synchronizing counter.
9025cebf12d176 Joey Gouly        2022-08-30   84  	 * Otherwise the isb is required to prevent that the counter value
27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25   85  	 * is speculated.
27e11a9fe2e2e7 Vincenzo Frascino 2019-06-25   86  	*/
9025cebf12d176 Joey Gouly        2022-08-30   87  	asm volatile(
9025cebf12d176 Joey Gouly        2022-08-30   88  	ALTERNATIVE("isb\n"
9025cebf12d176 Joey Gouly        2022-08-30   89  		    "mrs %0, cntvct_el0",
9025cebf12d176 Joey Gouly        2022-08-30   90  		    "nop\n"
9025cebf12d176 Joey Gouly        2022-08-30   91  		    __mrs_s("%0", SYS_CNTVCTSS_EL0),
9025cebf12d176 Joey Gouly        2022-08-30   92  		    ARM64_HAS_ECV)
9025cebf12d176 Joey Gouly        2022-08-30   93  	: "=r" (res)
9025cebf12d176 Joey Gouly        2022-08-30   94  	:
9025cebf12d176 Joey Gouly        2022-08-30   95  	: "memory");
9025cebf12d176 Joey Gouly        2022-08-30   96  
77ec462536a13d Will Deacon       2021-03-18   97  	arch_counter_enforce_ordering(res);
28b1a824a4f44d Vincenzo Frascino 2019-06-21   98  
28b1a824a4f44d Vincenzo Frascino 2019-06-21   99  	return res;
28b1a824a4f44d Vincenzo Frascino 2019-06-21  100  }
28b1a824a4f44d Vincenzo Frascino 2019-06-21  101  
28b1a824a4f44d Vincenzo Frascino 2019-06-21  102  static __always_inline
28b1a824a4f44d Vincenzo Frascino 2019-06-21  103  const struct vdso_data *__arch_get_vdso_data(void)
28b1a824a4f44d Vincenzo Frascino 2019-06-21  104  {
28b1a824a4f44d Vincenzo Frascino 2019-06-21 @105  	return _vdso_data;
28b1a824a4f44d Vincenzo Frascino 2019-06-21  106  }
28b1a824a4f44d Vincenzo Frascino 2019-06-21  107
kernel test robot Oct. 24, 2024, 4:13 p.m. UTC | #5
Hi Vadim,

kernel test robot noticed the following build errors:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Vadim-Fedorenko/selftests-bpf-add-selftest-to-check-rdtsc-jit/20241024-050747
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link:    https://lore.kernel.org/r/20241023210437.2266063-1-vadfed%40meta.com
patch subject: [PATCH bpf-next 1/2] bpf: add bpf_get_hw_counter kfunc
config: riscv-defconfig (https://download.01.org/0day-ci/archive/20241024/202410242317.RqcJ3H1k-lkp@intel.com/config)
compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project 5886454669c3c9026f7f27eab13509dd0241f2d6)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241024/202410242317.RqcJ3H1k-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202410242317.RqcJ3H1k-lkp@intel.com/

All error/warnings (new ones prefixed by >>):

   In file included from kernel/bpf/helpers.c:4:
   In file included from include/linux/bpf.h:21:
   In file included from include/linux/kallsyms.h:13:
   In file included from include/linux/mm.h:2213:
   include/linux/vmstat.h:518:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     518 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
   In file included from kernel/bpf/helpers.c:26:
>> arch/riscv/include/asm/vdso/gettimeofday.h:72:21: warning: declaration of 'struct vdso_data' will not be visible outside of this function [-Wvisibility]
      72 |                                                  const struct vdso_data *vd)
         |                                                               ^
>> arch/riscv/include/asm/vdso/gettimeofday.h:84:9: error: use of undeclared identifier '_vdso_data'
      84 |         return _vdso_data;
         |                ^
>> arch/riscv/include/asm/vdso/gettimeofday.h:91:9: error: use of undeclared identifier '_timens_data'
      91 |         return _timens_data;
         |                ^
   kernel/bpf/helpers.c:115:36: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     115 |         .arg2_type      = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
         |                           ~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:128:36: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     128 |         .arg2_type      = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT,
         |                           ~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:539:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     539 |         .arg1_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:542:41: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     542 |         .arg4_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
         |                           ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:567:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     567 |         .arg1_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:570:41: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     570 |         .arg4_type      = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED,
         |                           ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:583:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     583 |         .arg1_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:653:35: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     653 |         .arg4_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:725:39: warning: bitwise operation between different enumeration types ('enum bpf_return_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     725 |         .ret_type       = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~
   kernel/bpf/helpers.c:738:39: warning: bitwise operation between different enumeration types ('enum bpf_return_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
     738 |         .ret_type       = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:1080:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1080 |         .arg4_type      = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~
   kernel/bpf/helpers.c:1641:44: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1641 |         .arg2_type    = ARG_PTR_TO_BTF_ID_OR_NULL | OBJ_RELEASE,
         |                         ~~~~~~~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~
   kernel/bpf/helpers.c:1746:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1746 |         .arg4_type      = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT,
         |                           ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~
   kernel/bpf/helpers.c:1789:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1789 |         .arg3_type      = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:1837:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1837 |         .arg1_type      = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:1839:30: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1839 |         .arg3_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   kernel/bpf/helpers.c:1879:33: warning: bitwise operation between different enumeration types ('enum bpf_arg_type' and 'enum bpf_type_flag') [-Wenum-enum-conversion]
    1879 |         .arg1_type      = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
         |                           ~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~
   19 warnings and 2 errors generated.


vim +/_vdso_data +84 arch/riscv/include/asm/vdso/gettimeofday.h

aa5af0aa90bad3 Evan Green      2023-04-07  70  
4c5a116ada953b Thomas Gleixner 2020-08-04  71  static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
4c5a116ada953b Thomas Gleixner 2020-08-04 @72  						 const struct vdso_data *vd)
ad5d1122b82fbd Vincent Chen    2020-06-09  73  {
ad5d1122b82fbd Vincent Chen    2020-06-09  74  	/*
ad5d1122b82fbd Vincent Chen    2020-06-09  75  	 * The purpose of csr_read(CSR_TIME) is to trap the system into
ad5d1122b82fbd Vincent Chen    2020-06-09  76  	 * M-mode to obtain the value of CSR_TIME. Hence, unlike other
ad5d1122b82fbd Vincent Chen    2020-06-09  77  	 * architecture, no fence instructions surround the csr_read()
ad5d1122b82fbd Vincent Chen    2020-06-09  78  	 */
ad5d1122b82fbd Vincent Chen    2020-06-09  79  	return csr_read(CSR_TIME);
ad5d1122b82fbd Vincent Chen    2020-06-09  80  }
ad5d1122b82fbd Vincent Chen    2020-06-09  81  
ad5d1122b82fbd Vincent Chen    2020-06-09  82  static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
ad5d1122b82fbd Vincent Chen    2020-06-09  83  {
ad5d1122b82fbd Vincent Chen    2020-06-09 @84  	return _vdso_data;
ad5d1122b82fbd Vincent Chen    2020-06-09  85  }
ad5d1122b82fbd Vincent Chen    2020-06-09  86  
dffe11e280a42c Tong Tiangen    2021-09-01  87  #ifdef CONFIG_TIME_NS
dffe11e280a42c Tong Tiangen    2021-09-01  88  static __always_inline
dffe11e280a42c Tong Tiangen    2021-09-01  89  const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
dffe11e280a42c Tong Tiangen    2021-09-01  90  {
dffe11e280a42c Tong Tiangen    2021-09-01 @91  	return _timens_data;
dffe11e280a42c Tong Tiangen    2021-09-01  92  }
dffe11e280a42c Tong Tiangen    2021-09-01  93  #endif
ad5d1122b82fbd Vincent Chen    2020-06-09  94  #endif /* !__ASSEMBLY__ */
ad5d1122b82fbd Vincent Chen    2020-06-09  95
Alexei Starovoitov Oct. 24, 2024, 5:03 p.m. UTC | #6
On Thu, Oct 24, 2024 at 6:11 AM Vadim Fedorenko
<vadim.fedorenko@linux.dev> wrote:
>
> > static inline u64 __arch_get_hw_counter(s32 clock_mode,
> >                                          const struct vdso_data *vd)
> > {
> >          if (likely(clock_mode == VDSO_CLOCKMODE_TSC))
> >                  return (u64)rdtsc_ordered() & S64_MAX;
> >
> > - & is missing
>
> & S64_MAX is only needed to early detect possible wrap-around of
> timecounter in case of vDSO call for CLOCK_MONOTONIC_RAW/CLOCK_COARSE
> which adds namespace time offset. TSC is reset during CPU reset and will
> not overflow within 10 years according to "Intel 64 and IA-32
> Architecture Software Developer's Manual,Vol 3B", so it doesn't really
> matter if we mask the highest bit or not while accessing raw cycles
> counter.
>
> > - rdtsc vs rdtscp
>
> rdtscp provides additional 32 bit of "signature value" atomically with
> TSC value in ECX. This value is not really usable outside of domain
> which set it previously while initialization. The kernel stores encoded
> cpuid into IA32_TSC_AUX to provide it back to user-space application,
> but at the same time ignores its value during read. The combination of
> lfence and rdtsc will give us the same result (ordered read of TSC value
> independent on the core) without trashing ECX value.

That makes sense.

One more thing:

> __bpf_kfunc int bpf_get_hw_counter(void)

it should be returning u64

> >> +               insn->imm = 0;
> >> +               insn->code = BPF_JMP | BPF_CALL;
> >> +               insn->src_reg = BPF_PSEUDO_KFUNC_CALL;
> >> +               insn->dst_reg = 1; /* Implement enum for inlined fast calls */
> >
> > Yes. Pls do it cleanly from the start.
> >
> > Why rewrite though?
> > Can JIT match the addr of bpf_get_hw_counter ?
> > And no need to rewrite call insn ?
>
> I was thinking about this way, just wasn't able to find easy examples of
> matching function addresses in jit. I'll try to make it but it may add
> some extra functions to the jit.

func = (u8 *) __bpf_call_base + imm32;
if (func == &bpf_get_hw_counter)
Eduard Zingerman Oct. 24, 2024, 5:31 p.m. UTC | #7
On Thu, 2024-10-24 at 14:11 +0100, Vadim Fedorenko wrote:

[...]

> > > @@ -20396,6 +20398,15 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> > >                     desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
> > >                  insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
> > >                  *cnt = 1;
> > > +       } else if (IS_ENABLED(CONFIG_X86) &&
> > 
> > It's better to introduce bpf_jit_inlines_kfunc_call()
> > similar to bpf_jit_inlines_helper_call().
> 
> Yep, I thought about introducing it while adding more architectures, but
> can do it from the beginning.

After thinking a bit more, I think I messed up in a private discussion
with Vadim. It is necessary to introduce bpf_jit_inlines_kfunc_call()
and use it in the mark_fastcall_pattern_for_call(),
otherwise the following situation is possible:

- the program is executed on the arch where inlining for
  bpf_get_hw_counter() is not implemented;
- there is a pattern in the code:

    r1 = *(u64 *)(r10 - 256);
    call bpf_get_hw_fastcall
    *(u64 *)(r10 - 256) = r1;
    ... r10 - 8 is not used ...
    ... r1 is read ...

- mark_fastcall_pattern_for_call() would mark spill and fill as
  members of the pattern;
- fastcall contract is not violated, because reserved stack slots are
  used as expected;
- remove_fastcall_spills_fills() would remove spill and fill:

    call bpf_get_hw_fastcall
    ... r1 is read ...

- since call is not transformed to instructions by a specific jit the
  value of r1 would be clobbered, making the program invalid.
  
Vadim, sorry I did not point this out earlier, I thought that fastcall
contract ensuring logic would handle everything.

[...]
Eduard Zingerman Oct. 24, 2024, 5:47 p.m. UTC | #8
On Thu, 2024-10-24 at 10:31 -0700, Eduard Zingerman wrote:

[...]

>     ... r10 - 8 is not used ...

I mean r10 - 256.

[...]
diff mbox series

Patch

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 06b080b61aa5..55595a0fa55b 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -2126,6 +2126,29 @@  st:			if (is_imm8(insn->off))
 		case BPF_JMP | BPF_CALL: {
 			u8 *ip = image + addrs[i - 1];
 
+			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) {
+				if (insn->dst_reg == 1) {
+					struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id());
+
+					/* Save RDX because RDTSC will use EDX:EAX to return u64 */
+					emit_mov_reg(&prog, true, AUX_REG, BPF_REG_3);
+					if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC))
+						EMIT_LFENCE();
+					EMIT2(0x0F, 0x31);
+
+					/* shl RDX, 32 */
+					maybe_emit_1mod(&prog, BPF_REG_3, true);
+					EMIT3(0xC1, add_1reg(0xE0, BPF_REG_3), 32);
+					/* or RAX, RDX */
+					maybe_emit_mod(&prog, BPF_REG_0, BPF_REG_3, true);
+					EMIT2(0x09, add_2reg(0xC0, BPF_REG_0, BPF_REG_3));
+					/* restore RDX from R11 */
+					emit_mov_reg(&prog, true, BPF_REG_3, AUX_REG);
+
+					break;
+				}
+			}
+
 			func = (u8 *) __bpf_call_base + imm32;
 			if (tail_call_reachable) {
 				LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth);
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index de0f9e5f9f73..c36ff18a044b 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -2091,6 +2091,17 @@  static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			if (insn->src_reg == BPF_PSEUDO_CALL)
 				goto notyet;
 
+			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && !imm32) {
+				if (insn->dst_reg == 1) {
+					struct cpuinfo_x86 *c = &cpu_data(get_boot_cpu_id());
+
+					if (cpu_has(c, X86_FEATURE_LFENCE_RDTSC))
+						EMIT3(0x0F, 0xAE, 0xE8);
+					EMIT2(0x0F, 0x31);
+					break;
+				}
+			}
+
 			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
 				int err;
 
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 5c3fdb29c1b1..6624b2465484 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -23,6 +23,7 @@ 
 #include <linux/btf_ids.h>
 #include <linux/bpf_mem_alloc.h>
 #include <linux/kasan.h>
+#include <asm/vdso/gettimeofday.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -3023,6 +3024,11 @@  __bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user
 	return ret + 1;
 }
 
+__bpf_kfunc int bpf_get_hw_counter(void)
+{
+	return __arch_get_hw_counter(1, NULL);
+}
+
 __bpf_kfunc_end_defs();
 
 BTF_KFUNCS_START(generic_btf_ids)
@@ -3112,6 +3118,7 @@  BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
 BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
 BTF_ID_FLAGS(func, bpf_get_kmem_cache)
+BTF_ID_FLAGS(func, bpf_get_hw_counter, KF_FASTCALL)
 BTF_KFUNCS_END(common_btf_ids)
 
 static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f514247ba8ba..5f0e4f91ce48 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -11260,6 +11260,7 @@  enum special_kfunc_type {
 	KF_bpf_iter_css_task_new,
 	KF_bpf_session_cookie,
 	KF_bpf_get_kmem_cache,
+	KF_bpf_get_hw_counter,
 };
 
 BTF_SET_START(special_kfunc_set)
@@ -11326,6 +11327,7 @@  BTF_ID(func, bpf_session_cookie)
 BTF_ID_UNUSED
 #endif
 BTF_ID(func, bpf_get_kmem_cache)
+BTF_ID(func, bpf_get_hw_counter)
 
 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
 {
@@ -20396,6 +20398,15 @@  static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
 		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
 		*cnt = 1;
+	} else if (IS_ENABLED(CONFIG_X86) &&
+		   desc->func_id == special_kfunc_list[KF_bpf_get_hw_counter]) {
+		insn->imm = 0;
+		insn->code = BPF_JMP | BPF_CALL;
+		insn->src_reg = BPF_PSEUDO_KFUNC_CALL;
+		insn->dst_reg = 1; /* Implement enum for inlined fast calls */
+
+		insn_buf[0] = *insn;
+		*cnt = 1;
 	} else if (is_bpf_wq_set_callback_impl_kfunc(desc->func_id)) {
 		struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(BPF_REG_4, (long)env->prog->aux) };