diff mbox series

[bpf-next,v7,2/4] bpf: add bpf_cpu_cycles_to_ns helper

Message ID 20241118185245.1065000-3-vadfed@meta.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: add cpu cycles kfuncss | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 202 this patch: 202
netdev/build_tools success Errors and warnings before: 0 (+0) this patch: 0 (+0)
netdev/cc_maintainers warning 12 maintainers not CCed: kpsingh@kernel.org dave.hansen@linux.intel.com hpa@zytor.com udknight@gmail.com jolsa@kernel.org song@kernel.org dsahern@kernel.org haoluo@google.com john.fastabend@gmail.com mingo@redhat.com netdev@vger.kernel.org sdf@fomichev.me
netdev/build_clang success Errors and warnings before: 252 this patch: 252
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6969 this patch: 6969
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 17 this patch: 17
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Vadim Fedorenko Nov. 18, 2024, 6:52 p.m. UTC
The new helper should be used to convert cycles received by
bpf_get_cpu_cycle() into nanoseconds.

Reviewed-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
---
v6 -> v7:
* change boot_cpu_has() -> cpu_feature_enabled() (Borislav)
v4 -> v6:
* add comment about simplified implementation (Eduard)
v4:
* change helper name to bpf_cpu_cycles_to_ns.
* hide it behind CONFIG_GENERIC_GETTIMEOFDAY to avoid exposing on
  unsupported architectures.
---
 arch/x86/net/bpf_jit_comp.c   | 22 ++++++++++++++++++++++
 arch/x86/net/bpf_jit_comp32.c | 19 +++++++++++++++++++
 include/linux/bpf.h           |  1 +
 kernel/bpf/helpers.c          | 14 +++++++++++++-
 4 files changed, 55 insertions(+), 1 deletion(-)

Comments

Peter Zijlstra Nov. 19, 2024, 11:28 a.m. UTC | #1
On Mon, Nov 18, 2024 at 10:52:43AM -0800, Vadim Fedorenko wrote:

> +			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
> +			    imm32 == BPF_CALL_IMM(bpf_cpu_cycles_to_ns) &&
> +			    cpu_feature_enabled(X86_FEATURE_CONSTANT_TSC)) {
> +				u32 mult, shift;
> +
> +				clocks_calc_mult_shift(&mult, &shift, tsc_khz, USEC_PER_SEC, 0);
> +				/* imul RAX, RDI, mult */
> +				maybe_emit_mod(&prog, BPF_REG_1, BPF_REG_0, true);
> +				EMIT2_off32(0x69, add_2reg(0xC0, BPF_REG_1, BPF_REG_0),
> +					    mult);
> +
> +				/* shr RAX, shift (which is less than 64) */
> +				maybe_emit_1mod(&prog, BPF_REG_0, true);
> +				EMIT3(0xC1, add_1reg(0xE8, BPF_REG_0), shift);
> +
> +				break;
> +			}

This is ludicrously horrible. Why are you using your own mult/shift and
not offset here instead of using the one from either sched_clock or
clocksource_tsc ?

And being totally inconsistent with your own alternative implementation
which uses the VDSO, which in turn uses clocksource_tsc:

> +__bpf_kfunc u64 bpf_cpu_cycles_to_ns(u64 cycles)
> +{
> +	const struct vdso_data *vd = __arch_get_k_vdso_data();
> +
> +	vd = &vd[CS_RAW];
> +	/* kfunc implementation does less manipulations than vDSO
> +	 * implementation. BPF use-case assumes two measurements are close
> +	 * in time and can simplify the logic.
> +	 */
> +	return mul_u64_u32_shr(cycles, vd->mult, vd->shift);
> +}

Also, if I'm not mistaken, the above is broken, you really should add
the offset, without it I don't think we guarantee the result is
monotonic.
Vadim Fedorenko Nov. 19, 2024, 2:38 p.m. UTC | #2
On 19/11/2024 03:28, Peter Zijlstra wrote:
> On Mon, Nov 18, 2024 at 10:52:43AM -0800, Vadim Fedorenko wrote:
> 
>> +			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
>> +			    imm32 == BPF_CALL_IMM(bpf_cpu_cycles_to_ns) &&
>> +			    cpu_feature_enabled(X86_FEATURE_CONSTANT_TSC)) {
>> +				u32 mult, shift;
>> +
>> +				clocks_calc_mult_shift(&mult, &shift, tsc_khz, USEC_PER_SEC, 0);
>> +				/* imul RAX, RDI, mult */
>> +				maybe_emit_mod(&prog, BPF_REG_1, BPF_REG_0, true);
>> +				EMIT2_off32(0x69, add_2reg(0xC0, BPF_REG_1, BPF_REG_0),
>> +					    mult);
>> +
>> +				/* shr RAX, shift (which is less than 64) */
>> +				maybe_emit_1mod(&prog, BPF_REG_0, true);
>> +				EMIT3(0xC1, add_1reg(0xE8, BPF_REG_0), shift);
>> +
>> +				break;
>> +			}
> 
> This is ludicrously horrible. Why are you using your own mult/shift and
> not offset here instead of using the one from either sched_clock or
> clocksource_tsc ?

With X86_FEATURE_CONSTANT_TSC, tsc_khz is actually constant after
switching from tsc_early. And the very same call to
clocks_calc_mult_shift() is used to create clocksource_tsc mult and
shift constants. Unfortunately, clocksources don't have proper API to
get the underlying info, that's why I have to calculate shift and mult
values on my own.

> And being totally inconsistent with your own alternative implementation
> which uses the VDSO, which in turn uses clocksource_tsc:

With what I said above it is consistent with clocksource_tsc.

> 
>> +__bpf_kfunc u64 bpf_cpu_cycles_to_ns(u64 cycles)
>> +{
>> +	const struct vdso_data *vd = __arch_get_k_vdso_data();
>> +
>> +	vd = &vd[CS_RAW];
>> +	/* kfunc implementation does less manipulations than vDSO
>> +	 * implementation. BPF use-case assumes two measurements are close
>> +	 * in time and can simplify the logic.
>> +	 */
>> +	return mul_u64_u32_shr(cycles, vd->mult, vd->shift);
>> +}
> 
> Also, if I'm not mistaken, the above is broken, you really should add
> the offset, without it I don't think we guarantee the result is
> monotonic.

Not quite sure how constant offset can affect monotonic guarantee of
cycles, given that the main use case will be to calculate ns out of
small deltas? AFAIU, the offset is needed to get ns of CLOCK_MONOTONIC,
which can be affected by NTP manipulation. But in this helper we don't
follow any clock_id, we just want to calculate nanoseconds value out of
stable and monotonically increasing counter provided by architecture.
Peter Zijlstra Nov. 20, 2024, 8:49 a.m. UTC | #3
On Tue, Nov 19, 2024 at 06:38:51AM -0800, Vadim Fedorenko wrote:
> On 19/11/2024 03:28, Peter Zijlstra wrote:
> > On Mon, Nov 18, 2024 at 10:52:43AM -0800, Vadim Fedorenko wrote:
> > 
> > > +			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
> > > +			    imm32 == BPF_CALL_IMM(bpf_cpu_cycles_to_ns) &&
> > > +			    cpu_feature_enabled(X86_FEATURE_CONSTANT_TSC)) {
> > > +				u32 mult, shift;
> > > +
> > > +				clocks_calc_mult_shift(&mult, &shift, tsc_khz, USEC_PER_SEC, 0);
> > > +				/* imul RAX, RDI, mult */
> > > +				maybe_emit_mod(&prog, BPF_REG_1, BPF_REG_0, true);
> > > +				EMIT2_off32(0x69, add_2reg(0xC0, BPF_REG_1, BPF_REG_0),
> > > +					    mult);
> > > +
> > > +				/* shr RAX, shift (which is less than 64) */
> > > +				maybe_emit_1mod(&prog, BPF_REG_0, true);
> > > +				EMIT3(0xC1, add_1reg(0xE8, BPF_REG_0), shift);
> > > +
> > > +				break;
> > > +			}
> > 
> > This is ludicrously horrible. Why are you using your own mult/shift and
> > not offset here instead of using the one from either sched_clock or
> > clocksource_tsc ?
> 
> With X86_FEATURE_CONSTANT_TSC, tsc_khz is actually constant after
> switching from tsc_early. And the very same call to
> clocks_calc_mult_shift() is used to create clocksource_tsc mult and
> shift constants. Unfortunately, clocksources don't have proper API to
> get the underlying info, that's why I have to calculate shift and mult
> values on my own.

There is cyc2ns_read_begin() / cyc2ns_read_end(), and you can use the
VDSO thing you do below.

> > And being totally inconsistent with your own alternative implementation
> > which uses the VDSO, which in turn uses clocksource_tsc:
> 
> With what I said above it is consistent with clocksource_tsc.
> 
> > 
> > > +__bpf_kfunc u64 bpf_cpu_cycles_to_ns(u64 cycles)
> > > +{
> > > +	const struct vdso_data *vd = __arch_get_k_vdso_data();
> > > +
> > > +	vd = &vd[CS_RAW];
> > > +	/* kfunc implementation does less manipulations than vDSO
> > > +	 * implementation. BPF use-case assumes two measurements are close
> > > +	 * in time and can simplify the logic.
> > > +	 */
> > > +	return mul_u64_u32_shr(cycles, vd->mult, vd->shift);
> > > +}
> > 
> > Also, if I'm not mistaken, the above is broken, you really should add
> > the offset, without it I don't think we guarantee the result is
> > monotonic.
> 
> Not quite sure how constant offset can affect monotonic guarantee of
> cycles, given that the main use case will be to calculate ns out of
> small deltas?

Well, when I read this patch I didn't know, because your changelogs
don't mention anything at all.
Vadim Fedorenko Nov. 20, 2024, 1:39 p.m. UTC | #4
On 20/11/2024 00:49, Peter Zijlstra wrote:
> On Tue, Nov 19, 2024 at 06:38:51AM -0800, Vadim Fedorenko wrote:
>> On 19/11/2024 03:28, Peter Zijlstra wrote:
>>> On Mon, Nov 18, 2024 at 10:52:43AM -0800, Vadim Fedorenko wrote:
>>>
>>>> +			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
>>>> +			    imm32 == BPF_CALL_IMM(bpf_cpu_cycles_to_ns) &&
>>>> +			    cpu_feature_enabled(X86_FEATURE_CONSTANT_TSC)) {
>>>> +				u32 mult, shift;
>>>> +
>>>> +				clocks_calc_mult_shift(&mult, &shift, tsc_khz, USEC_PER_SEC, 0);
>>>> +				/* imul RAX, RDI, mult */
>>>> +				maybe_emit_mod(&prog, BPF_REG_1, BPF_REG_0, true);
>>>> +				EMIT2_off32(0x69, add_2reg(0xC0, BPF_REG_1, BPF_REG_0),
>>>> +					    mult);
>>>> +
>>>> +				/* shr RAX, shift (which is less than 64) */
>>>> +				maybe_emit_1mod(&prog, BPF_REG_0, true);
>>>> +				EMIT3(0xC1, add_1reg(0xE8, BPF_REG_0), shift);
>>>> +
>>>> +				break;
>>>> +			}
>>>
>>> This is ludicrously horrible. Why are you using your own mult/shift and
>>> not offset here instead of using the one from either sched_clock or
>>> clocksource_tsc ?
>>
>> With X86_FEATURE_CONSTANT_TSC, tsc_khz is actually constant after
>> switching from tsc_early. And the very same call to
>> clocks_calc_mult_shift() is used to create clocksource_tsc mult and
>> shift constants. Unfortunately, clocksources don't have proper API to
>> get the underlying info, that's why I have to calculate shift and mult
>> values on my own.
> 
> There is cyc2ns_read_begin() / cyc2ns_read_end(), and you can use the
> VDSO thing you do below.

Looks like I missed arch-specific implementation. Thanks, I'll use it in
the next version.

>>> And being totally inconsistent with your own alternative implementation
>>> which uses the VDSO, which in turn uses clocksource_tsc:
>>
>> With what I said above it is consistent with clocksource_tsc.
>>
>>>
>>>> +__bpf_kfunc u64 bpf_cpu_cycles_to_ns(u64 cycles)
>>>> +{
>>>> +	const struct vdso_data *vd = __arch_get_k_vdso_data();
>>>> +
>>>> +	vd = &vd[CS_RAW];
>>>> +	/* kfunc implementation does less manipulations than vDSO
>>>> +	 * implementation. BPF use-case assumes two measurements are close
>>>> +	 * in time and can simplify the logic.
>>>> +	 */
>>>> +	return mul_u64_u32_shr(cycles, vd->mult, vd->shift);
>>>> +}
>>>
>>> Also, if I'm not mistaken, the above is broken, you really should add
>>> the offset, without it I don't think we guarantee the result is
>>> monotonic.
>>
>> Not quite sure how constant offset can affect monotonic guarantee of
>> cycles, given that the main use case will be to calculate ns out of
>> small deltas?
> 
> Well, when I read this patch I didn't know, because your changelogs
> don't mention anything at all.

Fair, I'll improve commit message in v8, thanks.
diff mbox series

Patch

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5e0c16d8bba3..2a3f7d5fdf26 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -11,6 +11,7 @@ 
 #include <linux/bpf.h>
 #include <linux/memory.h>
 #include <linux/sort.h>
+#include <linux/clocksource.h>
 #include <asm/extable.h>
 #include <asm/ftrace.h>
 #include <asm/set_memory.h>
@@ -2216,6 +2217,24 @@  st:			if (is_imm8(insn->off))
 				break;
 			}
 
+			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
+			    imm32 == BPF_CALL_IMM(bpf_cpu_cycles_to_ns) &&
+			    cpu_feature_enabled(X86_FEATURE_CONSTANT_TSC)) {
+				u32 mult, shift;
+
+				clocks_calc_mult_shift(&mult, &shift, tsc_khz, USEC_PER_SEC, 0);
+				/* imul RAX, RDI, mult */
+				maybe_emit_mod(&prog, BPF_REG_1, BPF_REG_0, true);
+				EMIT2_off32(0x69, add_2reg(0xC0, BPF_REG_1, BPF_REG_0),
+					    mult);
+
+				/* shr RAX, shift (which is less than 64) */
+				maybe_emit_1mod(&prog, BPF_REG_0, true);
+				EMIT3(0xC1, add_1reg(0xE8, BPF_REG_0), shift);
+
+				break;
+			}
+
 			func = (u8 *) __bpf_call_base + imm32;
 			if (src_reg == BPF_PSEUDO_CALL && tail_call_reachable) {
 				LOAD_TAIL_CALL_CNT_PTR(stack_depth);
@@ -3828,5 +3847,8 @@  bool bpf_jit_inlines_kfunc_call(s32 imm)
 {
 	if (imm == BPF_CALL_IMM(bpf_get_cpu_cycles))
 		return true;
+	if (imm == BPF_CALL_IMM(bpf_cpu_cycles_to_ns) &&
+	    boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+		return true;
 	return false;
 }
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 11a5c41302a3..2bc560c47c00 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -12,6 +12,7 @@ 
 #include <linux/netdevice.h>
 #include <linux/filter.h>
 #include <linux/if_vlan.h>
+#include <linux/clocksource.h>
 #include <asm/cacheflush.h>
 #include <asm/set_memory.h>
 #include <asm/nospec-branch.h>
@@ -2100,6 +2101,24 @@  static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 					EMIT2(0x0F, 0x31);
 					break;
 				}
+				if (imm32 == BPF_CALL_IMM(bpf_cpu_cycles_to_ns) &&
+				    cpu_feature_enabled(X86_FEATURE_CONSTANT_TSC)) {
+					u32 mult, shift;
+
+					clocks_calc_mult_shift(&mult, &shift, tsc_khz,
+							       USEC_PER_SEC, 0);
+
+					/* move parameter to BPF_REG_0 */
+					emit_ia32_mov_r64(true, bpf2ia32[BPF_REG_0],
+							  bpf2ia32[BPF_REG_1], true, true,
+							  &prog, bpf_prog->aux);
+					/* multiply parameter by mut */
+					emit_ia32_mul_i64(bpf2ia32[BPF_REG_0],
+							  mult, true, &prog);
+					/* shift parameter by shift which is less than 64 */
+					emit_ia32_rsh_i64(bpf2ia32[BPF_REG_0],
+							  shift, true, &prog);
+				}
 
 				err = emit_kfunc_call(bpf_prog,
 						      image + addrs[i],
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 43a5207a1591..af47704afeaa 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -3336,6 +3336,7 @@  u64 bpf_get_raw_cpu_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 /* Inlined kfuncs */
 #if IS_ENABLED(CONFIG_GENERIC_GETTIMEOFDAY)
 u64 bpf_get_cpu_cycles(void);
+u64 bpf_cpu_cycles_to_ns(u64 cycles);
 #endif
 
 #if defined(CONFIG_NET)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 9f1a51bdb365..ed3876aa30ad 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -3079,8 +3079,19 @@  __bpf_kfunc u64 bpf_get_cpu_cycles(void)
 	 */
 	return __arch_get_hw_counter(1, vd);
 }
-#endif
 
+__bpf_kfunc u64 bpf_cpu_cycles_to_ns(u64 cycles)
+{
+	const struct vdso_data *vd = __arch_get_k_vdso_data();
+
+	vd = &vd[CS_RAW];
+	/* kfunc implementation does less manipulations than vDSO
+	 * implementation. BPF use-case assumes two measurements are close
+	 * in time and can simplify the logic.
+	 */
+	return mul_u64_u32_shr(cycles, vd->mult, vd->shift);
+}
+#endif
 __bpf_kfunc_end_defs();
 
 BTF_KFUNCS_START(generic_btf_ids)
@@ -3175,6 +3186,7 @@  BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLE
 BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
 #if IS_ENABLED(CONFIG_GENERIC_GETTIMEOFDAY)
 BTF_ID_FLAGS(func, bpf_get_cpu_cycles, KF_FASTCALL)
+BTF_ID_FLAGS(func, bpf_cpu_cycles_to_ns, KF_FASTCALL)
 #endif
 BTF_KFUNCS_END(common_btf_ids)