diff mbox series

[v2,bpf-next] bpf: introduce helper bpf_raw_read_cpu_clock

Message ID 20211007175037.GA439973@fuller.cnet (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series [v2,bpf-next] bpf: introduce helper bpf_raw_read_cpu_clock | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
netdev/cover_letter success Single patches do not need cover letters
netdev/fixes_present success Fixes tag not required for -next series
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 20 maintainers not CCed: sean@mess.org revest@chromium.org netdev@vger.kernel.org joe@cilium.io brouer@redhat.com mingo@redhat.com jackmanb@google.com daniel@iogearbox.net kafai@fb.com linux-media@vger.kernel.org yhs@fb.com x86@kernel.org liuhangbin@gmail.com john.fastabend@gmail.com songliubraving@fb.com mchehab@kernel.org kpsingh@kernel.org hpa@zytor.com ast@kernel.org bp@alien8.de
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 11790 this patch: 11790
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success No Fixes tag
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: please, no space before tabs
netdev/build_allmodconfig_warn success Errors and warnings before: 11421 this patch: 11421
netdev/header_inline success No static functions without inline keyword in header files
bpf/vmtest-bpf-next fail VM_Test

Commit Message

Marcelo Tosatti Oct. 7, 2021, 5:50 p.m. UTC
Add bpf_raw_read_cpu_clock helper, to read architecture specific
CPU clock. In x86's case, this is the TSC.  

This is necessary to synchronize bpf traces from host and guest bpf-programs
(after subtracting guest tsc-offset from guest timestamps).

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---

v2:
- drop redundant addition to bpf_tracing_func_proto (Song Liu)

Comments

Song Liu Oct. 7, 2021, 6:58 p.m. UTC | #1
On Thu, Oct 7, 2021 at 11:04 AM Marcelo Tosatti <mtosatti@redhat.com> wrote:
>
>
> Add bpf_raw_read_cpu_clock helper, to read architecture specific
> CPU clock. In x86's case, this is the TSC.
>
> This is necessary to synchronize bpf traces from host and guest bpf-programs
> (after subtracting guest tsc-offset from guest timestamps).
>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Could you please
1) give more context on the target use case;
2) add some selftests for the feature?

Thanks,
Song

>
> ---
>
> v2:
> - drop redundant addition to bpf_tracing_func_proto (Song Liu)
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index ab83c22d274e..832bb1f65f28 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -95,6 +95,7 @@ config X86
>         select ARCH_HAS_UBSAN_SANITIZE_ALL
>         select ARCH_HAS_DEBUG_WX
>         select ARCH_HAS_ZONE_DMA_SET if EXPERT
> +       select ARCH_HAS_BPF_RAW_CPU_CLOCK
>         select ARCH_HAVE_NMI_SAFE_CMPXCHG
>         select ARCH_MIGHT_HAVE_ACPI_PDC         if ACPI
>         select ARCH_MIGHT_HAVE_PC_PARPORT
> diff --git a/arch/x86/include/asm/bpf_raw_cpu_clock.h b/arch/x86/include/asm/bpf_raw_cpu_clock.h
> new file mode 100644
> index 000000000000..6951c399819e
> --- /dev/null
> +++ b/arch/x86/include/asm/bpf_raw_cpu_clock.h
> @@ -0,0 +1,10 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_X86_BPF_RAW_CPU_CLOCK_H_
> +#define _ASM_X86_BPF_RAW_CPU_CLOCK_H_
> +
> +static inline unsigned long long read_raw_cpu_clock(void)
> +{
> +       return rdtsc_ordered();
> +}
> +
> +#endif /* _ASM_X86_BPF_RAW_CPU_CLOCK_H_ */
> diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
> index 3eff08d7b8e5..844a44ff508d 100644
> --- a/drivers/media/rc/bpf-lirc.c
> +++ b/drivers/media/rc/bpf-lirc.c
> @@ -105,6 +105,8 @@ lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return &bpf_ktime_get_ns_proto;
>         case BPF_FUNC_ktime_get_boot_ns:
>                 return &bpf_ktime_get_boot_ns_proto;
> +       case BPF_FUNC_read_raw_cpu_clock:
> +               return &bpf_read_raw_cpu_clock_proto;
>         case BPF_FUNC_tail_call:
>                 return &bpf_tail_call_proto;
>         case BPF_FUNC_get_prandom_u32:
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index d604c8251d88..b6cb426085fb 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -2058,6 +2058,7 @@ extern const struct bpf_func_proto bpf_get_numa_node_id_proto;
>  extern const struct bpf_func_proto bpf_tail_call_proto;
>  extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
>  extern const struct bpf_func_proto bpf_ktime_get_boot_ns_proto;
> +extern const struct bpf_func_proto bpf_read_raw_cpu_clock_proto;
>  extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
>  extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
>  extern const struct bpf_func_proto bpf_get_current_comm_proto;
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 6fc59d61937a..52191791b089 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -4037,6 +4037,13 @@ union bpf_attr {
>   *     Return
>   *             Current *ktime*.
>   *
> + * u64 bpf_read_raw_cpu_clock(void)
> + *     Description
> + *             Return the architecture specific CPU clock value.
> + *             For x86, this is the TSC clock.
> + *     Return
> + *             *CPU clock value*
> + *
>   * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
>   *     Description
>   *             **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
> @@ -5089,6 +5096,7 @@ union bpf_attr {
>         FN(task_pt_regs),               \
>         FN(get_branch_snapshot),        \
>         FN(trace_vprintk),              \
> +       FN(read_raw_cpu_clock),         \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
> index a82d6de86522..5815db157220 100644
> --- a/kernel/bpf/Kconfig
> +++ b/kernel/bpf/Kconfig
> @@ -21,6 +21,10 @@ config HAVE_EBPF_JIT
>  config ARCH_WANT_DEFAULT_BPF_JIT
>         bool
>
> +# Used by archs to tell they support reading raw CPU clock
> +config ARCH_HAS_BPF_RAW_CPU_CLOCK
> +       bool
> +
>  menu "BPF subsystem"
>
>  config BPF_SYSCALL
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index b6c72af64d5d..8e2359dfd582 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -2345,6 +2345,8 @@ const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
>  const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
>  const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak;
>  const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak;
> +const struct bpf_func_proto bpf_read_raw_cpu_clock_proto __weak;
> +
>
>  const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
>  const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 1ffd469c217f..90b9e5efaf65 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -18,6 +18,10 @@
>
>  #include "../../lib/kstrtox.h"
>
> +#ifdef CONFIG_ARCH_HAS_BPF_RAW_CPU_CLOCK
> +#include <asm/bpf_raw_cpu_clock.h>
> +#endif
> +
>  /* If kernel subsystem is allowing eBPF programs to call this function,
>   * inside its own verifier_ops->get_func_proto() callback it should return
>   * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
> @@ -168,6 +172,21 @@ const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
>         .ret_type       = RET_INTEGER,
>  };
>
> +BPF_CALL_0(bpf_read_raw_cpu_clock)
> +{
> +#ifdef CONFIG_ARCH_HAS_BPF_RAW_CPU_CLOCK
> +       return read_raw_cpu_clock();
> +#else
> +       return sched_clock();
> +#endif
> +}
> +
> +const struct bpf_func_proto bpf_read_raw_cpu_clock_proto = {
> +       .func           = bpf_read_raw_cpu_clock,
> +       .gpl_only       = false,
> +       .ret_type       = RET_INTEGER,
> +};
> +
>  BPF_CALL_0(bpf_ktime_get_coarse_ns)
>  {
>         return ktime_get_coarse_ns();
> @@ -1366,6 +1385,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
>                 return &bpf_ktime_get_boot_ns_proto;
>         case BPF_FUNC_ktime_get_coarse_ns:
>                 return &bpf_ktime_get_coarse_ns_proto;
> +       case BPF_FUNC_read_raw_cpu_clock:
> +               return &bpf_read_raw_cpu_clock_proto;
>         case BPF_FUNC_ringbuf_output:
>                 return &bpf_ringbuf_output_proto;
>         case BPF_FUNC_ringbuf_reserve:
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 6fc59d61937a..52191791b089 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -4037,6 +4037,13 @@ union bpf_attr {
>   *     Return
>   *             Current *ktime*.
>   *
> + * u64 bpf_read_raw_cpu_clock(void)
> + *     Description
> + *             Return the architecture specific CPU clock value.
> + *             For x86, this is the TSC clock.
> + *     Return
> + *             *CPU clock value*
> + *
>   * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
>   *     Description
>   *             **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
> @@ -5089,6 +5096,7 @@ union bpf_attr {
>         FN(task_pt_regs),               \
>         FN(get_branch_snapshot),        \
>         FN(trace_vprintk),              \
> +       FN(read_raw_cpu_clock),         \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>
diff mbox series

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ab83c22d274e..832bb1f65f28 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -95,6 +95,7 @@  config X86
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARCH_HAS_DEBUG_WX
 	select ARCH_HAS_ZONE_DMA_SET if EXPERT
+	select ARCH_HAS_BPF_RAW_CPU_CLOCK
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
 	select ARCH_MIGHT_HAVE_PC_PARPORT
diff --git a/arch/x86/include/asm/bpf_raw_cpu_clock.h b/arch/x86/include/asm/bpf_raw_cpu_clock.h
new file mode 100644
index 000000000000..6951c399819e
--- /dev/null
+++ b/arch/x86/include/asm/bpf_raw_cpu_clock.h
@@ -0,0 +1,10 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_BPF_RAW_CPU_CLOCK_H_
+#define _ASM_X86_BPF_RAW_CPU_CLOCK_H_
+
+static inline unsigned long long read_raw_cpu_clock(void)
+{
+	return rdtsc_ordered();
+}
+
+#endif /* _ASM_X86_BPF_RAW_CPU_CLOCK_H_ */
diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index 3eff08d7b8e5..844a44ff508d 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -105,6 +105,8 @@  lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_ktime_get_boot_ns:
 		return &bpf_ktime_get_boot_ns_proto;
+	case BPF_FUNC_read_raw_cpu_clock:
+		return &bpf_read_raw_cpu_clock_proto;
 	case BPF_FUNC_tail_call:
 		return &bpf_tail_call_proto;
 	case BPF_FUNC_get_prandom_u32:
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d604c8251d88..b6cb426085fb 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2058,6 +2058,7 @@  extern const struct bpf_func_proto bpf_get_numa_node_id_proto;
 extern const struct bpf_func_proto bpf_tail_call_proto;
 extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
 extern const struct bpf_func_proto bpf_ktime_get_boot_ns_proto;
+extern const struct bpf_func_proto bpf_read_raw_cpu_clock_proto;
 extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
 extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
 extern const struct bpf_func_proto bpf_get_current_comm_proto;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6fc59d61937a..52191791b089 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4037,6 +4037,13 @@  union bpf_attr {
  * 	Return
  * 		Current *ktime*.
  *
+ * u64 bpf_read_raw_cpu_clock(void)
+ * 	Description
+ * 		Return the architecture specific CPU clock value.
+ * 		For x86, this is the TSC clock.
+ * 	Return
+ * 		*CPU clock value*
+ *
  * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
  * 	Description
  * 		**bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
@@ -5089,6 +5096,7 @@  union bpf_attr {
 	FN(task_pt_regs),		\
 	FN(get_branch_snapshot),	\
 	FN(trace_vprintk),		\
+	FN(read_raw_cpu_clock),         \
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index a82d6de86522..5815db157220 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -21,6 +21,10 @@  config HAVE_EBPF_JIT
 config ARCH_WANT_DEFAULT_BPF_JIT
 	bool
 
+# Used by archs to tell they support reading raw CPU clock
+config ARCH_HAS_BPF_RAW_CPU_CLOCK
+	bool
+
 menu "BPF subsystem"
 
 config BPF_SYSCALL
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b6c72af64d5d..8e2359dfd582 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2345,6 +2345,8 @@  const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak;
+const struct bpf_func_proto bpf_read_raw_cpu_clock_proto __weak;
+
 
 const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
 const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1ffd469c217f..90b9e5efaf65 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -18,6 +18,10 @@ 
 
 #include "../../lib/kstrtox.h"
 
+#ifdef CONFIG_ARCH_HAS_BPF_RAW_CPU_CLOCK
+#include <asm/bpf_raw_cpu_clock.h>
+#endif
+
 /* If kernel subsystem is allowing eBPF programs to call this function,
  * inside its own verifier_ops->get_func_proto() callback it should return
  * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
@@ -168,6 +172,21 @@  const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
 	.ret_type	= RET_INTEGER,
 };
 
+BPF_CALL_0(bpf_read_raw_cpu_clock)
+{
+#ifdef CONFIG_ARCH_HAS_BPF_RAW_CPU_CLOCK
+	return read_raw_cpu_clock();
+#else
+	return sched_clock();
+#endif
+}
+
+const struct bpf_func_proto bpf_read_raw_cpu_clock_proto = {
+	.func		= bpf_read_raw_cpu_clock,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
+
 BPF_CALL_0(bpf_ktime_get_coarse_ns)
 {
 	return ktime_get_coarse_ns();
@@ -1366,6 +1385,8 @@  bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_ktime_get_boot_ns_proto;
 	case BPF_FUNC_ktime_get_coarse_ns:
 		return &bpf_ktime_get_coarse_ns_proto;
+	case BPF_FUNC_read_raw_cpu_clock:
+		return &bpf_read_raw_cpu_clock_proto;
 	case BPF_FUNC_ringbuf_output:
 		return &bpf_ringbuf_output_proto;
 	case BPF_FUNC_ringbuf_reserve:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6fc59d61937a..52191791b089 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4037,6 +4037,13 @@  union bpf_attr {
  * 	Return
  * 		Current *ktime*.
  *
+ * u64 bpf_read_raw_cpu_clock(void)
+ * 	Description
+ * 		Return the architecture specific CPU clock value.
+ * 		For x86, this is the TSC clock.
+ * 	Return
+ * 		*CPU clock value*
+ *
  * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
  * 	Description
  * 		**bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
@@ -5089,6 +5096,7 @@  union bpf_attr {
 	FN(task_pt_regs),		\
 	FN(get_branch_snapshot),	\
 	FN(trace_vprintk),		\
+	FN(read_raw_cpu_clock),         \
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper