diff mbox series

[v5,1/2] riscv: perf: add guest vs host distinction

Message ID a67d527dc1b11493fe11f7f53584772fdd983744.1728980031.git.zhouquan@iscas.ac.cn (mailing list archive)
State New
Headers show
Series riscv: Add perf support to collect KVM guest statistics from host side | expand

Checks

Context Check Description
conchuod/vmtest-for-next-PR success PR summary
conchuod/patch-1-test-1 success .github/scripts/patches/tests/build_rv32_defconfig.sh took 140.89s
conchuod/patch-1-test-2 success .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh took 1341.13s
conchuod/patch-1-test-3 success .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh took 1550.09s
conchuod/patch-1-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh took 20.88s
conchuod/patch-1-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh took 22.60s
conchuod/patch-1-test-6 warning .github/scripts/patches/tests/checkpatch.sh took 0.72s
conchuod/patch-1-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh took 42.92s
conchuod/patch-1-test-8 success .github/scripts/patches/tests/header_inline.sh took 0.01s
conchuod/patch-1-test-9 success .github/scripts/patches/tests/kdoc.sh took 0.60s
conchuod/patch-1-test-10 success .github/scripts/patches/tests/module_param.sh took 0.01s
conchuod/patch-1-test-11 success .github/scripts/patches/tests/verify_fixes.sh took 0.00s
conchuod/patch-1-test-12 success .github/scripts/patches/tests/verify_signedoff.sh took 0.03s

Commit Message

Quan Zhou Oct. 15, 2024, 8:42 a.m. UTC
From: Quan Zhou <zhouquan@iscas.ac.cn>

Introduce basic guest support in perf, enabling it to distinguish
between PMU interrupts in the host or guest, and collect
fundamental information.

Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Signed-off-by: Quan Zhou <zhouquan@iscas.ac.cn>
---
 arch/riscv/include/asm/perf_event.h |  6 +++++
 arch/riscv/kernel/perf_callchain.c  | 38 +++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)

Comments

Palmer Dabbelt Oct. 18, 2024, 7:55 p.m. UTC | #1
On Tue, 15 Oct 2024 01:42:50 PDT (-0700), zhouquan@iscas.ac.cn wrote:
> From: Quan Zhou <zhouquan@iscas.ac.cn>
>
> Introduce basic guest support in perf, enabling it to distinguish
> between PMU interrupts in the host or guest, and collect
> fundamental information.
>
> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> Signed-off-by: Quan Zhou <zhouquan@iscas.ac.cn>
> ---
>  arch/riscv/include/asm/perf_event.h |  6 +++++
>  arch/riscv/kernel/perf_callchain.c  | 38 +++++++++++++++++++++++++++++
>  2 files changed, 44 insertions(+)
>
> diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h
> index 665bbc9b2f84..38926b4a902d 100644
> --- a/arch/riscv/include/asm/perf_event.h
> +++ b/arch/riscv/include/asm/perf_event.h
> @@ -8,7 +8,11 @@
>  #ifndef _ASM_RISCV_PERF_EVENT_H
>  #define _ASM_RISCV_PERF_EVENT_H
>
> +#ifdef CONFIG_PERF_EVENTS
>  #include <linux/perf_event.h>
> +extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
> +extern unsigned long perf_misc_flags(struct pt_regs *regs);
> +#define perf_misc_flags(regs) perf_misc_flags(regs)
>  #define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
>
>  #define perf_arch_fetch_caller_regs(regs, __ip) { \
> @@ -17,4 +21,6 @@
>  	(regs)->sp = current_stack_pointer; \
>  	(regs)->status = SR_PP; \
>  }
> +#endif
> +
>  #endif /* _ASM_RISCV_PERF_EVENT_H */
> diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c
> index c7468af77c66..c2c81a80f816 100644
> --- a/arch/riscv/kernel/perf_callchain.c
> +++ b/arch/riscv/kernel/perf_callchain.c
> @@ -28,11 +28,49 @@ static bool fill_callchain(void *entry, unsigned long pc)
>  void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
>  			 struct pt_regs *regs)
>  {
> +	if (perf_guest_state()) {
> +		/* TODO: We don't support guest os callchain now */
> +		return;

That seems kind of weird, but it looks like almost exactly the same 
thing Marc did in 75e424620a4f ("arm64: perf: add guest vs host 
discrimination").  I think it's safe, as we'll basically just silently 
display no backtrace and we can always just fail to backtrace.

That said: I don't understand why we can't backtrace inside a guest?  If 
we can get the registers and memory it seems like we should be able to.  
Maybe I'm missing something?

> +	}
> +
>  	arch_stack_walk_user(fill_callchain, entry, regs);
>  }
>
>  void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
>  			   struct pt_regs *regs)
>  {
> +	if (perf_guest_state()) {
> +		/* TODO: We don't support guest os callchain now */
> +		return;
> +	}
> +
>  	walk_stackframe(NULL, regs, fill_callchain, entry);
>  }
> +
> +unsigned long perf_instruction_pointer(struct pt_regs *regs)
> +{
> +	if (perf_guest_state())
> +		return perf_guest_get_ip();
> +
> +	return instruction_pointer(regs);
> +}
> +
> +unsigned long perf_misc_flags(struct pt_regs *regs)
> +{
> +	unsigned int guest_state = perf_guest_state();
> +	unsigned long misc = 0;
> +
> +	if (guest_state) {
> +		if (guest_state & PERF_GUEST_USER)
> +			misc |= PERF_RECORD_MISC_GUEST_USER;
> +		else
> +			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
> +	} else {
> +		if (user_mode(regs))
> +			misc |= PERF_RECORD_MISC_USER;
> +		else
> +			misc |= PERF_RECORD_MISC_KERNEL;
> +	}
> +
> +	return misc;
> +}
Quan Zhou Oct. 22, 2024, 9:11 a.m. UTC | #2
On 2024/10/19 03:55, Palmer Dabbelt wrote:
> On Tue, 15 Oct 2024 01:42:50 PDT (-0700), zhouquan@iscas.ac.cn wrote:
>> From: Quan Zhou <zhouquan@iscas.ac.cn>
>>
>> Introduce basic guest support in perf, enabling it to distinguish
>> between PMU interrupts in the host or guest, and collect
>> fundamental information.
>>
>> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
>> Signed-off-by: Quan Zhou <zhouquan@iscas.ac.cn>
>> ---
>>  arch/riscv/include/asm/perf_event.h |  6 +++++
>>  arch/riscv/kernel/perf_callchain.c  | 38 +++++++++++++++++++++++++++++
>>  2 files changed, 44 insertions(+)
>>
>> diff --git a/arch/riscv/include/asm/perf_event.h 
>> b/arch/riscv/include/asm/perf_event.h
>> index 665bbc9b2f84..38926b4a902d 100644
>> --- a/arch/riscv/include/asm/perf_event.h
>> +++ b/arch/riscv/include/asm/perf_event.h
>> @@ -8,7 +8,11 @@
>>  #ifndef _ASM_RISCV_PERF_EVENT_H
>>  #define _ASM_RISCV_PERF_EVENT_H
>>
>> +#ifdef CONFIG_PERF_EVENTS
>>  #include <linux/perf_event.h>
>> +extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
>> +extern unsigned long perf_misc_flags(struct pt_regs *regs);
>> +#define perf_misc_flags(regs) perf_misc_flags(regs)
>>  #define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
>>
>>  #define perf_arch_fetch_caller_regs(regs, __ip) { \
>> @@ -17,4 +21,6 @@
>>      (regs)->sp = current_stack_pointer; \
>>      (regs)->status = SR_PP; \
>>  }
>> +#endif
>> +
>>  #endif /* _ASM_RISCV_PERF_EVENT_H */
>> diff --git a/arch/riscv/kernel/perf_callchain.c 
>> b/arch/riscv/kernel/perf_callchain.c
>> index c7468af77c66..c2c81a80f816 100644
>> --- a/arch/riscv/kernel/perf_callchain.c
>> +++ b/arch/riscv/kernel/perf_callchain.c
>> @@ -28,11 +28,49 @@ static bool fill_callchain(void *entry, unsigned 
>> long pc)
>>  void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
>>               struct pt_regs *regs)
>>  {
>> +    if (perf_guest_state()) {
>> +        /* TODO: We don't support guest os callchain now */
>> +        return;
> 
> That seems kind of weird, but it looks like almost exactly the same 
> thing Marc did in 75e424620a4f ("arm64: perf: add guest vs host 
> discrimination").  I think it's safe, as we'll basically just silently 
> display no backtrace and we can always just fail to backtrace.
> 
> That said: I don't understand why we can't backtrace inside a guest?  If 
> we can get the registers and memory it seems like we should be able to. 
> Maybe I'm missing something?
> 

 From the community's discussion history, there are two reasons:

1) For backtracing, we must traverse the structures within the guest's 
virtual address space. These structures can change at any time while the 
guest is running. The page table data we obtain might be corrupted, 
making it impossible to complete the traversal of the page tables or 
leading to incorrect data [1].

2) For security reasons, a significant number of cloud vendors wish to 
make accessing customer data almost impossible [2].

Currently, community prefer to implement this functionality in user 
space and access the guest through the KVM API, interrupting the guest 
to perform the unwind operation.

As with the x86/ARM architectures, stubs are still retained here for 
RISC-V. If `guest os callchain` is introduced in the future, they can 
indicate that additional work needs to be done.

[1] 
https://lore.kernel.org/all/ZSlNsn-f1j2bB8pW@FVFF77S0Q05N.cambridge.arm.com/
[2] https://lore.kernel.org/all/ZXeTvCLURmwzpDkP@google.com/

Regards,
Quan

>> +    }
>> +
>>      arch_stack_walk_user(fill_callchain, entry, regs);
>>  }
>>
>>  void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
>>                 struct pt_regs *regs)
>>  {
>> +    if (perf_guest_state()) {
>> +        /* TODO: We don't support guest os callchain now */
>> +        return;
>> +    }
>> +
>>      walk_stackframe(NULL, regs, fill_callchain, entry);
>>  }
>> +
>> +unsigned long perf_instruction_pointer(struct pt_regs *regs)
>> +{
>> +    if (perf_guest_state())
>> +        return perf_guest_get_ip();
>> +
>> +    return instruction_pointer(regs);
>> +}
>> +
>> +unsigned long perf_misc_flags(struct pt_regs *regs)
>> +{
>> +    unsigned int guest_state = perf_guest_state();
>> +    unsigned long misc = 0;
>> +
>> +    if (guest_state) {
>> +        if (guest_state & PERF_GUEST_USER)
>> +            misc |= PERF_RECORD_MISC_GUEST_USER;
>> +        else
>> +            misc |= PERF_RECORD_MISC_GUEST_KERNEL;
>> +    } else {
>> +        if (user_mode(regs))
>> +            misc |= PERF_RECORD_MISC_USER;
>> +        else
>> +            misc |= PERF_RECORD_MISC_KERNEL;
>> +    }
>> +
>> +    return misc;
>> +}
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h
index 665bbc9b2f84..38926b4a902d 100644
--- a/arch/riscv/include/asm/perf_event.h
+++ b/arch/riscv/include/asm/perf_event.h
@@ -8,7 +8,11 @@ 
 #ifndef _ASM_RISCV_PERF_EVENT_H
 #define _ASM_RISCV_PERF_EVENT_H
 
+#ifdef CONFIG_PERF_EVENTS
 #include <linux/perf_event.h>
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs) perf_misc_flags(regs)
 #define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
 
 #define perf_arch_fetch_caller_regs(regs, __ip) { \
@@ -17,4 +21,6 @@ 
 	(regs)->sp = current_stack_pointer; \
 	(regs)->status = SR_PP; \
 }
+#endif
+
 #endif /* _ASM_RISCV_PERF_EVENT_H */
diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c
index c7468af77c66..c2c81a80f816 100644
--- a/arch/riscv/kernel/perf_callchain.c
+++ b/arch/riscv/kernel/perf_callchain.c
@@ -28,11 +28,49 @@  static bool fill_callchain(void *entry, unsigned long pc)
 void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
 			 struct pt_regs *regs)
 {
+	if (perf_guest_state()) {
+		/* TODO: We don't support guest os callchain now */
+		return;
+	}
+
 	arch_stack_walk_user(fill_callchain, entry, regs);
 }
 
 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
 			   struct pt_regs *regs)
 {
+	if (perf_guest_state()) {
+		/* TODO: We don't support guest os callchain now */
+		return;
+	}
+
 	walk_stackframe(NULL, regs, fill_callchain, entry);
 }
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	if (perf_guest_state())
+		return perf_guest_get_ip();
+
+	return instruction_pointer(regs);
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	unsigned int guest_state = perf_guest_state();
+	unsigned long misc = 0;
+
+	if (guest_state) {
+		if (guest_state & PERF_GUEST_USER)
+			misc |= PERF_RECORD_MISC_GUEST_USER;
+		else
+			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+	} else {
+		if (user_mode(regs))
+			misc |= PERF_RECORD_MISC_USER;
+		else
+			misc |= PERF_RECORD_MISC_KERNEL;
+	}
+
+	return misc;
+}