diff mbox series

[v6,4/5] x86: perf: Refactor misc flag assignments

Message ID 20241105195603.2317483-5-coltonlewis@google.com (mailing list archive)
State New
Headers show
Series Correct perf sampling with Guest VMs | expand

Commit Message

Colton Lewis Nov. 5, 2024, 7:56 p.m. UTC
Break the assignment logic for misc flags into their own respective
functions to reduce the complexity of the nested logic.

Signed-off-by: Colton Lewis <coltonlewis@google.com>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
---
 arch/x86/events/core.c            | 31 +++++++++++++++++++++++--------
 arch/x86/include/asm/perf_event.h |  2 ++
 2 files changed, 25 insertions(+), 8 deletions(-)

Comments

Liang, Kan Nov. 6, 2024, 4:03 p.m. UTC | #1
On 2024-11-05 2:56 p.m., Colton Lewis wrote:
> Break the assignment logic for misc flags into their own respective
> functions to reduce the complexity of the nested logic.
> 
> Signed-off-by: Colton Lewis <coltonlewis@google.com>
> Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
> ---
>  arch/x86/events/core.c            | 31 +++++++++++++++++++++++--------
>  arch/x86/include/asm/perf_event.h |  2 ++
>  2 files changed, 25 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index d19e939f3998..24910c625e3d 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -3011,16 +3011,34 @@ unsigned long perf_arch_instruction_pointer(struct pt_regs *regs)
>  	return regs->ip + code_segment_base(regs);
>  }
>  
> +static unsigned long common_misc_flags(struct pt_regs *regs)
> +{
> +	if (regs->flags & PERF_EFLAGS_EXACT)
> +		return PERF_RECORD_MISC_EXACT_IP;
> +
> +	return 0;
> +}
> +
> +unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
> +{
> +	unsigned long guest_state = perf_guest_state();
> +	unsigned long flags = common_misc_flags(regs);
> +
> +	if (guest_state & PERF_GUEST_USER)
> +		flags |= PERF_RECORD_MISC_GUEST_USER;
> +	else if (guest_state & PERF_GUEST_ACTIVE)
> +		flags |= PERF_RECORD_MISC_GUEST_KERNEL;
> +

The logic of setting the GUEST_KERNEL flag is implicitly changed here.

For the current code, the GUEST_KERNEL flag is set for !PERF_GUEST_USER,
which include both guest_in_kernel and guest_in_NMI.

With the above change, the GUEST_KERNEL flag should be only set for the
guest_in_kernel case.
IIUC, this is the series's target, right?

If so, could you please move the explanation into this patch?
For x86, the behavior has already been changed since this patch.

Thanks,
Kan

> +	return flags;
> +}
> +
>  unsigned long perf_arch_misc_flags(struct pt_regs *regs)
>  {
>  	unsigned int guest_state = perf_guest_state();
> -	int misc = 0;
> +	unsigned long misc = common_misc_flags(regs);
>  
>  	if (guest_state) {
> -		if (guest_state & PERF_GUEST_USER)
> -			misc |= PERF_RECORD_MISC_GUEST_USER;
> -		else
> -			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
> +		misc |= perf_arch_guest_misc_flags(regs);
>  	} else {
>  		if (user_mode(regs))
>  			misc |= PERF_RECORD_MISC_USER;
> @@ -3028,9 +3046,6 @@ unsigned long perf_arch_misc_flags(struct pt_regs *regs)
>  			misc |= PERF_RECORD_MISC_KERNEL;
>  	}
>  
> -	if (regs->flags & PERF_EFLAGS_EXACT)
> -		misc |= PERF_RECORD_MISC_EXACT_IP;
> -
>  	return misc;
>  }
>  
> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> index feb87bf3d2e9..d95f902acc52 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -538,7 +538,9 @@ struct x86_perf_regs {
>  
>  extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
>  extern unsigned long perf_arch_misc_flags(struct pt_regs *regs);
> +extern unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs);
>  #define perf_arch_misc_flags(regs)	perf_arch_misc_flags(regs)
> +#define perf_arch_guest_misc_flags(regs)	perf_arch_guest_misc_flags(regs)
>  
>  #include <asm/stacktrace.h>
>
Oliver Upton Nov. 6, 2024, 8:02 p.m. UTC | #2
On Wed, Nov 06, 2024 at 11:03:10AM -0500, Liang, Kan wrote:
> > +static unsigned long common_misc_flags(struct pt_regs *regs)
> > +{
> > +	if (regs->flags & PERF_EFLAGS_EXACT)
> > +		return PERF_RECORD_MISC_EXACT_IP;
> > +
> > +	return 0;
> > +}
> > +
> > +unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
> > +{
> > +	unsigned long guest_state = perf_guest_state();
> > +	unsigned long flags = common_misc_flags(regs);
> > +
> > +	if (guest_state & PERF_GUEST_USER)
> > +		flags |= PERF_RECORD_MISC_GUEST_USER;
> > +	else if (guest_state & PERF_GUEST_ACTIVE)
> > +		flags |= PERF_RECORD_MISC_GUEST_KERNEL;
> > +
> 
> The logic of setting the GUEST_KERNEL flag is implicitly changed here.
> 
> For the current code, the GUEST_KERNEL flag is set for !PERF_GUEST_USER,
> which include both guest_in_kernel and guest_in_NMI.

Where is the "guest_in_NMI" state coming from? KVM only reports user v.
kernel mode.
Liang, Kan Nov. 6, 2024, 8:33 p.m. UTC | #3
On 2024-11-06 3:02 p.m., Oliver Upton wrote:
> On Wed, Nov 06, 2024 at 11:03:10AM -0500, Liang, Kan wrote:
>>> +static unsigned long common_misc_flags(struct pt_regs *regs)
>>> +{
>>> +	if (regs->flags & PERF_EFLAGS_EXACT)
>>> +		return PERF_RECORD_MISC_EXACT_IP;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
>>> +{
>>> +	unsigned long guest_state = perf_guest_state();
>>> +	unsigned long flags = common_misc_flags(regs);
>>> +
>>> +	if (guest_state & PERF_GUEST_USER)
>>> +		flags |= PERF_RECORD_MISC_GUEST_USER;
>>> +	else if (guest_state & PERF_GUEST_ACTIVE)
>>> +		flags |= PERF_RECORD_MISC_GUEST_KERNEL;
>>> +
>>
>> The logic of setting the GUEST_KERNEL flag is implicitly changed here.
>>
>> For the current code, the GUEST_KERNEL flag is set for !PERF_GUEST_USER,
>> which include both guest_in_kernel and guest_in_NMI.
> 
> Where is the "guest_in_NMI" state coming from? KVM only reports user v.
> kernel mode.

I may understand the kvm_arch_pmi_in_guest() wrong.
However, the kvm_guest_state() at least return 3 states.
0
PERF_GUEST_ACTIVE
PERF_GUEST_ACTIVE | PERF_GUEST_USER

The existing code indeed assumes two modes. If it's not user mode, it
must be kernel mode.
However, the proposed code behave differently, or at least implies there
are more modes.
If it's not user mode and sets PERF_GUEST_ACTIVE, it's kernel mode.

Thanks,
Kan
Oliver Upton Nov. 6, 2024, 8:51 p.m. UTC | #4
On Wed, Nov 06, 2024 at 03:33:30PM -0500, Liang, Kan wrote:
> On 2024-11-06 3:02 p.m., Oliver Upton wrote:
> > On Wed, Nov 06, 2024 at 11:03:10AM -0500, Liang, Kan wrote:
> >>> +static unsigned long common_misc_flags(struct pt_regs *regs)
> >>> +{
> >>> +	if (regs->flags & PERF_EFLAGS_EXACT)
> >>> +		return PERF_RECORD_MISC_EXACT_IP;
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
> >>> +{
> >>> +	unsigned long guest_state = perf_guest_state();
> >>> +	unsigned long flags = common_misc_flags(regs);
> >>> +
> >>> +	if (guest_state & PERF_GUEST_USER)
> >>> +		flags |= PERF_RECORD_MISC_GUEST_USER;
> >>> +	else if (guest_state & PERF_GUEST_ACTIVE)
> >>> +		flags |= PERF_RECORD_MISC_GUEST_KERNEL;
> >>> +
> >>
> >> The logic of setting the GUEST_KERNEL flag is implicitly changed here.
> >>
> >> For the current code, the GUEST_KERNEL flag is set for !PERF_GUEST_USER,
> >> which include both guest_in_kernel and guest_in_NMI.
> > 
> > Where is the "guest_in_NMI" state coming from? KVM only reports user v.
> > kernel mode.
> 
> I may understand the kvm_arch_pmi_in_guest() wrong.

kvm_arch_pmi_in_guest() is trying to *guess* whether or not an overflow
interrupt caused the most recent VM-exit, implying a counter overflowed
while in the VM. It has no idea what events are loaded on the PMU and
which contexts they're intended to sample in.

It only makes sense to check kvm_arch_pmi_in_guest() if you're dealing with
an event that counts in both host and guest modes and you need to decide who
to sample.

> However, the kvm_guest_state() at least return 3 states.
> 0
> PERF_GUEST_ACTIVE
> PERF_GUEST_ACTIVE | PERF_GUEST_USER
> 
> The existing code indeed assumes two modes. If it's not user mode, it
> must be kernel mode.
> However, the proposed code behave differently, or at least implies there
> are more modes.
> If it's not user mode and sets PERF_GUEST_ACTIVE, it's kernel mode.

A precondition of the call to perf_arch_guest_misc_flags() is that guest
state is nonzero, meaning a vCPU is loaded presently on this CPU.
diff mbox series

Patch

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index d19e939f3998..24910c625e3d 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -3011,16 +3011,34 @@  unsigned long perf_arch_instruction_pointer(struct pt_regs *regs)
 	return regs->ip + code_segment_base(regs);
 }
 
+static unsigned long common_misc_flags(struct pt_regs *regs)
+{
+	if (regs->flags & PERF_EFLAGS_EXACT)
+		return PERF_RECORD_MISC_EXACT_IP;
+
+	return 0;
+}
+
+unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
+{
+	unsigned long guest_state = perf_guest_state();
+	unsigned long flags = common_misc_flags(regs);
+
+	if (guest_state & PERF_GUEST_USER)
+		flags |= PERF_RECORD_MISC_GUEST_USER;
+	else if (guest_state & PERF_GUEST_ACTIVE)
+		flags |= PERF_RECORD_MISC_GUEST_KERNEL;
+
+	return flags;
+}
+
 unsigned long perf_arch_misc_flags(struct pt_regs *regs)
 {
 	unsigned int guest_state = perf_guest_state();
-	int misc = 0;
+	unsigned long misc = common_misc_flags(regs);
 
 	if (guest_state) {
-		if (guest_state & PERF_GUEST_USER)
-			misc |= PERF_RECORD_MISC_GUEST_USER;
-		else
-			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+		misc |= perf_arch_guest_misc_flags(regs);
 	} else {
 		if (user_mode(regs))
 			misc |= PERF_RECORD_MISC_USER;
@@ -3028,9 +3046,6 @@  unsigned long perf_arch_misc_flags(struct pt_regs *regs)
 			misc |= PERF_RECORD_MISC_KERNEL;
 	}
 
-	if (regs->flags & PERF_EFLAGS_EXACT)
-		misc |= PERF_RECORD_MISC_EXACT_IP;
-
 	return misc;
 }
 
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index feb87bf3d2e9..d95f902acc52 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -538,7 +538,9 @@  struct x86_perf_regs {
 
 extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_arch_misc_flags(struct pt_regs *regs);
+extern unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs);
 #define perf_arch_misc_flags(regs)	perf_arch_misc_flags(regs)
+#define perf_arch_guest_misc_flags(regs)	perf_arch_guest_misc_flags(regs)
 
 #include <asm/stacktrace.h>