Message ID | 20241105195603.2317483-5-coltonlewis@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Correct perf sampling with Guest VMs | expand |
On 2024-11-05 2:56 p.m., Colton Lewis wrote: > Break the assignment logic for misc flags into their own respective > functions to reduce the complexity of the nested logic. > > Signed-off-by: Colton Lewis <coltonlewis@google.com> > Reviewed-by: Oliver Upton <oliver.upton@linux.dev> > --- > arch/x86/events/core.c | 31 +++++++++++++++++++++++-------- > arch/x86/include/asm/perf_event.h | 2 ++ > 2 files changed, 25 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c > index d19e939f3998..24910c625e3d 100644 > --- a/arch/x86/events/core.c > +++ b/arch/x86/events/core.c > @@ -3011,16 +3011,34 @@ unsigned long perf_arch_instruction_pointer(struct pt_regs *regs) > return regs->ip + code_segment_base(regs); > } > > +static unsigned long common_misc_flags(struct pt_regs *regs) > +{ > + if (regs->flags & PERF_EFLAGS_EXACT) > + return PERF_RECORD_MISC_EXACT_IP; > + > + return 0; > +} > + > +unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs) > +{ > + unsigned long guest_state = perf_guest_state(); > + unsigned long flags = common_misc_flags(regs); > + > + if (guest_state & PERF_GUEST_USER) > + flags |= PERF_RECORD_MISC_GUEST_USER; > + else if (guest_state & PERF_GUEST_ACTIVE) > + flags |= PERF_RECORD_MISC_GUEST_KERNEL; > + The logic of setting the GUEST_KERNEL flag is implicitly changed here. For the current code, the GUEST_KERNEL flag is set for !PERF_GUEST_USER, which include both guest_in_kernel and guest_in_NMI. With the above change, the GUEST_KERNEL flag should be only set for the guest_in_kernel case. IIUC, this is the series's target, right? If so, could you please move the explanation into this patch? For x86, the behavior has already been changed since this patch. Thanks, Kan > + return flags; > +} > + > unsigned long perf_arch_misc_flags(struct pt_regs *regs) > { > unsigned int guest_state = perf_guest_state(); > - int misc = 0; > + unsigned long misc = common_misc_flags(regs); > > if (guest_state) { > - if (guest_state & PERF_GUEST_USER) > - misc |= PERF_RECORD_MISC_GUEST_USER; > - else > - misc |= PERF_RECORD_MISC_GUEST_KERNEL; > + misc |= perf_arch_guest_misc_flags(regs); > } else { > if (user_mode(regs)) > misc |= PERF_RECORD_MISC_USER; > @@ -3028,9 +3046,6 @@ unsigned long perf_arch_misc_flags(struct pt_regs *regs) > misc |= PERF_RECORD_MISC_KERNEL; > } > > - if (regs->flags & PERF_EFLAGS_EXACT) > - misc |= PERF_RECORD_MISC_EXACT_IP; > - > return misc; > } > > diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h > index feb87bf3d2e9..d95f902acc52 100644 > --- a/arch/x86/include/asm/perf_event.h > +++ b/arch/x86/include/asm/perf_event.h > @@ -538,7 +538,9 @@ struct x86_perf_regs { > > extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs); > extern unsigned long perf_arch_misc_flags(struct pt_regs *regs); > +extern unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs); > #define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs) > +#define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs) > > #include <asm/stacktrace.h> >
On Wed, Nov 06, 2024 at 11:03:10AM -0500, Liang, Kan wrote: > > +static unsigned long common_misc_flags(struct pt_regs *regs) > > +{ > > + if (regs->flags & PERF_EFLAGS_EXACT) > > + return PERF_RECORD_MISC_EXACT_IP; > > + > > + return 0; > > +} > > + > > +unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs) > > +{ > > + unsigned long guest_state = perf_guest_state(); > > + unsigned long flags = common_misc_flags(regs); > > + > > + if (guest_state & PERF_GUEST_USER) > > + flags |= PERF_RECORD_MISC_GUEST_USER; > > + else if (guest_state & PERF_GUEST_ACTIVE) > > + flags |= PERF_RECORD_MISC_GUEST_KERNEL; > > + > > The logic of setting the GUEST_KERNEL flag is implicitly changed here. > > For the current code, the GUEST_KERNEL flag is set for !PERF_GUEST_USER, > which include both guest_in_kernel and guest_in_NMI. Where is the "guest_in_NMI" state coming from? KVM only reports user v. kernel mode.
On 2024-11-06 3:02 p.m., Oliver Upton wrote: > On Wed, Nov 06, 2024 at 11:03:10AM -0500, Liang, Kan wrote: >>> +static unsigned long common_misc_flags(struct pt_regs *regs) >>> +{ >>> + if (regs->flags & PERF_EFLAGS_EXACT) >>> + return PERF_RECORD_MISC_EXACT_IP; >>> + >>> + return 0; >>> +} >>> + >>> +unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs) >>> +{ >>> + unsigned long guest_state = perf_guest_state(); >>> + unsigned long flags = common_misc_flags(regs); >>> + >>> + if (guest_state & PERF_GUEST_USER) >>> + flags |= PERF_RECORD_MISC_GUEST_USER; >>> + else if (guest_state & PERF_GUEST_ACTIVE) >>> + flags |= PERF_RECORD_MISC_GUEST_KERNEL; >>> + >> >> The logic of setting the GUEST_KERNEL flag is implicitly changed here. >> >> For the current code, the GUEST_KERNEL flag is set for !PERF_GUEST_USER, >> which include both guest_in_kernel and guest_in_NMI. > > Where is the "guest_in_NMI" state coming from? KVM only reports user v. > kernel mode. I may understand the kvm_arch_pmi_in_guest() wrong. However, the kvm_guest_state() at least return 3 states. 0 PERF_GUEST_ACTIVE PERF_GUEST_ACTIVE | PERF_GUEST_USER The existing code indeed assumes two modes. If it's not user mode, it must be kernel mode. However, the proposed code behave differently, or at least implies there are more modes. If it's not user mode and sets PERF_GUEST_ACTIVE, it's kernel mode. Thanks, Kan
On Wed, Nov 06, 2024 at 03:33:30PM -0500, Liang, Kan wrote: > On 2024-11-06 3:02 p.m., Oliver Upton wrote: > > On Wed, Nov 06, 2024 at 11:03:10AM -0500, Liang, Kan wrote: > >>> +static unsigned long common_misc_flags(struct pt_regs *regs) > >>> +{ > >>> + if (regs->flags & PERF_EFLAGS_EXACT) > >>> + return PERF_RECORD_MISC_EXACT_IP; > >>> + > >>> + return 0; > >>> +} > >>> + > >>> +unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs) > >>> +{ > >>> + unsigned long guest_state = perf_guest_state(); > >>> + unsigned long flags = common_misc_flags(regs); > >>> + > >>> + if (guest_state & PERF_GUEST_USER) > >>> + flags |= PERF_RECORD_MISC_GUEST_USER; > >>> + else if (guest_state & PERF_GUEST_ACTIVE) > >>> + flags |= PERF_RECORD_MISC_GUEST_KERNEL; > >>> + > >> > >> The logic of setting the GUEST_KERNEL flag is implicitly changed here. > >> > >> For the current code, the GUEST_KERNEL flag is set for !PERF_GUEST_USER, > >> which include both guest_in_kernel and guest_in_NMI. > > > > Where is the "guest_in_NMI" state coming from? KVM only reports user v. > > kernel mode. > > I may understand the kvm_arch_pmi_in_guest() wrong. kvm_arch_pmi_in_guest() is trying to *guess* whether or not an overflow interrupt caused the most recent VM-exit, implying a counter overflowed while in the VM. It has no idea what events are loaded on the PMU and which contexts they're intended to sample in. It only makes sense to check kvm_arch_pmi_in_guest() if you're dealing with an event that counts in both host and guest modes and you need to decide who to sample. > However, the kvm_guest_state() at least return 3 states. > 0 > PERF_GUEST_ACTIVE > PERF_GUEST_ACTIVE | PERF_GUEST_USER > > The existing code indeed assumes two modes. If it's not user mode, it > must be kernel mode. > However, the proposed code behave differently, or at least implies there > are more modes. > If it's not user mode and sets PERF_GUEST_ACTIVE, it's kernel mode. A precondition of the call to perf_arch_guest_misc_flags() is that guest state is nonzero, meaning a vCPU is loaded presently on this CPU.
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index d19e939f3998..24910c625e3d 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -3011,16 +3011,34 @@ unsigned long perf_arch_instruction_pointer(struct pt_regs *regs) return regs->ip + code_segment_base(regs); } +static unsigned long common_misc_flags(struct pt_regs *regs) +{ + if (regs->flags & PERF_EFLAGS_EXACT) + return PERF_RECORD_MISC_EXACT_IP; + + return 0; +} + +unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs) +{ + unsigned long guest_state = perf_guest_state(); + unsigned long flags = common_misc_flags(regs); + + if (guest_state & PERF_GUEST_USER) + flags |= PERF_RECORD_MISC_GUEST_USER; + else if (guest_state & PERF_GUEST_ACTIVE) + flags |= PERF_RECORD_MISC_GUEST_KERNEL; + + return flags; +} + unsigned long perf_arch_misc_flags(struct pt_regs *regs) { unsigned int guest_state = perf_guest_state(); - int misc = 0; + unsigned long misc = common_misc_flags(regs); if (guest_state) { - if (guest_state & PERF_GUEST_USER) - misc |= PERF_RECORD_MISC_GUEST_USER; - else - misc |= PERF_RECORD_MISC_GUEST_KERNEL; + misc |= perf_arch_guest_misc_flags(regs); } else { if (user_mode(regs)) misc |= PERF_RECORD_MISC_USER; @@ -3028,9 +3046,6 @@ unsigned long perf_arch_misc_flags(struct pt_regs *regs) misc |= PERF_RECORD_MISC_KERNEL; } - if (regs->flags & PERF_EFLAGS_EXACT) - misc |= PERF_RECORD_MISC_EXACT_IP; - return misc; } diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index feb87bf3d2e9..d95f902acc52 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -538,7 +538,9 @@ struct x86_perf_regs { extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_arch_misc_flags(struct pt_regs *regs); +extern unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs); #define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs) +#define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs) #include <asm/stacktrace.h>