diff mbox series

[13/13] x86: retpolines: eliminate retpoline from msr event handlers

Message ID 20191104230001.27774-14-aarcange@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM monolithic v3 | expand

Commit Message

Andrea Arcangeli Nov. 4, 2019, 11 p.m. UTC
It's enough to check the value and issue the direct call.

After this commit is applied, here the most common retpolines executed
under a high resolution timer workload in the guest on a VMX host:

[..]
@[
    trace_retpoline+1
    __trace_retpoline+30
    __x86_indirect_thunk_rax+33
    do_syscall_64+89
    entry_SYSCALL_64_after_hwframe+68
]: 267
@[]: 2256
@[
    trace_retpoline+1
    __trace_retpoline+30
    __x86_indirect_thunk_rax+33
    __kvm_wait_lapic_expire+284
    vmx_vcpu_run.part.97+1091
    vcpu_enter_guest+377
    kvm_arch_vcpu_ioctl_run+261
    kvm_vcpu_ioctl+559
    do_vfs_ioctl+164
    ksys_ioctl+96
    __x64_sys_ioctl+22
    do_syscall_64+89
    entry_SYSCALL_64_after_hwframe+68
]: 2390
@[]: 33410

@total: 315707

Note the highest hit above is __delay so probably not worth optimizing
even if it would be more frequent than 2k hits per sec.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
---
 arch/x86/events/intel/core.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

Comments

Paolo Bonzini Nov. 5, 2019, 10:21 a.m. UTC | #1
On 05/11/19 00:00, Andrea Arcangeli wrote:
> It's enough to check the value and issue the direct call.
> 
> After this commit is applied, here the most common retpolines executed
> under a high resolution timer workload in the guest on a VMX host:
> 
> [..]
> @[
>     trace_retpoline+1
>     __trace_retpoline+30
>     __x86_indirect_thunk_rax+33
>     do_syscall_64+89
>     entry_SYSCALL_64_after_hwframe+68
> ]: 267
> @[]: 2256
> @[
>     trace_retpoline+1
>     __trace_retpoline+30
>     __x86_indirect_thunk_rax+33
>     __kvm_wait_lapic_expire+284
>     vmx_vcpu_run.part.97+1091
>     vcpu_enter_guest+377
>     kvm_arch_vcpu_ioctl_run+261
>     kvm_vcpu_ioctl+559
>     do_vfs_ioctl+164
>     ksys_ioctl+96
>     __x64_sys_ioctl+22
>     do_syscall_64+89
>     entry_SYSCALL_64_after_hwframe+68
> ]: 2390
> @[]: 33410
> 
> @total: 315707
> 
> Note the highest hit above is __delay so probably not worth optimizing
> even if it would be more frequent than 2k hits per sec.
> 
> Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
> ---
>  arch/x86/events/intel/core.c | 11 +++++++++++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
> index fcef678c3423..937363b803c1 100644
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -3323,8 +3323,19 @@ static int intel_pmu_hw_config(struct perf_event *event)
>  	return 0;
>  }
>  
> +#ifdef CONFIG_RETPOLINE
> +static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr);
> +static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr);
> +#endif
> +
>  struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
>  {
> +#ifdef CONFIG_RETPOLINE
> +	if (x86_pmu.guest_get_msrs == intel_guest_get_msrs)
> +		return intel_guest_get_msrs(nr);
> +	else if (x86_pmu.guest_get_msrs == core_guest_get_msrs)
> +		return core_guest_get_msrs(nr);
> +#endif
>  	if (x86_pmu.guest_get_msrs)
>  		return x86_pmu.guest_get_msrs(nr);
>  	*nr = 0;
> 

Queued, thanks.

Paolo
diff mbox series

Patch

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index fcef678c3423..937363b803c1 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3323,8 +3323,19 @@  static int intel_pmu_hw_config(struct perf_event *event)
 	return 0;
 }
 
+#ifdef CONFIG_RETPOLINE
+static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr);
+static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr);
+#endif
+
 struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
 {
+#ifdef CONFIG_RETPOLINE
+	if (x86_pmu.guest_get_msrs == intel_guest_get_msrs)
+		return intel_guest_get_msrs(nr);
+	else if (x86_pmu.guest_get_msrs == core_guest_get_msrs)
+		return core_guest_get_msrs(nr);
+#endif
 	if (x86_pmu.guest_get_msrs)
 		return x86_pmu.guest_get_msrs(nr);
 	*nr = 0;