From patchwork Tue Mar 2 07:09:06 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yanmin Zhang X-Patchwork-Id: 83129 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o2276wfe023004 for ; Tue, 2 Mar 2010 07:06:58 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753213Ab0CBHGs (ORCPT ); Tue, 2 Mar 2010 02:06:48 -0500 Received: from mga07.intel.com ([143.182.124.22]:39823 "EHLO azsmga101.ch.intel.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1753173Ab0CBHGr (ORCPT ); Tue, 2 Mar 2010 02:06:47 -0500 Received: from azsmga001.ch.intel.com ([10.2.17.19]) by azsmga101.ch.intel.com with ESMTP; 01 Mar 2010 23:06:46 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.49,564,1262592000"; d="scan'208";a="249701337" Received: from ymzhang.sh.intel.com (HELO [10.239.13.147]) ([10.239.13.147]) by azsmga001.ch.intel.com with ESMTP; 01 Mar 2010 23:06:44 -0800 Subject: Re: KVM PMU virtualization From: "Zhang, Yanmin" To: Ingo Molnar Cc: Joerg Roedel , Jes Sorensen , KVM General , Peter Zijlstra , Avi Kivity , Zachary Amsden , Gleb Natapov , ming.m.lin@intel.com In-Reply-To: <20100226091732.GI15885@elte.hu> References: <4B86917C.4070102@redhat.com> <20100225173423.GB4246@8bytes.org> <1267152917.1726.82.camel@localhost> <20100226085105.GC4246@8bytes.org> <20100226091732.GI15885@elte.hu> Date: Tue, 02 Mar 2010 15:09:06 +0800 Message-Id: <1267513746.1726.104.camel@localhost> Mime-Version: 1.0 X-Mailer: Evolution 2.28.0 (2.28.0-2.fc12) Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Tue, 02 Mar 2010 07:06:59 +0000 (UTC) --- linux-2.6.33/arch/x86/kernel/cpu/perf_event.c 2010-02-25 02:52:17.000000000 +0800 +++ linux-2.6.33_perfkvm/arch/x86/kernel/cpu/perf_event.c 2010-03-01 15:57:51.672990615 +0800 @@ -1621,6 +1621,7 @@ static void intel_pmu_drain_bts_buffer(s struct perf_event_header header; struct perf_sample_data data; struct pt_regs regs; + int ret; if (!event) return; @@ -1647,7 +1648,9 @@ static void intel_pmu_drain_bts_buffer(s * We will overwrite the from and to address before we output * the sample. */ - perf_prepare_sample(&header, &data, event, ®s); + ret = perf_prepare_sample(&header, &data, event, ®s); + if (ret) + return; if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) --- linux-2.6.33/arch/x86/kvm/vmx.c 2010-02-25 02:52:17.000000000 +0800 +++ linux-2.6.33_perfkvm/arch/x86/kvm/vmx.c 2010-03-02 10:21:57.588586179 +0800 @@ -26,6 +26,7 @@ #include #include #include +#include #include "kvm_cache_regs.h" #include "x86.h" @@ -3553,8 +3554,19 @@ static void vmx_complete_interrupts(stru /* We need to handle NMIs before interrupts are enabled */ if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && - (exit_intr_info & INTR_INFO_VALID_MASK)) + (exit_intr_info & INTR_INFO_VALID_MASK)) { + u64 rip = vmcs_readl(GUEST_RIP); + int user_mode = vmcs_read16(GUEST_CS_SELECTOR); + +#ifdef CONFIG_X86_32 + user_mode = (user_mode & SEGMENT_RPL_MASK) == USER_RPL; +#else + user_mode = !!(user_mode & 3); +#endif + perf_save_virt_ip(user_mode, rip); asm("int $2"); + perf_reset_virt_ip(); + } idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; --- linux-2.6.33/include/linux/perf_event.h 2010-02-25 02:52:17.000000000 +0800 +++ linux-2.6.33_perfkvm/include/linux/perf_event.h 2010-03-02 12:26:15.050947780 +0800 @@ -125,8 +125,9 @@ enum perf_event_sample_format { PERF_SAMPLE_PERIOD = 1U << 8, PERF_SAMPLE_STREAM_ID = 1U << 9, PERF_SAMPLE_RAW = 1U << 10, + PERF_SAMPLE_KVM = 1U << 11, - PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */ }; /* @@ -798,7 +799,7 @@ extern void perf_output_sample(struct pe struct perf_event_header *header, struct perf_sample_data *data, struct perf_event *event); -extern void perf_prepare_sample(struct perf_event_header *header, +extern int perf_prepare_sample(struct perf_event_header *header, struct perf_sample_data *data, struct perf_event *event, struct pt_regs *regs); @@ -858,7 +859,6 @@ extern void perf_bp_event(struct perf_ev #ifndef perf_misc_flags #define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ PERF_RECORD_MISC_KERNEL) -#define perf_instruction_pointer(regs) instruction_pointer(regs) #endif extern int perf_output_begin(struct perf_output_handle *handle, @@ -905,6 +905,34 @@ static inline void perf_event_enable(str static inline void perf_event_disable(struct perf_event *event) { } #endif +//#if defined(CONFIG_PERF_EVENTS && CONFIG_PERF_HAS_VIRT_IP) +#if defined(CONFIG_PERF_EVENTS) +struct virt_ip_info { + int user_mode; + u64 ip; +}; + +DECLARE_PER_CPU(struct virt_ip_info, perf_virt_ip); +extern void perf_save_virt_ip(int user_mode, u64 ip); +extern void perf_reset_virt_ip(void); +extern int perf_get_virt_user_mode(void); +static inline u64 perf_instruction_pointer(struct perf_event *event, struct pt_regs *regs) +{ + u64 ip; + if (event->attr.sample_type & PERF_SAMPLE_KVM) + ip = percpu_read(perf_virt_ip.ip); + else + ip = instruction_pointer(regs); + return ip; +} +#else +static inline void perf_save_virt_ip(int user_mode, u64 ip) { } +static inline void perf_reset_virt_ip(void) { } +static inline int perf_get_virt_user_mode(void) { return -1; } +#define perf_instruction_pointer(event, regs) instruction_pointer(regs)) +#endif + + #define perf_output_put(handle, x) \ perf_output_copy((handle), &(x), sizeof(x)) --- linux-2.6.33/kernel/perf_event.c 2010-02-25 02:52:17.000000000 +0800 +++ linux-2.6.33_perfkvm/kernel/perf_event.c 2010-03-02 12:30:41.236003180 +0800 @@ -3077,7 +3077,38 @@ void perf_output_sample(struct perf_outp } } -void perf_prepare_sample(struct perf_event_header *header, +//#ifdef CONFIG_PERF_VIRT_IP +DEFINE_PER_CPU(struct virt_ip_info, perf_virt_ip) = {0,0}; +EXPORT_PER_CPU_SYMBOL(perf_virt_ip); + +void perf_save_virt_ip(int user_mode, u64 ip) +{ + if (!atomic_read(&nr_events)) + return; + percpu_write(perf_virt_ip.user_mode, ip); + percpu_write(perf_virt_ip.ip, ip); +} +EXPORT_SYMBOL_GPL(perf_save_virt_ip); + +void perf_reset_virt_ip(void) +{ + if (!percpu_read(perf_virt_ip.ip)) + return; + percpu_write(perf_virt_ip.user_mode, 0); + percpu_write(perf_virt_ip.ip, 0); +} +EXPORT_SYMBOL_GPL(perf_reset_virt_ip); + +int perf_get_virt_user_mode(void) +{ + if (!percpu_read(perf_virt_ip.ip)) + return -1; + return percpu_read(perf_virt_ip.user_mode); +} + +//#endif + +int perf_prepare_sample(struct perf_event_header *header, struct perf_sample_data *data, struct perf_event *event, struct pt_regs *regs) @@ -3090,10 +3121,15 @@ void perf_prepare_sample(struct perf_eve header->size = sizeof(*header); header->misc = 0; - header->misc |= perf_misc_flags(regs); + if (event->attr.sample_type & PERF_SAMPLE_KVM) + header->misc |= percpu_read(perf_virt_ip.user_mode)?PERF_RECORD_MISC_USER:PERF_RECORD_MISC_KERNEL; + else + header->misc |= perf_misc_flags(regs); if (sample_type & PERF_SAMPLE_IP) { - data->ip = perf_instruction_pointer(regs); + data->ip = perf_instruction_pointer(event, regs); + if (!data->ip) + return -1; header->size += sizeof(data->ip); } @@ -3162,6 +3198,8 @@ void perf_prepare_sample(struct perf_eve WARN_ON_ONCE(size & (sizeof(u64)-1)); header->size += size; } + + return 0; } static void perf_event_output(struct perf_event *event, int nmi, @@ -3170,8 +3208,11 @@ static void perf_event_output(struct per { struct perf_output_handle handle; struct perf_event_header header; + int ret; - perf_prepare_sample(&header, data, event, regs); + ret = perf_prepare_sample(&header, data, event, regs); + if (ret) + return; if (perf_output_begin(&handle, event, header.size, nmi, 1)) return; --- linux-2.6.33/tools/perf/builtin-record.c 2010-02-25 02:52:17.000000000 +0800 +++ linux-2.6.33_perfkvm/tools/perf/builtin-record.c 2010-03-02 13:19:53.564376291 +0800 @@ -251,6 +251,8 @@ static void create_counter(int counter, PERF_FORMAT_ID; attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; + if (sample_kvm) + attr->sample_type |= PERF_SAMPLE_KVM; if (freq) { attr->sample_type |= PERF_SAMPLE_PERIOD; --- linux-2.6.33/tools/perf/builtin-top.c 2010-02-25 02:52:17.000000000 +0800 +++ linux-2.6.33_perfkvm/tools/perf/builtin-top.c 2010-03-01 16:35:41.972067501 +0800 @@ -1091,6 +1091,8 @@ static void start_counter(int i, int cou attr = attrs + counter; attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; + if (sample_kvm) + attr->sample_type |= PERF_SAMPLE_KVM; if (freq) { attr->sample_type |= PERF_SAMPLE_PERIOD; --- linux-2.6.33/tools/perf/perf.c 2010-02-25 02:52:17.000000000 +0800 +++ linux-2.6.33_perfkvm/tools/perf/perf.c 2010-03-02 09:57:03.164001069 +0800 @@ -28,6 +28,8 @@ struct pager_config { int val; }; +int sample_kvm = 0; + static char debugfs_mntpt[MAXPATHLEN]; static int pager_command_config(const char *var, const char *value, void *data) @@ -320,6 +322,13 @@ static void handle_internal_command(int argv[0] = cmd = "help"; } + if (argc > 1 && !strcmp(argv[0], "kvm")) { + sample_kvm = 1; + argv++; + argc--; + cmd = argv[0]; + } + for (i = 0; i < ARRAY_SIZE(commands); i++) { struct cmd_struct *p = commands+i; if (strcmp(p->cmd, cmd)) --- linux-2.6.33/tools/perf/perf.h 2010-02-25 02:52:17.000000000 +0800 +++ linux-2.6.33_perfkvm/tools/perf/perf.h 2010-03-01 16:12:42.470082418 +0800 @@ -131,4 +131,6 @@ struct ip_callchain { u64 ips[0]; }; +extern int sample_kvm; + #endif