Message ID | 1453866709-20324-8-git-send-email-zhaoshenglong@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Jan 27, 2016 at 11:51:35AM +0800, Shannon Zhao wrote: > From: Shannon Zhao <shannon.zhao@linaro.org> > > When we use tools like perf on host, perf passes the event type and the > id of this event type category to kernel, then kernel will map them to > hardware event number and write this number to PMU PMEVTYPER<n>_EL0 > register. When getting the event number in KVM, directly use raw event > type to create a perf_event for it. > > Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org> > Reviewed-by: Marc Zyngier <marc.zyngier@arm.com> > --- > arch/arm64/include/asm/pmu.h | 3 ++ > arch/arm64/kvm/Makefile | 1 + > include/kvm/arm_pmu.h | 10 ++++ > virt/kvm/arm/pmu.c | 122 +++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 136 insertions(+) > create mode 100644 virt/kvm/arm/pmu.c > > diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h > index 4406184..2588f9c 100644 > --- a/arch/arm64/include/asm/pmu.h > +++ b/arch/arm64/include/asm/pmu.h > @@ -21,6 +21,7 @@ > > #define ARMV8_MAX_COUNTERS 32 > #define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1) > +#define ARMV8_CYCLE_IDX (ARMV8_MAX_COUNTERS - 1) I'm not sure we want to add this. It's name is wrong, as it's really PMCNTENSET_EL0.C, and just a few lines above we have the idx defined already (ARMV8_IDX_CYCLE_COUNTER), but as zero, because arch/arm64/kernel/perf_event.c maps it that way. I think we should do the same with the pmc array, i.e. map the cycle counter to idx zero. > > /* > * Per-CPU PMCR: config reg > @@ -31,6 +32,8 @@ > #define ARMV8_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ > #define ARMV8_PMCR_X (1 << 4) /* Export to ETM */ > #define ARMV8_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ > +/* Determines which PMCCNTR_EL0 bit generates an overflow */ > +#define ARMV8_PMCR_LC (1 << 6) > #define ARMV8_PMCR_N_SHIFT 11 /* Number of counters supported */ > #define ARMV8_PMCR_N_MASK 0x1f > #define ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */ > diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile > index caee9ee..122cff4 100644 > --- a/arch/arm64/kvm/Makefile > +++ b/arch/arm64/kvm/Makefile > @@ -26,3 +26,4 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o > kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o > kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o > kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o > +kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o > diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h > index 32fee2d..ee4b15c 100644 > --- a/include/kvm/arm_pmu.h > +++ b/include/kvm/arm_pmu.h > @@ -36,11 +36,21 @@ struct kvm_pmu { > }; > > #define kvm_arm_pmu_v3_ready(v) ((v)->arch.pmu.ready) > +u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); > +void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, > + u64 select_idx); > #else > struct kvm_pmu { > }; > > #define kvm_arm_pmu_v3_ready(v) (false) > +static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, > + u64 select_idx) > +{ > + return 0; > +} > +static inline void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, > + u64 data, u64 select_idx) {} > #endif > > #endif > diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c > new file mode 100644 > index 0000000..673ec55 > --- /dev/null > +++ b/virt/kvm/arm/pmu.c > @@ -0,0 +1,122 @@ > +/* > + * Copyright (C) 2015 Linaro Ltd. > + * Author: Shannon Zhao <shannon.zhao@linaro.org> > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +#include <linux/cpu.h> > +#include <linux/kvm.h> > +#include <linux/kvm_host.h> > +#include <linux/perf_event.h> > +#include <asm/kvm_emulate.h> > +#include <kvm/arm_pmu.h> > + > +/** > + * kvm_pmu_get_counter_value - get PMU counter value > + * @vcpu: The vcpu pointer > + * @select_idx: The counter index > + */ > +u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) > +{ > + u64 counter, reg, enabled, running; > + struct kvm_pmu *pmu = &vcpu->arch.pmu; > + struct kvm_pmc *pmc = &pmu->pmc[select_idx]; > + > + reg = (select_idx == ARMV8_CYCLE_IDX) > + ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; reg = select_idx == 0 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx - 1; > + counter = vcpu_sys_reg(vcpu, reg); > + > + /* The real counter value is equal to the value of counter register plus > + * the value perf event counts. > + */ > + if (pmc->perf_event) > + counter += perf_event_read_value(pmc->perf_event, &enabled, > + &running); > + > + return counter & pmc->bitmask; > +} > + > +/** > + * kvm_pmu_stop_counter - stop PMU counter > + * @pmc: The PMU counter pointer > + * > + * If this counter has been configured to monitor some event, release it here. > + */ > +static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) > +{ > + u64 counter, reg; > + > + if (pmc->perf_event) { > + counter = kvm_pmu_get_counter_value(vcpu, pmc->idx); > + reg = (pmc->idx == ARMV8_CYCLE_IDX) > + ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; Second need for this reg selection. We should probably create an idx to reg function. > + vcpu_sys_reg(vcpu, reg) = counter; > + perf_event_disable(pmc->perf_event); > + perf_event_release_kernel(pmc->perf_event); > + pmc->perf_event = NULL; > + } > +} > + > +static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) > +{ > + return (vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMCR_E) && > + (vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); > +} > + > +/** > + * kvm_pmu_set_counter_event_type - set selected counter to monitor some event > + * @vcpu: The vcpu pointer > + * @data: The data guest writes to PMXEVTYPER_EL0 > + * @select_idx: The number of selected counter > + * > + * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an > + * event with given hardware event number. Here we call perf_event API to > + * emulate this action and create a kernel perf event for it. > + */ > +void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, > + u64 select_idx) > +{ > + struct kvm_pmu *pmu = &vcpu->arch.pmu; > + struct kvm_pmc *pmc = &pmu->pmc[select_idx]; > + struct perf_event *event; > + struct perf_event_attr attr; > + u64 eventsel, counter; > + > + kvm_pmu_stop_counter(vcpu, pmc); > + eventsel = data & ARMV8_EVTYPE_EVENT; > + > + memset(&attr, 0, sizeof(struct perf_event_attr)); > + attr.type = PERF_TYPE_RAW; > + attr.size = sizeof(attr); nit: the memset sizeof could also just use attr to save characters. Or why not avoid the memset using struct perf_event_attr attr = {...}, like arch/x86/kvm/pmu.c does? > + attr.pinned = 1; > + attr.disabled = kvm_pmu_counter_is_enabled(vcpu, select_idx); hmm.. disabled = enabled ? If we always want it off at set time, then it should just be '= 1', right? > + attr.exclude_user = data & ARMV8_EXCLUDE_EL0 ? 1 : 0; > + attr.exclude_kernel = data & ARMV8_EXCLUDE_EL1 ? 1 : 0; > + attr.exclude_hv = 1; /* Don't count EL2 events */ > + attr.exclude_host = 1; /* Don't count host events */ > + attr.config = eventsel; > + > + counter = kvm_pmu_get_counter_value(vcpu, select_idx); > + /* The initial sample period (overflow count) of an event. */ > + attr.sample_period = (-counter) & pmc->bitmask; > + > + event = perf_event_create_kernel_counter(&attr, -1, current, NULL, pmc); > + if (IS_ERR(event)) { > + pr_err_once("kvm: pmu event creation failed %ld\n", > + PTR_ERR(event)); > + return; > + } > + > + pmc->perf_event = event; > +} > -- > 2.0.4 > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 28/01/16 16:31, Andrew Jones wrote: > On Wed, Jan 27, 2016 at 11:51:35AM +0800, Shannon Zhao wrote: >> From: Shannon Zhao <shannon.zhao@linaro.org> >> >> When we use tools like perf on host, perf passes the event type and the >> id of this event type category to kernel, then kernel will map them to >> hardware event number and write this number to PMU PMEVTYPER<n>_EL0 >> register. When getting the event number in KVM, directly use raw event >> type to create a perf_event for it. >> >> Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org> >> Reviewed-by: Marc Zyngier <marc.zyngier@arm.com> >> --- >> arch/arm64/include/asm/pmu.h | 3 ++ >> arch/arm64/kvm/Makefile | 1 + >> include/kvm/arm_pmu.h | 10 ++++ >> virt/kvm/arm/pmu.c | 122 +++++++++++++++++++++++++++++++++++++++++++ >> 4 files changed, 136 insertions(+) >> create mode 100644 virt/kvm/arm/pmu.c >> >> diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h >> index 4406184..2588f9c 100644 >> --- a/arch/arm64/include/asm/pmu.h >> +++ b/arch/arm64/include/asm/pmu.h >> @@ -21,6 +21,7 @@ >> >> #define ARMV8_MAX_COUNTERS 32 >> #define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1) >> +#define ARMV8_CYCLE_IDX (ARMV8_MAX_COUNTERS - 1) > > I'm not sure we want to add this. It's name is wrong, as it's really > PMCNTENSET_EL0.C, and just a few lines above we have the idx defined > already (ARMV8_IDX_CYCLE_COUNTER), but as zero, because > arch/arm64/kernel/perf_event.c maps it that way. > > I think we should do the same with the pmc array, i.e. map the cycle > counter to idx zero. I tend to have the opposite view. Not for the sake of it, but because I find it helpful to directly map the code to the architecture documentation without having to bend another handful of neurons. Will probably had some good reasons to structure it that way, but I don't know the rational. Will? Thanks, M.
On Thu, Jan 28, 2016 at 04:45:36PM +0000, Marc Zyngier wrote: > On 28/01/16 16:31, Andrew Jones wrote: > > On Wed, Jan 27, 2016 at 11:51:35AM +0800, Shannon Zhao wrote: > >> From: Shannon Zhao <shannon.zhao@linaro.org> > >> > >> When we use tools like perf on host, perf passes the event type and the > >> id of this event type category to kernel, then kernel will map them to > >> hardware event number and write this number to PMU PMEVTYPER<n>_EL0 > >> register. When getting the event number in KVM, directly use raw event > >> type to create a perf_event for it. > >> > >> Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org> > >> Reviewed-by: Marc Zyngier <marc.zyngier@arm.com> > >> --- > >> arch/arm64/include/asm/pmu.h | 3 ++ > >> arch/arm64/kvm/Makefile | 1 + > >> include/kvm/arm_pmu.h | 10 ++++ > >> virt/kvm/arm/pmu.c | 122 +++++++++++++++++++++++++++++++++++++++++++ > >> 4 files changed, 136 insertions(+) > >> create mode 100644 virt/kvm/arm/pmu.c > >> > >> diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h > >> index 4406184..2588f9c 100644 > >> --- a/arch/arm64/include/asm/pmu.h > >> +++ b/arch/arm64/include/asm/pmu.h > >> @@ -21,6 +21,7 @@ > >> > >> #define ARMV8_MAX_COUNTERS 32 > >> #define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1) > >> +#define ARMV8_CYCLE_IDX (ARMV8_MAX_COUNTERS - 1) > > > > I'm not sure we want to add this. It's name is wrong, as it's really > > PMCNTENSET_EL0.C, and just a few lines above we have the idx defined > > already (ARMV8_IDX_CYCLE_COUNTER), but as zero, because > > arch/arm64/kernel/perf_event.c maps it that way. > > > > I think we should do the same with the pmc array, i.e. map the cycle > > counter to idx zero. > > I tend to have the opposite view. Not for the sake of it, but because I > find it helpful to directly map the code to the architecture > documentation without having to bend another handful of neurons. > > Will probably had some good reasons to structure it that way, but I > don't know the rational. Will? It was years ago, but I suspect that the cycle counter is index zero because its mandated, whilst the number of event counters is IMPDEF. Will -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2016/1/29 2:06, Will Deacon wrote: > On Thu, Jan 28, 2016 at 04:45:36PM +0000, Marc Zyngier wrote: >> > On 28/01/16 16:31, Andrew Jones wrote: >>> > > On Wed, Jan 27, 2016 at 11:51:35AM +0800, Shannon Zhao wrote: >>>> > >> From: Shannon Zhao <shannon.zhao@linaro.org> >>>> > >> >>>> > >> When we use tools like perf on host, perf passes the event type and the >>>> > >> id of this event type category to kernel, then kernel will map them to >>>> > >> hardware event number and write this number to PMU PMEVTYPER<n>_EL0 >>>> > >> register. When getting the event number in KVM, directly use raw event >>>> > >> type to create a perf_event for it. >>>> > >> >>>> > >> Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org> >>>> > >> Reviewed-by: Marc Zyngier <marc.zyngier@arm.com> >>>> > >> --- >>>> > >> arch/arm64/include/asm/pmu.h | 3 ++ >>>> > >> arch/arm64/kvm/Makefile | 1 + >>>> > >> include/kvm/arm_pmu.h | 10 ++++ >>>> > >> virt/kvm/arm/pmu.c | 122 +++++++++++++++++++++++++++++++++++++++++++ >>>> > >> 4 files changed, 136 insertions(+) >>>> > >> create mode 100644 virt/kvm/arm/pmu.c >>>> > >> >>>> > >> diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h >>>> > >> index 4406184..2588f9c 100644 >>>> > >> --- a/arch/arm64/include/asm/pmu.h >>>> > >> +++ b/arch/arm64/include/asm/pmu.h >>>> > >> @@ -21,6 +21,7 @@ >>>> > >> >>>> > >> #define ARMV8_MAX_COUNTERS 32 >>>> > >> #define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1) >>>> > >> +#define ARMV8_CYCLE_IDX (ARMV8_MAX_COUNTERS - 1) >>> > > >>> > > I'm not sure we want to add this. It's name is wrong, as it's really >>> > > PMCNTENSET_EL0.C, and just a few lines above we have the idx defined >>> > > already (ARMV8_IDX_CYCLE_COUNTER), but as zero, because >>> > > arch/arm64/kernel/perf_event.c maps it that way. >>> > > >>> > > I think we should do the same with the pmc array, i.e. map the cycle >>> > > counter to idx zero. >> > >> > I tend to have the opposite view. Not for the sake of it, but because I >> > find it helpful to directly map the code to the architecture >> > documentation without having to bend another handful of neurons. >> > >> > Will probably had some good reasons to structure it that way, but I >> > don't know the rational. Will? > It was years ago, but I suspect that the cycle counter is index zero > because its mandated, whilst the number of event counters is IMPDEF. So does it need to change the cycle counter index to zero in this patch set? Thanks,
On 2016/1/29 2:06, Will Deacon wrote: > On Thu, Jan 28, 2016 at 04:45:36PM +0000, Marc Zyngier wrote: >> > On 28/01/16 16:31, Andrew Jones wrote: >>> > > On Wed, Jan 27, 2016 at 11:51:35AM +0800, Shannon Zhao wrote: >>>> > >> From: Shannon Zhao <shannon.zhao@linaro.org> >>>> > >> >>>> > >> When we use tools like perf on host, perf passes the event type and the >>>> > >> id of this event type category to kernel, then kernel will map them to >>>> > >> hardware event number and write this number to PMU PMEVTYPER<n>_EL0 >>>> > >> register. When getting the event number in KVM, directly use raw event >>>> > >> type to create a perf_event for it. >>>> > >> >>>> > >> Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org> >>>> > >> Reviewed-by: Marc Zyngier <marc.zyngier@arm.com> >>>> > >> --- >>>> > >> arch/arm64/include/asm/pmu.h | 3 ++ >>>> > >> arch/arm64/kvm/Makefile | 1 + >>>> > >> include/kvm/arm_pmu.h | 10 ++++ >>>> > >> virt/kvm/arm/pmu.c | 122 +++++++++++++++++++++++++++++++++++++++++++ >>>> > >> 4 files changed, 136 insertions(+) >>>> > >> create mode 100644 virt/kvm/arm/pmu.c >>>> > >> >>>> > >> diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h >>>> > >> index 4406184..2588f9c 100644 >>>> > >> --- a/arch/arm64/include/asm/pmu.h >>>> > >> +++ b/arch/arm64/include/asm/pmu.h >>>> > >> @@ -21,6 +21,7 @@ >>>> > >> >>>> > >> #define ARMV8_MAX_COUNTERS 32 >>>> > >> #define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1) >>>> > >> +#define ARMV8_CYCLE_IDX (ARMV8_MAX_COUNTERS - 1) >>> > > >>> > > I'm not sure we want to add this. It's name is wrong, as it's really >>> > > PMCNTENSET_EL0.C, and just a few lines above we have the idx defined >>> > > already (ARMV8_IDX_CYCLE_COUNTER), but as zero, because >>> > > arch/arm64/kernel/perf_event.c maps it that way. >>> > > >>> > > I think we should do the same with the pmc array, i.e. map the cycle >>> > > counter to idx zero. >> > >> > I tend to have the opposite view. Not for the sake of it, but because I >> > find it helpful to directly map the code to the architecture >> > documentation without having to bend another handful of neurons. >> > >> > Will probably had some good reasons to structure it that way, but I >> > don't know the rational. Will? > It was years ago, but I suspect that the cycle counter is index zero > because its mandated, whilst the number of event counters is IMPDEF. Since PMCNTENSET/CLR, PMINTENSET/CLR, PMOVSSET/CLR and PMSWINC are using bit 31 to stands the state of cycle counter, if we make cycle counter index to zero, we always need to do translation between the idx and bit 31 when we access these registers. Thanks,
On Fri, Jan 29, 2016 at 02:26:31PM +0800, Shannon Zhao wrote: > > > On 2016/1/29 2:06, Will Deacon wrote: > > On Thu, Jan 28, 2016 at 04:45:36PM +0000, Marc Zyngier wrote: > >> > On 28/01/16 16:31, Andrew Jones wrote: > >>> > > On Wed, Jan 27, 2016 at 11:51:35AM +0800, Shannon Zhao wrote: > >>>> > >> From: Shannon Zhao <shannon.zhao@linaro.org> > >>>> > >> > >>>> > >> When we use tools like perf on host, perf passes the event type and the > >>>> > >> id of this event type category to kernel, then kernel will map them to > >>>> > >> hardware event number and write this number to PMU PMEVTYPER<n>_EL0 > >>>> > >> register. When getting the event number in KVM, directly use raw event > >>>> > >> type to create a perf_event for it. > >>>> > >> > >>>> > >> Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org> > >>>> > >> Reviewed-by: Marc Zyngier <marc.zyngier@arm.com> > >>>> > >> --- > >>>> > >> arch/arm64/include/asm/pmu.h | 3 ++ > >>>> > >> arch/arm64/kvm/Makefile | 1 + > >>>> > >> include/kvm/arm_pmu.h | 10 ++++ > >>>> > >> virt/kvm/arm/pmu.c | 122 +++++++++++++++++++++++++++++++++++++++++++ > >>>> > >> 4 files changed, 136 insertions(+) > >>>> > >> create mode 100644 virt/kvm/arm/pmu.c > >>>> > >> > >>>> > >> diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h > >>>> > >> index 4406184..2588f9c 100644 > >>>> > >> --- a/arch/arm64/include/asm/pmu.h > >>>> > >> +++ b/arch/arm64/include/asm/pmu.h > >>>> > >> @@ -21,6 +21,7 @@ > >>>> > >> > >>>> > >> #define ARMV8_MAX_COUNTERS 32 > >>>> > >> #define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1) > >>>> > >> +#define ARMV8_CYCLE_IDX (ARMV8_MAX_COUNTERS - 1) > >>> > > > >>> > > I'm not sure we want to add this. It's name is wrong, as it's really > >>> > > PMCNTENSET_EL0.C, and just a few lines above we have the idx defined > >>> > > already (ARMV8_IDX_CYCLE_COUNTER), but as zero, because > >>> > > arch/arm64/kernel/perf_event.c maps it that way. > >>> > > > >>> > > I think we should do the same with the pmc array, i.e. map the cycle > >>> > > counter to idx zero. > >> > > >> > I tend to have the opposite view. Not for the sake of it, but because I > >> > find it helpful to directly map the code to the architecture > >> > documentation without having to bend another handful of neurons. > >> > > >> > Will probably had some good reasons to structure it that way, but I > >> > don't know the rational. Will? > > It was years ago, but I suspect that the cycle counter is index zero > > because its mandated, whilst the number of event counters is IMPDEF. > > Since PMCNTENSET/CLR, PMINTENSET/CLR, PMOVSSET/CLR and PMSWINC are using > bit 31 to stands the state of cycle counter, if we make cycle counter > index to zero, we always need to do translation between the idx and bit > 31 when we access these registers. Conversely, if you stick the cycle counter right at the top, then you'll need to rework a bunch of the perf code that iterates from ARMV7_IDX_COUNTER0 to pmu->num_events. Will -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2016/1/29 18:18, Will Deacon wrote: > On Fri, Jan 29, 2016 at 02:26:31PM +0800, Shannon Zhao wrote: >> > >> > >> >On 2016/1/29 2:06, Will Deacon wrote: >>> > >On Thu, Jan 28, 2016 at 04:45:36PM +0000, Marc Zyngier wrote: >>>>> > >> >On 28/01/16 16:31, Andrew Jones wrote: >>>>>>> > >>> > >On Wed, Jan 27, 2016 at 11:51:35AM +0800, Shannon Zhao wrote: >>>>>>>>> > >>>> > >>From: Shannon Zhao<shannon.zhao@linaro.org> >>>>>>>>> > >>>> > >> >>>>>>>>> > >>>> > >>When we use tools like perf on host, perf passes the event type and the >>>>>>>>> > >>>> > >>id of this event type category to kernel, then kernel will map them to >>>>>>>>> > >>>> > >>hardware event number and write this number to PMU PMEVTYPER<n>_EL0 >>>>>>>>> > >>>> > >>register. When getting the event number in KVM, directly use raw event >>>>>>>>> > >>>> > >>type to create a perf_event for it. >>>>>>>>> > >>>> > >> >>>>>>>>> > >>>> > >>Signed-off-by: Shannon Zhao<shannon.zhao@linaro.org> >>>>>>>>> > >>>> > >>Reviewed-by: Marc Zyngier<marc.zyngier@arm.com> >>>>>>>>> > >>>> > >>--- >>>>>>>>> > >>>> > >> arch/arm64/include/asm/pmu.h | 3 ++ >>>>>>>>> > >>>> > >> arch/arm64/kvm/Makefile | 1 + >>>>>>>>> > >>>> > >> include/kvm/arm_pmu.h | 10 ++++ >>>>>>>>> > >>>> > >> virt/kvm/arm/pmu.c | 122 +++++++++++++++++++++++++++++++++++++++++++ >>>>>>>>> > >>>> > >> 4 files changed, 136 insertions(+) >>>>>>>>> > >>>> > >> create mode 100644 virt/kvm/arm/pmu.c >>>>>>>>> > >>>> > >> >>>>>>>>> > >>>> > >>diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h >>>>>>>>> > >>>> > >>index 4406184..2588f9c 100644 >>>>>>>>> > >>>> > >>--- a/arch/arm64/include/asm/pmu.h >>>>>>>>> > >>>> > >>+++ b/arch/arm64/include/asm/pmu.h >>>>>>>>> > >>>> > >>@@ -21,6 +21,7 @@ >>>>>>>>> > >>>> > >> >>>>>>>>> > >>>> > >> #define ARMV8_MAX_COUNTERS 32 >>>>>>>>> > >>>> > >> #define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1) >>>>>>>>> > >>>> > >>+#define ARMV8_CYCLE_IDX (ARMV8_MAX_COUNTERS - 1) >>>>>>> > >>> > > >>>>>>> > >>> > >I'm not sure we want to add this. It's name is wrong, as it's really >>>>>>> > >>> > >PMCNTENSET_EL0.C, and just a few lines above we have the idx defined >>>>>>> > >>> > >already (ARMV8_IDX_CYCLE_COUNTER), but as zero, because >>>>>>> > >>> > >arch/arm64/kernel/perf_event.c maps it that way. >>>>>>> > >>> > > >>>>>>> > >>> > >I think we should do the same with the pmc array, i.e. map the cycle >>>>>>> > >>> > >counter to idx zero. >>>>> > >> > >>>>> > >> >I tend to have the opposite view. Not for the sake of it, but because I >>>>> > >> >find it helpful to directly map the code to the architecture >>>>> > >> >documentation without having to bend another handful of neurons. >>>>> > >> > >>>>> > >> >Will probably had some good reasons to structure it that way, but I >>>>> > >> >don't know the rational. Will? >>> > >It was years ago, but I suspect that the cycle counter is index zero >>> > >because its mandated, whilst the number of event counters is IMPDEF. >> > >> >Since PMCNTENSET/CLR, PMINTENSET/CLR, PMOVSSET/CLR and PMSWINC are using >> >bit 31 to stands the state of cycle counter, if we make cycle counter >> >index to zero, we always need to do translation between the idx and bit >> >31 when we access these registers. > Conversely, if you stick the cycle counter right at the top, then you'll > need to rework a bunch of the perf code that iterates from > ARMV7_IDX_COUNTER0 to pmu->num_events. But actually this doesn't affect the perf code now because it's just a internal PMC array index of kvm arm guest pmu codes. And having a look at the ARMv8 SPEC, it says " PMCCFILTR_EL0 can also be accessed by using PMXEVTYPER_EL0 with PMSELR_EL0.SEL set to 0b11111. " Apparently it treats the index of cycle counter as 31 not zero. Also regarding the PMCR.N field my understanding is that it just stands the number of counters not the counter's order or index. Looking at the description of PMSEL.SEL field, it says "This field can take any value from 0 (0b00000) to (PMCR.N)-1, or 31 (0b11111)." while it's not "This field can take any value from 1 (0b00001) to (PMCR.N), or 0." Thanks,
diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h index 4406184..2588f9c 100644 --- a/arch/arm64/include/asm/pmu.h +++ b/arch/arm64/include/asm/pmu.h @@ -21,6 +21,7 @@ #define ARMV8_MAX_COUNTERS 32 #define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1) +#define ARMV8_CYCLE_IDX (ARMV8_MAX_COUNTERS - 1) /* * Per-CPU PMCR: config reg @@ -31,6 +32,8 @@ #define ARMV8_PMCR_D (1 << 3) /* CCNT counts every 64th cpu cycle */ #define ARMV8_PMCR_X (1 << 4) /* Export to ETM */ #define ARMV8_PMCR_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +/* Determines which PMCCNTR_EL0 bit generates an overflow */ +#define ARMV8_PMCR_LC (1 << 6) #define ARMV8_PMCR_N_SHIFT 11 /* Number of counters supported */ #define ARMV8_PMCR_N_MASK 0x1f #define ARMV8_PMCR_MASK 0x3f /* Mask for writable bits */ diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index caee9ee..122cff4 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -26,3 +26,4 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o +kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 32fee2d..ee4b15c 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -36,11 +36,21 @@ struct kvm_pmu { }; #define kvm_arm_pmu_v3_ready(v) ((v)->arch.pmu.ready) +u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); +void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, + u64 select_idx); #else struct kvm_pmu { }; #define kvm_arm_pmu_v3_ready(v) (false) +static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, + u64 select_idx) +{ + return 0; +} +static inline void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, + u64 data, u64 select_idx) {} #endif #endif diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c new file mode 100644 index 0000000..673ec55 --- /dev/null +++ b/virt/kvm/arm/pmu.c @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2015 Linaro Ltd. + * Author: Shannon Zhao <shannon.zhao@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/cpu.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/perf_event.h> +#include <asm/kvm_emulate.h> +#include <kvm/arm_pmu.h> + +/** + * kvm_pmu_get_counter_value - get PMU counter value + * @vcpu: The vcpu pointer + * @select_idx: The counter index + */ +u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) +{ + u64 counter, reg, enabled, running; + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_pmc *pmc = &pmu->pmc[select_idx]; + + reg = (select_idx == ARMV8_CYCLE_IDX) + ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; + counter = vcpu_sys_reg(vcpu, reg); + + /* The real counter value is equal to the value of counter register plus + * the value perf event counts. + */ + if (pmc->perf_event) + counter += perf_event_read_value(pmc->perf_event, &enabled, + &running); + + return counter & pmc->bitmask; +} + +/** + * kvm_pmu_stop_counter - stop PMU counter + * @pmc: The PMU counter pointer + * + * If this counter has been configured to monitor some event, release it here. + */ +static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) +{ + u64 counter, reg; + + if (pmc->perf_event) { + counter = kvm_pmu_get_counter_value(vcpu, pmc->idx); + reg = (pmc->idx == ARMV8_CYCLE_IDX) + ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; + vcpu_sys_reg(vcpu, reg) = counter; + perf_event_disable(pmc->perf_event); + perf_event_release_kernel(pmc->perf_event); + pmc->perf_event = NULL; + } +} + +static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) +{ + return (vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMCR_E) && + (vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); +} + +/** + * kvm_pmu_set_counter_event_type - set selected counter to monitor some event + * @vcpu: The vcpu pointer + * @data: The data guest writes to PMXEVTYPER_EL0 + * @select_idx: The number of selected counter + * + * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an + * event with given hardware event number. Here we call perf_event API to + * emulate this action and create a kernel perf event for it. + */ +void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, + u64 select_idx) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_pmc *pmc = &pmu->pmc[select_idx]; + struct perf_event *event; + struct perf_event_attr attr; + u64 eventsel, counter; + + kvm_pmu_stop_counter(vcpu, pmc); + eventsel = data & ARMV8_EVTYPE_EVENT; + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.type = PERF_TYPE_RAW; + attr.size = sizeof(attr); + attr.pinned = 1; + attr.disabled = kvm_pmu_counter_is_enabled(vcpu, select_idx); + attr.exclude_user = data & ARMV8_EXCLUDE_EL0 ? 1 : 0; + attr.exclude_kernel = data & ARMV8_EXCLUDE_EL1 ? 1 : 0; + attr.exclude_hv = 1; /* Don't count EL2 events */ + attr.exclude_host = 1; /* Don't count host events */ + attr.config = eventsel; + + counter = kvm_pmu_get_counter_value(vcpu, select_idx); + /* The initial sample period (overflow count) of an event. */ + attr.sample_period = (-counter) & pmc->bitmask; + + event = perf_event_create_kernel_counter(&attr, -1, current, NULL, pmc); + if (IS_ERR(event)) { + pr_err_once("kvm: pmu event creation failed %ld\n", + PTR_ERR(event)); + return; + } + + pmc->perf_event = event; +}