diff mbox

[v10,07/21] KVM: ARM64: PMU: Add perf event map and introduce perf event creating function

Message ID 1453866709-20324-8-git-send-email-zhaoshenglong@huawei.com (mailing list archive)
State New, archived
Headers show

Commit Message

Shannon Zhao Jan. 27, 2016, 3:51 a.m. UTC
From: Shannon Zhao <shannon.zhao@linaro.org>

When we use tools like perf on host, perf passes the event type and the
id of this event type category to kernel, then kernel will map them to
hardware event number and write this number to PMU PMEVTYPER<n>_EL0
register. When getting the event number in KVM, directly use raw event
type to create a perf_event for it.

Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/pmu.h |   3 ++
 arch/arm64/kvm/Makefile      |   1 +
 include/kvm/arm_pmu.h        |  10 ++++
 virt/kvm/arm/pmu.c           | 122 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 136 insertions(+)
 create mode 100644 virt/kvm/arm/pmu.c

Comments

Andrew Jones Jan. 28, 2016, 4:31 p.m. UTC | #1
On Wed, Jan 27, 2016 at 11:51:35AM +0800, Shannon Zhao wrote:
> From: Shannon Zhao <shannon.zhao@linaro.org>
> 
> When we use tools like perf on host, perf passes the event type and the
> id of this event type category to kernel, then kernel will map them to
> hardware event number and write this number to PMU PMEVTYPER<n>_EL0
> register. When getting the event number in KVM, directly use raw event
> type to create a perf_event for it.
> 
> Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
> Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  arch/arm64/include/asm/pmu.h |   3 ++
>  arch/arm64/kvm/Makefile      |   1 +
>  include/kvm/arm_pmu.h        |  10 ++++
>  virt/kvm/arm/pmu.c           | 122 +++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 136 insertions(+)
>  create mode 100644 virt/kvm/arm/pmu.c
> 
> diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
> index 4406184..2588f9c 100644
> --- a/arch/arm64/include/asm/pmu.h
> +++ b/arch/arm64/include/asm/pmu.h
> @@ -21,6 +21,7 @@
>  
>  #define ARMV8_MAX_COUNTERS      32
>  #define ARMV8_COUNTER_MASK      (ARMV8_MAX_COUNTERS - 1)
> +#define ARMV8_CYCLE_IDX         (ARMV8_MAX_COUNTERS - 1)

I'm not sure we want to add this. It's name is wrong, as it's really
PMCNTENSET_EL0.C, and just a few lines above we have the idx defined
already (ARMV8_IDX_CYCLE_COUNTER), but as zero, because
arch/arm64/kernel/perf_event.c maps it that way.

I think we should do the same with the pmc array, i.e. map the cycle
counter to idx zero.

>  
>  /*
>   * Per-CPU PMCR: config reg
> @@ -31,6 +32,8 @@
>  #define ARMV8_PMCR_D		(1 << 3) /* CCNT counts every 64th cpu cycle */
>  #define ARMV8_PMCR_X		(1 << 4) /* Export to ETM */
>  #define ARMV8_PMCR_DP		(1 << 5) /* Disable CCNT if non-invasive debug*/
> +/* Determines which PMCCNTR_EL0 bit generates an overflow */
> +#define ARMV8_PMCR_LC		(1 << 6)
>  #define	ARMV8_PMCR_N_SHIFT	11	 /* Number of counters supported */
>  #define	ARMV8_PMCR_N_MASK	0x1f
>  #define	ARMV8_PMCR_MASK		0x3f	 /* Mask for writable bits */
> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> index caee9ee..122cff4 100644
> --- a/arch/arm64/kvm/Makefile
> +++ b/arch/arm64/kvm/Makefile
> @@ -26,3 +26,4 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
> +kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
> diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
> index 32fee2d..ee4b15c 100644
> --- a/include/kvm/arm_pmu.h
> +++ b/include/kvm/arm_pmu.h
> @@ -36,11 +36,21 @@ struct kvm_pmu {
>  };
>  
>  #define kvm_arm_pmu_v3_ready(v)		((v)->arch.pmu.ready)
> +u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx);
> +void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
> +				    u64 select_idx);
>  #else
>  struct kvm_pmu {
>  };
>  
>  #define kvm_arm_pmu_v3_ready(v)		(false)
> +static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
> +					    u64 select_idx)
> +{
> +	return 0;
> +}
> +static inline void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu,
> +						  u64 data, u64 select_idx) {}
>  #endif
>  
>  #endif
> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> new file mode 100644
> index 0000000..673ec55
> --- /dev/null
> +++ b/virt/kvm/arm/pmu.c
> @@ -0,0 +1,122 @@
> +/*
> + * Copyright (C) 2015 Linaro Ltd.
> + * Author: Shannon Zhao <shannon.zhao@linaro.org>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <linux/cpu.h>
> +#include <linux/kvm.h>
> +#include <linux/kvm_host.h>
> +#include <linux/perf_event.h>
> +#include <asm/kvm_emulate.h>
> +#include <kvm/arm_pmu.h>
> +
> +/**
> + * kvm_pmu_get_counter_value - get PMU counter value
> + * @vcpu: The vcpu pointer
> + * @select_idx: The counter index
> + */
> +u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
> +{
> +	u64 counter, reg, enabled, running;
> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
> +	struct kvm_pmc *pmc = &pmu->pmc[select_idx];
> +
> +	reg = (select_idx == ARMV8_CYCLE_IDX)
> +	      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;

reg = select_idx == 0 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx - 1;

> +	counter = vcpu_sys_reg(vcpu, reg);
> +
> +	/* The real counter value is equal to the value of counter register plus
> +	 * the value perf event counts.
> +	 */
> +	if (pmc->perf_event)
> +		counter += perf_event_read_value(pmc->perf_event, &enabled,
> +						 &running);
> +
> +	return counter & pmc->bitmask;
> +}
> +
> +/**
> + * kvm_pmu_stop_counter - stop PMU counter
> + * @pmc: The PMU counter pointer
> + *
> + * If this counter has been configured to monitor some event, release it here.
> + */
> +static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
> +{
> +	u64 counter, reg;
> +
> +	if (pmc->perf_event) {
> +		counter = kvm_pmu_get_counter_value(vcpu, pmc->idx);
> +		reg = (pmc->idx == ARMV8_CYCLE_IDX)
> +		       ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;

Second need for this reg selection. We should probably create an idx to
reg function.

> +		vcpu_sys_reg(vcpu, reg) = counter;
> +		perf_event_disable(pmc->perf_event);
> +		perf_event_release_kernel(pmc->perf_event);
> +		pmc->perf_event = NULL;
> +	}
> +}
> +
> +static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
> +{
> +	return (vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMCR_E) &&
> +	       (vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
> +}
> +
> +/**
> + * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
> + * @vcpu: The vcpu pointer
> + * @data: The data guest writes to PMXEVTYPER_EL0
> + * @select_idx: The number of selected counter
> + *
> + * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
> + * event with given hardware event number. Here we call perf_event API to
> + * emulate this action and create a kernel perf event for it.
> + */
> +void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
> +				    u64 select_idx)
> +{
> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
> +	struct kvm_pmc *pmc = &pmu->pmc[select_idx];
> +	struct perf_event *event;
> +	struct perf_event_attr attr;
> +	u64 eventsel, counter;
> +
> +	kvm_pmu_stop_counter(vcpu, pmc);
> +	eventsel = data & ARMV8_EVTYPE_EVENT;
> +
> +	memset(&attr, 0, sizeof(struct perf_event_attr));
> +	attr.type = PERF_TYPE_RAW;
> +	attr.size = sizeof(attr);

nit: the memset sizeof could also just use attr to save characters.
Or why not avoid the memset using struct perf_event_attr attr = {...},
like arch/x86/kvm/pmu.c does?


> +	attr.pinned = 1;
> +	attr.disabled = kvm_pmu_counter_is_enabled(vcpu, select_idx);

hmm.. disabled = enabled ? If we always want it off at set time, then it
should just be '= 1', right?

> +	attr.exclude_user = data & ARMV8_EXCLUDE_EL0 ? 1 : 0;
> +	attr.exclude_kernel = data & ARMV8_EXCLUDE_EL1 ? 1 : 0;
> +	attr.exclude_hv = 1; /* Don't count EL2 events */
> +	attr.exclude_host = 1; /* Don't count host events */
> +	attr.config = eventsel;
> +
> +	counter = kvm_pmu_get_counter_value(vcpu, select_idx);
> +	/* The initial sample period (overflow count) of an event. */
> +	attr.sample_period = (-counter) & pmc->bitmask;
> +
> +	event = perf_event_create_kernel_counter(&attr, -1, current, NULL, pmc);
> +	if (IS_ERR(event)) {
> +		pr_err_once("kvm: pmu event creation failed %ld\n",
> +			    PTR_ERR(event));
> +		return;
> +	}
> +
> +	pmc->perf_event = event;
> +}
> -- 
> 2.0.4
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier Jan. 28, 2016, 4:45 p.m. UTC | #2
On 28/01/16 16:31, Andrew Jones wrote:
> On Wed, Jan 27, 2016 at 11:51:35AM +0800, Shannon Zhao wrote:
>> From: Shannon Zhao <shannon.zhao@linaro.org>
>>
>> When we use tools like perf on host, perf passes the event type and the
>> id of this event type category to kernel, then kernel will map them to
>> hardware event number and write this number to PMU PMEVTYPER<n>_EL0
>> register. When getting the event number in KVM, directly use raw event
>> type to create a perf_event for it.
>>
>> Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
>> Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
>> ---
>>  arch/arm64/include/asm/pmu.h |   3 ++
>>  arch/arm64/kvm/Makefile      |   1 +
>>  include/kvm/arm_pmu.h        |  10 ++++
>>  virt/kvm/arm/pmu.c           | 122 +++++++++++++++++++++++++++++++++++++++++++
>>  4 files changed, 136 insertions(+)
>>  create mode 100644 virt/kvm/arm/pmu.c
>>
>> diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
>> index 4406184..2588f9c 100644
>> --- a/arch/arm64/include/asm/pmu.h
>> +++ b/arch/arm64/include/asm/pmu.h
>> @@ -21,6 +21,7 @@
>>  
>>  #define ARMV8_MAX_COUNTERS      32
>>  #define ARMV8_COUNTER_MASK      (ARMV8_MAX_COUNTERS - 1)
>> +#define ARMV8_CYCLE_IDX         (ARMV8_MAX_COUNTERS - 1)
> 
> I'm not sure we want to add this. It's name is wrong, as it's really
> PMCNTENSET_EL0.C, and just a few lines above we have the idx defined
> already (ARMV8_IDX_CYCLE_COUNTER), but as zero, because
> arch/arm64/kernel/perf_event.c maps it that way.
> 
> I think we should do the same with the pmc array, i.e. map the cycle
> counter to idx zero.

I tend to have the opposite view. Not for the sake of it, but because I
find it helpful to directly map the code to the architecture
documentation without having to bend another handful of neurons.

Will probably had some good reasons to structure it that way, but I
don't know the rational. Will?

Thanks,

	M.
Will Deacon Jan. 28, 2016, 6:06 p.m. UTC | #3
On Thu, Jan 28, 2016 at 04:45:36PM +0000, Marc Zyngier wrote:
> On 28/01/16 16:31, Andrew Jones wrote:
> > On Wed, Jan 27, 2016 at 11:51:35AM +0800, Shannon Zhao wrote:
> >> From: Shannon Zhao <shannon.zhao@linaro.org>
> >>
> >> When we use tools like perf on host, perf passes the event type and the
> >> id of this event type category to kernel, then kernel will map them to
> >> hardware event number and write this number to PMU PMEVTYPER<n>_EL0
> >> register. When getting the event number in KVM, directly use raw event
> >> type to create a perf_event for it.
> >>
> >> Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
> >> Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
> >> ---
> >>  arch/arm64/include/asm/pmu.h |   3 ++
> >>  arch/arm64/kvm/Makefile      |   1 +
> >>  include/kvm/arm_pmu.h        |  10 ++++
> >>  virt/kvm/arm/pmu.c           | 122 +++++++++++++++++++++++++++++++++++++++++++
> >>  4 files changed, 136 insertions(+)
> >>  create mode 100644 virt/kvm/arm/pmu.c
> >>
> >> diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
> >> index 4406184..2588f9c 100644
> >> --- a/arch/arm64/include/asm/pmu.h
> >> +++ b/arch/arm64/include/asm/pmu.h
> >> @@ -21,6 +21,7 @@
> >>  
> >>  #define ARMV8_MAX_COUNTERS      32
> >>  #define ARMV8_COUNTER_MASK      (ARMV8_MAX_COUNTERS - 1)
> >> +#define ARMV8_CYCLE_IDX         (ARMV8_MAX_COUNTERS - 1)
> > 
> > I'm not sure we want to add this. It's name is wrong, as it's really
> > PMCNTENSET_EL0.C, and just a few lines above we have the idx defined
> > already (ARMV8_IDX_CYCLE_COUNTER), but as zero, because
> > arch/arm64/kernel/perf_event.c maps it that way.
> > 
> > I think we should do the same with the pmc array, i.e. map the cycle
> > counter to idx zero.
> 
> I tend to have the opposite view. Not for the sake of it, but because I
> find it helpful to directly map the code to the architecture
> documentation without having to bend another handful of neurons.
> 
> Will probably had some good reasons to structure it that way, but I
> don't know the rational. Will?

It was years ago, but I suspect that the cycle counter is index zero
because its mandated, whilst the number of event counters is IMPDEF.

Will
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Shannon Zhao Jan. 29, 2016, 6:14 a.m. UTC | #4
On 2016/1/29 2:06, Will Deacon wrote:
> On Thu, Jan 28, 2016 at 04:45:36PM +0000, Marc Zyngier wrote:
>> > On 28/01/16 16:31, Andrew Jones wrote:
>>> > > On Wed, Jan 27, 2016 at 11:51:35AM +0800, Shannon Zhao wrote:
>>>> > >> From: Shannon Zhao <shannon.zhao@linaro.org>
>>>> > >>
>>>> > >> When we use tools like perf on host, perf passes the event type and the
>>>> > >> id of this event type category to kernel, then kernel will map them to
>>>> > >> hardware event number and write this number to PMU PMEVTYPER<n>_EL0
>>>> > >> register. When getting the event number in KVM, directly use raw event
>>>> > >> type to create a perf_event for it.
>>>> > >>
>>>> > >> Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
>>>> > >> Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
>>>> > >> ---
>>>> > >>  arch/arm64/include/asm/pmu.h |   3 ++
>>>> > >>  arch/arm64/kvm/Makefile      |   1 +
>>>> > >>  include/kvm/arm_pmu.h        |  10 ++++
>>>> > >>  virt/kvm/arm/pmu.c           | 122 +++++++++++++++++++++++++++++++++++++++++++
>>>> > >>  4 files changed, 136 insertions(+)
>>>> > >>  create mode 100644 virt/kvm/arm/pmu.c
>>>> > >>
>>>> > >> diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
>>>> > >> index 4406184..2588f9c 100644
>>>> > >> --- a/arch/arm64/include/asm/pmu.h
>>>> > >> +++ b/arch/arm64/include/asm/pmu.h
>>>> > >> @@ -21,6 +21,7 @@
>>>> > >>  
>>>> > >>  #define ARMV8_MAX_COUNTERS      32
>>>> > >>  #define ARMV8_COUNTER_MASK      (ARMV8_MAX_COUNTERS - 1)
>>>> > >> +#define ARMV8_CYCLE_IDX         (ARMV8_MAX_COUNTERS - 1)
>>> > > 
>>> > > I'm not sure we want to add this. It's name is wrong, as it's really
>>> > > PMCNTENSET_EL0.C, and just a few lines above we have the idx defined
>>> > > already (ARMV8_IDX_CYCLE_COUNTER), but as zero, because
>>> > > arch/arm64/kernel/perf_event.c maps it that way.
>>> > > 
>>> > > I think we should do the same with the pmc array, i.e. map the cycle
>>> > > counter to idx zero.
>> > 
>> > I tend to have the opposite view. Not for the sake of it, but because I
>> > find it helpful to directly map the code to the architecture
>> > documentation without having to bend another handful of neurons.
>> > 
>> > Will probably had some good reasons to structure it that way, but I
>> > don't know the rational. Will?
> It was years ago, but I suspect that the cycle counter is index zero
> because its mandated, whilst the number of event counters is IMPDEF.
So does it need to change the cycle counter index to zero in this patch set?

Thanks,
Shannon Zhao Jan. 29, 2016, 6:26 a.m. UTC | #5
On 2016/1/29 2:06, Will Deacon wrote:
> On Thu, Jan 28, 2016 at 04:45:36PM +0000, Marc Zyngier wrote:
>> > On 28/01/16 16:31, Andrew Jones wrote:
>>> > > On Wed, Jan 27, 2016 at 11:51:35AM +0800, Shannon Zhao wrote:
>>>> > >> From: Shannon Zhao <shannon.zhao@linaro.org>
>>>> > >>
>>>> > >> When we use tools like perf on host, perf passes the event type and the
>>>> > >> id of this event type category to kernel, then kernel will map them to
>>>> > >> hardware event number and write this number to PMU PMEVTYPER<n>_EL0
>>>> > >> register. When getting the event number in KVM, directly use raw event
>>>> > >> type to create a perf_event for it.
>>>> > >>
>>>> > >> Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
>>>> > >> Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
>>>> > >> ---
>>>> > >>  arch/arm64/include/asm/pmu.h |   3 ++
>>>> > >>  arch/arm64/kvm/Makefile      |   1 +
>>>> > >>  include/kvm/arm_pmu.h        |  10 ++++
>>>> > >>  virt/kvm/arm/pmu.c           | 122 +++++++++++++++++++++++++++++++++++++++++++
>>>> > >>  4 files changed, 136 insertions(+)
>>>> > >>  create mode 100644 virt/kvm/arm/pmu.c
>>>> > >>
>>>> > >> diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
>>>> > >> index 4406184..2588f9c 100644
>>>> > >> --- a/arch/arm64/include/asm/pmu.h
>>>> > >> +++ b/arch/arm64/include/asm/pmu.h
>>>> > >> @@ -21,6 +21,7 @@
>>>> > >>  
>>>> > >>  #define ARMV8_MAX_COUNTERS      32
>>>> > >>  #define ARMV8_COUNTER_MASK      (ARMV8_MAX_COUNTERS - 1)
>>>> > >> +#define ARMV8_CYCLE_IDX         (ARMV8_MAX_COUNTERS - 1)
>>> > > 
>>> > > I'm not sure we want to add this. It's name is wrong, as it's really
>>> > > PMCNTENSET_EL0.C, and just a few lines above we have the idx defined
>>> > > already (ARMV8_IDX_CYCLE_COUNTER), but as zero, because
>>> > > arch/arm64/kernel/perf_event.c maps it that way.
>>> > > 
>>> > > I think we should do the same with the pmc array, i.e. map the cycle
>>> > > counter to idx zero.
>> > 
>> > I tend to have the opposite view. Not for the sake of it, but because I
>> > find it helpful to directly map the code to the architecture
>> > documentation without having to bend another handful of neurons.
>> > 
>> > Will probably had some good reasons to structure it that way, but I
>> > don't know the rational. Will?
> It was years ago, but I suspect that the cycle counter is index zero
> because its mandated, whilst the number of event counters is IMPDEF.

Since PMCNTENSET/CLR, PMINTENSET/CLR, PMOVSSET/CLR and PMSWINC are using
bit 31 to stands the state of cycle counter, if we make cycle counter
index to zero, we always need to do translation between the idx and bit
31 when we access these registers.

Thanks,
Will Deacon Jan. 29, 2016, 10:18 a.m. UTC | #6
On Fri, Jan 29, 2016 at 02:26:31PM +0800, Shannon Zhao wrote:
> 
> 
> On 2016/1/29 2:06, Will Deacon wrote:
> > On Thu, Jan 28, 2016 at 04:45:36PM +0000, Marc Zyngier wrote:
> >> > On 28/01/16 16:31, Andrew Jones wrote:
> >>> > > On Wed, Jan 27, 2016 at 11:51:35AM +0800, Shannon Zhao wrote:
> >>>> > >> From: Shannon Zhao <shannon.zhao@linaro.org>
> >>>> > >>
> >>>> > >> When we use tools like perf on host, perf passes the event type and the
> >>>> > >> id of this event type category to kernel, then kernel will map them to
> >>>> > >> hardware event number and write this number to PMU PMEVTYPER<n>_EL0
> >>>> > >> register. When getting the event number in KVM, directly use raw event
> >>>> > >> type to create a perf_event for it.
> >>>> > >>
> >>>> > >> Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
> >>>> > >> Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
> >>>> > >> ---
> >>>> > >>  arch/arm64/include/asm/pmu.h |   3 ++
> >>>> > >>  arch/arm64/kvm/Makefile      |   1 +
> >>>> > >>  include/kvm/arm_pmu.h        |  10 ++++
> >>>> > >>  virt/kvm/arm/pmu.c           | 122 +++++++++++++++++++++++++++++++++++++++++++
> >>>> > >>  4 files changed, 136 insertions(+)
> >>>> > >>  create mode 100644 virt/kvm/arm/pmu.c
> >>>> > >>
> >>>> > >> diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
> >>>> > >> index 4406184..2588f9c 100644
> >>>> > >> --- a/arch/arm64/include/asm/pmu.h
> >>>> > >> +++ b/arch/arm64/include/asm/pmu.h
> >>>> > >> @@ -21,6 +21,7 @@
> >>>> > >>  
> >>>> > >>  #define ARMV8_MAX_COUNTERS      32
> >>>> > >>  #define ARMV8_COUNTER_MASK      (ARMV8_MAX_COUNTERS - 1)
> >>>> > >> +#define ARMV8_CYCLE_IDX         (ARMV8_MAX_COUNTERS - 1)
> >>> > > 
> >>> > > I'm not sure we want to add this. It's name is wrong, as it's really
> >>> > > PMCNTENSET_EL0.C, and just a few lines above we have the idx defined
> >>> > > already (ARMV8_IDX_CYCLE_COUNTER), but as zero, because
> >>> > > arch/arm64/kernel/perf_event.c maps it that way.
> >>> > > 
> >>> > > I think we should do the same with the pmc array, i.e. map the cycle
> >>> > > counter to idx zero.
> >> > 
> >> > I tend to have the opposite view. Not for the sake of it, but because I
> >> > find it helpful to directly map the code to the architecture
> >> > documentation without having to bend another handful of neurons.
> >> > 
> >> > Will probably had some good reasons to structure it that way, but I
> >> > don't know the rational. Will?
> > It was years ago, but I suspect that the cycle counter is index zero
> > because its mandated, whilst the number of event counters is IMPDEF.
> 
> Since PMCNTENSET/CLR, PMINTENSET/CLR, PMOVSSET/CLR and PMSWINC are using
> bit 31 to stands the state of cycle counter, if we make cycle counter
> index to zero, we always need to do translation between the idx and bit
> 31 when we access these registers.

Conversely, if you stick the cycle counter right at the top, then you'll
need to rework a bunch of the perf code that iterates from
ARMV7_IDX_COUNTER0 to pmu->num_events.

Will
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Shannon Zhao Jan. 29, 2016, 1:11 p.m. UTC | #7
On 2016/1/29 18:18, Will Deacon wrote:
> On Fri, Jan 29, 2016 at 02:26:31PM +0800, Shannon Zhao wrote:
>> >
>> >
>> >On 2016/1/29 2:06, Will Deacon wrote:
>>> > >On Thu, Jan 28, 2016 at 04:45:36PM +0000, Marc Zyngier wrote:
>>>>> > >> >On 28/01/16 16:31, Andrew Jones wrote:
>>>>>>> > >>> > >On Wed, Jan 27, 2016 at 11:51:35AM +0800, Shannon Zhao wrote:
>>>>>>>>> > >>>> > >>From: Shannon Zhao<shannon.zhao@linaro.org>
>>>>>>>>> > >>>> > >>
>>>>>>>>> > >>>> > >>When we use tools like perf on host, perf passes the event type and the
>>>>>>>>> > >>>> > >>id of this event type category to kernel, then kernel will map them to
>>>>>>>>> > >>>> > >>hardware event number and write this number to PMU PMEVTYPER<n>_EL0
>>>>>>>>> > >>>> > >>register. When getting the event number in KVM, directly use raw event
>>>>>>>>> > >>>> > >>type to create a perf_event for it.
>>>>>>>>> > >>>> > >>
>>>>>>>>> > >>>> > >>Signed-off-by: Shannon Zhao<shannon.zhao@linaro.org>
>>>>>>>>> > >>>> > >>Reviewed-by: Marc Zyngier<marc.zyngier@arm.com>
>>>>>>>>> > >>>> > >>---
>>>>>>>>> > >>>> > >>  arch/arm64/include/asm/pmu.h |   3 ++
>>>>>>>>> > >>>> > >>  arch/arm64/kvm/Makefile      |   1 +
>>>>>>>>> > >>>> > >>  include/kvm/arm_pmu.h        |  10 ++++
>>>>>>>>> > >>>> > >>  virt/kvm/arm/pmu.c           | 122 +++++++++++++++++++++++++++++++++++++++++++
>>>>>>>>> > >>>> > >>  4 files changed, 136 insertions(+)
>>>>>>>>> > >>>> > >>  create mode 100644 virt/kvm/arm/pmu.c
>>>>>>>>> > >>>> > >>
>>>>>>>>> > >>>> > >>diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
>>>>>>>>> > >>>> > >>index 4406184..2588f9c 100644
>>>>>>>>> > >>>> > >>--- a/arch/arm64/include/asm/pmu.h
>>>>>>>>> > >>>> > >>+++ b/arch/arm64/include/asm/pmu.h
>>>>>>>>> > >>>> > >>@@ -21,6 +21,7 @@
>>>>>>>>> > >>>> > >>
>>>>>>>>> > >>>> > >>  #define ARMV8_MAX_COUNTERS      32
>>>>>>>>> > >>>> > >>  #define ARMV8_COUNTER_MASK      (ARMV8_MAX_COUNTERS - 1)
>>>>>>>>> > >>>> > >>+#define ARMV8_CYCLE_IDX         (ARMV8_MAX_COUNTERS - 1)
>>>>>>> > >>> > >
>>>>>>> > >>> > >I'm not sure we want to add this. It's name is wrong, as it's really
>>>>>>> > >>> > >PMCNTENSET_EL0.C, and just a few lines above we have the idx defined
>>>>>>> > >>> > >already (ARMV8_IDX_CYCLE_COUNTER), but as zero, because
>>>>>>> > >>> > >arch/arm64/kernel/perf_event.c maps it that way.
>>>>>>> > >>> > >
>>>>>>> > >>> > >I think we should do the same with the pmc array, i.e. map the cycle
>>>>>>> > >>> > >counter to idx zero.
>>>>> > >> >
>>>>> > >> >I tend to have the opposite view. Not for the sake of it, but because I
>>>>> > >> >find it helpful to directly map the code to the architecture
>>>>> > >> >documentation without having to bend another handful of neurons.
>>>>> > >> >
>>>>> > >> >Will probably had some good reasons to structure it that way, but I
>>>>> > >> >don't know the rational. Will?
>>> > >It was years ago, but I suspect that the cycle counter is index zero
>>> > >because its mandated, whilst the number of event counters is IMPDEF.
>> >
>> >Since PMCNTENSET/CLR, PMINTENSET/CLR, PMOVSSET/CLR and PMSWINC are using
>> >bit 31 to stands the state of cycle counter, if we make cycle counter
>> >index to zero, we always need to do translation between the idx and bit
>> >31 when we access these registers.
> Conversely, if you stick the cycle counter right at the top, then you'll
> need to rework a bunch of the perf code that iterates from
> ARMV7_IDX_COUNTER0 to pmu->num_events.

But actually this doesn't affect the perf code now because it's just a 
internal PMC array index of kvm arm guest pmu codes.
And having a look at the ARMv8 SPEC, it says
"
PMCCFILTR_EL0 can also be accessed by using PMXEVTYPER_EL0 with 
PMSELR_EL0.SEL set to 0b11111.
"
Apparently it treats the index of cycle counter as 31 not zero. Also 
regarding the PMCR.N field my understanding is that it just stands the 
number of counters not the counter's order or index. Looking at the 
description of PMSEL.SEL field, it says "This field can take any value 
from 0 (0b00000) to (PMCR.N)-1, or 31 (0b11111)." while it's not "This 
field can take any value from 1 (0b00001) to (PMCR.N), or 0."

Thanks,
diff mbox

Patch

diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
index 4406184..2588f9c 100644
--- a/arch/arm64/include/asm/pmu.h
+++ b/arch/arm64/include/asm/pmu.h
@@ -21,6 +21,7 @@ 
 
 #define ARMV8_MAX_COUNTERS      32
 #define ARMV8_COUNTER_MASK      (ARMV8_MAX_COUNTERS - 1)
+#define ARMV8_CYCLE_IDX         (ARMV8_MAX_COUNTERS - 1)
 
 /*
  * Per-CPU PMCR: config reg
@@ -31,6 +32,8 @@ 
 #define ARMV8_PMCR_D		(1 << 3) /* CCNT counts every 64th cpu cycle */
 #define ARMV8_PMCR_X		(1 << 4) /* Export to ETM */
 #define ARMV8_PMCR_DP		(1 << 5) /* Disable CCNT if non-invasive debug*/
+/* Determines which PMCCNTR_EL0 bit generates an overflow */
+#define ARMV8_PMCR_LC		(1 << 6)
 #define	ARMV8_PMCR_N_SHIFT	11	 /* Number of counters supported */
 #define	ARMV8_PMCR_N_MASK	0x1f
 #define	ARMV8_PMCR_MASK		0x3f	 /* Mask for writable bits */
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index caee9ee..122cff4 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -26,3 +26,4 @@  kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
+kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 32fee2d..ee4b15c 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -36,11 +36,21 @@  struct kvm_pmu {
 };
 
 #define kvm_arm_pmu_v3_ready(v)		((v)->arch.pmu.ready)
+u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx);
+void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
+				    u64 select_idx);
 #else
 struct kvm_pmu {
 };
 
 #define kvm_arm_pmu_v3_ready(v)		(false)
+static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
+					    u64 select_idx)
+{
+	return 0;
+}
+static inline void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu,
+						  u64 data, u64 select_idx) {}
 #endif
 
 #endif
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
new file mode 100644
index 0000000..673ec55
--- /dev/null
+++ b/virt/kvm/arm/pmu.c
@@ -0,0 +1,122 @@ 
+/*
+ * Copyright (C) 2015 Linaro Ltd.
+ * Author: Shannon Zhao <shannon.zhao@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/perf_event.h>
+#include <asm/kvm_emulate.h>
+#include <kvm/arm_pmu.h>
+
+/**
+ * kvm_pmu_get_counter_value - get PMU counter value
+ * @vcpu: The vcpu pointer
+ * @select_idx: The counter index
+ */
+u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
+{
+	u64 counter, reg, enabled, running;
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_pmc *pmc = &pmu->pmc[select_idx];
+
+	reg = (select_idx == ARMV8_CYCLE_IDX)
+	      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
+	counter = vcpu_sys_reg(vcpu, reg);
+
+	/* The real counter value is equal to the value of counter register plus
+	 * the value perf event counts.
+	 */
+	if (pmc->perf_event)
+		counter += perf_event_read_value(pmc->perf_event, &enabled,
+						 &running);
+
+	return counter & pmc->bitmask;
+}
+
+/**
+ * kvm_pmu_stop_counter - stop PMU counter
+ * @pmc: The PMU counter pointer
+ *
+ * If this counter has been configured to monitor some event, release it here.
+ */
+static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
+{
+	u64 counter, reg;
+
+	if (pmc->perf_event) {
+		counter = kvm_pmu_get_counter_value(vcpu, pmc->idx);
+		reg = (pmc->idx == ARMV8_CYCLE_IDX)
+		       ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
+		vcpu_sys_reg(vcpu, reg) = counter;
+		perf_event_disable(pmc->perf_event);
+		perf_event_release_kernel(pmc->perf_event);
+		pmc->perf_event = NULL;
+	}
+}
+
+static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
+{
+	return (vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMCR_E) &&
+	       (vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
+}
+
+/**
+ * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
+ * @vcpu: The vcpu pointer
+ * @data: The data guest writes to PMXEVTYPER_EL0
+ * @select_idx: The number of selected counter
+ *
+ * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an
+ * event with given hardware event number. Here we call perf_event API to
+ * emulate this action and create a kernel perf event for it.
+ */
+void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
+				    u64 select_idx)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_pmc *pmc = &pmu->pmc[select_idx];
+	struct perf_event *event;
+	struct perf_event_attr attr;
+	u64 eventsel, counter;
+
+	kvm_pmu_stop_counter(vcpu, pmc);
+	eventsel = data & ARMV8_EVTYPE_EVENT;
+
+	memset(&attr, 0, sizeof(struct perf_event_attr));
+	attr.type = PERF_TYPE_RAW;
+	attr.size = sizeof(attr);
+	attr.pinned = 1;
+	attr.disabled = kvm_pmu_counter_is_enabled(vcpu, select_idx);
+	attr.exclude_user = data & ARMV8_EXCLUDE_EL0 ? 1 : 0;
+	attr.exclude_kernel = data & ARMV8_EXCLUDE_EL1 ? 1 : 0;
+	attr.exclude_hv = 1; /* Don't count EL2 events */
+	attr.exclude_host = 1; /* Don't count host events */
+	attr.config = eventsel;
+
+	counter = kvm_pmu_get_counter_value(vcpu, select_idx);
+	/* The initial sample period (overflow count) of an event. */
+	attr.sample_period = (-counter) & pmc->bitmask;
+
+	event = perf_event_create_kernel_counter(&attr, -1, current, NULL, pmc);
+	if (IS_ERR(event)) {
+		pr_err_once("kvm: pmu event creation failed %ld\n",
+			    PTR_ERR(event));
+		return;
+	}
+
+	pmc->perf_event = event;
+}