Message ID | 1501820670-23194-4-git-send-email-wei@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Wei, I didn't review the important part of this test, i.e. what it's testing and how, so below are just some nits. On Thu, Aug 03, 2017 at 11:24:30PM -0500, Wei Huang wrote: > AMD PMU implementation is very different from Intel. This patch adds > a new test case, called amd_pmu, to x86 architecture. The > implementation of amd_pmu.c is based on intel_pmu.c, with focus on > AMD's general-purpose registers. To avoid running on Intel CPUs, > we check the CPU's vendor name before executing the tests. > > Signed-off-by: Wei Huang <wei@redhat.com> > --- > x86/Makefile.common | 3 +- > x86/amd_pmu.c | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > x86/unittests.cfg | 4 + > 3 files changed, 271 insertions(+), 1 deletion(-) > create mode 100644 x86/amd_pmu.c > > diff --git a/x86/Makefile.common b/x86/Makefile.common > index d0f4ed1..36bcf90 100644 > --- a/x86/Makefile.common > +++ b/x86/Makefile.common > @@ -45,7 +45,8 @@ tests-common = $(TEST_DIR)/vmexit.flat $(TEST_DIR)/tsc.flat \ > $(TEST_DIR)/realmode.flat $(TEST_DIR)/msr.flat \ > $(TEST_DIR)/hypercall.flat $(TEST_DIR)/sieve.flat \ > $(TEST_DIR)/kvmclock_test.flat $(TEST_DIR)/eventinj.flat \ > - $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat $(TEST_DIR)/setjmp.flat \ > + $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat \ > + $(TEST_DIR)/amd_pmu.flat $(TEST_DIR)/setjmp.flat \ mixing tabs and spaces here > $(TEST_DIR)/tsc_adjust.flat $(TEST_DIR)/asyncpf.flat \ > $(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat \ > $(TEST_DIR)/hyperv_synic.flat $(TEST_DIR)/hyperv_stimer.flat \ > diff --git a/x86/amd_pmu.c b/x86/amd_pmu.c > new file mode 100644 > index 0000000..006eccf > --- /dev/null > +++ b/x86/amd_pmu.c > @@ -0,0 +1,265 @@ > +/* > + * vPMU testing for AMD CPUs > + * > + * Copyright (c) 2017 Red Hat Inc > + * > + * This work is licensed under the terms of the GNU GPL, version 2. > + */ > + > +#include "x86/msr.h" > +#include "x86/processor.h" > +#include "x86/apic-defs.h" > +#include "x86/apic.h" apic-defs is included by apic > +#include "x86/desc.h" > +#include "x86/isr.h" > +#include "x86/vm.h" > + > +#include "libcflat.h" > +#include "pmu.h" > +#include <stdint.h> stdint is included by libcflat > + > +#define AMD64_NUM_COUNTERS 4 > + > +#define CPUID_VENDOR_AMD_1 0x68747541 /* "Auth" */ > +#define CPUID_VENDOR_AMD_2 0x69746e65 /* "enti" */ > +#define CPUID_VENDOR_AMD_3 0x444d4163 /* "cAMD" */ > + > +struct pmu_event gp_events[] = { > + {"core cycles", 0x0076, 1*N, 50*N}, > + {"instructions", 0x00c0, 10*N, 10.2*N}, > + {"cache reference", 0x077d, 1, 2*N}, > + {"cache misses", 0x077e, 0, 1*N}, > + {"branches", 0x00c2, 1*N, 1.1*N}, > + {"branch misses", 0x00c3, 0, 0.1*N}, > + {"stalls frontend", 0x00d0, 0, 0.1*N}, > + {"stalls backend", 0x00d1, 0, 30*N}, > +}; > + > +char *buf; > +static int num_counters; > +volatile uint64_t irq_received; all above globals can be static > + > +static bool check_irq(void) > +{ > + int i; > + irq_received = 0; > + > + irq_enable(); > + for (i = 0; i < 100000 && !irq_received; i++) > + asm volatile("pause"); > + irq_disable(); > + > + return irq_received; > +} > + > +static inline void loop() > +{ > + unsigned long tmp, tmp2, tmp3; > + > + asm volatile("1: mov (%1), %2\n\t" > + " add $64, %1\n\t" > + " nop\n\t" > + " nop\n\t" > + " nop\n\t" > + " nop\n\t" > + " nop\n\t" > + " nop\n\t" > + " nop\n\t" > + " loop 1b" > + : "=c"(tmp), "=r"(tmp2), "=r"(tmp3) > + : "0"(N), "1"(buf)); > +} > + > +static struct pmu_event* get_counter_event(pmu_counter_t *cnt) > +{ > + int i; > + > + for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) > + if (gp_events[i].unit_sel == (cnt->config & 0xffff)) > + return &gp_events[i]; > + > + return (void*)0; NULL > +} > + > +static void cnt_overflow(isr_regs_t *regs) > +{ > + irq_received++; > + apic_write(APIC_EOI, 0); > +} > + > +static int event_to_global_idx(pmu_counter_t *cnt) > +{ > + return cnt->ctr - MSR_K7_PERFCTR0; > +} > + > +static bool verify_event(uint64_t count, struct pmu_event *e) > +{ > + return count >= e->min && count <= e->max; > +} > + > +static bool verify_counter(pmu_counter_t *cnt) > +{ > + return verify_event(cnt->count, get_counter_event(cnt)); > +} > + > +static void start_event(pmu_counter_t *evt) > +{ > + wrmsr(evt->ctr, evt->count); > + wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt), > + evt->config | EVNTSEL_EN); > +} > + > +static void stop_event(pmu_counter_t *evt) > +{ > + wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt), > + evt->config & ~EVNTSEL_EN); > + evt->count = rdmsr(evt->ctr); > +} > + > +static void measure(pmu_counter_t *evt, int count) > +{ > + int i; > + > + for (i = 0; i < count; i++) > + start_event(&evt[i]); > + > + loop(); > + > + for (i = 0; i < count; i++) > + stop_event(&evt[i]); > +} > + > +static void check_gp_counter(struct pmu_event *evt) > +{ > + int i; > + > + pmu_counter_t cnt = { > + .ctr = MSR_K7_PERFCTR0, > + .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, > + }; > + > + for (i = 0; i < num_counters; i++, cnt.ctr++) { > + cnt.count = 0; > + measure(&cnt, 1); > + report("%s-%d", verify_event(cnt.count, evt), evt->name, i); > + } > +} > + > +static void check_gp_counters(void) > +{ > + int i; > + > + for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) { > + check_gp_counter(&gp_events[i]); > + } > +} > + > +static void check_counter_overflow(void) > +{ > + uint64_t count; > + int i; > + pmu_counter_t cnt = { > + .ctr = MSR_K7_PERFCTR0, > + .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, > + .count = 0, > + }; > + > + measure(&cnt, 1); > + count = cnt.count; > + > + report_prefix_push("overflow"); > + > + for (i = 0; i < num_counters; i++, cnt.ctr++) { > + if (i % 2) > + cnt.config |= EVNTSEL_INT; > + else > + cnt.config &= ~EVNTSEL_INT; > + cnt.count = 1 - count; > + measure(&cnt, 1); > + report("cntr-%d", cnt.count < 20, i); > + report("irq-%d", check_irq() == (i % 2), i); > + } > + > + report_prefix_pop(); > +} > + > +static void check_gp_counter_cmask(void) > +{ > + pmu_counter_t cnt = { > + .ctr = MSR_K7_PERFCTR0, > + .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, > + .count = 0, > + }; > + > + cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); > + measure(&cnt, 1); > + report("cmask", cnt.count < gp_events[1].min); > +} > + > +static void check_rdpmc(void) > +{ > + uint64_t val = 0x1f3456789ull; > + int i; > + > + report_prefix_push("rdpmc"); > + > + for (i = 0; i < num_counters; i++) { > + uint64_t x = val; > + wrmsr(MSR_K7_PERFCTR0 + i, val); > + report("cntr-%d", rdpmc(i) == x, i); > + } > + > + report_prefix_pop(); > +} > + > +static void check_counters_many(void) > +{ > + pmu_counter_t cnt[10]; > + int i, n; > + > + for (n = 0; n < num_counters; n++) { > + cnt[n].count = 0; > + cnt[n].ctr = MSR_K7_PERFCTR0 + n; > + cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | gp_events[n].unit_sel; > + } > + > + measure(cnt, n); > + > + for (i = 0; i < n; i++) > + if (!verify_counter(&cnt[i])) > + break; {} for the for loop, might be nice here > + > + report("all counters", i == n); > +} > + > +#define IS_AMD_CPU(cpuid) ((cpuid).b == CPUID_VENDOR_AMD_1 && \ > + (cpuid).d == CPUID_VENDOR_AMD_2 && \ > + (cpuid).c == CPUID_VENDOR_AMD_3) should add a blank line here > +int main(int ac, char **av) > +{ > + struct cpuid id = cpuid(0); > + > + setup_vm(); > + setup_idt(); > + handle_irq(PC_VECTOR, cnt_overflow); > + buf = vmalloc(N * 64); > + > + if (!IS_AMD_CPU(id)) { > + printf("No AMD PMU detected!\n"); > + return report_summary(); > + } > + > + num_counters = AMD64_NUM_COUNTERS; > + if (num_counters > ARRAY_SIZE(gp_events)) > + num_counters = ARRAY_SIZE(gp_events); > + > + apic_write(APIC_LVTPC, PC_VECTOR); > + > + check_gp_counters(); > + check_rdpmc(); > + check_counters_many(); > + check_counter_overflow(); > + check_gp_counter_cmask(); > + > + return report_summary(); > +} > diff --git a/x86/unittests.cfg b/x86/unittests.cfg > index 74156fa..98fb3e9 100644 > --- a/x86/unittests.cfg > +++ b/x86/unittests.cfg > @@ -145,6 +145,10 @@ file = intel_pmu.flat > extra_params = -cpu host > check = /proc/sys/kernel/nmi_watchdog=0 > > +[amd_pmu] > +file = amd_pmu.flat > +extra_params = -cpu host maybe add 'arch = x86_64', since AMD only supports 64 bit. > + > [port80] > file = port80.flat > > -- > 2.7.5 > Thanks, drew
On 08/04/2017 03:14 AM, Andrew Jones wrote: > Hi Wei, > > I didn't review the important part of this test, i.e. what it's > testing and how, so below are just some nits. Thanks for reviewing it. I agree with most nits below, and will fix them in the next respin. > > On Thu, Aug 03, 2017 at 11:24:30PM -0500, Wei Huang wrote: >> AMD PMU implementation is very different from Intel. This patch adds >> a new test case, called amd_pmu, to x86 architecture. The >> implementation of amd_pmu.c is based on intel_pmu.c, with focus on >> AMD's general-purpose registers. To avoid running on Intel CPUs, >> we check the CPU's vendor name before executing the tests. >> >> Signed-off-by: Wei Huang <wei@redhat.com> >> --- >> x86/Makefile.common | 3 +- >> x86/amd_pmu.c | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++++ >> x86/unittests.cfg | 4 + >> 3 files changed, 271 insertions(+), 1 deletion(-) >> create mode 100644 x86/amd_pmu.c >> >> diff --git a/x86/Makefile.common b/x86/Makefile.common >> index d0f4ed1..36bcf90 100644 >> --- a/x86/Makefile.common >> +++ b/x86/Makefile.common >> @@ -45,7 +45,8 @@ tests-common = $(TEST_DIR)/vmexit.flat $(TEST_DIR)/tsc.flat \ >> $(TEST_DIR)/realmode.flat $(TEST_DIR)/msr.flat \ >> $(TEST_DIR)/hypercall.flat $(TEST_DIR)/sieve.flat \ >> $(TEST_DIR)/kvmclock_test.flat $(TEST_DIR)/eventinj.flat \ >> - $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat $(TEST_DIR)/setjmp.flat \ >> + $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat \ >> + $(TEST_DIR)/amd_pmu.flat $(TEST_DIR)/setjmp.flat \ > > mixing tabs and spaces here > >> $(TEST_DIR)/tsc_adjust.flat $(TEST_DIR)/asyncpf.flat \ >> $(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat \ >> $(TEST_DIR)/hyperv_synic.flat $(TEST_DIR)/hyperv_stimer.flat \ >> diff --git a/x86/amd_pmu.c b/x86/amd_pmu.c >> new file mode 100644 >> index 0000000..006eccf >> --- /dev/null >> +++ b/x86/amd_pmu.c >> @@ -0,0 +1,265 @@ >> +/* >> + * vPMU testing for AMD CPUs >> + * >> + * Copyright (c) 2017 Red Hat Inc >> + * >> + * This work is licensed under the terms of the GNU GPL, version 2. >> + */ >> + >> +#include "x86/msr.h" >> +#include "x86/processor.h" >> +#include "x86/apic-defs.h" >> +#include "x86/apic.h" > > apic-defs is included by apic > >> +#include "x86/desc.h" >> +#include "x86/isr.h" >> +#include "x86/vm.h" >> + >> +#include "libcflat.h" >> +#include "pmu.h" >> +#include <stdint.h> > > stdint is included by libcflat > >> + >> +#define AMD64_NUM_COUNTERS 4 >> + >> +#define CPUID_VENDOR_AMD_1 0x68747541 /* "Auth" */ >> +#define CPUID_VENDOR_AMD_2 0x69746e65 /* "enti" */ >> +#define CPUID_VENDOR_AMD_3 0x444d4163 /* "cAMD" */ >> + >> +struct pmu_event gp_events[] = { >> + {"core cycles", 0x0076, 1*N, 50*N}, >> + {"instructions", 0x00c0, 10*N, 10.2*N}, >> + {"cache reference", 0x077d, 1, 2*N}, >> + {"cache misses", 0x077e, 0, 1*N}, >> + {"branches", 0x00c2, 1*N, 1.1*N}, >> + {"branch misses", 0x00c3, 0, 0.1*N}, >> + {"stalls frontend", 0x00d0, 0, 0.1*N}, >> + {"stalls backend", 0x00d1, 0, 30*N}, >> +}; >> + >> +char *buf; >> +static int num_counters; >> +volatile uint64_t irq_received; > > all above globals can be static > >> + >> +static bool check_irq(void) >> +{ >> + int i; >> + irq_received = 0; >> + >> + irq_enable(); >> + for (i = 0; i < 100000 && !irq_received; i++) >> + asm volatile("pause"); >> + irq_disable(); >> + >> + return irq_received; >> +} >> + >> +static inline void loop() >> +{ >> + unsigned long tmp, tmp2, tmp3; >> + >> + asm volatile("1: mov (%1), %2\n\t" >> + " add $64, %1\n\t" >> + " nop\n\t" >> + " nop\n\t" >> + " nop\n\t" >> + " nop\n\t" >> + " nop\n\t" >> + " nop\n\t" >> + " nop\n\t" >> + " loop 1b" >> + : "=c"(tmp), "=r"(tmp2), "=r"(tmp3) >> + : "0"(N), "1"(buf)); >> +} >> + >> +static struct pmu_event* get_counter_event(pmu_counter_t *cnt) >> +{ >> + int i; >> + >> + for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) >> + if (gp_events[i].unit_sel == (cnt->config & 0xffff)) >> + return &gp_events[i]; >> + >> + return (void*)0; > > NULL > >> +} >> + >> +static void cnt_overflow(isr_regs_t *regs) >> +{ >> + irq_received++; >> + apic_write(APIC_EOI, 0); >> +} >> + >> +static int event_to_global_idx(pmu_counter_t *cnt) >> +{ >> + return cnt->ctr - MSR_K7_PERFCTR0; >> +} >> + >> +static bool verify_event(uint64_t count, struct pmu_event *e) >> +{ >> + return count >= e->min && count <= e->max; >> +} >> + >> +static bool verify_counter(pmu_counter_t *cnt) >> +{ >> + return verify_event(cnt->count, get_counter_event(cnt)); >> +} >> + >> +static void start_event(pmu_counter_t *evt) >> +{ >> + wrmsr(evt->ctr, evt->count); >> + wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt), >> + evt->config | EVNTSEL_EN); >> +} >> + >> +static void stop_event(pmu_counter_t *evt) >> +{ >> + wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt), >> + evt->config & ~EVNTSEL_EN); >> + evt->count = rdmsr(evt->ctr); >> +} >> + >> +static void measure(pmu_counter_t *evt, int count) >> +{ >> + int i; >> + >> + for (i = 0; i < count; i++) >> + start_event(&evt[i]); >> + >> + loop(); >> + >> + for (i = 0; i < count; i++) >> + stop_event(&evt[i]); >> +} >> + >> +static void check_gp_counter(struct pmu_event *evt) >> +{ >> + int i; >> + >> + pmu_counter_t cnt = { >> + .ctr = MSR_K7_PERFCTR0, >> + .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, >> + }; >> + >> + for (i = 0; i < num_counters; i++, cnt.ctr++) { >> + cnt.count = 0; >> + measure(&cnt, 1); >> + report("%s-%d", verify_event(cnt.count, evt), evt->name, i); >> + } >> +} >> + >> +static void check_gp_counters(void) >> +{ >> + int i; >> + >> + for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) { >> + check_gp_counter(&gp_events[i]); >> + } >> +} >> + >> +static void check_counter_overflow(void) >> +{ >> + uint64_t count; >> + int i; >> + pmu_counter_t cnt = { >> + .ctr = MSR_K7_PERFCTR0, >> + .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, >> + .count = 0, >> + }; >> + >> + measure(&cnt, 1); >> + count = cnt.count; >> + >> + report_prefix_push("overflow"); >> + >> + for (i = 0; i < num_counters; i++, cnt.ctr++) { >> + if (i % 2) >> + cnt.config |= EVNTSEL_INT; >> + else >> + cnt.config &= ~EVNTSEL_INT; >> + cnt.count = 1 - count; >> + measure(&cnt, 1); >> + report("cntr-%d", cnt.count < 20, i); >> + report("irq-%d", check_irq() == (i % 2), i); >> + } >> + >> + report_prefix_pop(); >> +} >> + >> +static void check_gp_counter_cmask(void) >> +{ >> + pmu_counter_t cnt = { >> + .ctr = MSR_K7_PERFCTR0, >> + .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, >> + .count = 0, >> + }; >> + >> + cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); >> + measure(&cnt, 1); >> + report("cmask", cnt.count < gp_events[1].min); >> +} >> + >> +static void check_rdpmc(void) >> +{ >> + uint64_t val = 0x1f3456789ull; >> + int i; >> + >> + report_prefix_push("rdpmc"); >> + >> + for (i = 0; i < num_counters; i++) { >> + uint64_t x = val; >> + wrmsr(MSR_K7_PERFCTR0 + i, val); >> + report("cntr-%d", rdpmc(i) == x, i); >> + } >> + >> + report_prefix_pop(); >> +} >> + >> +static void check_counters_many(void) >> +{ >> + pmu_counter_t cnt[10]; >> + int i, n; >> + >> + for (n = 0; n < num_counters; n++) { >> + cnt[n].count = 0; >> + cnt[n].ctr = MSR_K7_PERFCTR0 + n; >> + cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | gp_events[n].unit_sel; >> + } >> + >> + measure(cnt, n); >> + >> + for (i = 0; i < n; i++) >> + if (!verify_counter(&cnt[i])) >> + break; > > {} for the for loop, might be nice here > >> + >> + report("all counters", i == n); >> +} >> + >> +#define IS_AMD_CPU(cpuid) ((cpuid).b == CPUID_VENDOR_AMD_1 && \ >> + (cpuid).d == CPUID_VENDOR_AMD_2 && \ >> + (cpuid).c == CPUID_VENDOR_AMD_3) > > should add a blank line here > >> +int main(int ac, char **av) >> +{ >> + struct cpuid id = cpuid(0); >> + >> + setup_vm(); >> + setup_idt(); >> + handle_irq(PC_VECTOR, cnt_overflow); >> + buf = vmalloc(N * 64); >> + >> + if (!IS_AMD_CPU(id)) { >> + printf("No AMD PMU detected!\n"); >> + return report_summary(); >> + } >> + >> + num_counters = AMD64_NUM_COUNTERS; >> + if (num_counters > ARRAY_SIZE(gp_events)) >> + num_counters = ARRAY_SIZE(gp_events); >> + >> + apic_write(APIC_LVTPC, PC_VECTOR); >> + >> + check_gp_counters(); >> + check_rdpmc(); >> + check_counters_many(); >> + check_counter_overflow(); >> + check_gp_counter_cmask(); >> + >> + return report_summary(); >> +} >> diff --git a/x86/unittests.cfg b/x86/unittests.cfg >> index 74156fa..98fb3e9 100644 >> --- a/x86/unittests.cfg >> +++ b/x86/unittests.cfg >> @@ -145,6 +145,10 @@ file = intel_pmu.flat >> extra_params = -cpu host >> check = /proc/sys/kernel/nmi_watchdog=0 >> >> +[amd_pmu] >> +file = amd_pmu.flat >> +extra_params = -cpu host > > maybe add 'arch = x86_64', since AMD only supports 64 bit. > >> + >> [port80] >> file = port80.flat >> >> -- >> 2.7.5 >> > > Thanks, > drew >
diff --git a/x86/Makefile.common b/x86/Makefile.common index d0f4ed1..36bcf90 100644 --- a/x86/Makefile.common +++ b/x86/Makefile.common @@ -45,7 +45,8 @@ tests-common = $(TEST_DIR)/vmexit.flat $(TEST_DIR)/tsc.flat \ $(TEST_DIR)/realmode.flat $(TEST_DIR)/msr.flat \ $(TEST_DIR)/hypercall.flat $(TEST_DIR)/sieve.flat \ $(TEST_DIR)/kvmclock_test.flat $(TEST_DIR)/eventinj.flat \ - $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat $(TEST_DIR)/setjmp.flat \ + $(TEST_DIR)/s3.flat $(TEST_DIR)/intel_pmu.flat \ + $(TEST_DIR)/amd_pmu.flat $(TEST_DIR)/setjmp.flat \ $(TEST_DIR)/tsc_adjust.flat $(TEST_DIR)/asyncpf.flat \ $(TEST_DIR)/init.flat $(TEST_DIR)/smap.flat \ $(TEST_DIR)/hyperv_synic.flat $(TEST_DIR)/hyperv_stimer.flat \ diff --git a/x86/amd_pmu.c b/x86/amd_pmu.c new file mode 100644 index 0000000..006eccf --- /dev/null +++ b/x86/amd_pmu.c @@ -0,0 +1,265 @@ +/* + * vPMU testing for AMD CPUs + * + * Copyright (c) 2017 Red Hat Inc + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#include "x86/msr.h" +#include "x86/processor.h" +#include "x86/apic-defs.h" +#include "x86/apic.h" +#include "x86/desc.h" +#include "x86/isr.h" +#include "x86/vm.h" + +#include "libcflat.h" +#include "pmu.h" +#include <stdint.h> + +#define AMD64_NUM_COUNTERS 4 + +#define CPUID_VENDOR_AMD_1 0x68747541 /* "Auth" */ +#define CPUID_VENDOR_AMD_2 0x69746e65 /* "enti" */ +#define CPUID_VENDOR_AMD_3 0x444d4163 /* "cAMD" */ + +struct pmu_event gp_events[] = { + {"core cycles", 0x0076, 1*N, 50*N}, + {"instructions", 0x00c0, 10*N, 10.2*N}, + {"cache reference", 0x077d, 1, 2*N}, + {"cache misses", 0x077e, 0, 1*N}, + {"branches", 0x00c2, 1*N, 1.1*N}, + {"branch misses", 0x00c3, 0, 0.1*N}, + {"stalls frontend", 0x00d0, 0, 0.1*N}, + {"stalls backend", 0x00d1, 0, 30*N}, +}; + +char *buf; +static int num_counters; +volatile uint64_t irq_received; + +static bool check_irq(void) +{ + int i; + irq_received = 0; + + irq_enable(); + for (i = 0; i < 100000 && !irq_received; i++) + asm volatile("pause"); + irq_disable(); + + return irq_received; +} + +static inline void loop() +{ + unsigned long tmp, tmp2, tmp3; + + asm volatile("1: mov (%1), %2\n\t" + " add $64, %1\n\t" + " nop\n\t" + " nop\n\t" + " nop\n\t" + " nop\n\t" + " nop\n\t" + " nop\n\t" + " nop\n\t" + " loop 1b" + : "=c"(tmp), "=r"(tmp2), "=r"(tmp3) + : "0"(N), "1"(buf)); +} + +static struct pmu_event* get_counter_event(pmu_counter_t *cnt) +{ + int i; + + for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) + if (gp_events[i].unit_sel == (cnt->config & 0xffff)) + return &gp_events[i]; + + return (void*)0; +} + +static void cnt_overflow(isr_regs_t *regs) +{ + irq_received++; + apic_write(APIC_EOI, 0); +} + +static int event_to_global_idx(pmu_counter_t *cnt) +{ + return cnt->ctr - MSR_K7_PERFCTR0; +} + +static bool verify_event(uint64_t count, struct pmu_event *e) +{ + return count >= e->min && count <= e->max; +} + +static bool verify_counter(pmu_counter_t *cnt) +{ + return verify_event(cnt->count, get_counter_event(cnt)); +} + +static void start_event(pmu_counter_t *evt) +{ + wrmsr(evt->ctr, evt->count); + wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt), + evt->config | EVNTSEL_EN); +} + +static void stop_event(pmu_counter_t *evt) +{ + wrmsr(MSR_K7_EVNTSEL0 + event_to_global_idx(evt), + evt->config & ~EVNTSEL_EN); + evt->count = rdmsr(evt->ctr); +} + +static void measure(pmu_counter_t *evt, int count) +{ + int i; + + for (i = 0; i < count; i++) + start_event(&evt[i]); + + loop(); + + for (i = 0; i < count; i++) + stop_event(&evt[i]); +} + +static void check_gp_counter(struct pmu_event *evt) +{ + int i; + + pmu_counter_t cnt = { + .ctr = MSR_K7_PERFCTR0, + .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel, + }; + + for (i = 0; i < num_counters; i++, cnt.ctr++) { + cnt.count = 0; + measure(&cnt, 1); + report("%s-%d", verify_event(cnt.count, evt), evt->name, i); + } +} + +static void check_gp_counters(void) +{ + int i; + + for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++) { + check_gp_counter(&gp_events[i]); + } +} + +static void check_counter_overflow(void) +{ + uint64_t count; + int i; + pmu_counter_t cnt = { + .ctr = MSR_K7_PERFCTR0, + .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, + .count = 0, + }; + + measure(&cnt, 1); + count = cnt.count; + + report_prefix_push("overflow"); + + for (i = 0; i < num_counters; i++, cnt.ctr++) { + if (i % 2) + cnt.config |= EVNTSEL_INT; + else + cnt.config &= ~EVNTSEL_INT; + cnt.count = 1 - count; + measure(&cnt, 1); + report("cntr-%d", cnt.count < 20, i); + report("irq-%d", check_irq() == (i % 2), i); + } + + report_prefix_pop(); +} + +static void check_gp_counter_cmask(void) +{ + pmu_counter_t cnt = { + .ctr = MSR_K7_PERFCTR0, + .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel, + .count = 0, + }; + + cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT); + measure(&cnt, 1); + report("cmask", cnt.count < gp_events[1].min); +} + +static void check_rdpmc(void) +{ + uint64_t val = 0x1f3456789ull; + int i; + + report_prefix_push("rdpmc"); + + for (i = 0; i < num_counters; i++) { + uint64_t x = val; + wrmsr(MSR_K7_PERFCTR0 + i, val); + report("cntr-%d", rdpmc(i) == x, i); + } + + report_prefix_pop(); +} + +static void check_counters_many(void) +{ + pmu_counter_t cnt[10]; + int i, n; + + for (n = 0; n < num_counters; n++) { + cnt[n].count = 0; + cnt[n].ctr = MSR_K7_PERFCTR0 + n; + cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | gp_events[n].unit_sel; + } + + measure(cnt, n); + + for (i = 0; i < n; i++) + if (!verify_counter(&cnt[i])) + break; + + report("all counters", i == n); +} + +#define IS_AMD_CPU(cpuid) ((cpuid).b == CPUID_VENDOR_AMD_1 && \ + (cpuid).d == CPUID_VENDOR_AMD_2 && \ + (cpuid).c == CPUID_VENDOR_AMD_3) +int main(int ac, char **av) +{ + struct cpuid id = cpuid(0); + + setup_vm(); + setup_idt(); + handle_irq(PC_VECTOR, cnt_overflow); + buf = vmalloc(N * 64); + + if (!IS_AMD_CPU(id)) { + printf("No AMD PMU detected!\n"); + return report_summary(); + } + + num_counters = AMD64_NUM_COUNTERS; + if (num_counters > ARRAY_SIZE(gp_events)) + num_counters = ARRAY_SIZE(gp_events); + + apic_write(APIC_LVTPC, PC_VECTOR); + + check_gp_counters(); + check_rdpmc(); + check_counters_many(); + check_counter_overflow(); + check_gp_counter_cmask(); + + return report_summary(); +} diff --git a/x86/unittests.cfg b/x86/unittests.cfg index 74156fa..98fb3e9 100644 --- a/x86/unittests.cfg +++ b/x86/unittests.cfg @@ -145,6 +145,10 @@ file = intel_pmu.flat extra_params = -cpu host check = /proc/sys/kernel/nmi_watchdog=0 +[amd_pmu] +file = amd_pmu.flat +extra_params = -cpu host + [port80] file = port80.flat
AMD PMU implementation is very different from Intel. This patch adds a new test case, called amd_pmu, to x86 architecture. The implementation of amd_pmu.c is based on intel_pmu.c, with focus on AMD's general-purpose registers. To avoid running on Intel CPUs, we check the CPU's vendor name before executing the tests. Signed-off-by: Wei Huang <wei@redhat.com> --- x86/Makefile.common | 3 +- x86/amd_pmu.c | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++++ x86/unittests.cfg | 4 + 3 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 x86/amd_pmu.c