diff mbox

[3/4] perf: Handle guest PEBS events with a fake event

Message ID 1401412327-14810-4-git-send-email-andi@firstfloor.org (mailing list archive)
State New, archived
Headers show

Commit Message

Andi Kleen May 30, 2014, 1:12 a.m. UTC
From: Andi Kleen <ak@linux.intel.com>

With PEBS virtualization the PEBS record gets delivered to the guest,
but the host sees the PMI. This would normally result in a spurious
PEBS PMI that is ignored. But we need to inject the PMI into the guest,
so that the guest PMI handler can handle the PEBS record.

Check for this case in the perf PEBS handler.  When any guest PEBS
counters are active always check the counters explicitely for
overflow. If a guest PEBs counter overflowed trigger a fake event. The
fake event results in calling the KVM PMI callback, which injects
the PMI into the guest. The guest handler then retrieves the correct
information from its own PEBS record and the guest state.

Note: in very rare cases with exotic events this may lead to spurious PMIs
in the guest.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 49 +++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

Comments

Peter Zijlstra May 30, 2014, 7:34 a.m. UTC | #1
On Thu, May 29, 2014 at 06:12:06PM -0700, Andi Kleen wrote:

> Note: in very rare cases with exotic events this may lead to spurious PMIs
> in the guest.

Qualify that statement so that if someone runs into it we at least know
it is known/expected.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andi Kleen May 30, 2014, 4:29 p.m. UTC | #2
On Fri, May 30, 2014 at 09:34:39AM +0200, Peter Zijlstra wrote:
> On Thu, May 29, 2014 at 06:12:06PM -0700, Andi Kleen wrote:
> 
> > Note: in very rare cases with exotic events this may lead to spurious PMIs
> > in the guest.
> 
> Qualify that statement so that if someone runs into it we at least know
> it is known/expected.

You cannot actually observe it, so it's not a real problem.
I'll drop the Note.

-Andi
diff mbox

Patch

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 29622a7..0267174 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -998,6 +998,53 @@  static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 	__intel_pmu_pebs_event(event, iregs, at);
 }
 
+/*
+ * We may be running with virtualized PEBS, so the PEBS record
+ * was logged into the guest's DS and is invisible to us.
+ *
+ * For guest-owned counters we always have to check the counter
+ * and see if they are overflowed, because PEBS thresholds
+ * are not reported in the GLOBAL_STATUS.
+ *
+ * In this case just trigger a fake event for KVM to forward
+ * to the guest as PMI.  The guest will then see the real PEBS
+ * record and read the counter values.
+ *
+ * The contents of the event do not matter.
+ */
+static void intel_pmu_handle_guest_pebs(struct cpu_hw_events *cpuc,
+					struct pt_regs *iregs)
+{
+	int bit;
+	struct perf_event *event;
+
+	if (!cpuc->intel_ctrl_guest_owned)
+		return;
+
+	for_each_set_bit(bit, (unsigned long *)&cpuc->intel_ctrl_guest_owned,
+			 x86_pmu.max_pebs_events) {
+		struct perf_sample_data data;
+		s64 count;
+		int shift;
+
+		event = cpuc->events[bit];
+		if (!event->attr.precise_ip)
+			continue;
+		rdpmcl(event->hw.event_base_rdpmc, count);
+
+		/* sign extend */
+		shift = 64 - x86_pmu.cntval_bits;
+		count = ((s64)((u64)count << shift)) >> shift;
+
+		if (count < 0)
+			continue;
+
+		perf_sample_data_init(&data, 0, event->hw.last_period);
+		if (perf_event_overflow(event, &data, iregs))
+			x86_pmu_stop(event, 0);
+	}
+}
+
 static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -1010,6 +1057,8 @@  static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 	if (!x86_pmu.pebs_active)
 		return;
 
+	intel_pmu_handle_guest_pebs(cpuc, iregs);
+
 	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
 	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;