@@ -2408,6 +2408,16 @@ void x86_perf_mask_perf_counters(u64 mask)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
/*
+ * Increment active_events by 1 if there are counters reserved for
+ * the guest to use, and no need to do more increments if there are
+ * already counters taken by the guest.
+ */
+ if (!cpuc->intel_ctrl_guest_mask && mask)
+ atomic_inc(&active_events);
+ else if (cpuc->intel_ctrl_guest_mask && !mask)
+ atomic_dec(&active_events);
+
+ /*
* If the counter happens to be used by a host event, take it back
* first, and then restart the pmu after mask that counter as being
* reserved.
@@ -2283,6 +2283,13 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
*/
status |= cpuc->intel_cp_status;
+ if (status & cpuc->intel_ctrl_guest_mask) {
+ cpuc->pmi_callback(cpuc->pmi_opaque,
+ status & cpuc->intel_ctrl_guest_mask);
+ status &= ~cpuc->intel_ctrl_guest_mask;
+ handled++;
+ }
+
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit];
@@ -3162,6 +3169,26 @@ struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
}
EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
+void x86_perf_register_pmi_callback(pmi_callback_t callback, void *opaque)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ cpuc->pmi_callback = callback;
+ cpuc->pmi_opaque = opaque;
+
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+}
+EXPORT_SYMBOL_GPL(x86_perf_register_pmi_callback);
+
+void x86_perf_unregister_pmi_callback(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ cpuc->pmi_callback = NULL;
+ cpuc->pmi_opaque = NULL;
+}
+EXPORT_SYMBOL_GPL(x86_perf_unregister_pmi_callback);
+
static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -197,6 +197,10 @@ struct cpu_hw_events {
unsigned int txn_flags;
int is_fake;
+ /* PMI related fields */
+ pmi_callback_t pmi_callback;
+ void *pmi_opaque;
+
/*
* Intel DebugStore bits
*/
@@ -275,6 +275,8 @@ struct perf_guest_switch_msr {
u64 host, guest;
};
+typedef void (*pmi_callback_t)(void *opaque, u64 status);
+
extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
extern void perf_check_microcode(void);
@@ -298,6 +300,9 @@ static inline void perf_check_microcode(void) { }
#ifdef CONFIG_CPU_SUP_INTEL
extern void intel_pt_handle_vmx(int on);
extern void x86_perf_mask_perf_counters(u64 mask);
+extern void x86_perf_register_pmi_callback(pmi_callback_t callback,
+ void *opaque);
+extern void x86_perf_unregister_pmi_callback(void);
#endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
This patch adds the pmi callback support for the counters reserved by components outside the perf core. For example, a hypervisor may register such a callback to get the guest notified about the receiving of the pmi. The host PMI handling requires the active_events to be non-zero, so we need active_events to be at least 1 in x86_perf_mask_perf_counters when there are counters used by the guest. Signed-off-by: Wei Wang <wei.w.wang@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/events/core.c | 10 ++++++++++ arch/x86/events/intel/core.c | 27 +++++++++++++++++++++++++++ arch/x86/events/perf_event.h | 4 ++++ arch/x86/include/asm/perf_event.h | 5 +++++ 4 files changed, 46 insertions(+)