@@ -462,6 +462,9 @@ void intel_pmu_lbr_add(struct perf_event *event)
if (!x86_pmu.lbr_nr)
return;
+ if (event->attr.exclude_guest && is_no_counter_event(event))
+ cpuc->vcpu_lbr = 1;
+
cpuc->br_sel = event->hw.branch_reg.reg;
if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) {
@@ -509,6 +512,9 @@ void intel_pmu_lbr_del(struct perf_event *event)
task_ctx->lbr_callstack_users--;
}
+ if (event->attr.exclude_guest && is_no_counter_event(event))
+ cpuc->vcpu_lbr = 0;
+
if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
cpuc->lbr_pebs_users--;
cpuc->lbr_users--;
@@ -521,7 +527,7 @@ void intel_pmu_lbr_enable_all(bool pmi)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- if (cpuc->lbr_users)
+ if (cpuc->lbr_users && !cpuc->vcpu_lbr)
__intel_pmu_lbr_enable(pmi);
}
@@ -529,7 +535,7 @@ void intel_pmu_lbr_disable_all(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- if (cpuc->lbr_users)
+ if (cpuc->lbr_users && !cpuc->vcpu_lbr)
__intel_pmu_lbr_disable();
}
@@ -669,7 +675,8 @@ void intel_pmu_lbr_read(void)
* This could be smarter and actually check the event,
* but this simple approach seems to work for now.
*/
- if (!cpuc->lbr_users || cpuc->lbr_users == cpuc->lbr_pebs_users)
+ if (!cpuc->lbr_users || cpuc->vcpu_lbr ||
+ cpuc->lbr_users == cpuc->lbr_pebs_users)
return;
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
@@ -238,6 +238,7 @@ struct cpu_hw_events {
/*
* Intel LBR bits
*/
+ u8 vcpu_lbr;
int lbr_users;
int lbr_pebs_users;
struct perf_branch_stack lbr_stack;
@@ -477,6 +477,7 @@ struct kvm_pmu {
struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
struct irq_work irq_work;
u64 reprogram_pmi;
+ struct perf_event *vcpu_lbr_event;
};
struct kvm_pmu_ops;
@@ -123,6 +123,9 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
bool is_vmware_backdoor_pmc(u32 pmc_idx);
+extern int intel_pmu_enable_save_guest_lbr(struct kvm_vcpu *vcpu);
+extern void intel_pmu_disable_save_guest_lbr(struct kvm_vcpu *vcpu);
+
extern struct kvm_pmu_ops intel_pmu_ops;
extern struct kvm_pmu_ops amd_pmu_ops;
#endif /* __KVM_X86_PMU_H */
@@ -510,6 +510,67 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
pmu->global_ovf_ctrl = 0;
}
+int intel_pmu_enable_save_guest_lbr(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct perf_event *event;
+
+ /*
+ * The main purpose of this perf event is to have the host perf core
+ * help save/restore the guest lbr stack on vcpu switching. There is
+ * no perf counters allocated for the event.
+ *
+ * About the attr:
+ * exclude_guest: set to true to indicate that the event runs on the
+ * host only.
+ * pinned: set to false, so that the FLEXIBLE events will not
+ * be rescheduled for this event which actually doesn't
+ * need a perf counter.
+ * config: Actually this field won't be used by the perf core
+ * as this event doesn't have a perf counter.
+ * sample_period: Same as above.
+ * sample_type: tells the perf core that it is an lbr event.
+ * branch_sample_type: tells the perf core that the lbr event works in
+ * the user callstack mode so that the lbr stack will be
+ * saved/restored on vCPU switching.
+ */
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_RAW,
+ .size = sizeof(attr),
+ .exclude_guest = true,
+ .pinned = false,
+ .config = 0,
+ .sample_period = 0,
+ .sample_type = PERF_SAMPLE_BRANCH_STACK,
+ .branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK |
+ PERF_SAMPLE_BRANCH_USER,
+ };
+
+ if (pmu->vcpu_lbr_event)
+ return 0;
+
+ event = perf_event_create(&attr, -1, current, NULL, NULL, false);
+ if (IS_ERR(event)) {
+ pr_err("%s: failed %ld\n", __func__, PTR_ERR(event));
+ return -ENOENT;
+ }
+ pmu->vcpu_lbr_event = event;
+
+ return 0;
+}
+
+void intel_pmu_disable_save_guest_lbr(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct perf_event *event = pmu->vcpu_lbr_event;
+
+ if (!event)
+ return;
+
+ perf_event_release_kernel(event);
+ pmu->vcpu_lbr_event = NULL;
+}
+
struct kvm_pmu_ops intel_pmu_ops = {
.find_arch_event = intel_find_arch_event,
.find_fixed_event = intel_find_fixed_event,