@@ -390,6 +390,9 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
wrmsrl(x86_pmu.lbr_tos, tos);
task_ctx->lbr_stack_state = LBR_NONE;
+
+ if (cpuc->vcpu_lbr)
+ wrmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
}
static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
@@ -416,6 +419,10 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
}
+
+ if (cpuc->vcpu_lbr)
+ rdmsrl(MSR_LBR_SELECT, task_ctx->lbr_sel);
+
task_ctx->valid_lbrs = i;
task_ctx->tos = tos;
task_ctx->lbr_stack_state = LBR_VALID;
@@ -699,6 +699,7 @@ struct x86_perf_task_context {
u64 lbr_from[MAX_LBR_ENTRIES];
u64 lbr_to[MAX_LBR_ENTRIES];
u64 lbr_info[MAX_LBR_ENTRIES];
+ u64 lbr_sel;
int tos;
int valid_lbrs;
int lbr_callstack_users;
The regular host lbr perf event doesn't save/restore the LBR_SELECT msr during a thread context switching, because the LBR_SELECT value is generated from attr.branch_sample_type and already stored in event->hw.branch_reg (please see intel_pmu_setup_hw_filter), which doesn't get lost during thread context switching. The attr.branch_sample_type for the vcpu lbr event is deliberately set to the user call stack mode to enable the perf core to save/restore the lbr related msrs on vcpu switching. So the attr.branch_sample_type essentially doesn't represent what the guest pmu driver will write to LBR_SELECT. Meanwhile, the host lbr driver doesn't configure the lbr msrs, including the LBR_SELECT msr, for the vcpu thread case, as the pmu driver inside the vcpu will do that. So for the vcpu case, add the LBR_SELECT save/restore to ensure what the guest writes to the LBR_SELECT msr doesn't get lost during the vcpu context switching. Cc: Peter Zijlstra <peterz@infradead.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: Kan Liang <kan.liang@intel.com> Signed-off-by: Wei Wang <wei.w.wang@intel.com> --- arch/x86/events/intel/lbr.c | 7 +++++++ arch/x86/events/perf_event.h | 1 + 2 files changed, 8 insertions(+)