@@ -668,6 +668,8 @@ struct kvm_vcpu_arch {
/* Software step state is Active-pending */
#define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5))
+/* PMUSERENR for the guest EL0 is on physical CPU */
+#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6))
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \
@@ -1028,9 +1030,14 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
void kvm_clr_pmu_events(u32 clr);
+bool kvm_set_pmuserenr(u64 val);
#else
static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
static inline void kvm_clr_pmu_events(u32 clr) {}
+static inline bool kvm_set_pmuserenr(u64 val)
+{
+ return false;
+}
#endif
void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu);
@@ -741,9 +741,25 @@ static inline u32 armv8pmu_getreset_flags(void)
return value;
}
+static void update_pmuserenr(u64 val)
+{
+ lockdep_assert_irqs_disabled();
+
+ /*
+ * The current PMUSERENR_EL0 value might be the value for the guest.
+ * If that's the case, have KVM keep tracking of the register value
+ * for the host EL0 so that KVM can restore it before returning to
+ * the host EL0. Otherwise, update the register now.
+ */
+ if (kvm_set_pmuserenr(val))
+ return;
+
+ write_sysreg(val, pmuserenr_el0);
+}
+
static void armv8pmu_disable_user_access(void)
{
- write_sysreg(0, pmuserenr_el0);
+ update_pmuserenr(0);
}
static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu)
@@ -759,8 +775,7 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu)
armv8pmu_write_evcntr(i, 0);
}
- write_sysreg(0, pmuserenr_el0);
- write_sysreg(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR, pmuserenr_el0);
+ update_pmuserenr(ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_CR);
}
static void armv8pmu_enable_event(struct perf_event *event)
@@ -82,12 +82,24 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
*/
if (kvm_arm_support_pmu_v3()) {
struct kvm_cpu_context *hctxt;
+ unsigned long flags;
write_sysreg(0, pmselr_el0);
hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+
+ /*
+ * Disable IRQs to prevent a race condition between the
+ * following code and IPIs that attempts to update
+ * PMUSERENR_EL0. See also kvm_set_pmuserenr().
+ */
+ local_irq_save(flags);
+
ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0);
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
+ vcpu_set_flag(vcpu, PMUSERENR_ON_CPU);
+
+ local_irq_restore(flags);
}
vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
@@ -112,9 +124,21 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
write_sysreg(0, hstr_el2);
if (kvm_arm_support_pmu_v3()) {
struct kvm_cpu_context *hctxt;
+ unsigned long flags;
hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+
+ /*
+ * Disable IRQs to prevent a race condition between the
+ * following code and IPIs that attempts to update
+ * PMUSERENR_EL0. See also kvm_set_pmuserenr().
+ */
+ local_irq_save(flags);
+
write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0);
+ vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
+
+ local_irq_restore(flags);
}
if (cpus_have_final_cap(ARM64_SME)) {
@@ -10,7 +10,7 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
# will explode instantly (Words of Marc Zyngier). So introduce a generic flag
# __DISABLE_TRACE_MMIO__ to disable MMIO tracing for nVHE KVM.
ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS -D__DISABLE_TRACE_MMIO__
-ccflags-y += -fno-stack-protector \
+ccflags-y += -fno-stack-protector -DNO_TRACE_IRQFLAGS -DNO_DEBUG_IRQFLAGS \
-DDISABLE_BRANCH_PROFILING \
$(DISABLE_STACKLEAK_PLUGIN)
@@ -209,3 +209,28 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
kvm_vcpu_pmu_enable_el0(events_host);
kvm_vcpu_pmu_disable_el0(events_guest);
}
+
+/*
+ * With VHE, keep track of the PMUSERENR_EL0 value for the host EL0 on the pCPU
+ * where PMUSERENR_EL0 for the guest is loaded, since PMUSERENR_EL0 is switched
+ * to the value for the guest on vcpu_load(). The value for the host EL0
+ * will be restored on vcpu_put(), before returning to the EL0.
+ *
+ * Return true if KVM takes care of the register. Otherwise return false.
+ */
+bool kvm_set_pmuserenr(u64 val)
+{
+ struct kvm_cpu_context *hctxt;
+ struct kvm_vcpu *vcpu;
+
+ if (!kvm_arm_support_pmu_v3() || !has_vhe())
+ return false;
+
+ vcpu = kvm_get_running_vcpu();
+ if (!vcpu || !vcpu_get_flag(vcpu, PMUSERENR_ON_CPU))
+ return false;
+
+ hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val;
+ return true;
+}
@@ -156,7 +156,7 @@ do { \
# define start_critical_timings() do { } while (0)
#endif
-#ifdef CONFIG_DEBUG_IRQFLAGS
+#if defined CONFIG_DEBUG_IRQFLAGS && !defined(NO_DEBUG_IRQFLAGS)
extern void warn_bogus_irq_restore(void);
#define raw_check_bogus_irq_restore() \
do { \
@@ -198,9 +198,9 @@ extern void warn_bogus_irq_restore(void);
/*
* The local_irq_*() APIs are equal to the raw_local_irq*()
- * if !TRACE_IRQFLAGS.
+ * if !TRACE_IRQFLAGS or if NO_TRACE_IRQFLAGS is locally set.
*/
-#ifdef CONFIG_TRACE_IRQFLAGS
+#if defined CONFIG_TRACE_IRQFLAGS && !defined(NO_TRACE_IRQFLAGS)
#define local_irq_enable() \
do { \
Currently, with VHE, KVM sets ER, CR, SW and EN bits of PMUSERENR_EL0 to 1 on vcpu_load(), and saves and restores the register value for the host on vcpu_load() and vcpu_put(). If the value of those bits are cleared on a pCPU with a vCPU loaded (armv8pmu_start() would do that when PMU counters are programmed for the guest), PMU access from the guest EL0 might be trapped to the guest EL1 directly regardless of the current PMUSERENR_EL0 value of the vCPU. Fix this by not letting armv8pmu_start() overwrite PMUSERENR_EL0 on the pCPU where PMUSERENR_EL0 for the guest is loaded, and instead updating the saved shadow register value for the host, so that the value can be restored on vcpu_put() later. While vcpu_{put,load}() are manipulating PMUSERENR_EL0, disable IRQs to prevent a race condition between these processes and IPIs that attempt to update PMUSERENR_EL0 for the host EL0. As this change (disabling IRQs) is applied to the nVHE hyp code, unwanted code (e.g. trace_hardirqs_off, etc) will be included in the hyp code when CONFIG_TRACE_IRQFLAGS and/or CONFIG_DEBUG_IRQFLAGS are enabled. Introduce NO_TRACE_IRQFLAGS and NO_DEBUG_IRQFLAGS macros to locally disable CONFIG_TRACE_IRQFLAGS or CONFIG_DEBUG_IRQFLAGS in the nVHE hyp code. Suggested-by: Mark Rutland <mark.rutland@arm.com> Suggested-by: Marc Zyngier <maz@kernel.org> Fixes: 83a7a4d643d3 ("arm64: perf: Enable PMU counter userspace access for perf event") Signed-off-by: Reiji Watanabe <reijiw@google.com> --- arch/arm64/include/asm/kvm_host.h | 7 +++++++ arch/arm64/kernel/perf_event.c | 21 ++++++++++++++++++--- arch/arm64/kvm/hyp/include/hyp/switch.h | 24 ++++++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/pmu.c | 25 +++++++++++++++++++++++++ include/linux/irqflags.h | 6 +++--- 6 files changed, 78 insertions(+), 7 deletions(-)