diff mbox series

[v7,07/12] KVM: arm64: arm_spe: Give SPE enabled state to KVM

Message ID 20241112103717.589952-8-james.clark@linaro.org (mailing list archive)
State New
Headers show
Series kvm/coresight: Support exclude guest and exclude host | expand

Commit Message

James Clark Nov. 12, 2024, 10:37 a.m. UTC
Currently in nVHE, KVM has to check if SPE is enabled on every guest
switch even if it was never used. Because it's a debug feature and is
more likely to not be used than used, give KVM the SPE buffer status to
allow a much simpler and faster do-nothing path in the hyp.

This is always called with preemption disabled except for probe/hotplug
which gets wrapped with preempt_disable().

Signed-off-by: James Clark <james.clark@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h |  6 ++++++
 arch/arm64/kvm/debug.c            | 29 +++++++++++++++++++++++++++++
 drivers/perf/arm_spe_pmu.c        | 13 +++++++++++--
 3 files changed, 46 insertions(+), 2 deletions(-)

Comments

Oliver Upton Nov. 20, 2024, 9:16 a.m. UTC | #1
Hi James,

On Tue, Nov 12, 2024 at 10:37:06AM +0000, James Clark wrote:
> Currently in nVHE, KVM has to check if SPE is enabled on every guest
> switch even if it was never used. Because it's a debug feature and is
> more likely to not be used than used, give KVM the SPE buffer status to
> allow a much simpler and faster do-nothing path in the hyp.
> 
> This is always called with preemption disabled except for probe/hotplug
> which gets wrapped with preempt_disable().

Unless the performance penalty of checking if SPE is measurably bad, I'd
rather we keep things as-is.

Folks that want to go fast are probably using VHE to begin with. As you
note below, we need the hypervisor to decide if SPE is enabled based on
hardware in protected mode anyway. Using a common flow for protected and
non-protected configs keeps complexity down and increases the likelihood
SPE save/restore code actually gets tested.
James Clark Nov. 20, 2024, 9:43 a.m. UTC | #2
On 20/11/2024 9:16 am, Oliver Upton wrote:
> Hi James,
> 
> On Tue, Nov 12, 2024 at 10:37:06AM +0000, James Clark wrote:
>> Currently in nVHE, KVM has to check if SPE is enabled on every guest
>> switch even if it was never used. Because it's a debug feature and is
>> more likely to not be used than used, give KVM the SPE buffer status to
>> allow a much simpler and faster do-nothing path in the hyp.
>>
>> This is always called with preemption disabled except for probe/hotplug
>> which gets wrapped with preempt_disable().
> 
> Unless the performance penalty of checking if SPE is measurably bad, I'd
> rather we keep things as-is.
> 
> Folks that want to go fast are probably using VHE to begin with. As you
> note below, we need the hypervisor to decide if SPE is enabled based on
> hardware in protected mode anyway. Using a common flow for protected and
> non-protected configs keeps complexity down and increases the likelihood
> SPE save/restore code actually gets tested.
> 

I'm not sure if there is any measurable difference. This change was 
actually in response to this review from Marc here [1]:

   > Why do we need to save anything if nothing was enabled, which is
   > *all the time*? I'm sorry to break it to you, but nobody uses these
   > features.  So I'd like them to have zero cost when not in use.

   > Surely there is something there that should say "yup, tracing" or
   > not (such as the enable bits), which would avoid hitting the sysreg
   > pointlessly?

I suppose I could have taken the "zero cost" bit a bit too literally and 
maybe there were some simpler optimizations that didn't involve strongly 
coupling the driver to KVM. At least for enable/disable, for filtering 
it would still be required.

I'm trying to think if there is some middle ground where there is a 
systemwide flag or static key that gets set on the very first SPE or 
trace session. In theory it could be simpler than this per-cpu enable 
disable stuff, but in the end it pretty much ends up needing the same 
info from the driver (and has the same protected mode issue). So you 
might as well do it as fine grained as this or not at all like you suggest.

[1]: https://lore.kernel.org/linux-arm-kernel/86bk832jza.wl-maz@kernel.org/
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5dfc3f4f74b2..7f1e32d40f0c 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -641,6 +641,8 @@  struct kvm_host_data {
 	struct {
 		/* Host CPU features, set at init */
 		u8 feats;
+		/* Host CPU state */
+		u8 state;
 	} flags;
 
 	/*
@@ -941,6 +943,8 @@  struct kvm_vcpu_arch {
 #define HOST_FEAT_HAS_TRBE	__kvm_single_flag(feats, BIT(1))
 /* CPU has Feat_TRF */
 #define HOST_FEAT_HAS_TRF	__kvm_single_flag(feats, BIT(2))
+/* PMBLIMITR_EL1_E is set (SPE profiling buffer enabled) */
+#define HOST_STATE_SPE_EN	__kvm_single_flag(state, BIT(0))
 
 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
 #define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) +	\
@@ -1382,6 +1386,7 @@  static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
 void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr);
 void kvm_clr_pmu_events(u64 clr);
 bool kvm_set_pmuserenr(u64 val);
+void kvm_set_pmblimitr(u64 pmblimitr);
 #else
 static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {}
 static inline void kvm_clr_pmu_events(u64 clr) {}
@@ -1389,6 +1394,7 @@  static inline bool kvm_set_pmuserenr(u64 val)
 {
 	return false;
 }
+static inline void kvm_set_pmblimitr(u64 pmblimitr) {}
 #endif
 
 void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index fb41ef5d9db9..ed3b4d057c52 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -335,3 +335,32 @@  void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
 		}
 	}
 }
+
+static bool kvm_arm_skip_trace_state(void)
+{
+	/* pKVM hyp finds out the state for itself */
+	if (is_protected_kvm_enabled())
+		return true;
+
+	/* Make sure state gets there in one piece */
+	if (WARN_ON_ONCE(preemptible()))
+		return true;
+
+	return false;
+}
+
+void kvm_set_pmblimitr(u64 pmblimitr)
+{
+	/* Only read in nVHE */
+	if (has_vhe())
+		return;
+
+	if (kvm_arm_skip_trace_state())
+		return;
+
+	if (pmblimitr & PMBLIMITR_EL1_E)
+		host_data_set_flag(HOST_STATE_SPE_EN);
+	else
+		host_data_clear_flag(HOST_STATE_SPE_EN);
+}
+EXPORT_SYMBOL_GPL(kvm_set_pmblimitr);
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index 3569050f9cf3..6a79df363aa6 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -23,6 +23,7 @@ 
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/kernel.h>
+#include <linux/kvm_host.h>
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/of.h>
@@ -496,6 +497,12 @@  static u64 arm_spe_pmu_next_off(struct perf_output_handle *handle)
 	return limit;
 }
 
+static void arm_spe_write_pmblimitr(u64 val)
+{
+	write_sysreg_s(val, SYS_PMBLIMITR_EL1);
+	kvm_set_pmblimitr(val);
+}
+
 static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle,
 					  struct perf_event *event)
 {
@@ -524,7 +531,7 @@  static void arm_spe_perf_aux_output_begin(struct perf_output_handle *handle,
 	write_sysreg_s(base, SYS_PMBPTR_EL1);
 
 out_write_limit:
-	write_sysreg_s(limit, SYS_PMBLIMITR_EL1);
+	arm_spe_write_pmblimitr(limit);
 }
 
 static void arm_spe_perf_aux_output_end(struct perf_output_handle *handle)
@@ -552,7 +559,7 @@  static void arm_spe_pmu_disable_and_drain_local(void)
 	dsb(nsh);
 
 	/* Disable the profiling buffer */
-	write_sysreg_s(0, SYS_PMBLIMITR_EL1);
+	arm_spe_write_pmblimitr(0);
 	isb();
 }
 
@@ -1095,7 +1102,9 @@  static void __arm_spe_pmu_reset_local(void)
 	 * This is probably overkill, as we have no idea where we're
 	 * draining any buffered data to...
 	 */
+	preempt_disable();
 	arm_spe_pmu_disable_and_drain_local();
+	preempt_enable();
 
 	/* Reset the buffer base pointer */
 	write_sysreg_s(0, SYS_PMBPTR_EL1);