diff mbox series

[v7,11/12] KVM: arm64: Swap TRFCR on guest switch

Message ID 20241112103717.589952-12-james.clark@linaro.org (mailing list archive)
State New
Headers show
Series kvm/coresight: Support exclude guest and exclude host | expand

Commit Message

James Clark Nov. 12, 2024, 10:37 a.m. UTC
This implements exclude/include guest rules of the active tracing
session. Only do it if a different value is required for the guest,
otherwise the filters remain untouched.

In VHE we can just directly write the value.

Signed-off-by: James Clark <james.clark@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h  |  4 ++++
 arch/arm64/kvm/debug.c             | 16 ++++++++++++++++
 arch/arm64/kvm/hyp/nvhe/debug-sr.c | 17 +++++++++++++++--
 3 files changed, 35 insertions(+), 2 deletions(-)

Comments

Oliver Upton Nov. 20, 2024, 5:31 p.m. UTC | #1
On Tue, Nov 12, 2024 at 10:37:10AM +0000, James Clark wrote:
> +void kvm_set_trfcr(u64 host_trfcr, u64 guest_trfcr)
> +{
> +	if (kvm_arm_skip_trace_state())
> +		return;
> +
> +	if (has_vhe())
> +		write_sysreg_s(guest_trfcr, SYS_TRFCR_EL12);
> +	else
> +		if (host_trfcr != guest_trfcr) {
> +			*host_data_ptr(host_debug_state.trfcr_el1) = guest_trfcr;

Huh? That's going into host_debug_state, which is the dumping grounds
for *host* context when entering a guest.

Not sure why we'd stick a *guest* value in there...

> +			host_data_set_flag(HOST_STATE_SWAP_TRFCR);
> +		} else
> +			host_data_clear_flag(HOST_STATE_SWAP_TRFCR);
> +}
> +EXPORT_SYMBOL_GPL(kvm_set_trfcr);

I have a rather strong distaste for this interface, both with the
coresight driver and internally with the hypervisor. It'd be better if
the driver actually told KVM what the *intent* is rather than throwing a
pile of bits over the fence and forcing KVM to interpret what that
configuration means.

> +static void __debug_swap_trace(void)
> +{
> +	u64 trfcr = read_sysreg_el1(SYS_TRFCR);
> +
> +	write_sysreg_el1(*host_data_ptr(host_debug_state.trfcr_el1), SYS_TRFCR);
> +	*host_data_ptr(host_debug_state.trfcr_el1) = trfcr;
> +	host_data_set_flag(HOST_STATE_RESTORE_TRFCR);
> +}
> +

What if trace is disabled in the guest or in the host? Do we need to
synchronize when transitioning from an enabled -> disabled state like we
do today?

I took a stab at this, completely untested of course && punts on
protected mode. But this is _generally_ how I'd like to see everything
fit together.

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 8bc0ec151684..b4714cece5f0 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -611,7 +611,7 @@ struct cpu_sve_state {
  */
 struct kvm_host_data {
 #define KVM_HOST_DATA_FLAG_HAS_SPE			0
-#define KVM_HOST_DATA_FLAG_HAS_TRBE			1
+#define KVM_HOST_DATA_FLAG_HOST_TRBE_ENABLED		1
 #define KVM_HOST_DATA_FLAG_HOST_SVE_ENABLED		2
 #define KVM_HOST_DATA_FLAG_HOST_SME_ENABLED		3
 	unsigned long flags;
@@ -659,6 +659,9 @@ struct kvm_host_data {
 		u64 mdcr_el2;
 	} host_debug_state;
 
+	/* Guest trace filter value */
+	u64 guest_trfcr_el1;
+
 	/* Number of programmable event counters (PMCR_EL0.N) for this CPU */
 	unsigned int nr_event_counters;
 
@@ -1381,6 +1384,8 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
 void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr);
 void kvm_clr_pmu_events(u64 clr);
 bool kvm_set_pmuserenr(u64 val);
+void kvm_enable_trbe(u64 guest_trfcr);
+void kvm_disable_trbe(void);
 #else
 static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {}
 static inline void kvm_clr_pmu_events(u64 clr) {}
@@ -1388,6 +1393,8 @@ static inline bool kvm_set_pmuserenr(u64 val)
 {
 	return false;
 }
+void kvm_enable_trbe(u64 guest_trfcr) {}
+void kvm_disable_trbe(void) {}
 #endif
 
 void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 46dbeabd6833..6ef8d8f4b452 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -72,10 +72,6 @@ void kvm_init_host_debug_data(void)
 	if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) &&
 	    !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P))
 		host_data_set_flag(HAS_SPE);
-
-	if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) &&
-	    !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P))
-		host_data_set_flag(HAS_TRBE);
 }
 
 /*
@@ -215,3 +211,27 @@ void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val)
 	kvm_arch_vcpu_load(vcpu, smp_processor_id());
 	preempt_enable();
 }
+
+void kvm_enable_trbe(u64 guest_trfcr)
+{
+	if (WARN_ON_ONCE(preemptible()))
+		return;
+
+	if (has_vhe()) {
+		write_sysreg_s(guest_trfcr, SYS_TRFCR_EL12);
+		return;
+	}
+
+	*host_data_ptr(guest_trfcr_el1) = guest_trfcr;
+	host_data_set_flag(HOST_TRBE_ENABLED);
+}
+EXPORT_SYMBOL_GPL(kvm_enable_trbe);
+
+void kvm_disable_trbe(void)
+{
+	if (has_vhe() || WARN_ON_ONCE(preemptible()))
+		return;
+
+	host_data_clear_flag(HOST_TRBE_ENABLED);
+}
+EXPORT_SYMBOL_GPL(kvm_disable_trbe);
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 858bb38e273f..d36cbce75bee 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -51,32 +51,33 @@ static void __debug_restore_spe(u64 pmscr_el1)
 	write_sysreg_el1(pmscr_el1, SYS_PMSCR);
 }
 
-static void __debug_save_trace(u64 *trfcr_el1)
+static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr)
 {
-	*trfcr_el1 = 0;
+	*saved_trfcr = read_sysreg_el1(SYS_TRFCR);
+	write_sysreg_el1(new_trfcr, SYS_TRFCR);
 
-	/* Check if the TRBE is enabled */
-	if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E))
+	/* Nothing left to do if going to an enabled state */
+	if (new_trfcr)
 		return;
+
 	/*
-	 * Prohibit trace generation while we are in guest.
-	 * Since access to TRFCR_EL1 is trapped, the guest can't
-	 * modify the filtering set by the host.
+	 * Switching to a context with trace generation disabled. Drain the
+	 * trace buffer to memory.
 	 */
-	*trfcr_el1 = read_sysreg_el1(SYS_TRFCR);
-	write_sysreg_el1(0, SYS_TRFCR);
 	isb();
-	/* Drain the trace buffer to memory */
 	tsb_csync();
 }
 
-static void __debug_restore_trace(u64 trfcr_el1)
+static void __trace_switch_to_guest(void)
 {
-	if (!trfcr_el1)
-		return;
+	__trace_do_switch(host_data_ptr(host_debug_state.trfcr_el1),
+			  *host_data_ptr(guest_trfcr_el1));
+}
 
-	/* Restore trace filter controls */
-	write_sysreg_el1(trfcr_el1, SYS_TRFCR);
+static void __trace_switch_to_host(void)
+{
+	__trace_do_switch(host_data_ptr(guest_trfcr_el1),
+			  *host_data_ptr(host_debug_state.trfcr_el1));
 }
 
 void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
@@ -84,9 +85,13 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
 	/* Disable and flush SPE data generation */
 	if (host_data_test_flag(HAS_SPE))
 		__debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1));
-	/* Disable and flush Self-Hosted Trace generation */
-	if (host_data_test_flag(HAS_TRBE))
-		__debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1));
+
+	/*
+	 * Switch the trace filter, potentially disabling and flushing trace
+	 * data generation
+	 */
+	if (host_data_test_flag(HOST_TRBE_ENABLED))
+		__trace_switch_to_guest();
 }
 
 void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
@@ -98,8 +103,8 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
 {
 	if (host_data_test_flag(HAS_SPE))
 		__debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1));
-	if (host_data_test_flag(HAS_TRBE))
-		__debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1));
+	if (host_data_test_flag(HOST_TRBE_ENABLED))
+		__trace_switch_to_host();
 }
 
 void __debug_switch_to_host(struct kvm_vcpu *vcpu)
James Clark Nov. 21, 2024, 12:50 p.m. UTC | #2
On 20/11/2024 5:31 pm, Oliver Upton wrote:
> On Tue, Nov 12, 2024 at 10:37:10AM +0000, James Clark wrote:
>> +void kvm_set_trfcr(u64 host_trfcr, u64 guest_trfcr)
>> +{
>> +	if (kvm_arm_skip_trace_state())
>> +		return;
>> +
>> +	if (has_vhe())
>> +		write_sysreg_s(guest_trfcr, SYS_TRFCR_EL12);
>> +	else
>> +		if (host_trfcr != guest_trfcr) {
>> +			*host_data_ptr(host_debug_state.trfcr_el1) = guest_trfcr;
> 
> Huh? That's going into host_debug_state, which is the dumping grounds
> for *host* context when entering a guest.
> 
> Not sure why we'd stick a *guest* value in there...
> 

Only to save a 3rd storage place for trfcr when just the register and 
one place is technically enough. But yes if it's more readable to have 
guest_trfcr_el1 separately then that makes sense.

>> +			host_data_set_flag(HOST_STATE_SWAP_TRFCR);
>> +		} else
>> +			host_data_clear_flag(HOST_STATE_SWAP_TRFCR);
>> +}
>> +EXPORT_SYMBOL_GPL(kvm_set_trfcr);
> 
> I have a rather strong distaste for this interface, both with the
> coresight driver and internally with the hypervisor. It'd be better if
> the driver actually told KVM what the *intent* is rather than throwing a
> pile of bits over the fence and forcing KVM to interpret what that
> configuration means.
> 

That works, it would be nice to have it consistent and have it that way 
for filtering, like kvm_set_guest_trace_filters(bool kernel, bool user). 
But I suppose we can justify not doing it there because we're not really 
interpreting the TRFCR value just writing it whole.

>> +static void __debug_swap_trace(void)
>> +{
>> +	u64 trfcr = read_sysreg_el1(SYS_TRFCR);
>> +
>> +	write_sysreg_el1(*host_data_ptr(host_debug_state.trfcr_el1), SYS_TRFCR);
>> +	*host_data_ptr(host_debug_state.trfcr_el1) = trfcr;
>> +	host_data_set_flag(HOST_STATE_RESTORE_TRFCR);
>> +}
>> +
> 
> What if trace is disabled in the guest or in the host? Do we need to
> synchronize when transitioning from an enabled -> disabled state like we
> do today?
> 

By synchronize do you mean the tsb_csync()? I can only see it being 
necessary for the TRBE case because then writing to the buffer is fatal. 
Without TRBE the trace sinks still work and the boundary of when exactly 
tracing is disabled in the kernel isn't critical.

> I took a stab at this, completely untested of course && punts on
> protected mode. But this is _generally_ how I'd like to see everything
> fit together.
> 

Would you expect to see the protected mode stuff ignored if I sent 
another version more like yours below? Or was that just skipped to keep 
the example shorter?

I think I'm a bit uncertain on that one because removing HAS_TRBE means 
you can't check if TRBE is enabled or not in protected mode and it will 
go wrong if it is.

But other than that I think I get the general point of what you mean:

   * Add an explicit guest_trfcr variable rather than cheating and using
     the host one
   * kvm_enable_trbe() rather than interpreting the TRBLIMITR value
   * Some code reuse by calling __trace_do_switch() with flipped
     arguments on both entry and exit

And see below but I think it requires one minor change to support 
filtering without TRBE

> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 8bc0ec151684..b4714cece5f0 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -611,7 +611,7 @@ struct cpu_sve_state {
>    */
>   struct kvm_host_data {
>   #define KVM_HOST_DATA_FLAG_HAS_SPE			0
> -#define KVM_HOST_DATA_FLAG_HAS_TRBE			1
> +#define KVM_HOST_DATA_FLAG_HOST_TRBE_ENABLED		1
>   #define KVM_HOST_DATA_FLAG_HOST_SVE_ENABLED		2
>   #define KVM_HOST_DATA_FLAG_HOST_SME_ENABLED		3
>   	unsigned long flags;
> @@ -659,6 +659,9 @@ struct kvm_host_data {
>   		u64 mdcr_el2;
>   	} host_debug_state;
>   
> +	/* Guest trace filter value */
> +	u64 guest_trfcr_el1;
> +
>   	/* Number of programmable event counters (PMCR_EL0.N) for this CPU */
>   	unsigned int nr_event_counters;
>   
> @@ -1381,6 +1384,8 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
>   void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr);
>   void kvm_clr_pmu_events(u64 clr);
>   bool kvm_set_pmuserenr(u64 val);
> +void kvm_enable_trbe(u64 guest_trfcr);
> +void kvm_disable_trbe(void);
>   #else
>   static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {}
>   static inline void kvm_clr_pmu_events(u64 clr) {}
> @@ -1388,6 +1393,8 @@ static inline bool kvm_set_pmuserenr(u64 val)
>   {
>   	return false;
>   }
> +void kvm_enable_trbe(u64 guest_trfcr) {}
> +void kvm_disable_trbe(void) {}
>   #endif
>   
>   void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
> index 46dbeabd6833..6ef8d8f4b452 100644
> --- a/arch/arm64/kvm/debug.c
> +++ b/arch/arm64/kvm/debug.c
> @@ -72,10 +72,6 @@ void kvm_init_host_debug_data(void)
>   	if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) &&
>   	    !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P))
>   		host_data_set_flag(HAS_SPE);
> -
> -	if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) &&
> -	    !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P))
> -		host_data_set_flag(HAS_TRBE);
>   }
>   
>   /*
> @@ -215,3 +211,27 @@ void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val)
>   	kvm_arch_vcpu_load(vcpu, smp_processor_id());
>   	preempt_enable();
>   }
> +
> +void kvm_enable_trbe(u64 guest_trfcr)
> +{
> +	if (WARN_ON_ONCE(preemptible()))
> +		return;
> +
> +	if (has_vhe()) {
> +		write_sysreg_s(guest_trfcr, SYS_TRFCR_EL12);
> +		return;
> +	}
> +
> +	*host_data_ptr(guest_trfcr_el1) = guest_trfcr;
> +	host_data_set_flag(HOST_TRBE_ENABLED);

FWIW TRBE and TRF are separate features, so this wouldn't do the 
filtering correctly if TRBE wasn't in use, but I can split it out into
separate kvm_enable_trbe(void) and kvm_set_guest_filters(u64 guest_trfcr).

> +}
> +EXPORT_SYMBOL_GPL(kvm_enable_trbe);
> +
> +void kvm_disable_trbe(void)
> +{
> +	if (has_vhe() || WARN_ON_ONCE(preemptible()))
> +		return;
> +
> +	host_data_clear_flag(HOST_TRBE_ENABLED);
> +}
> +EXPORT_SYMBOL_GPL(kvm_disable_trbe);
> diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
> index 858bb38e273f..d36cbce75bee 100644
> --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
> +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
> @@ -51,32 +51,33 @@ static void __debug_restore_spe(u64 pmscr_el1)
>   	write_sysreg_el1(pmscr_el1, SYS_PMSCR);
>   }
>   
> -static void __debug_save_trace(u64 *trfcr_el1)
> +static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr)
>   {
> -	*trfcr_el1 = 0;
> +	*saved_trfcr = read_sysreg_el1(SYS_TRFCR);
> +	write_sysreg_el1(new_trfcr, SYS_TRFCR);
>   
> -	/* Check if the TRBE is enabled */
> -	if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E))
> +	/* Nothing left to do if going to an enabled state */
> +	if (new_trfcr)
>   		return;
> +
>   	/*
> -	 * Prohibit trace generation while we are in guest.
> -	 * Since access to TRFCR_EL1 is trapped, the guest can't
> -	 * modify the filtering set by the host.
> +	 * Switching to a context with trace generation disabled. Drain the
> +	 * trace buffer to memory.
>   	 */
> -	*trfcr_el1 = read_sysreg_el1(SYS_TRFCR);
> -	write_sysreg_el1(0, SYS_TRFCR);
>   	isb();
> -	/* Drain the trace buffer to memory */
>   	tsb_csync();
>   }
>   
> -static void __debug_restore_trace(u64 trfcr_el1)
> +static void __trace_switch_to_guest(void)
>   {
> -	if (!trfcr_el1)
> -		return;
> +	__trace_do_switch(host_data_ptr(host_debug_state.trfcr_el1),
> +			  *host_data_ptr(guest_trfcr_el1));
> +}
>   
> -	/* Restore trace filter controls */
> -	write_sysreg_el1(trfcr_el1, SYS_TRFCR);
> +static void __trace_switch_to_host(void)
> +{
> +	__trace_do_switch(host_data_ptr(guest_trfcr_el1),
> +			  *host_data_ptr(host_debug_state.trfcr_el1));
>   }
>   
>   void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
> @@ -84,9 +85,13 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
>   	/* Disable and flush SPE data generation */
>   	if (host_data_test_flag(HAS_SPE))
>   		__debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1));
> -	/* Disable and flush Self-Hosted Trace generation */
> -	if (host_data_test_flag(HAS_TRBE))
> -		__debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1));
> +
> +	/*
> +	 * Switch the trace filter, potentially disabling and flushing trace
> +	 * data generation
> +	 */
> +	if (host_data_test_flag(HOST_TRBE_ENABLED))
> +		__trace_switch_to_guest();



>   }
>   
>   void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
> @@ -98,8 +103,8 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
>   {
>   	if (host_data_test_flag(HAS_SPE))
>   		__debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1));
> -	if (host_data_test_flag(HAS_TRBE))
> -		__debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1));
> +	if (host_data_test_flag(HOST_TRBE_ENABLED))
> +		__trace_switch_to_host();
>   }
>   
>   void __debug_switch_to_host(struct kvm_vcpu *vcpu)
>
Oliver Upton Nov. 26, 2024, 4:23 p.m. UTC | #3
On Thu, Nov 21, 2024 at 12:50:10PM +0000, James Clark wrote:
> 
> 
> On 20/11/2024 5:31 pm, Oliver Upton wrote:
> > On Tue, Nov 12, 2024 at 10:37:10AM +0000, James Clark wrote:
> > > +void kvm_set_trfcr(u64 host_trfcr, u64 guest_trfcr)
> > > +{
> > > +	if (kvm_arm_skip_trace_state())
> > > +		return;
> > > +
> > > +	if (has_vhe())
> > > +		write_sysreg_s(guest_trfcr, SYS_TRFCR_EL12);
> > > +	else
> > > +		if (host_trfcr != guest_trfcr) {
> > > +			*host_data_ptr(host_debug_state.trfcr_el1) = guest_trfcr;
> > 
> > Huh? That's going into host_debug_state, which is the dumping grounds
> > for *host* context when entering a guest.
> > 
> > Not sure why we'd stick a *guest* value in there...
> > 
> 
> Only to save a 3rd storage place for trfcr when just the register and one
> place is technically enough. But yes if it's more readable to have
> guest_trfcr_el1 separately then that makes sense.

Yeah, since this is all per-cpu data at this point rather than per-vCPU,
it isn't the end of the world to use a few extra bytes.

> That works, it would be nice to have it consistent and have it that way for
> filtering, like kvm_set_guest_trace_filters(bool kernel, bool user). But I
> suppose we can justify not doing it there because we're not really
> interpreting the TRFCR value just writing it whole.

Agreed, the biggest thing I'd want to see in the exported interfaces
like this is to have enable/disable helpers to tell KVM when a driver
wants KVM to start/stop managing a piece of state while in a guest.

Then the hypervisor code can blindly save/restore some opaque values to
whatever registers it needs to update.

> > What if trace is disabled in the guest or in the host? Do we need to
> > synchronize when transitioning from an enabled -> disabled state like we
> > do today?
> > 
> 
> By synchronize do you mean the tsb_csync()? I can only see it being
> necessary for the TRBE case because then writing to the buffer is fatal.
> Without TRBE the trace sinks still work and the boundary of when exactly
> tracing is disabled in the kernel isn't critical.

Ack, I had the blinders on that we cared only about TRBE here.

> > I took a stab at this, completely untested of course && punts on
> > protected mode. But this is _generally_ how I'd like to see everything
> > fit together.
> > 
> 
> Would you expect to see the protected mode stuff ignored if I sent another
> version more like yours below? Or was that just skipped to keep the example
> shorter?

Skipped since I slapped this together in a hurry.

> I think I'm a bit uncertain on that one because removing HAS_TRBE means you
> can't check if TRBE is enabled or not in protected mode and it will go wrong
> if it is.

The protected mode hypervisor will need two bits of information.
Detecting that the feature is present can be done in the kernel so long
as the corresponding static key / cpucap is toggled before we drop
privileges.

Whether or not it is programmable + enabled is a decision that must be
made by observing hardware state from the hypervisor before entering a
guest.

[...]

> > +void kvm_enable_trbe(u64 guest_trfcr)
> > +{
> > +	if (WARN_ON_ONCE(preemptible()))
> > +		return;
> > +
> > +	if (has_vhe()) {
> > +		write_sysreg_s(guest_trfcr, SYS_TRFCR_EL12);
> > +		return;
> > +	}
> > +
> > +	*host_data_ptr(guest_trfcr_el1) = guest_trfcr;
> > +	host_data_set_flag(HOST_TRBE_ENABLED);
> 
> FWIW TRBE and TRF are separate features, so this wouldn't do the filtering
> correctly if TRBE wasn't in use, but I can split it out into
> separate kvm_enable_trbe(void) and kvm_set_guest_filters(u64 guest_trfcr).

KVM manages the same piece of state (TRFCR_EL1) either way though right?

The expectation I had is that KVM is informed any time a trace session
(TRBE or otherwise) is enabled/disabled on a CPU, likely with a TRFCR_EL1
of 0 if guest mode is excluded.

The function names might need massaging, but I was hoping to have a
single set of enable/disable knobs to cover all bases here.
James Clark Nov. 27, 2024, 10:08 a.m. UTC | #4
On 26/11/2024 4:23 pm, Oliver Upton wrote:
> On Thu, Nov 21, 2024 at 12:50:10PM +0000, James Clark wrote:
>>
>>
>> On 20/11/2024 5:31 pm, Oliver Upton wrote:
>>> On Tue, Nov 12, 2024 at 10:37:10AM +0000, James Clark wrote:
>>>> +void kvm_set_trfcr(u64 host_trfcr, u64 guest_trfcr)
>>>> +{
>>>> +	if (kvm_arm_skip_trace_state())
>>>> +		return;
>>>> +
>>>> +	if (has_vhe())
>>>> +		write_sysreg_s(guest_trfcr, SYS_TRFCR_EL12);
>>>> +	else
>>>> +		if (host_trfcr != guest_trfcr) {
>>>> +			*host_data_ptr(host_debug_state.trfcr_el1) = guest_trfcr;
>>>
>>> Huh? That's going into host_debug_state, which is the dumping grounds
>>> for *host* context when entering a guest.
>>>
>>> Not sure why we'd stick a *guest* value in there...
>>>
>>
>> Only to save a 3rd storage place for trfcr when just the register and one
>> place is technically enough. But yes if it's more readable to have
>> guest_trfcr_el1 separately then that makes sense.
> 
> Yeah, since this is all per-cpu data at this point rather than per-vCPU,
> it isn't the end of the world to use a few extra bytes.
> 
>> That works, it would be nice to have it consistent and have it that way for
>> filtering, like kvm_set_guest_trace_filters(bool kernel, bool user). But I
>> suppose we can justify not doing it there because we're not really
>> interpreting the TRFCR value just writing it whole.
> 
> Agreed, the biggest thing I'd want to see in the exported interfaces
> like this is to have enable/disable helpers to tell KVM when a driver
> wants KVM to start/stop managing a piece of state while in a guest.
> 
> Then the hypervisor code can blindly save/restore some opaque values to
> whatever registers it needs to update.
> 
>>> What if trace is disabled in the guest or in the host? Do we need to
>>> synchronize when transitioning from an enabled -> disabled state like we
>>> do today?
>>>
>>
>> By synchronize do you mean the tsb_csync()? I can only see it being
>> necessary for the TRBE case because then writing to the buffer is fatal.
>> Without TRBE the trace sinks still work and the boundary of when exactly
>> tracing is disabled in the kernel isn't critical.
> 
> Ack, I had the blinders on that we cared only about TRBE here.
> 
>>> I took a stab at this, completely untested of course && punts on
>>> protected mode. But this is _generally_ how I'd like to see everything
>>> fit together.
>>>
>>
>> Would you expect to see the protected mode stuff ignored if I sent another
>> version more like yours below? Or was that just skipped to keep the example
>> shorter?
> 
> Skipped since I slapped this together in a hurry.
> 
>> I think I'm a bit uncertain on that one because removing HAS_TRBE means you
>> can't check if TRBE is enabled or not in protected mode and it will go wrong
>> if it is.
> 
> The protected mode hypervisor will need two bits of information.
> Detecting that the feature is present can be done in the kernel so long
> as the corresponding static key / cpucap is toggled before we drop
> privileges.
> 
> Whether or not it is programmable + enabled is a decision that must be
> made by observing hardware state from the hypervisor before entering a
> guest.
> 
> [...]
> 
>>> +void kvm_enable_trbe(u64 guest_trfcr)
>>> +{
>>> +	if (WARN_ON_ONCE(preemptible()))
>>> +		return;
>>> +
>>> +	if (has_vhe()) {
>>> +		write_sysreg_s(guest_trfcr, SYS_TRFCR_EL12);
>>> +		return;
>>> +	}
>>> +
>>> +	*host_data_ptr(guest_trfcr_el1) = guest_trfcr;
>>> +	host_data_set_flag(HOST_TRBE_ENABLED);
>>
>> FWIW TRBE and TRF are separate features, so this wouldn't do the filtering
>> correctly if TRBE wasn't in use, but I can split it out into
>> separate kvm_enable_trbe(void) and kvm_set_guest_filters(u64 guest_trfcr).
> 
> KVM manages the same piece of state (TRFCR_EL1) either way though right?
> 
> The expectation I had is that KVM is informed any time a trace session
> (TRBE or otherwise) is enabled/disabled on a CPU, likely with a TRFCR_EL1
> of 0 if guest mode is excluded.
> 
> The function names might need massaging, but I was hoping to have a
> single set of enable/disable knobs to cover all bases here.
> 

I sent another version, it did come out much simpler and still does all 
the same things as before.

I didn't manage to make a single enable/disable knob though. The thing 
is the filtering is set on the source side of the driver and trbe is a 
sink thing. I would have to couple them together and add knowledge of 
the sink type to the source to make it work.

That would then open up the possibility for anyone adding a new source 
to get the trbe bit wrong in the future. Having KVM override the filter 
setting when trbe is in use seems a lot safer and easier to understand.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a8846689512b..9109d10c656e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -949,6 +949,8 @@  struct kvm_vcpu_arch {
 #define HOST_STATE_TRBE_EN	__kvm_single_flag(state, BIT(1))
 /* Hyp modified TRFCR */
 #define HOST_STATE_RESTORE_TRFCR __kvm_single_flag(state, BIT(2))
+/* Host wants a different trace filters for the guest */
+#define HOST_STATE_SWAP_TRFCR	__kvm_single_flag(state, BIT(3))
 
 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
 #define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) +	\
@@ -1392,6 +1394,7 @@  void kvm_clr_pmu_events(u64 clr);
 bool kvm_set_pmuserenr(u64 val);
 void kvm_set_pmblimitr(u64 pmblimitr);
 void kvm_set_trblimitr(u64 trblimitr);
+void kvm_set_trfcr(u64 host_trfcr, u64 guest_trfcr);
 #else
 static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {}
 static inline void kvm_clr_pmu_events(u64 clr) {}
@@ -1401,6 +1404,7 @@  static inline bool kvm_set_pmuserenr(u64 val)
 }
 static inline void kvm_set_pmblimitr(u64 pmblimitr) {}
 static inline void kvm_set_trblimitr(u64 trblimitr) {}
+static inline void kvm_set_trfcr(u64 host_trfcr, u64 guest_trfcr) {}
 #endif
 
 void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index e99df2c3f62a..9acec1b67d5f 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -380,3 +380,19 @@  void kvm_set_trblimitr(u64 trblimitr)
 		host_data_clear_flag(HOST_STATE_TRBE_EN);
 }
 EXPORT_SYMBOL_GPL(kvm_set_trblimitr);
+
+void kvm_set_trfcr(u64 host_trfcr, u64 guest_trfcr)
+{
+	if (kvm_arm_skip_trace_state())
+		return;
+
+	if (has_vhe())
+		write_sysreg_s(guest_trfcr, SYS_TRFCR_EL12);
+	else
+		if (host_trfcr != guest_trfcr) {
+			*host_data_ptr(host_debug_state.trfcr_el1) = guest_trfcr;
+			host_data_set_flag(HOST_STATE_SWAP_TRFCR);
+		} else
+			host_data_clear_flag(HOST_STATE_SWAP_TRFCR);
+}
+EXPORT_SYMBOL_GPL(kvm_set_trfcr);
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 17c23e52f5f4..47602c4d160a 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -100,6 +100,15 @@  static void __debug_save_trace(void)
 	host_data_set_flag(HOST_STATE_RESTORE_TRFCR);
 }
 
+static void __debug_swap_trace(void)
+{
+	u64 trfcr = read_sysreg_el1(SYS_TRFCR);
+
+	write_sysreg_el1(*host_data_ptr(host_debug_state.trfcr_el1), SYS_TRFCR);
+	*host_data_ptr(host_debug_state.trfcr_el1) = trfcr;
+	host_data_set_flag(HOST_STATE_RESTORE_TRFCR);
+}
+
 static void __debug_restore_trace(void)
 {
 	u64 trfcr_el1;
@@ -124,10 +133,14 @@  void __debug_save_host_buffers_nvhe(void)
 	if (!host_data_get_flag(HOST_FEAT_HAS_TRF))
 		return;
 
-	/* Disable and flush Self-Hosted Trace generation */
+	/*
+	 * Disable and flush Self-Hosted Trace generation for pKVM and TRBE,
+	 * or swap if host requires different guest filters.
+	 */
 	if (__debug_should_save_trace())
 		__debug_save_trace();
-
+	else if (host_data_get_flag(HOST_STATE_SWAP_TRFCR))
+		__debug_swap_trace();
 }
 
 void __debug_switch_to_guest(struct kvm_vcpu *vcpu)