diff mbox series

[v4,2/7] arm64: KVM: Use shared area to pass PMU event state to hypervisor

Message ID 20240104162714.1062610-3-james.clark@arm.com (mailing list archive)
State New, archived
Headers show
Series kvm/coresight: Support exclude guest and exclude host | expand

Commit Message

James Clark Jan. 4, 2024, 4:27 p.m. UTC
Currently the state of the PMU events is copied into the VCPU struct
before every VCPU run. This isn't scalable if more data for other
features needs to be added too. So make a writable area that's shared
between the host and the hypervisor to store this state.

Normal per-cpu constructs can't be used because although the framework
exists for the host to write to the hypervisor's per-cpu structs, this
only works until the protection is enabled. And for the other way
around, no framework exists for the hypervisor to access the host's size
and layout of per-cpu data. Instead of making a new framework for the
hypervisor to access the host's per-cpu data that would only be used
once, just define the new shared area as an array with NR_CPUS elements.
This also reduces the amount of sharing that needs to be done, because
unlike this array, the per-cpu data isn't contiguous.

Signed-off-by: James Clark <james.clark@arm.com>
---
 arch/arm64/include/asm/kvm_host.h |  8 ++++++++
 arch/arm64/kernel/image-vars.h    |  1 +
 arch/arm64/kvm/arm.c              | 16 ++++++++++++++--
 arch/arm64/kvm/hyp/nvhe/setup.c   | 11 +++++++++++
 arch/arm64/kvm/hyp/nvhe/switch.c  |  9 +++++++--
 arch/arm64/kvm/pmu.c              |  4 +---
 include/kvm/arm_pmu.h             | 17 -----------------
 7 files changed, 42 insertions(+), 24 deletions(-)

Comments

Suzuki K Poulose Jan. 5, 2024, 9:40 a.m. UTC | #1
On 04/01/2024 16:27, James Clark wrote:
> Currently the state of the PMU events is copied into the VCPU struct
> before every VCPU run. This isn't scalable if more data for other
> features needs to be added too. So make a writable area that's shared
> between the host and the hypervisor to store this state.
> 
> Normal per-cpu constructs can't be used because although the framework
> exists for the host to write to the hypervisor's per-cpu structs, this
> only works until the protection is enabled. And for the other way
> around, no framework exists for the hypervisor to access the host's size
> and layout of per-cpu data. Instead of making a new framework for the
> hypervisor to access the host's per-cpu data that would only be used
> once, just define the new shared area as an array with NR_CPUS elements.
> This also reduces the amount of sharing that needs to be done, because
> unlike this array, the per-cpu data isn't contiguous.
> 
> Signed-off-by: James Clark <james.clark@arm.com>
> ---
>   arch/arm64/include/asm/kvm_host.h |  8 ++++++++
>   arch/arm64/kernel/image-vars.h    |  1 +
>   arch/arm64/kvm/arm.c              | 16 ++++++++++++++--
>   arch/arm64/kvm/hyp/nvhe/setup.c   | 11 +++++++++++
>   arch/arm64/kvm/hyp/nvhe/switch.c  |  9 +++++++--
>   arch/arm64/kvm/pmu.c              |  4 +---
>   include/kvm/arm_pmu.h             | 17 -----------------
>   7 files changed, 42 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 824f29f04916..93d38ad257ed 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -466,6 +466,14 @@ struct kvm_cpu_context {
>   	struct kvm_vcpu *__hyp_running_vcpu;
>   };
>   
> +struct kvm_host_global_state {
> +	struct kvm_pmu_events {
> +		u32 events_host;
> +		u32 events_guest;
> +	} pmu_events;
> +} ____cacheline_aligned;
> +extern struct kvm_host_global_state kvm_host_global_state[NR_CPUS];

With this in place, we could also optimize the VCPU debug state flags
(DEBUG_STATE_SAVE_{TRBE,SPE}). i.e., right now, we always check the for
SPE and TRBE availability on the CPU, by reading the ID registers.
This could hold the per-cpu flags for the Physical CPU and the VCPU
could use this for making the decisions, rather than reading the two
ID registers per feature everytime.

This can come later though, in a separate series.

Suzuki


> +
>   struct kvm_host_data {
>   	struct kvm_cpu_context host_ctxt;
>   };
> diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
> index 119ca121b5f8..1a9dbb02bb4a 100644
> --- a/arch/arm64/kernel/image-vars.h
> +++ b/arch/arm64/kernel/image-vars.h
> @@ -59,6 +59,7 @@ KVM_NVHE_ALIAS(alt_cb_patch_nops);
>   
>   /* Global kernel state accessed by nVHE hyp code. */
>   KVM_NVHE_ALIAS(kvm_vgic_global_state);
> +KVM_NVHE_ALIAS(kvm_host_global_state);
>   
>   /* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */
>   KVM_NVHE_ALIAS(nvhe_hyp_panic_handler);
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 4796104c4471..bd6b2eda5f4f 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -47,6 +47,20 @@
>   
>   static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
>   
> +/*
> + * Host state that isn't associated with any VCPU, but will affect any VCPU
> + * running on a host CPU in the future. This remains writable from the host and
> + * readable in the hyp.
> + *
> + * PER_CPU constructs aren't compatible between the hypervisor and the host so
> + * just define it as a NR_CPUS array. DECLARE_KVM_NVHE_PER_CPU works in both
> + * places, but not after the hypervisor protection is initialised. After that,
> + * kvm_arm_hyp_percpu_base isn't accessible from the host, so even if the
> + * kvm_host_global_state struct was shared with the host, the per-cpu offset
> + * can't be calculated without sharing even more data with the host.
> + */
> +struct kvm_host_global_state kvm_host_global_state[NR_CPUS];
> +
>   DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
>   
>   DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
> @@ -1016,8 +1030,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
>   
>   		kvm_vgic_flush_hwstate(vcpu);
>   
> -		kvm_pmu_update_vcpu_events(vcpu);
> -
>   		/*
>   		 * Ensure we set mode to IN_GUEST_MODE after we disable
>   		 * interrupts and before the final VCPU requests check.
> diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
> index b5452e58c49a..3e45cc10ba96 100644
> --- a/arch/arm64/kvm/hyp/nvhe/setup.c
> +++ b/arch/arm64/kvm/hyp/nvhe/setup.c
> @@ -159,6 +159,17 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
>   	if (ret)
>   		return ret;
>   
> +	/*
> +	 * Similar to kvm_vgic_global_state, but this one remains writable by
> +	 * the host rather than read-only. Used to store per-cpu state about the
> +	 * host that isn't associated with any particular VCPU.
> +	 */
> +	prot = pkvm_mkstate(KVM_PGTABLE_PROT_RW, PKVM_PAGE_SHARED_OWNED);
> +	ret = pkvm_create_mappings(&kvm_host_global_state,
> +				   &kvm_host_global_state + 1, prot);
> +	if (ret)
> +		return ret;
> +
>   	ret = create_hyp_debug_uart_mapping();
>   	if (ret)
>   		return ret;
> diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
> index c50f8459e4fc..89147a9dc38c 100644
> --- a/arch/arm64/kvm/hyp/nvhe/switch.c
> +++ b/arch/arm64/kvm/hyp/nvhe/switch.c
> @@ -130,13 +130,18 @@ static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
>   	}
>   }
>   
> +static struct kvm_pmu_events *kvm_nvhe_get_pmu_events(struct kvm_vcpu *vcpu)
> +{
> +	return &kvm_host_global_state[vcpu->cpu].pmu_events;
> +}
> +
>   /*
>    * Disable host events, enable guest events
>    */
>   #ifdef CONFIG_HW_PERF_EVENTS
>   static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu)
>   {
> -	struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
> +	struct kvm_pmu_events *pmu = kvm_nvhe_get_pmu_events(vcpu);
>   
>   	if (pmu->events_host)
>   		write_sysreg(pmu->events_host, pmcntenclr_el0);
> @@ -152,7 +157,7 @@ static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu)
>    */
>   static void __pmu_switch_to_host(struct kvm_vcpu *vcpu)
>   {
> -	struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
> +	struct kvm_pmu_events *pmu = kvm_nvhe_get_pmu_events(vcpu);
>   
>   	if (pmu->events_guest)
>   		write_sysreg(pmu->events_guest, pmcntenclr_el0);
> diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
> index a243934c5568..136d5c6c1916 100644
> --- a/arch/arm64/kvm/pmu.c
> +++ b/arch/arm64/kvm/pmu.c
> @@ -6,8 +6,6 @@
>   #include <linux/kvm_host.h>
>   #include <linux/perf_event.h>
>   
> -static DEFINE_PER_CPU(struct kvm_pmu_events, kvm_pmu_events);
> -
>   /*
>    * Given the perf event attributes and system type, determine
>    * if we are going to need to switch counters at guest entry/exit.
> @@ -28,7 +26,7 @@ static bool kvm_pmu_switch_needed(struct perf_event_attr *attr)
>   
>   struct kvm_pmu_events *kvm_get_pmu_events(void)
>   {
> -	return this_cpu_ptr(&kvm_pmu_events);
> +	return &kvm_host_global_state[smp_processor_id()].pmu_events;
>   }
>   
>   /*
> diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
> index 4b9d8fb393a8..71a835970ab5 100644
> --- a/include/kvm/arm_pmu.h
> +++ b/include/kvm/arm_pmu.h
> @@ -18,14 +18,8 @@ struct kvm_pmc {
>   	struct perf_event *perf_event;
>   };
>   
> -struct kvm_pmu_events {
> -	u32 events_host;
> -	u32 events_guest;
> -};
> -
>   struct kvm_pmu {
>   	struct irq_work overflow_work;
> -	struct kvm_pmu_events events;
>   	struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS];
>   	int irq_num;
>   	bool created;
> @@ -79,17 +73,6 @@ void kvm_vcpu_pmu_resync_el0(void);
>   #define kvm_vcpu_has_pmu(vcpu)					\
>   	(vcpu_has_feature(vcpu, KVM_ARM_VCPU_PMU_V3))
>   
> -/*
> - * Updates the vcpu's view of the pmu events for this cpu.
> - * Must be called before every vcpu run after disabling interrupts, to ensure
> - * that an interrupt cannot fire and update the structure.
> - */
> -#define kvm_pmu_update_vcpu_events(vcpu)				\
> -	do {								\
> -		if (!has_vhe() && kvm_vcpu_has_pmu(vcpu))		\
> -			vcpu->arch.pmu.events = *kvm_get_pmu_events();	\
> -	} while (0)
> -
>   /*
>    * Evaluates as true when emulating PMUv3p5, and false otherwise.
>    */
James Clark Feb. 1, 2024, 4:14 p.m. UTC | #2
On 04/01/2024 16:27, James Clark wrote:
> Currently the state of the PMU events is copied into the VCPU struct
> before every VCPU run. This isn't scalable if more data for other
> features needs to be added too. So make a writable area that's shared
> between the host and the hypervisor to store this state.
> 
> Normal per-cpu constructs can't be used because although the framework
> exists for the host to write to the hypervisor's per-cpu structs, this
> only works until the protection is enabled. And for the other way
> around, no framework exists for the hypervisor to access the host's size
> and layout of per-cpu data. Instead of making a new framework for the
> hypervisor to access the host's per-cpu data that would only be used
> once, just define the new shared area as an array with NR_CPUS elements.
> This also reduces the amount of sharing that needs to be done, because
> unlike this array, the per-cpu data isn't contiguous.
> 
> Signed-off-by: James Clark <james.clark@arm.com>

Hi Marc,

Do you have any feedback about whether this what you were thinking of in
your comment here:
https://lore.kernel.org/kvmarm/86msuqb84g.wl-maz@kernel.org/

Thanks
James

> ---
>  arch/arm64/include/asm/kvm_host.h |  8 ++++++++
>  arch/arm64/kernel/image-vars.h    |  1 +
>  arch/arm64/kvm/arm.c              | 16 ++++++++++++++--
>  arch/arm64/kvm/hyp/nvhe/setup.c   | 11 +++++++++++
>  arch/arm64/kvm/hyp/nvhe/switch.c  |  9 +++++++--
>  arch/arm64/kvm/pmu.c              |  4 +---
>  include/kvm/arm_pmu.h             | 17 -----------------
>  7 files changed, 42 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 824f29f04916..93d38ad257ed 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -466,6 +466,14 @@ struct kvm_cpu_context {
>  	struct kvm_vcpu *__hyp_running_vcpu;
>  };
>  
> +struct kvm_host_global_state {
> +	struct kvm_pmu_events {
> +		u32 events_host;
> +		u32 events_guest;
> +	} pmu_events;
> +} ____cacheline_aligned;
> +extern struct kvm_host_global_state kvm_host_global_state[NR_CPUS];
> +
>  struct kvm_host_data {
>  	struct kvm_cpu_context host_ctxt;
>  };
> diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
> index 119ca121b5f8..1a9dbb02bb4a 100644
> --- a/arch/arm64/kernel/image-vars.h
> +++ b/arch/arm64/kernel/image-vars.h
> @@ -59,6 +59,7 @@ KVM_NVHE_ALIAS(alt_cb_patch_nops);
>  
>  /* Global kernel state accessed by nVHE hyp code. */
>  KVM_NVHE_ALIAS(kvm_vgic_global_state);
> +KVM_NVHE_ALIAS(kvm_host_global_state);
>  
>  /* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */
>  KVM_NVHE_ALIAS(nvhe_hyp_panic_handler);
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 4796104c4471..bd6b2eda5f4f 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -47,6 +47,20 @@
>  
>  static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
>  
> +/*
> + * Host state that isn't associated with any VCPU, but will affect any VCPU
> + * running on a host CPU in the future. This remains writable from the host and
> + * readable in the hyp.
> + *
> + * PER_CPU constructs aren't compatible between the hypervisor and the host so
> + * just define it as a NR_CPUS array. DECLARE_KVM_NVHE_PER_CPU works in both
> + * places, but not after the hypervisor protection is initialised. After that,
> + * kvm_arm_hyp_percpu_base isn't accessible from the host, so even if the
> + * kvm_host_global_state struct was shared with the host, the per-cpu offset
> + * can't be calculated without sharing even more data with the host.
> + */
> +struct kvm_host_global_state kvm_host_global_state[NR_CPUS];
> +
>  DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
>  
>  DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
> @@ -1016,8 +1030,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
>  
>  		kvm_vgic_flush_hwstate(vcpu);
>  
> -		kvm_pmu_update_vcpu_events(vcpu);
> -
>  		/*
>  		 * Ensure we set mode to IN_GUEST_MODE after we disable
>  		 * interrupts and before the final VCPU requests check.
> diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
> index b5452e58c49a..3e45cc10ba96 100644
> --- a/arch/arm64/kvm/hyp/nvhe/setup.c
> +++ b/arch/arm64/kvm/hyp/nvhe/setup.c
> @@ -159,6 +159,17 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
>  	if (ret)
>  		return ret;
>  
> +	/*
> +	 * Similar to kvm_vgic_global_state, but this one remains writable by
> +	 * the host rather than read-only. Used to store per-cpu state about the
> +	 * host that isn't associated with any particular VCPU.
> +	 */
> +	prot = pkvm_mkstate(KVM_PGTABLE_PROT_RW, PKVM_PAGE_SHARED_OWNED);
> +	ret = pkvm_create_mappings(&kvm_host_global_state,
> +				   &kvm_host_global_state + 1, prot);
> +	if (ret)
> +		return ret;
> +
>  	ret = create_hyp_debug_uart_mapping();
>  	if (ret)
>  		return ret;
> diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
> index c50f8459e4fc..89147a9dc38c 100644
> --- a/arch/arm64/kvm/hyp/nvhe/switch.c
> +++ b/arch/arm64/kvm/hyp/nvhe/switch.c
> @@ -130,13 +130,18 @@ static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
>  	}
>  }
>  
> +static struct kvm_pmu_events *kvm_nvhe_get_pmu_events(struct kvm_vcpu *vcpu)
> +{
> +	return &kvm_host_global_state[vcpu->cpu].pmu_events;
> +}
> +
>  /*
>   * Disable host events, enable guest events
>   */
>  #ifdef CONFIG_HW_PERF_EVENTS
>  static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu)
>  {
> -	struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
> +	struct kvm_pmu_events *pmu = kvm_nvhe_get_pmu_events(vcpu);
>  
>  	if (pmu->events_host)
>  		write_sysreg(pmu->events_host, pmcntenclr_el0);
> @@ -152,7 +157,7 @@ static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu)
>   */
>  static void __pmu_switch_to_host(struct kvm_vcpu *vcpu)
>  {
> -	struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
> +	struct kvm_pmu_events *pmu = kvm_nvhe_get_pmu_events(vcpu);
>  
>  	if (pmu->events_guest)
>  		write_sysreg(pmu->events_guest, pmcntenclr_el0);
> diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
> index a243934c5568..136d5c6c1916 100644
> --- a/arch/arm64/kvm/pmu.c
> +++ b/arch/arm64/kvm/pmu.c
> @@ -6,8 +6,6 @@
>  #include <linux/kvm_host.h>
>  #include <linux/perf_event.h>
>  
> -static DEFINE_PER_CPU(struct kvm_pmu_events, kvm_pmu_events);
> -
>  /*
>   * Given the perf event attributes and system type, determine
>   * if we are going to need to switch counters at guest entry/exit.
> @@ -28,7 +26,7 @@ static bool kvm_pmu_switch_needed(struct perf_event_attr *attr)
>  
>  struct kvm_pmu_events *kvm_get_pmu_events(void)
>  {
> -	return this_cpu_ptr(&kvm_pmu_events);
> +	return &kvm_host_global_state[smp_processor_id()].pmu_events;
>  }
>  
>  /*
> diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
> index 4b9d8fb393a8..71a835970ab5 100644
> --- a/include/kvm/arm_pmu.h
> +++ b/include/kvm/arm_pmu.h
> @@ -18,14 +18,8 @@ struct kvm_pmc {
>  	struct perf_event *perf_event;
>  };
>  
> -struct kvm_pmu_events {
> -	u32 events_host;
> -	u32 events_guest;
> -};
> -
>  struct kvm_pmu {
>  	struct irq_work overflow_work;
> -	struct kvm_pmu_events events;
>  	struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS];
>  	int irq_num;
>  	bool created;
> @@ -79,17 +73,6 @@ void kvm_vcpu_pmu_resync_el0(void);
>  #define kvm_vcpu_has_pmu(vcpu)					\
>  	(vcpu_has_feature(vcpu, KVM_ARM_VCPU_PMU_V3))
>  
> -/*
> - * Updates the vcpu's view of the pmu events for this cpu.
> - * Must be called before every vcpu run after disabling interrupts, to ensure
> - * that an interrupt cannot fire and update the structure.
> - */
> -#define kvm_pmu_update_vcpu_events(vcpu)				\
> -	do {								\
> -		if (!has_vhe() && kvm_vcpu_has_pmu(vcpu))		\
> -			vcpu->arch.pmu.events = *kvm_get_pmu_events();	\
> -	} while (0)
> -
>  /*
>   * Evaluates as true when emulating PMUv3p5, and false otherwise.
>   */
Oliver Upton Feb. 2, 2024, 10 p.m. UTC | #3
On Thu, Jan 04, 2024 at 04:27:02PM +0000, James Clark wrote:

[...]

> diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
> index c50f8459e4fc..89147a9dc38c 100644
> --- a/arch/arm64/kvm/hyp/nvhe/switch.c
> +++ b/arch/arm64/kvm/hyp/nvhe/switch.c
> @@ -130,13 +130,18 @@ static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
>  	}
>  }
>  
> +static struct kvm_pmu_events *kvm_nvhe_get_pmu_events(struct kvm_vcpu *vcpu)
> +{
> +	return &kvm_host_global_state[vcpu->cpu].pmu_events;
> +}
> +
>  /*
>   * Disable host events, enable guest events
>   */
>  #ifdef CONFIG_HW_PERF_EVENTS
>  static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu)
>  {
> -	struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
> +	struct kvm_pmu_events *pmu = kvm_nvhe_get_pmu_events(vcpu);
>  
>  	if (pmu->events_host)
>  		write_sysreg(pmu->events_host, pmcntenclr_el0);
> @@ -152,7 +157,7 @@ static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu)
>   */
>  static void __pmu_switch_to_host(struct kvm_vcpu *vcpu)
>  {
> -	struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
> +	struct kvm_pmu_events *pmu = kvm_nvhe_get_pmu_events(vcpu);
>  
>  	if (pmu->events_guest)
>  		write_sysreg(pmu->events_guest, pmcntenclr_el0);

This now allows the host to program event counters for a protected
guest. That _might_ be a useful feature behind some debug option, but is
most definitely *not* something we want to do for pVMs generally.

Do we even need to make this shared data work at all for pKVM? The rest
of the shared data between pKVM and the kernel is system information,
which (importantly) doesn't have any guest context in it.

I'm perfectly happy leaving these sorts of features broken for pKVM and
using the 'normal' way of getting percpu data to the nVHE hypervisor
otherwise.
James Clark Feb. 5, 2024, 12:16 p.m. UTC | #4
On 02/02/2024 22:00, Oliver Upton wrote:
> On Thu, Jan 04, 2024 at 04:27:02PM +0000, James Clark wrote:
> 
> [...]
> 
>> diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
>> index c50f8459e4fc..89147a9dc38c 100644
>> --- a/arch/arm64/kvm/hyp/nvhe/switch.c
>> +++ b/arch/arm64/kvm/hyp/nvhe/switch.c
>> @@ -130,13 +130,18 @@ static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
>>  	}
>>  }
>>  
>> +static struct kvm_pmu_events *kvm_nvhe_get_pmu_events(struct kvm_vcpu *vcpu)
>> +{
>> +	return &kvm_host_global_state[vcpu->cpu].pmu_events;
>> +}
>> +
>>  /*
>>   * Disable host events, enable guest events
>>   */
>>  #ifdef CONFIG_HW_PERF_EVENTS
>>  static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu)
>>  {
>> -	struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
>> +	struct kvm_pmu_events *pmu = kvm_nvhe_get_pmu_events(vcpu);
>>  
>>  	if (pmu->events_host)
>>  		write_sysreg(pmu->events_host, pmcntenclr_el0);
>> @@ -152,7 +157,7 @@ static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu)
>>   */
>>  static void __pmu_switch_to_host(struct kvm_vcpu *vcpu)
>>  {
>> -	struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
>> +	struct kvm_pmu_events *pmu = kvm_nvhe_get_pmu_events(vcpu);
>>  
>>  	if (pmu->events_guest)
>>  		write_sysreg(pmu->events_guest, pmcntenclr_el0);
> 
> This now allows the host to program event counters for a protected
> guest. That _might_ be a useful feature behind some debug option, but is
> most definitely *not* something we want to do for pVMs generally.

Unless I'm missing something, using PMUs on protected guests was added
by 722625c6f4c5b ("KVM: arm64: Reenable pmu in Protected Mode"). This
change is just a refactor that will allow us to add the same behavior
for a similar feature (tracing) without adding yet another copy of some
state before the guest switch.

> 
> Do we even need to make this shared data work at all for pKVM? The rest
> of the shared data between pKVM and the kernel is system information,
> which (importantly) doesn't have any guest context in it.
> 

Probably not, Marc actually mentioned on one of the first versions of
that this could be hidden behind a debug flag. To be honest one of the
reasons I didn't do that was because I wasn't sure what the appropriate
debug setting was. NVHE_EL2_DEBUG didn't seem quite right. DEBUG_KERNEL
maybe? Or a new one?

And then I suppose I got distracted by trying to make it have feature
parity with PMUs and forgot about the debug only thing.


> I'm perfectly happy leaving these sorts of features broken for pKVM and
> using the 'normal' way of getting percpu data to the nVHE hypervisor
> otherwise.
> 

I can do that. But do I also disable PMU at the same time in a new
commit? Now that both PMU and tracing is working maybe it would be a
waste to throw that away and hiding it behind an option is better. Or I
can leave the PMU as it is and just keep tracing disabled in pKVM.

I don't mind either way, my main goal was to get exclude/include guest
tracing working for normal VMs. For pKVM I don't have a strong opinion.
Oliver Upton Feb. 5, 2024, 1:04 p.m. UTC | #5
On Mon, Feb 05, 2024 at 12:16:53PM +0000, James Clark wrote:
> > This now allows the host to program event counters for a protected
> > guest. That _might_ be a useful feature behind some debug option, but is
> > most definitely *not* something we want to do for pVMs generally.
> 
> Unless I'm missing something, using PMUs on protected guests was added
> by 722625c6f4c5b ("KVM: arm64: Reenable pmu in Protected Mode"). This
> change is just a refactor that will allow us to add the same behavior
> for a similar feature (tracing) without adding yet another copy of some
> state before the guest switch.

Ha, I had forgotten about that patch (and I had reviewed it!)

My interpretation of the intent for that change was to enable the usage
of vPMU for non-protected VMs. The situation has changed since then, as
we use the shadow state for vCPUs unconditionally in protected mode as
of commit  be66e67f1750 ("KVM: arm64: Use the pKVM hyp vCPU structure
in handle___kvm_vcpu_run()")

Protected mode is well understood at this point to be a WIP feature, and
that not all things are expected to work with it. Eventually we will
need a way to distinguish between 'normal' VMs and true pVMs (i.e. the
VMM selected full isolation) in nVHE, but right now what we have enables
testing of some isolation features.

> > I'm perfectly happy leaving these sorts of features broken for pKVM and
> > using the 'normal' way of getting percpu data to the nVHE hypervisor
> > otherwise.
> > 
> 
> I can do that. But do I also disable PMU at the same time in a new
> commit? Now that both PMU and tracing is working maybe it would be a
> waste to throw that away and hiding it behind an option is better. Or I
> can leave the PMU as it is and just keep tracing disabled in pKVM.
> 
> I don't mind either way, my main goal was to get exclude/include guest
> tracing working for normal VMs. For pKVM I don't have a strong opinion.

Unless someone has strong opinions about making this work in protected
mode, I am happy to see tracing support limited to the 'normal' nVHE
configuration. The protected feature as a whole is just baggage until
upstream support is completed.
Marc Zyngier Feb. 5, 2024, 1:15 p.m. UTC | #6
On Mon, 05 Feb 2024 13:04:51 +0000,
Oliver Upton <oliver.upton@linux.dev> wrote:
> 
> Unless someone has strong opinions about making this work in protected
> mode, I am happy to see tracing support limited to the 'normal' nVHE
> configuration. The protected feature as a whole is just baggage until
> upstream support is completed.

Limiting tracing to non-protected mode is a must IMO. Allowing tracing
when pKVM is enabled is a sure way to expose secrets that should
stay... secret. The only exception I can think of is when
CONFIG_NVHE_EL2_DEBUG is enabled, at which point all bets are off.

Thanks,

	M.
Oliver Upton Feb. 5, 2024, 1:21 p.m. UTC | #7
On Mon, Feb 05, 2024 at 01:15:36PM +0000, Marc Zyngier wrote:
> On Mon, 05 Feb 2024 13:04:51 +0000,
> Oliver Upton <oliver.upton@linux.dev> wrote:
> > 
> > Unless someone has strong opinions about making this work in protected
> > mode, I am happy to see tracing support limited to the 'normal' nVHE
> > configuration. The protected feature as a whole is just baggage until
> > upstream support is completed.
> 
> Limiting tracing to non-protected mode is a must IMO. Allowing tracing
> when pKVM is enabled is a sure way to expose secrets that should
> stay... secret. The only exception I can think of is when
> CONFIG_NVHE_EL2_DEBUG is enabled, at which point all bets are off.

Zero argument there :) I left off the "and PMU" part of what I was
saying, because that was a feature that semi-worked in protected mode
before VM/VCPU shadowing support landed.
Marc Zyngier Feb. 5, 2024, 2:16 p.m. UTC | #8
On Mon, 05 Feb 2024 13:21:26 +0000,
Oliver Upton <oliver.upton@linux.dev> wrote:
> 
> On Mon, Feb 05, 2024 at 01:15:36PM +0000, Marc Zyngier wrote:
> > On Mon, 05 Feb 2024 13:04:51 +0000,
> > Oliver Upton <oliver.upton@linux.dev> wrote:
> > > 
> > > Unless someone has strong opinions about making this work in protected
> > > mode, I am happy to see tracing support limited to the 'normal' nVHE
> > > configuration. The protected feature as a whole is just baggage until
> > > upstream support is completed.
> > 
> > Limiting tracing to non-protected mode is a must IMO. Allowing tracing
> > when pKVM is enabled is a sure way to expose secrets that should
> > stay... secret. The only exception I can think of is when
> > CONFIG_NVHE_EL2_DEBUG is enabled, at which point all bets are off.
> 
> Zero argument there :) I left off the "and PMU" part of what I was
> saying, because that was a feature that semi-worked in protected mode
> before VM/VCPU shadowing support landed.

Indeed. The goal is that as far as userspace is concerned, the host
running in protected mode shouldn't impair the ability to run
non-protected VMs, and it should all be hunky-dory, unless you
explicitly ask for a protected guest (at which point you are facing a
lot of restrictions).

PMU definitely falls into that last bucket, although I would hope that
we eventually get some support by context-switching the whole of the
PMU state. Don't worry, it's going to be cheap...

Thanks,

	M.
James Clark Feb. 5, 2024, 2:17 p.m. UTC | #9
On 05/02/2024 13:21, Oliver Upton wrote:
> On Mon, Feb 05, 2024 at 01:15:36PM +0000, Marc Zyngier wrote:
>> On Mon, 05 Feb 2024 13:04:51 +0000,
>> Oliver Upton <oliver.upton@linux.dev> wrote:
>>>
>>> Unless someone has strong opinions about making this work in protected
>>> mode, I am happy to see tracing support limited to the 'normal' nVHE
>>> configuration. The protected feature as a whole is just baggage until
>>> upstream support is completed.
>>
>> Limiting tracing to non-protected mode is a must IMO. Allowing tracing
>> when pKVM is enabled is a sure way to expose secrets that should
>> stay... secret. The only exception I can think of is when
>> CONFIG_NVHE_EL2_DEBUG is enabled, at which point all bets are off.
> 
> Zero argument there :) I left off the "and PMU" part of what I was
> saying, because that was a feature that semi-worked in protected mode
> before VM/VCPU shadowing support landed.
> 

In that case I can hide all this behind CONFIG_NVHE_EL2_DEBUG for pKVM.
This will also have the effect of disabling PMU again for pKVM because I
moved that into this new shared area.

The same place will be used to store the state for normal nVHE and at
least then there is some code re-use and flexibility to use trace and
PMU for debugging if needed. And the copy on every switch gets deleted.
Marc Zyngier Feb. 5, 2024, 2:52 p.m. UTC | #10
On Mon, 05 Feb 2024 14:17:10 +0000,
James Clark <james.clark@arm.com> wrote:
>
> On 05/02/2024 13:21, Oliver Upton wrote:
> > On Mon, Feb 05, 2024 at 01:15:36PM +0000, Marc Zyngier wrote:
> >> On Mon, 05 Feb 2024 13:04:51 +0000,
> >> Oliver Upton <oliver.upton@linux.dev> wrote:
> >>>
> >>> Unless someone has strong opinions about making this work in protected
> >>> mode, I am happy to see tracing support limited to the 'normal' nVHE
> >>> configuration. The protected feature as a whole is just baggage until
> >>> upstream support is completed.
> >>
> >> Limiting tracing to non-protected mode is a must IMO. Allowing tracing
> >> when pKVM is enabled is a sure way to expose secrets that should
> >> stay... secret. The only exception I can think of is when
> >> CONFIG_NVHE_EL2_DEBUG is enabled, at which point all bets are off.
> > 
> > Zero argument there :) I left off the "and PMU" part of what I was
> > saying, because that was a feature that semi-worked in protected mode
> > before VM/VCPU shadowing support landed.
> > 
> 
> In that case I can hide all this behind CONFIG_NVHE_EL2_DEBUG for pKVM.
> This will also have the effect of disabling PMU again for pKVM because I
> moved that into this new shared area.

I'm not sure what you have in mind, but dropping PMU support for
non-protected guests when protected-mode is enabled is not an
acceptable outcome.

Hiding the trace behind a debug option is fine as this is a global
setting that has no userspace impact, but impacting guests isn't.

	M.
James Clark Feb. 5, 2024, 3:37 p.m. UTC | #11
On 05/02/2024 14:52, Marc Zyngier wrote:
> On Mon, 05 Feb 2024 14:17:10 +0000,
> James Clark <james.clark@arm.com> wrote:
>>
>> On 05/02/2024 13:21, Oliver Upton wrote:
>>> On Mon, Feb 05, 2024 at 01:15:36PM +0000, Marc Zyngier wrote:
>>>> On Mon, 05 Feb 2024 13:04:51 +0000,
>>>> Oliver Upton <oliver.upton@linux.dev> wrote:
>>>>>
>>>>> Unless someone has strong opinions about making this work in protected
>>>>> mode, I am happy to see tracing support limited to the 'normal' nVHE
>>>>> configuration. The protected feature as a whole is just baggage until
>>>>> upstream support is completed.
>>>>
>>>> Limiting tracing to non-protected mode is a must IMO. Allowing tracing
>>>> when pKVM is enabled is a sure way to expose secrets that should
>>>> stay... secret. The only exception I can think of is when
>>>> CONFIG_NVHE_EL2_DEBUG is enabled, at which point all bets are off.
>>>
>>> Zero argument there :) I left off the "and PMU" part of what I was
>>> saying, because that was a feature that semi-worked in protected mode
>>> before VM/VCPU shadowing support landed.
>>>
>>
>> In that case I can hide all this behind CONFIG_NVHE_EL2_DEBUG for pKVM.
>> This will also have the effect of disabling PMU again for pKVM because I
>> moved that into this new shared area.
> 
> I'm not sure what you have in mind, but dropping PMU support for
> non-protected guests when protected-mode is enabled is not an
> acceptable outcome.
> 
> Hiding the trace behind a debug option is fine as this is a global
> setting that has no userspace impact, but impacting guests isn't.
> 
> 	M.
> 

Hmmm in that case if there's currently no way to distinguish between
normal VMs and pVMs in protected-mode then what I was thinking of
probably won't work.

I'll actually just leave PMU as it is and only have tracing disabled in
protected-mode.

My only question now is whether to:

 * Keep this new shared area and use it for both PMU and trace status
   (well, for PMU only in protected mode as trace would always be
   disabled and doesn't actually need any state)

 * Delete patch 2, add a new normal per-cpu struct just for
   trace status that's only used in non-protected mode and revert to
   copying the PMU status into the vCPU on guest switch as it was
   previously.
Marc Zyngier Feb. 5, 2024, 3:50 p.m. UTC | #12
On Mon, 05 Feb 2024 15:37:34 +0000,
James Clark <james.clark@arm.com> wrote:
> 
> 
> 
> On 05/02/2024 14:52, Marc Zyngier wrote:
> > On Mon, 05 Feb 2024 14:17:10 +0000,
> > James Clark <james.clark@arm.com> wrote:
> >>
> >> On 05/02/2024 13:21, Oliver Upton wrote:
> >>> On Mon, Feb 05, 2024 at 01:15:36PM +0000, Marc Zyngier wrote:
> >>>> On Mon, 05 Feb 2024 13:04:51 +0000,
> >>>> Oliver Upton <oliver.upton@linux.dev> wrote:
> >>>>>
> >>>>> Unless someone has strong opinions about making this work in protected
> >>>>> mode, I am happy to see tracing support limited to the 'normal' nVHE
> >>>>> configuration. The protected feature as a whole is just baggage until
> >>>>> upstream support is completed.
> >>>>
> >>>> Limiting tracing to non-protected mode is a must IMO. Allowing tracing
> >>>> when pKVM is enabled is a sure way to expose secrets that should
> >>>> stay... secret. The only exception I can think of is when
> >>>> CONFIG_NVHE_EL2_DEBUG is enabled, at which point all bets are off.
> >>>
> >>> Zero argument there :) I left off the "and PMU" part of what I was
> >>> saying, because that was a feature that semi-worked in protected mode
> >>> before VM/VCPU shadowing support landed.
> >>>
> >>
> >> In that case I can hide all this behind CONFIG_NVHE_EL2_DEBUG for pKVM.
> >> This will also have the effect of disabling PMU again for pKVM because I
> >> moved that into this new shared area.
> > 
> > I'm not sure what you have in mind, but dropping PMU support for
> > non-protected guests when protected-mode is enabled is not an
> > acceptable outcome.
> > 
> > Hiding the trace behind a debug option is fine as this is a global
> > setting that has no userspace impact, but impacting guests isn't.
> > 
> > 	M.
> > 
> 
> Hmmm in that case if there's currently no way to distinguish between
> normal VMs and pVMs in protected-mode then what I was thinking of
> probably won't work.

Have you looked? kvm_vm_is_protected() has been in for a while, even
if that's not a lot. The upcoming code will flesh this helper out,

	M.
Oliver Upton Feb. 5, 2024, 4:38 p.m. UTC | #13
On Mon, Feb 05, 2024 at 03:50:12PM +0000, Marc Zyngier wrote:
> On Mon, 05 Feb 2024 15:37:34 +0000,
> James Clark <james.clark@arm.com> wrote:
> > 
> > Hmmm in that case if there's currently no way to distinguish between
> > normal VMs and pVMs in protected-mode then what I was thinking of
> > probably won't work.
> 
> Have you looked? kvm_vm_is_protected() has been in for a while, even
> if that's not a lot. The upcoming code will flesh this helper out,

Blame me for the bad intel. What I was mentioning earlier is that (1) we
use the hyp's shadowed vCPUs when running in protected mode and (2) we
don't sync PMU state into the shadow vCPU. So really PMU support for
non-protected guests has been broken since commit be66e67f1750 ("KVM:
arm64: Use the pKVM hyp vCPU structure in handle___kvm_vcpu_run()").

Fixing PMU support for non-protected guests implies the hypervisor will
conditionally trust data coming from the host based on the type of VM
that it is running.

For protected guests the hypervisor will need a private location to
do save/restore of the enable regs since I'm certain we will not trust
whatever the host tells us in these circumstances.

Both of these reasons has me feeling like the PMU context still needs to
be associated with the vCPU, though the tracing stuff can be percpu.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 824f29f04916..93d38ad257ed 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -466,6 +466,14 @@  struct kvm_cpu_context {
 	struct kvm_vcpu *__hyp_running_vcpu;
 };
 
+struct kvm_host_global_state {
+	struct kvm_pmu_events {
+		u32 events_host;
+		u32 events_guest;
+	} pmu_events;
+} ____cacheline_aligned;
+extern struct kvm_host_global_state kvm_host_global_state[NR_CPUS];
+
 struct kvm_host_data {
 	struct kvm_cpu_context host_ctxt;
 };
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 119ca121b5f8..1a9dbb02bb4a 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -59,6 +59,7 @@  KVM_NVHE_ALIAS(alt_cb_patch_nops);
 
 /* Global kernel state accessed by nVHE hyp code. */
 KVM_NVHE_ALIAS(kvm_vgic_global_state);
+KVM_NVHE_ALIAS(kvm_host_global_state);
 
 /* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */
 KVM_NVHE_ALIAS(nvhe_hyp_panic_handler);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 4796104c4471..bd6b2eda5f4f 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -47,6 +47,20 @@ 
 
 static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
 
+/*
+ * Host state that isn't associated with any VCPU, but will affect any VCPU
+ * running on a host CPU in the future. This remains writable from the host and
+ * readable in the hyp.
+ *
+ * PER_CPU constructs aren't compatible between the hypervisor and the host so
+ * just define it as a NR_CPUS array. DECLARE_KVM_NVHE_PER_CPU works in both
+ * places, but not after the hypervisor protection is initialised. After that,
+ * kvm_arm_hyp_percpu_base isn't accessible from the host, so even if the
+ * kvm_host_global_state struct was shared with the host, the per-cpu offset
+ * can't be calculated without sharing even more data with the host.
+ */
+struct kvm_host_global_state kvm_host_global_state[NR_CPUS];
+
 DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
 
 DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
@@ -1016,8 +1030,6 @@  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 
 		kvm_vgic_flush_hwstate(vcpu);
 
-		kvm_pmu_update_vcpu_events(vcpu);
-
 		/*
 		 * Ensure we set mode to IN_GUEST_MODE after we disable
 		 * interrupts and before the final VCPU requests check.
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index b5452e58c49a..3e45cc10ba96 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -159,6 +159,17 @@  static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 	if (ret)
 		return ret;
 
+	/*
+	 * Similar to kvm_vgic_global_state, but this one remains writable by
+	 * the host rather than read-only. Used to store per-cpu state about the
+	 * host that isn't associated with any particular VCPU.
+	 */
+	prot = pkvm_mkstate(KVM_PGTABLE_PROT_RW, PKVM_PAGE_SHARED_OWNED);
+	ret = pkvm_create_mappings(&kvm_host_global_state,
+				   &kvm_host_global_state + 1, prot);
+	if (ret)
+		return ret;
+
 	ret = create_hyp_debug_uart_mapping();
 	if (ret)
 		return ret;
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index c50f8459e4fc..89147a9dc38c 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -130,13 +130,18 @@  static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
 	}
 }
 
+static struct kvm_pmu_events *kvm_nvhe_get_pmu_events(struct kvm_vcpu *vcpu)
+{
+	return &kvm_host_global_state[vcpu->cpu].pmu_events;
+}
+
 /*
  * Disable host events, enable guest events
  */
 #ifdef CONFIG_HW_PERF_EVENTS
 static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu)
 {
-	struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
+	struct kvm_pmu_events *pmu = kvm_nvhe_get_pmu_events(vcpu);
 
 	if (pmu->events_host)
 		write_sysreg(pmu->events_host, pmcntenclr_el0);
@@ -152,7 +157,7 @@  static bool __pmu_switch_to_guest(struct kvm_vcpu *vcpu)
  */
 static void __pmu_switch_to_host(struct kvm_vcpu *vcpu)
 {
-	struct kvm_pmu_events *pmu = &vcpu->arch.pmu.events;
+	struct kvm_pmu_events *pmu = kvm_nvhe_get_pmu_events(vcpu);
 
 	if (pmu->events_guest)
 		write_sysreg(pmu->events_guest, pmcntenclr_el0);
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index a243934c5568..136d5c6c1916 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -6,8 +6,6 @@ 
 #include <linux/kvm_host.h>
 #include <linux/perf_event.h>
 
-static DEFINE_PER_CPU(struct kvm_pmu_events, kvm_pmu_events);
-
 /*
  * Given the perf event attributes and system type, determine
  * if we are going to need to switch counters at guest entry/exit.
@@ -28,7 +26,7 @@  static bool kvm_pmu_switch_needed(struct perf_event_attr *attr)
 
 struct kvm_pmu_events *kvm_get_pmu_events(void)
 {
-	return this_cpu_ptr(&kvm_pmu_events);
+	return &kvm_host_global_state[smp_processor_id()].pmu_events;
 }
 
 /*
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 4b9d8fb393a8..71a835970ab5 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -18,14 +18,8 @@  struct kvm_pmc {
 	struct perf_event *perf_event;
 };
 
-struct kvm_pmu_events {
-	u32 events_host;
-	u32 events_guest;
-};
-
 struct kvm_pmu {
 	struct irq_work overflow_work;
-	struct kvm_pmu_events events;
 	struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS];
 	int irq_num;
 	bool created;
@@ -79,17 +73,6 @@  void kvm_vcpu_pmu_resync_el0(void);
 #define kvm_vcpu_has_pmu(vcpu)					\
 	(vcpu_has_feature(vcpu, KVM_ARM_VCPU_PMU_V3))
 
-/*
- * Updates the vcpu's view of the pmu events for this cpu.
- * Must be called before every vcpu run after disabling interrupts, to ensure
- * that an interrupt cannot fire and update the structure.
- */
-#define kvm_pmu_update_vcpu_events(vcpu)				\
-	do {								\
-		if (!has_vhe() && kvm_vcpu_has_pmu(vcpu))		\
-			vcpu->arch.pmu.events = *kvm_get_pmu_events();	\
-	} while (0)
-
 /*
  * Evaluates as true when emulating PMUv3p5, and false otherwise.
  */