diff mbox series

[RFC,18/26] kvm: arm64: Intercept PSCI_CPU_OFF host SMC calls

Message ID 20201104183630.27513-19-dbrazdil@google.com (mailing list archive)
State New, archived
Headers show
Series [RFC,01/26] psci: Export configured PSCI version | expand

Commit Message

David Brazdil Nov. 4, 2020, 6:36 p.m. UTC
Add a handler of the CPU_OFF PSCI host SMC trapped in KVM nVHE hyp code.
When invoked, it changes the recorded state of the core to OFF before
forwarding the call to EL3. If the call fails, it changes the state back
to ON and returns the error to the host.

Signed-off-by: David Brazdil <dbrazdil@google.com>
---
 arch/arm64/kvm/hyp/nvhe/psci.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

Comments

Marc Zyngier Nov. 5, 2020, 11:30 a.m. UTC | #1
On 2020-11-04 18:36, David Brazdil wrote:
> Add a handler of the CPU_OFF PSCI host SMC trapped in KVM nVHE hyp 
> code.
> When invoked, it changes the recorded state of the core to OFF before
> forwarding the call to EL3. If the call fails, it changes the state 
> back
> to ON and returns the error to the host.
> 
> Signed-off-by: David Brazdil <dbrazdil@google.com>
> ---
>  arch/arm64/kvm/hyp/nvhe/psci.c | 30 +++++++++++++++++++++++++++++-
>  1 file changed, 29 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/kvm/hyp/nvhe/psci.c 
> b/arch/arm64/kvm/hyp/nvhe/psci.c
> index c3d0a6246c66..00dc0cab860c 100644
> --- a/arch/arm64/kvm/hyp/nvhe/psci.c
> +++ b/arch/arm64/kvm/hyp/nvhe/psci.c
> @@ -13,6 +13,8 @@
>  #include <kvm/arm_psci.h>
>  #include <uapi/linux/psci.h>
> 
> +#include <nvhe/spinlock.h>
> +
>  /* Config options set by the host. */
>  u32 kvm_host_psci_version = PSCI_VERSION(0, 0);
>  u32 kvm_host_psci_function_id[PSCI_FN_MAX];
> @@ -20,6 +22,7 @@ s64 hyp_physvirt_offset;
> 
>  #define __hyp_pa(x) ((phys_addr_t)(x) + hyp_physvirt_offset)
> 
> +static DEFINE_PER_CPU(hyp_spinlock_t, psci_cpu_lock);
>  DEFINE_PER_CPU(enum kvm_nvhe_psci_state, psci_cpu_state);
> 
>  static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt)
> @@ -76,9 +79,32 @@ static __noreturn unsigned long
> psci_forward_noreturn(struct kvm_cpu_context *ho
>  	hyp_panic(); /* unreachable */
>  }
> 
> +static int psci_cpu_off(u64 func_id, struct kvm_cpu_context 
> *host_ctxt)
> +{
> +	hyp_spinlock_t *cpu_lock = this_cpu_ptr(&psci_cpu_lock);
> +	enum kvm_nvhe_psci_state *cpu_power = this_cpu_ptr(&psci_cpu_state);
> +	u32 power_state = (u32)host_ctxt->regs.regs[1];
> +	int ret;
> +
> +	/* Change the recorded state to OFF before forwarding the call. */
> +	hyp_spin_lock(cpu_lock);
> +	*cpu_power = KVM_NVHE_PSCI_CPU_OFF;
> +	hyp_spin_unlock(cpu_lock);

So at this point, another CPU can observe the vcpu being "off", and 
issue
a PSCI_ON, which may result in an "already on". I'm not sure this is an
actual issue, but it is worth documenting.

What is definitely missing is a rational about *why* we need to track 
the
state of the vcpus. I naively imagined that we could directly proxy the
PSCI calls to EL3, only repainting PC for PSCI_ON.

Thanks,

         M.
David Brazdil Nov. 5, 2020, 11:42 a.m. UTC | #2
Hi Marc,

> > +static DEFINE_PER_CPU(hyp_spinlock_t, psci_cpu_lock);
> >  DEFINE_PER_CPU(enum kvm_nvhe_psci_state, psci_cpu_state);
> > 
> >  static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt)
> > @@ -76,9 +79,32 @@ static __noreturn unsigned long
> > psci_forward_noreturn(struct kvm_cpu_context *ho
> >  	hyp_panic(); /* unreachable */
> >  }
> > 
> > +static int psci_cpu_off(u64 func_id, struct kvm_cpu_context *host_ctxt)
> > +{
> > +	hyp_spinlock_t *cpu_lock = this_cpu_ptr(&psci_cpu_lock);
> > +	enum kvm_nvhe_psci_state *cpu_power = this_cpu_ptr(&psci_cpu_state);
> > +	u32 power_state = (u32)host_ctxt->regs.regs[1];
> > +	int ret;
> > +
> > +	/* Change the recorded state to OFF before forwarding the call. */
> > +	hyp_spin_lock(cpu_lock);
> > +	*cpu_power = KVM_NVHE_PSCI_CPU_OFF;
> > +	hyp_spin_unlock(cpu_lock);
> 
> So at this point, another CPU can observe the vcpu being "off", and issue
> a PSCI_ON, which may result in an "already on". I'm not sure this is an
> actual issue, but it is worth documenting.
> 
> What is definitely missing is a rational about *why* we need to track the
> state of the vcpus. I naively imagined that we could directly proxy the
> PSCI calls to EL3, only repainting PC for PSCI_ON.

I think I've solved that particular problem by *not* using cpu_power for
AFFINITY_INFO. It's used only for resolving the race between CPU_ON/OFF.
You are, however, right that perhaps that is not needed either and resolving
the race should be left to the host. In that case the hypervisor would be just
repainting the CPU_ON/SUSPEND args, as you said.
diff mbox series

Patch

diff --git a/arch/arm64/kvm/hyp/nvhe/psci.c b/arch/arm64/kvm/hyp/nvhe/psci.c
index c3d0a6246c66..00dc0cab860c 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci.c
@@ -13,6 +13,8 @@ 
 #include <kvm/arm_psci.h>
 #include <uapi/linux/psci.h>
 
+#include <nvhe/spinlock.h>
+
 /* Config options set by the host. */
 u32 kvm_host_psci_version = PSCI_VERSION(0, 0);
 u32 kvm_host_psci_function_id[PSCI_FN_MAX];
@@ -20,6 +22,7 @@  s64 hyp_physvirt_offset;
 
 #define __hyp_pa(x) ((phys_addr_t)(x) + hyp_physvirt_offset)
 
+static DEFINE_PER_CPU(hyp_spinlock_t, psci_cpu_lock);
 DEFINE_PER_CPU(enum kvm_nvhe_psci_state, psci_cpu_state);
 
 static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt)
@@ -76,9 +79,32 @@  static __noreturn unsigned long psci_forward_noreturn(struct kvm_cpu_context *ho
 	hyp_panic(); /* unreachable */
 }
 
+static int psci_cpu_off(u64 func_id, struct kvm_cpu_context *host_ctxt)
+{
+	hyp_spinlock_t *cpu_lock = this_cpu_ptr(&psci_cpu_lock);
+	enum kvm_nvhe_psci_state *cpu_power = this_cpu_ptr(&psci_cpu_state);
+	u32 power_state = (u32)host_ctxt->regs.regs[1];
+	int ret;
+
+	/* Change the recorded state to OFF before forwarding the call. */
+	hyp_spin_lock(cpu_lock);
+	*cpu_power = KVM_NVHE_PSCI_CPU_OFF;
+	hyp_spin_unlock(cpu_lock);
+
+	ret = psci_call(func_id, power_state, 0, 0);
+
+	/* Call was unsuccessful. Restore the recorded state and return to host. */
+	hyp_spin_lock(cpu_lock);
+	*cpu_power = KVM_NVHE_PSCI_CPU_ON;
+	hyp_spin_unlock(cpu_lock);
+	return ret;
+}
+
 static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt)
 {
-	if (func_id == kvm_host_psci_function_id[PSCI_FN_MIGRATE])
+	if (func_id == kvm_host_psci_function_id[PSCI_FN_CPU_OFF])
+		return psci_cpu_off(func_id, host_ctxt);
+	else if (func_id == kvm_host_psci_function_id[PSCI_FN_MIGRATE])
 		return psci_forward(host_ctxt);
 	else
 		return PSCI_RET_NOT_SUPPORTED;
@@ -97,6 +123,8 @@  static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_
 	case PSCI_0_2_FN_SYSTEM_RESET:
 		psci_forward_noreturn(host_ctxt);
 		unreachable();
+	case PSCI_0_2_FN_CPU_OFF:
+		return psci_cpu_off(func_id, host_ctxt);
 	default:
 		return PSCI_RET_NOT_SUPPORTED;
 	}