diff mbox

[RFC,2/5] ARM/ARM64: KVM: Forward PSCI SYSTEM_OFF and SYSTEM_RESET to user space

Message ID 1381942954-22388-3-git-send-email-anup.patel@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

Anup Patel Oct. 16, 2013, 5:02 p.m. UTC
The PSCI SYSTEM_OFF and SYSTEM_RESET functions are VM or Guest level
functions hence cannot be emulated by the in-kernel PSCI emulation code.

To tackle this, we forward PSCI SYSTEM_OFF and SYSTEM_RESET function
calls from Guest to user space (i.e. QEMU or KVMTOOL) via KVM run
structure with KVM_EXIT_PSCI exit reason.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
---
 arch/arm/include/asm/kvm_psci.h   |   25 +++++++++++++++-
 arch/arm/include/uapi/asm/kvm.h   |    2 ++
 arch/arm/kvm/arm.c                |   12 ++++++--
 arch/arm/kvm/handle_exit.c        |   12 ++++++--
 arch/arm/kvm/psci.c               |   57 ++++++++++++++++++++++++++++++++++---
 arch/arm64/include/asm/kvm_psci.h |   25 +++++++++++++++-
 arch/arm64/include/uapi/asm/kvm.h |    2 ++
 arch/arm64/kvm/handle_exit.c      |   24 ++++++++++++----
 8 files changed, 141 insertions(+), 18 deletions(-)

Comments

Christoffer Dall Oct. 16, 2013, 10:22 p.m. UTC | #1
On Wed, Oct 16, 2013 at 10:32:31PM +0530, Anup Patel wrote:
> The PSCI SYSTEM_OFF and SYSTEM_RESET functions are VM or Guest level
> functions hence cannot be emulated by the in-kernel PSCI emulation code.
> 
> To tackle this, we forward PSCI SYSTEM_OFF and SYSTEM_RESET function
> calls from Guest to user space (i.e. QEMU or KVMTOOL) via KVM run
> structure with KVM_EXIT_PSCI exit reason.
> 
> Signed-off-by: Anup Patel <anup.patel@linaro.org>
> Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
> ---
>  arch/arm/include/asm/kvm_psci.h   |   25 +++++++++++++++-
>  arch/arm/include/uapi/asm/kvm.h   |    2 ++
>  arch/arm/kvm/arm.c                |   12 ++++++--
>  arch/arm/kvm/handle_exit.c        |   12 ++++++--
>  arch/arm/kvm/psci.c               |   57 ++++++++++++++++++++++++++++++++++---
>  arch/arm64/include/asm/kvm_psci.h |   25 +++++++++++++++-
>  arch/arm64/include/uapi/asm/kvm.h |    2 ++
>  arch/arm64/kvm/handle_exit.c      |   24 ++++++++++++----
>  8 files changed, 141 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h
> index 9a83d98..783566f 100644
> --- a/arch/arm/include/asm/kvm_psci.h
> +++ b/arch/arm/include/asm/kvm_psci.h
> @@ -18,6 +18,29 @@
>  #ifndef __ARM_KVM_PSCI_H__
>  #define __ARM_KVM_PSCI_H__
>  
> -bool kvm_psci_call(struct kvm_vcpu *vcpu);
> +#include <linux/kvm_host.h>
> +#include <asm/kvm_asm.h>
> +#include <asm/kvm_arm.h>
> +
> +/*
> + * The in-kernel PSCI emulation code wants to use a copy of run->psci,
> + * which is an anonymous type. Use our own type instead.
> + */
> +struct kvm_exit_psci {
> +	u32		fn;
> +	u64		args[7];
> +};
> +
> +static inline void kvm_prepare_psci(struct kvm_run *run,
> +				    struct kvm_exit_psci *psci)
> +{
> +	run->psci.fn = psci->fn;
> +	memcpy(&run->psci.args, &psci->args, sizeof(run->psci.args));
> +	memset(&run->psci.ret, 0, sizeof(run->psci.ret));
> +	run->exit_reason = KVM_EXIT_PSCI;
> +}
> +
> +int kvm_handle_psci_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
> +int kvm_psci_call(struct kvm_vcpu *vcpu, struct kvm_run *run);
>  
>  #endif /* __ARM_KVM_PSCI_H__ */
> diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
> index c1ee007..205cf0e 100644
> --- a/arch/arm/include/uapi/asm/kvm.h
> +++ b/arch/arm/include/uapi/asm/kvm.h
> @@ -171,6 +171,8 @@ struct kvm_arch_memory_slot {
>  #define KVM_PSCI_FN_CPU_OFF		KVM_PSCI_FN(1)
>  #define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
>  #define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
> +#define KVM_PSCI_FN_SYSTEM_OFF		KVM_PSCI_FN(4)
> +#define KVM_PSCI_FN_SYSTEM_RESET	KVM_PSCI_FN(5)
>  
>  #define KVM_PSCI_RET_SUCCESS		0
>  #define KVM_PSCI_RET_NI			((unsigned long)-1)
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index cc5adb9..5ffd9a3 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -459,7 +459,7 @@ static void update_vttbr(struct kvm *kvm)
>  	spin_unlock(&kvm_vmid_lock);
>  }
>  
> -static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
> +static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu, struct kvm_run *run)

why this change?  run can always be derived from vcpu->run.

>  {
>  	if (likely(vcpu->arch.has_run_once))
>  		return 0;
> @@ -483,7 +483,7 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
>  	 */
>  	if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) {
>  		*vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF;
> -		kvm_psci_call(vcpu);
> +		kvm_psci_call(vcpu, run);
>  	}
>  
>  	return 0;
> @@ -520,7 +520,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  	if (unlikely(!kvm_vcpu_initialized(vcpu)))
>  		return -ENOEXEC;
>  
> -	ret = kvm_vcpu_first_run_init(vcpu);
> +	ret = kvm_vcpu_first_run_init(vcpu, vcpu->run);
>  	if (ret)
>  		return ret;
>  
> @@ -530,6 +530,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  			return ret;
>  	}
>  
> +	if (run->exit_reason == KVM_EXIT_PSCI) {
> +		ret = kvm_handle_psci_return(vcpu, vcpu->run);
> +		if (ret)
> +			return ret;
> +	}
> +
>  	if (vcpu->sigset_active)
>  		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
>  
> diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
> index df4c82d..1a12e6c 100644
> --- a/arch/arm/kvm/handle_exit.c
> +++ b/arch/arm/kvm/handle_exit.c
> @@ -40,14 +40,20 @@ static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  
>  static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  {
> +	int ret;
> +
>  	trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
>  		      kvm_vcpu_hvc_get_imm(vcpu));
>  
> -	if (kvm_psci_call(vcpu))
> +	ret = kvm_psci_call(vcpu, run);
> +	if (!ret)
> +		return 1;
> +	else if (ret == -EINVAL) {
> +		kvm_inject_undefined(vcpu);
>  		return 1;
> +	}
>  
> -	kvm_inject_undefined(vcpu);
> -	return 1;
> +	return 0;
>  }
>  
>  static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
> diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
> index 86a693a..72c23a7 100644
> --- a/arch/arm/kvm/psci.c
> +++ b/arch/arm/kvm/psci.c
> @@ -71,6 +71,45 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
>  	return KVM_PSCI_RET_SUCCESS;
>  }
>  
> +static void kvm_psci_system_off(struct kvm_vcpu *vcpu, struct kvm_run *run)
> +{
> +	struct kvm_exit_psci psci;
> +
> +	psci.fn = KVM_PSCI_FN_SYSTEM_OFF;
> +	memset(&psci.args, 0, sizeof(psci.args));
> +	kvm_prepare_psci(run, &psci);
> +}
> +
> +static void kvm_psci_system_reset(struct kvm_vcpu *vcpu, struct kvm_run *run)
> +{
> +	struct kvm_exit_psci psci;
> +
> +	psci.fn = KVM_PSCI_FN_SYSTEM_RESET;
> +	memset(&psci.args, 0, sizeof(psci.args));
> +	kvm_prepare_psci(run, &psci);
> +}
> +
> +/**
> + * kvm_handle_psci_return -- Handle PSCI after user space emulation
> + * @vcpu: The VCPU pointer
> + * @run:  The VCPU run struct containing the psci data
> + *
> + * This should only be called after returning from userspace for
> + * PSCI emulation.
> + */
> +int kvm_handle_psci_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
> +{
> +	/*
> +	 * Currently, the PSCI functions passed to user space for emulation
> +	 * are SYSTEM_OFF and SYSTEM_RESET. These PSCI functions are not
> +	 * expected to return back after emulating in user space hence by
> +	 * default we return -EINVAL to avoid user space from doing RUN ioctl
> +	 * after handling KVM_EXIT_PSCI.
> +	 */
> +
> +	return -EINVAL;
> +}
> +

why would reset not return back after emulation?

also, do we need to impose this check or can we get rid of this all
together, if user space messes up, it's up to user space...

Are there any of the other PSCI functions that need specific handling in
the kernel on the return path?

>  /**
>   * kvm_psci_call - handle PSCI call if r0 value is in range
>   * @vcpu: Pointer to the VCPU struct
> @@ -81,8 +120,9 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
>   * function number specified in r0 is withing the PSCI range, and false
>   * otherwise.
>   */
> -bool kvm_psci_call(struct kvm_vcpu *vcpu)
> +int kvm_psci_call(struct kvm_vcpu *vcpu, struct kvm_run *run)

why add run here as well?

>  {
> +	int ret = 0;
>  	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
>  	unsigned long val;
>  
> @@ -98,11 +138,20 @@ bool kvm_psci_call(struct kvm_vcpu *vcpu)
>  	case KVM_PSCI_FN_MIGRATE:
>  		val = KVM_PSCI_RET_NI;
>  		break;
> -
> +	case KVM_PSCI_FN_SYSTEM_OFF:
> +		kvm_psci_system_off(vcpu, run);
> +		val = KVM_PSCI_RET_SUCCESS;
> +		ret = -EINTR;
> +		break;
> +	case KVM_PSCI_FN_SYSTEM_RESET:
> +		kvm_psci_system_reset(vcpu, run);
> +		val = KVM_PSCI_RET_SUCCESS;
> +		ret = -EINTR;
> +		break;
>  	default:
> -		return false;
> +		return -EINVAL;
>  	}
>  
>  	*vcpu_reg(vcpu, 0) = val;
> -	return true;
> +	return ret;
>  }
> diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
> index e301a48..db90649 100644
> --- a/arch/arm64/include/asm/kvm_psci.h
> +++ b/arch/arm64/include/asm/kvm_psci.h
> @@ -18,6 +18,29 @@
>  #ifndef __ARM64_KVM_PSCI_H__
>  #define __ARM64_KVM_PSCI_H__
>  
> -bool kvm_psci_call(struct kvm_vcpu *vcpu);
> +#include <linux/kvm_host.h>
> +#include <asm/kvm_asm.h>
> +#include <asm/kvm_arm.h>
> +
> +/*
> + * The in-kernel PSCI emulation code wants to use a copy of run->psci,
> + * which is an anonymous type. Use our own type instead.
> + */
> +struct kvm_exit_psci {
> +	u32		fn;
> +	u64		args[7];
> +};
> +
> +static inline void kvm_prepare_psci(struct kvm_run *run,
> +				    struct kvm_exit_psci *psci)
> +{
> +	run->psci.fn = psci->fn;
> +	memcpy(&run->psci.args, &psci->args, sizeof(run->psci.args));
> +	memset(&run->psci.ret, 0, sizeof(run->psci.ret));
> +	run->exit_reason = KVM_EXIT_PSCI;
> +}
> +
> +int kvm_handle_psci_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
> +int kvm_psci_call(struct kvm_vcpu *vcpu, struct kvm_run *run);
>  
>  #endif /* __ARM64_KVM_PSCI_H__ */
> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> index d9f026b..f678902 100644
> --- a/arch/arm64/include/uapi/asm/kvm.h
> +++ b/arch/arm64/include/uapi/asm/kvm.h
> @@ -158,6 +158,8 @@ struct kvm_arch_memory_slot {
>  #define KVM_PSCI_FN_CPU_OFF		KVM_PSCI_FN(1)
>  #define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
>  #define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
> +#define KVM_PSCI_FN_SYSTEM_OFF		KVM_PSCI_FN(4)
> +#define KVM_PSCI_FN_SYSTEM_RESET	KVM_PSCI_FN(5)
>  
>  #define KVM_PSCI_RET_SUCCESS		0
>  #define KVM_PSCI_RET_NI			((unsigned long)-1)
> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
> index 9beaca0..28e20bb 100644
> --- a/arch/arm64/kvm/handle_exit.c
> +++ b/arch/arm64/kvm/handle_exit.c
> @@ -30,20 +30,32 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
>  
>  static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  {
> -	if (kvm_psci_call(vcpu))
> +	int ret;
> +
> +	ret = kvm_psci_call(vcpu, run);
> +	if (!ret)
> +		return 1;
> +	else if (ret == -EINVAL) {
> +		kvm_inject_undefined(vcpu);
>  		return 1;
> +	}
>  
> -	kvm_inject_undefined(vcpu);
> -	return 1;
> +	return 0;
>  }
>  
>  static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  {
> -	if (kvm_psci_call(vcpu))
> +	int ret;
> +
> +	ret = kvm_psci_call(vcpu, run);
> +	if (!ret)
> +		return 1;
> +	else if (ret == -EINVAL) {
> +		kvm_inject_undefined(vcpu);
>  		return 1;
> +	}
>  
> -	kvm_inject_undefined(vcpu);
> -	return 1;
> +	return 0;
>  }
>  
>  /**
> -- 
> 1.7.9.5
>
Anup Patel Oct. 17, 2013, 5:52 a.m. UTC | #2
On Thu, Oct 17, 2013 at 3:52 AM, Christoffer Dall
<christoffer.dall@linaro.org> wrote:
> On Wed, Oct 16, 2013 at 10:32:31PM +0530, Anup Patel wrote:
>> The PSCI SYSTEM_OFF and SYSTEM_RESET functions are VM or Guest level
>> functions hence cannot be emulated by the in-kernel PSCI emulation code.
>>
>> To tackle this, we forward PSCI SYSTEM_OFF and SYSTEM_RESET function
>> calls from Guest to user space (i.e. QEMU or KVMTOOL) via KVM run
>> structure with KVM_EXIT_PSCI exit reason.
>>
>> Signed-off-by: Anup Patel <anup.patel@linaro.org>
>> Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
>> ---
>>  arch/arm/include/asm/kvm_psci.h   |   25 +++++++++++++++-
>>  arch/arm/include/uapi/asm/kvm.h   |    2 ++
>>  arch/arm/kvm/arm.c                |   12 ++++++--
>>  arch/arm/kvm/handle_exit.c        |   12 ++++++--
>>  arch/arm/kvm/psci.c               |   57 ++++++++++++++++++++++++++++++++++---
>>  arch/arm64/include/asm/kvm_psci.h |   25 +++++++++++++++-
>>  arch/arm64/include/uapi/asm/kvm.h |    2 ++
>>  arch/arm64/kvm/handle_exit.c      |   24 ++++++++++++----
>>  8 files changed, 141 insertions(+), 18 deletions(-)
>>
>> diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h
>> index 9a83d98..783566f 100644
>> --- a/arch/arm/include/asm/kvm_psci.h
>> +++ b/arch/arm/include/asm/kvm_psci.h
>> @@ -18,6 +18,29 @@
>>  #ifndef __ARM_KVM_PSCI_H__
>>  #define __ARM_KVM_PSCI_H__
>>
>> -bool kvm_psci_call(struct kvm_vcpu *vcpu);
>> +#include <linux/kvm_host.h>
>> +#include <asm/kvm_asm.h>
>> +#include <asm/kvm_arm.h>
>> +
>> +/*
>> + * The in-kernel PSCI emulation code wants to use a copy of run->psci,
>> + * which is an anonymous type. Use our own type instead.
>> + */
>> +struct kvm_exit_psci {
>> +     u32             fn;
>> +     u64             args[7];
>> +};
>> +
>> +static inline void kvm_prepare_psci(struct kvm_run *run,
>> +                                 struct kvm_exit_psci *psci)
>> +{
>> +     run->psci.fn = psci->fn;
>> +     memcpy(&run->psci.args, &psci->args, sizeof(run->psci.args));
>> +     memset(&run->psci.ret, 0, sizeof(run->psci.ret));
>> +     run->exit_reason = KVM_EXIT_PSCI;
>> +}
>> +
>> +int kvm_handle_psci_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
>> +int kvm_psci_call(struct kvm_vcpu *vcpu, struct kvm_run *run);
>>
>>  #endif /* __ARM_KVM_PSCI_H__ */
>> diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
>> index c1ee007..205cf0e 100644
>> --- a/arch/arm/include/uapi/asm/kvm.h
>> +++ b/arch/arm/include/uapi/asm/kvm.h
>> @@ -171,6 +171,8 @@ struct kvm_arch_memory_slot {
>>  #define KVM_PSCI_FN_CPU_OFF          KVM_PSCI_FN(1)
>>  #define KVM_PSCI_FN_CPU_ON           KVM_PSCI_FN(2)
>>  #define KVM_PSCI_FN_MIGRATE          KVM_PSCI_FN(3)
>> +#define KVM_PSCI_FN_SYSTEM_OFF               KVM_PSCI_FN(4)
>> +#define KVM_PSCI_FN_SYSTEM_RESET     KVM_PSCI_FN(5)
>>
>>  #define KVM_PSCI_RET_SUCCESS         0
>>  #define KVM_PSCI_RET_NI                      ((unsigned long)-1)
>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>> index cc5adb9..5ffd9a3 100644
>> --- a/arch/arm/kvm/arm.c
>> +++ b/arch/arm/kvm/arm.c
>> @@ -459,7 +459,7 @@ static void update_vttbr(struct kvm *kvm)
>>       spin_unlock(&kvm_vmid_lock);
>>  }
>>
>> -static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
>> +static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu, struct kvm_run *run)
>
> why this change?  run can always be derived from vcpu->run.

Because thats how kvm_handle_mmio_return() and handle_exit() are
implemented.

If you think this is inappropriate then function prototypes for
kvm_handle_mmio_return() and handle_exit() should also change.

IMHO, we should be consistent in usage of vcpu->run.

>
>>  {
>>       if (likely(vcpu->arch.has_run_once))
>>               return 0;
>> @@ -483,7 +483,7 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
>>        */
>>       if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) {
>>               *vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF;
>> -             kvm_psci_call(vcpu);
>> +             kvm_psci_call(vcpu, run);
>>       }
>>
>>       return 0;
>> @@ -520,7 +520,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
>>       if (unlikely(!kvm_vcpu_initialized(vcpu)))
>>               return -ENOEXEC;
>>
>> -     ret = kvm_vcpu_first_run_init(vcpu);
>> +     ret = kvm_vcpu_first_run_init(vcpu, vcpu->run);
>>       if (ret)
>>               return ret;
>>
>> @@ -530,6 +530,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
>>                       return ret;
>>       }
>>
>> +     if (run->exit_reason == KVM_EXIT_PSCI) {
>> +             ret = kvm_handle_psci_return(vcpu, vcpu->run);
>> +             if (ret)
>> +                     return ret;
>> +     }
>> +
>>       if (vcpu->sigset_active)
>>               sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
>>
>> diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
>> index df4c82d..1a12e6c 100644
>> --- a/arch/arm/kvm/handle_exit.c
>> +++ b/arch/arm/kvm/handle_exit.c
>> @@ -40,14 +40,20 @@ static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
>>
>>  static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
>>  {
>> +     int ret;
>> +
>>       trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
>>                     kvm_vcpu_hvc_get_imm(vcpu));
>>
>> -     if (kvm_psci_call(vcpu))
>> +     ret = kvm_psci_call(vcpu, run);
>> +     if (!ret)
>> +             return 1;
>> +     else if (ret == -EINVAL) {
>> +             kvm_inject_undefined(vcpu);
>>               return 1;
>> +     }
>>
>> -     kvm_inject_undefined(vcpu);
>> -     return 1;
>> +     return 0;
>>  }
>>
>>  static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
>> diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
>> index 86a693a..72c23a7 100644
>> --- a/arch/arm/kvm/psci.c
>> +++ b/arch/arm/kvm/psci.c
>> @@ -71,6 +71,45 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
>>       return KVM_PSCI_RET_SUCCESS;
>>  }
>>
>> +static void kvm_psci_system_off(struct kvm_vcpu *vcpu, struct kvm_run *run)
>> +{
>> +     struct kvm_exit_psci psci;
>> +
>> +     psci.fn = KVM_PSCI_FN_SYSTEM_OFF;
>> +     memset(&psci.args, 0, sizeof(psci.args));
>> +     kvm_prepare_psci(run, &psci);
>> +}
>> +
>> +static void kvm_psci_system_reset(struct kvm_vcpu *vcpu, struct kvm_run *run)
>> +{
>> +     struct kvm_exit_psci psci;
>> +
>> +     psci.fn = KVM_PSCI_FN_SYSTEM_RESET;
>> +     memset(&psci.args, 0, sizeof(psci.args));
>> +     kvm_prepare_psci(run, &psci);
>> +}
>> +
>> +/**
>> + * kvm_handle_psci_return -- Handle PSCI after user space emulation
>> + * @vcpu: The VCPU pointer
>> + * @run:  The VCPU run struct containing the psci data
>> + *
>> + * This should only be called after returning from userspace for
>> + * PSCI emulation.
>> + */
>> +int kvm_handle_psci_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
>> +{
>> +     /*
>> +      * Currently, the PSCI functions passed to user space for emulation
>> +      * are SYSTEM_OFF and SYSTEM_RESET. These PSCI functions are not
>> +      * expected to return back after emulating in user space hence by
>> +      * default we return -EINVAL to avoid user space from doing RUN ioctl
>> +      * after handling KVM_EXIT_PSCI.
>> +      */
>> +
>> +     return -EINVAL;
>> +}
>> +
>
> why would reset not return back after emulation?
>
> also, do we need to impose this check or can we get rid of this all
> together, if user space messes up, it's up to user space...

Yes, I think we should allow SYSTEM_RESET to return. I'll update this
with a check.

Actually, when we reach here we also need to update X0-X3 (for ARM R0-R3)
with return values in run->psci. I'll add this in subsequent revision.

>
> Are there any of the other PSCI functions that need specific handling in
> the kernel on the return path?

So far only SYSTEM_RESET and SYSTEM_OFF as per current PSCI spec.

I think people might be also be interested in emulating vendor specific PSCI
calls in user space (QEMU or KVMTOOL or Some proprietary software).

>
>>  /**
>>   * kvm_psci_call - handle PSCI call if r0 value is in range
>>   * @vcpu: Pointer to the VCPU struct
>> @@ -81,8 +120,9 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
>>   * function number specified in r0 is withing the PSCI range, and false
>>   * otherwise.
>>   */
>> -bool kvm_psci_call(struct kvm_vcpu *vcpu)
>> +int kvm_psci_call(struct kvm_vcpu *vcpu, struct kvm_run *run)
>
> why add run here as well?

Please see my previous comments.

--
Anup

>
>>  {
>> +     int ret = 0;
>>       unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
>>       unsigned long val;
>>
>> @@ -98,11 +138,20 @@ bool kvm_psci_call(struct kvm_vcpu *vcpu)
>>       case KVM_PSCI_FN_MIGRATE:
>>               val = KVM_PSCI_RET_NI;
>>               break;
>> -
>> +     case KVM_PSCI_FN_SYSTEM_OFF:
>> +             kvm_psci_system_off(vcpu, run);
>> +             val = KVM_PSCI_RET_SUCCESS;
>> +             ret = -EINTR;
>> +             break;
>> +     case KVM_PSCI_FN_SYSTEM_RESET:
>> +             kvm_psci_system_reset(vcpu, run);
>> +             val = KVM_PSCI_RET_SUCCESS;
>> +             ret = -EINTR;
>> +             break;
>>       default:
>> -             return false;
>> +             return -EINVAL;
>>       }
>>
>>       *vcpu_reg(vcpu, 0) = val;
>> -     return true;
>> +     return ret;
>>  }
>> diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
>> index e301a48..db90649 100644
>> --- a/arch/arm64/include/asm/kvm_psci.h
>> +++ b/arch/arm64/include/asm/kvm_psci.h
>> @@ -18,6 +18,29 @@
>>  #ifndef __ARM64_KVM_PSCI_H__
>>  #define __ARM64_KVM_PSCI_H__
>>
>> -bool kvm_psci_call(struct kvm_vcpu *vcpu);
>> +#include <linux/kvm_host.h>
>> +#include <asm/kvm_asm.h>
>> +#include <asm/kvm_arm.h>
>> +
>> +/*
>> + * The in-kernel PSCI emulation code wants to use a copy of run->psci,
>> + * which is an anonymous type. Use our own type instead.
>> + */
>> +struct kvm_exit_psci {
>> +     u32             fn;
>> +     u64             args[7];
>> +};
>> +
>> +static inline void kvm_prepare_psci(struct kvm_run *run,
>> +                                 struct kvm_exit_psci *psci)
>> +{
>> +     run->psci.fn = psci->fn;
>> +     memcpy(&run->psci.args, &psci->args, sizeof(run->psci.args));
>> +     memset(&run->psci.ret, 0, sizeof(run->psci.ret));
>> +     run->exit_reason = KVM_EXIT_PSCI;
>> +}
>> +
>> +int kvm_handle_psci_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
>> +int kvm_psci_call(struct kvm_vcpu *vcpu, struct kvm_run *run);
>>
>>  #endif /* __ARM64_KVM_PSCI_H__ */
>> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
>> index d9f026b..f678902 100644
>> --- a/arch/arm64/include/uapi/asm/kvm.h
>> +++ b/arch/arm64/include/uapi/asm/kvm.h
>> @@ -158,6 +158,8 @@ struct kvm_arch_memory_slot {
>>  #define KVM_PSCI_FN_CPU_OFF          KVM_PSCI_FN(1)
>>  #define KVM_PSCI_FN_CPU_ON           KVM_PSCI_FN(2)
>>  #define KVM_PSCI_FN_MIGRATE          KVM_PSCI_FN(3)
>> +#define KVM_PSCI_FN_SYSTEM_OFF               KVM_PSCI_FN(4)
>> +#define KVM_PSCI_FN_SYSTEM_RESET     KVM_PSCI_FN(5)
>>
>>  #define KVM_PSCI_RET_SUCCESS         0
>>  #define KVM_PSCI_RET_NI                      ((unsigned long)-1)
>> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
>> index 9beaca0..28e20bb 100644
>> --- a/arch/arm64/kvm/handle_exit.c
>> +++ b/arch/arm64/kvm/handle_exit.c
>> @@ -30,20 +30,32 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
>>
>>  static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
>>  {
>> -     if (kvm_psci_call(vcpu))
>> +     int ret;
>> +
>> +     ret = kvm_psci_call(vcpu, run);
>> +     if (!ret)
>> +             return 1;
>> +     else if (ret == -EINVAL) {
>> +             kvm_inject_undefined(vcpu);
>>               return 1;
>> +     }
>>
>> -     kvm_inject_undefined(vcpu);
>> -     return 1;
>> +     return 0;
>>  }
>>
>>  static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
>>  {
>> -     if (kvm_psci_call(vcpu))
>> +     int ret;
>> +
>> +     ret = kvm_psci_call(vcpu, run);
>> +     if (!ret)
>> +             return 1;
>> +     else if (ret == -EINVAL) {
>> +             kvm_inject_undefined(vcpu);
>>               return 1;
>> +     }
>>
>> -     kvm_inject_undefined(vcpu);
>> -     return 1;
>> +     return 0;
>>  }
>>
>>  /**
>> --
>> 1.7.9.5
>>
> _______________________________________________
> kvmarm mailing list
> kvmarm@lists.cs.columbia.edu
> https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm
Marc Zyngier Oct. 17, 2013, 8:37 a.m. UTC | #3
On 16/10/13 18:02, Anup Patel wrote:
> The PSCI SYSTEM_OFF and SYSTEM_RESET functions are VM or Guest level
> functions hence cannot be emulated by the in-kernel PSCI emulation code.

Why can't we implement system-wide functionality in the kernel? I fail
to see the issue here.

> To tackle this, we forward PSCI SYSTEM_OFF and SYSTEM_RESET function
> calls from Guest to user space (i.e. QEMU or KVMTOOL) via KVM run
> structure with KVM_EXIT_PSCI exit reason.

I'm really not keen on this approach. Having part of the PSCI
implementation offloaded to userspace means we don't have a complete
implementation in KVM anymore, and we end-up duplicating functionality
all over the place.

Also, OFF and RESET are not PSCI specific concepts, and could be
implemented in various ways. I'm more inclined to return a
*standardized* exit code that the various platforms can interpret.

	M.
Peter Maydell Oct. 17, 2013, 9:10 a.m. UTC | #4
On 17 October 2013 09:37, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On 16/10/13 18:02, Anup Patel wrote:
>> The PSCI SYSTEM_OFF and SYSTEM_RESET functions are VM or Guest level
>> functions hence cannot be emulated by the in-kernel PSCI emulation code.
>
> Why can't we implement system-wide functionality in the kernel? I fail
> to see the issue here.

Because the kernel isn't emulating the whole board, and you need
to power off or reset the whole board, not just the CPUs.

> I'm really not keen on this approach. Having part of the PSCI
> implementation offloaded to userspace means we don't have a complete
> implementation in KVM anymore, and we end-up duplicating functionality
> all over the place.
>
> Also, OFF and RESET are not PSCI specific concepts, and could be
> implemented in various ways. I'm more inclined to return a
> *standardized* exit code that the various platforms can interpret.

Maybe we should have a more generic "kernel can't handle this,
toss it to userspace" API? That might also fit in with supporting
guests that want to make SMC calls to an emulated monitor...

-- PMM
Marc Zyngier Oct. 17, 2013, 9:21 a.m. UTC | #5
On 17/10/13 10:10, Peter Maydell wrote:
> On 17 October 2013 09:37, Marc Zyngier <marc.zyngier@arm.com> wrote:
>> On 16/10/13 18:02, Anup Patel wrote:
>>> The PSCI SYSTEM_OFF and SYSTEM_RESET functions are VM or Guest level
>>> functions hence cannot be emulated by the in-kernel PSCI emulation code.
>>
>> Why can't we implement system-wide functionality in the kernel? I fail
>> to see the issue here.
> 
> Because the kernel isn't emulating the whole board, and you need
> to power off or reset the whole board, not just the CPUs.

In which case we can forward a generic event, once KVM has dealt with
the CPUs.

>> I'm really not keen on this approach. Having part of the PSCI
>> implementation offloaded to userspace means we don't have a complete
>> implementation in KVM anymore, and we end-up duplicating functionality
>> all over the place.
>>
>> Also, OFF and RESET are not PSCI specific concepts, and could be
>> implemented in various ways. I'm more inclined to return a
>> *standardized* exit code that the various platforms can interpret.
> 
> Maybe we should have a more generic "kernel can't handle this,
> toss it to userspace" API? That might also fit in with supporting
> guests that want to make SMC calls to an emulated monitor...

Indeed.

	M.
Peter Maydell Oct. 17, 2013, 9:31 a.m. UTC | #6
On 17 October 2013 10:21, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On 17/10/13 10:10, Peter Maydell wrote:
>> On 17 October 2013 09:37, Marc Zyngier <marc.zyngier@arm.com> wrote:
>>> On 16/10/13 18:02, Anup Patel wrote:
>>>> The PSCI SYSTEM_OFF and SYSTEM_RESET functions are VM or Guest level
>>>> functions hence cannot be emulated by the in-kernel PSCI emulation code.
>>>
>>> Why can't we implement system-wide functionality in the kernel? I fail
>>> to see the issue here.
>>
>> Because the kernel isn't emulating the whole board, and you need
>> to power off or reset the whole board, not just the CPUs.
>
> In which case we can forward a generic event, once KVM has dealt with
> the CPUs.

Ideally per-CPU reset should be driven by userspace, incidentally.
We ought to have an ioctl for "hey, reset this CPU': at the moment
we have to fake it up by having QEMU feed the CPU back the register
values it read on powerup, which is kind of ugly.

-- PMM
Anup Patel Oct. 17, 2013, 11:07 a.m. UTC | #7
On Thu, Oct 17, 2013 at 2:40 PM, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 17 October 2013 09:37, Marc Zyngier <marc.zyngier@arm.com> wrote:
>> On 16/10/13 18:02, Anup Patel wrote:
>>> The PSCI SYSTEM_OFF and SYSTEM_RESET functions are VM or Guest level
>>> functions hence cannot be emulated by the in-kernel PSCI emulation code.
>>
>> Why can't we implement system-wide functionality in the kernel? I fail
>> to see the issue here.
>
> Because the kernel isn't emulating the whole board, and you need
> to power off or reset the whole board, not just the CPUs.
>
>> I'm really not keen on this approach. Having part of the PSCI
>> implementation offloaded to userspace means we don't have a complete
>> implementation in KVM anymore, and we end-up duplicating functionality
>> all over the place.
>>
>> Also, OFF and RESET are not PSCI specific concepts, and could be
>> implemented in various ways. I'm more inclined to return a
>> *standardized* exit code that the various platforms can interpret.
>
> Maybe we should have a more generic "kernel can't handle this,
> toss it to userspace" API? That might also fit in with supporting
> guests that want to make SMC calls to an emulated monitor...

Excatly, that is what this patch does by forwarding PSCI SYSTEM_OFF
and SYSTEM_RESET to user space (QEMU or KVMTOOL).

--
Anup

>
> -- PMM
> _______________________________________________
> kvmarm mailing list
> kvmarm@lists.cs.columbia.edu
> https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm
Marc Zyngier Oct. 17, 2013, 11:13 a.m. UTC | #8
On 17/10/13 12:07, Anup Patel wrote:
> On Thu, Oct 17, 2013 at 2:40 PM, Peter Maydell <peter.maydell@linaro.org> wrote:
>> On 17 October 2013 09:37, Marc Zyngier <marc.zyngier@arm.com> wrote:
>>> On 16/10/13 18:02, Anup Patel wrote:
>>>> The PSCI SYSTEM_OFF and SYSTEM_RESET functions are VM or Guest level
>>>> functions hence cannot be emulated by the in-kernel PSCI emulation code.
>>>
>>> Why can't we implement system-wide functionality in the kernel? I fail
>>> to see the issue here.
>>
>> Because the kernel isn't emulating the whole board, and you need
>> to power off or reset the whole board, not just the CPUs.
>>
>>> I'm really not keen on this approach. Having part of the PSCI
>>> implementation offloaded to userspace means we don't have a complete
>>> implementation in KVM anymore, and we end-up duplicating functionality
>>> all over the place.
>>>
>>> Also, OFF and RESET are not PSCI specific concepts, and could be
>>> implemented in various ways. I'm more inclined to return a
>>> *standardized* exit code that the various platforms can interpret.
>>
>> Maybe we should have a more generic "kernel can't handle this,
>> toss it to userspace" API? That might also fit in with supporting
>> guests that want to make SMC calls to an emulated monitor...
> 
> Excatly, that is what this patch does by forwarding PSCI SYSTEM_OFF
> and SYSTEM_RESET to user space (QEMU or KVMTOOL).

I think you missed the words standardized and generic above.

	M.
Anup Patel Oct. 17, 2013, 11:13 a.m. UTC | #9
On Thu, Oct 17, 2013 at 2:07 PM, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On 16/10/13 18:02, Anup Patel wrote:
>> The PSCI SYSTEM_OFF and SYSTEM_RESET functions are VM or Guest level
>> functions hence cannot be emulated by the in-kernel PSCI emulation code.
>
> Why can't we implement system-wide functionality in the kernel? I fail
> to see the issue here.
>
>> To tackle this, we forward PSCI SYSTEM_OFF and SYSTEM_RESET function
>> calls from Guest to user space (i.e. QEMU or KVMTOOL) via KVM run
>> structure with KVM_EXIT_PSCI exit reason.
>
> I'm really not keen on this approach. Having part of the PSCI
> implementation offloaded to userspace means we don't have a complete
> implementation in KVM anymore, and we end-up duplicating functionality
> all over the place.
>
> Also, OFF and RESET are not PSCI specific concepts, and could be
> implemented in various ways. I'm more inclined to return a
> *standardized* exit code that the various platforms can interpret.

Please refer to the documentation of SYSTEM_OFF and SYSTEM_RESET
functions in the PSCI specifications.
(More preciesly section 5.10 and 5.11 of
http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0022b/index.html)

--
Anup

>
>         M.
> --
> Jazz is not dead. It just smells funny...
>
>
> _______________________________________________
> kvmarm mailing list
> kvmarm@lists.cs.columbia.edu
> https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm
Christoffer Dall Oct. 17, 2013, 6:29 p.m. UTC | #10
On Thu, Oct 17, 2013 at 04:43:59PM +0530, Anup Patel wrote:
> On Thu, Oct 17, 2013 at 2:07 PM, Marc Zyngier <marc.zyngier@arm.com> wrote:
> > On 16/10/13 18:02, Anup Patel wrote:
> >> The PSCI SYSTEM_OFF and SYSTEM_RESET functions are VM or Guest level
> >> functions hence cannot be emulated by the in-kernel PSCI emulation code.
> >
> > Why can't we implement system-wide functionality in the kernel? I fail
> > to see the issue here.
> >
> >> To tackle this, we forward PSCI SYSTEM_OFF and SYSTEM_RESET function
> >> calls from Guest to user space (i.e. QEMU or KVMTOOL) via KVM run
> >> structure with KVM_EXIT_PSCI exit reason.
> >
> > I'm really not keen on this approach. Having part of the PSCI
> > implementation offloaded to userspace means we don't have a complete
> > implementation in KVM anymore, and we end-up duplicating functionality
> > all over the place.
> >
> > Also, OFF and RESET are not PSCI specific concepts, and could be
> > implemented in various ways. I'm more inclined to return a
> > *standardized* exit code that the various platforms can interpret.
> 
> Please refer to the documentation of SYSTEM_OFF and SYSTEM_RESET
> functions in the PSCI specifications.
> (More preciesly section 5.10 and 5.11 of
> http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0022b/index.html)
> 
Anup,

I think everyone knows where to find the specs, and I'm even more sure
that Marc knows the details of PSCI.

If there is a certain aspect of the spec that you find helpful to this
discussion that supports an implementation or design decision, I suggest
you try more carefully to explain why you think it is the right
solution, possibly using references to the specs in order to help people
out.

-Christoffer
Christoffer Dall Oct. 17, 2013, 6:34 p.m. UTC | #11
On Thu, Oct 17, 2013 at 10:21:11AM +0100, Marc Zyngier wrote:
> On 17/10/13 10:10, Peter Maydell wrote:
> > On 17 October 2013 09:37, Marc Zyngier <marc.zyngier@arm.com> wrote:
> >> On 16/10/13 18:02, Anup Patel wrote:
> >>> The PSCI SYSTEM_OFF and SYSTEM_RESET functions are VM or Guest level
> >>> functions hence cannot be emulated by the in-kernel PSCI emulation code.
> >>
> >> Why can't we implement system-wide functionality in the kernel? I fail
> >> to see the issue here.
> > 
> > Because the kernel isn't emulating the whole board, and you need
> > to power off or reset the whole board, not just the CPUs.
> 
> In which case we can forward a generic event, once KVM has dealt with
> the CPUs.
> 
> >> I'm really not keen on this approach. Having part of the PSCI
> >> implementation offloaded to userspace means we don't have a complete
> >> implementation in KVM anymore, and we end-up duplicating functionality
> >> all over the place.
> >>
> >> Also, OFF and RESET are not PSCI specific concepts, and could be
> >> implemented in various ways. I'm more inclined to return a
> >> *standardized* exit code that the various platforms can interpret.
> > 
> > Maybe we should have a more generic "kernel can't handle this,
> > toss it to userspace" API? That might also fit in with supporting
> > guests that want to make SMC calls to an emulated monitor...
> 
> Indeed.
> 
Agreed as well.  That's what I was trying to say with a more generic
solution, and exiting on a subset of SMC/HVC calls for PSCI is a weird
compromise.

After all, PSCI is nothing more than acting on SMC or HVC calls, and I'm
quite sure there will be a need for user space to emulate handling of
SMC calls sooner or later, and that is the interface we need.

We may end up with both kernel and user space support for PSCI in that
case, but naturally it would always be *either* the kernel handling the
whole thing, *or* user space handling the whole thing.

In both cases we may need extra functionality to communicate between
user space and KVM, such as suspending a vcpu from user space and waking
it up on in-kernel generated timer interrupts, or, conversely, telling
user space that there was a PSCI call to turn off the system.

From my point of view the most difficult part to figure out is how to
support general handling of secure monitor services in user space.  I
don't see any particular problems with expanding the exit reasons with
things like "turn off VM" or "reset VM"?

-Christoffer
Anup Patel Oct. 18, 2013, 4:18 a.m. UTC | #12
On Fri, Oct 18, 2013 at 12:04 AM, Christoffer Dall
<christoffer.dall@linaro.org> wrote:
> On Thu, Oct 17, 2013 at 10:21:11AM +0100, Marc Zyngier wrote:
>> On 17/10/13 10:10, Peter Maydell wrote:
>> > On 17 October 2013 09:37, Marc Zyngier <marc.zyngier@arm.com> wrote:
>> >> On 16/10/13 18:02, Anup Patel wrote:
>> >>> The PSCI SYSTEM_OFF and SYSTEM_RESET functions are VM or Guest level
>> >>> functions hence cannot be emulated by the in-kernel PSCI emulation code.
>> >>
>> >> Why can't we implement system-wide functionality in the kernel? I fail
>> >> to see the issue here.
>> >
>> > Because the kernel isn't emulating the whole board, and you need
>> > to power off or reset the whole board, not just the CPUs.
>>
>> In which case we can forward a generic event, once KVM has dealt with
>> the CPUs.
>>
>> >> I'm really not keen on this approach. Having part of the PSCI
>> >> implementation offloaded to userspace means we don't have a complete
>> >> implementation in KVM anymore, and we end-up duplicating functionality
>> >> all over the place.
>> >>
>> >> Also, OFF and RESET are not PSCI specific concepts, and could be
>> >> implemented in various ways. I'm more inclined to return a
>> >> *standardized* exit code that the various platforms can interpret.
>> >
>> > Maybe we should have a more generic "kernel can't handle this,
>> > toss it to userspace" API? That might also fit in with supporting
>> > guests that want to make SMC calls to an emulated monitor...
>>
>> Indeed.
>>
> Agreed as well.  That's what I was trying to say with a more generic
> solution, and exiting on a subset of SMC/HVC calls for PSCI is a weird
> compromise.

OK.

>
> After all, PSCI is nothing more than acting on SMC or HVC calls, and I'm
> quite sure there will be a need for user space to emulate handling of
> SMC calls sooner or later, and that is the interface we need.
>
> We may end up with both kernel and user space support for PSCI in that
> case, but naturally it would always be *either* the kernel handling the
> whole thing, *or* user space handling the whole thing.
>
> In both cases we may need extra functionality to communicate between
> user space and KVM, such as suspending a vcpu from user space and waking
> it up on in-kernel generated timer interrupts, or, conversely, telling
> user space that there was a PSCI call to turn off the system.

Agreed. Lets keep "user space emulation of PSCI for SMC/HVC" out of
discussions because we can implement PSCI SYSTEM_OFF and
SYSTEM_RESET without that.

>
> >From my point of view the most difficult part to figure out is how to
> support general handling of secure monitor services in user space.  I
> don't see any particular problems with expanding the exit reasons with
> things like "turn off VM" or "reset VM"?

Fine with me.

I'll keep separate exit reasons for HVC-based SYSTEM_OFF and
SYSTEM_RESET.

If everyone agrees with this then I'll change it accordingly in revised patch.

(Marc? Christoffer? PMM? ...)

--
Anup

>
> -Christoffer
> _______________________________________________
> kvmarm mailing list
> kvmarm@lists.cs.columbia.edu
> https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm
diff mbox

Patch

diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h
index 9a83d98..783566f 100644
--- a/arch/arm/include/asm/kvm_psci.h
+++ b/arch/arm/include/asm/kvm_psci.h
@@ -18,6 +18,29 @@ 
 #ifndef __ARM_KVM_PSCI_H__
 #define __ARM_KVM_PSCI_H__
 
-bool kvm_psci_call(struct kvm_vcpu *vcpu);
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+/*
+ * The in-kernel PSCI emulation code wants to use a copy of run->psci,
+ * which is an anonymous type. Use our own type instead.
+ */
+struct kvm_exit_psci {
+	u32		fn;
+	u64		args[7];
+};
+
+static inline void kvm_prepare_psci(struct kvm_run *run,
+				    struct kvm_exit_psci *psci)
+{
+	run->psci.fn = psci->fn;
+	memcpy(&run->psci.args, &psci->args, sizeof(run->psci.args));
+	memset(&run->psci.ret, 0, sizeof(run->psci.ret));
+	run->exit_reason = KVM_EXIT_PSCI;
+}
+
+int kvm_handle_psci_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_psci_call(struct kvm_vcpu *vcpu, struct kvm_run *run);
 
 #endif /* __ARM_KVM_PSCI_H__ */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index c1ee007..205cf0e 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -171,6 +171,8 @@  struct kvm_arch_memory_slot {
 #define KVM_PSCI_FN_CPU_OFF		KVM_PSCI_FN(1)
 #define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
 #define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
+#define KVM_PSCI_FN_SYSTEM_OFF		KVM_PSCI_FN(4)
+#define KVM_PSCI_FN_SYSTEM_RESET	KVM_PSCI_FN(5)
 
 #define KVM_PSCI_RET_SUCCESS		0
 #define KVM_PSCI_RET_NI			((unsigned long)-1)
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index cc5adb9..5ffd9a3 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -459,7 +459,7 @@  static void update_vttbr(struct kvm *kvm)
 	spin_unlock(&kvm_vmid_lock);
 }
 
-static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
+static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	if (likely(vcpu->arch.has_run_once))
 		return 0;
@@ -483,7 +483,7 @@  static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 	 */
 	if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) {
 		*vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF;
-		kvm_psci_call(vcpu);
+		kvm_psci_call(vcpu, run);
 	}
 
 	return 0;
@@ -520,7 +520,7 @@  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	if (unlikely(!kvm_vcpu_initialized(vcpu)))
 		return -ENOEXEC;
 
-	ret = kvm_vcpu_first_run_init(vcpu);
+	ret = kvm_vcpu_first_run_init(vcpu, vcpu->run);
 	if (ret)
 		return ret;
 
@@ -530,6 +530,12 @@  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 			return ret;
 	}
 
+	if (run->exit_reason == KVM_EXIT_PSCI) {
+		ret = kvm_handle_psci_return(vcpu, vcpu->run);
+		if (ret)
+			return ret;
+	}
+
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index df4c82d..1a12e6c 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -40,14 +40,20 @@  static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
+	int ret;
+
 	trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
 		      kvm_vcpu_hvc_get_imm(vcpu));
 
-	if (kvm_psci_call(vcpu))
+	ret = kvm_psci_call(vcpu, run);
+	if (!ret)
+		return 1;
+	else if (ret == -EINVAL) {
+		kvm_inject_undefined(vcpu);
 		return 1;
+	}
 
-	kvm_inject_undefined(vcpu);
-	return 1;
+	return 0;
 }
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 86a693a..72c23a7 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -71,6 +71,45 @@  static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	return KVM_PSCI_RET_SUCCESS;
 }
 
+static void kvm_psci_system_off(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct kvm_exit_psci psci;
+
+	psci.fn = KVM_PSCI_FN_SYSTEM_OFF;
+	memset(&psci.args, 0, sizeof(psci.args));
+	kvm_prepare_psci(run, &psci);
+}
+
+static void kvm_psci_system_reset(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct kvm_exit_psci psci;
+
+	psci.fn = KVM_PSCI_FN_SYSTEM_RESET;
+	memset(&psci.args, 0, sizeof(psci.args));
+	kvm_prepare_psci(run, &psci);
+}
+
+/**
+ * kvm_handle_psci_return -- Handle PSCI after user space emulation
+ * @vcpu: The VCPU pointer
+ * @run:  The VCPU run struct containing the psci data
+ *
+ * This should only be called after returning from userspace for
+ * PSCI emulation.
+ */
+int kvm_handle_psci_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	/*
+	 * Currently, the PSCI functions passed to user space for emulation
+	 * are SYSTEM_OFF and SYSTEM_RESET. These PSCI functions are not
+	 * expected to return back after emulating in user space hence by
+	 * default we return -EINVAL to avoid user space from doing RUN ioctl
+	 * after handling KVM_EXIT_PSCI.
+	 */
+
+	return -EINVAL;
+}
+
 /**
  * kvm_psci_call - handle PSCI call if r0 value is in range
  * @vcpu: Pointer to the VCPU struct
@@ -81,8 +120,9 @@  static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
  * function number specified in r0 is withing the PSCI range, and false
  * otherwise.
  */
-bool kvm_psci_call(struct kvm_vcpu *vcpu)
+int kvm_psci_call(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
+	int ret = 0;
 	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long val;
 
@@ -98,11 +138,20 @@  bool kvm_psci_call(struct kvm_vcpu *vcpu)
 	case KVM_PSCI_FN_MIGRATE:
 		val = KVM_PSCI_RET_NI;
 		break;
-
+	case KVM_PSCI_FN_SYSTEM_OFF:
+		kvm_psci_system_off(vcpu, run);
+		val = KVM_PSCI_RET_SUCCESS;
+		ret = -EINTR;
+		break;
+	case KVM_PSCI_FN_SYSTEM_RESET:
+		kvm_psci_system_reset(vcpu, run);
+		val = KVM_PSCI_RET_SUCCESS;
+		ret = -EINTR;
+		break;
 	default:
-		return false;
+		return -EINVAL;
 	}
 
 	*vcpu_reg(vcpu, 0) = val;
-	return true;
+	return ret;
 }
diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
index e301a48..db90649 100644
--- a/arch/arm64/include/asm/kvm_psci.h
+++ b/arch/arm64/include/asm/kvm_psci.h
@@ -18,6 +18,29 @@ 
 #ifndef __ARM64_KVM_PSCI_H__
 #define __ARM64_KVM_PSCI_H__
 
-bool kvm_psci_call(struct kvm_vcpu *vcpu);
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+/*
+ * The in-kernel PSCI emulation code wants to use a copy of run->psci,
+ * which is an anonymous type. Use our own type instead.
+ */
+struct kvm_exit_psci {
+	u32		fn;
+	u64		args[7];
+};
+
+static inline void kvm_prepare_psci(struct kvm_run *run,
+				    struct kvm_exit_psci *psci)
+{
+	run->psci.fn = psci->fn;
+	memcpy(&run->psci.args, &psci->args, sizeof(run->psci.args));
+	memset(&run->psci.ret, 0, sizeof(run->psci.ret));
+	run->exit_reason = KVM_EXIT_PSCI;
+}
+
+int kvm_handle_psci_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_psci_call(struct kvm_vcpu *vcpu, struct kvm_run *run);
 
 #endif /* __ARM64_KVM_PSCI_H__ */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index d9f026b..f678902 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -158,6 +158,8 @@  struct kvm_arch_memory_slot {
 #define KVM_PSCI_FN_CPU_OFF		KVM_PSCI_FN(1)
 #define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
 #define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
+#define KVM_PSCI_FN_SYSTEM_OFF		KVM_PSCI_FN(4)
+#define KVM_PSCI_FN_SYSTEM_RESET	KVM_PSCI_FN(5)
 
 #define KVM_PSCI_RET_SUCCESS		0
 #define KVM_PSCI_RET_NI			((unsigned long)-1)
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 9beaca0..28e20bb 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -30,20 +30,32 @@  typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
 
 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	if (kvm_psci_call(vcpu))
+	int ret;
+
+	ret = kvm_psci_call(vcpu, run);
+	if (!ret)
+		return 1;
+	else if (ret == -EINVAL) {
+		kvm_inject_undefined(vcpu);
 		return 1;
+	}
 
-	kvm_inject_undefined(vcpu);
-	return 1;
+	return 0;
 }
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	if (kvm_psci_call(vcpu))
+	int ret;
+
+	ret = kvm_psci_call(vcpu, run);
+	if (!ret)
+		return 1;
+	else if (ret == -EINVAL) {
+		kvm_inject_undefined(vcpu);
 		return 1;
+	}
 
-	kvm_inject_undefined(vcpu);
-	return 1;
+	return 0;
 }
 
 /**