diff mbox

[v3,2/3] arm64: kvm: inject SError with virtual syndrome

Message ID 1493530677-4919-2-git-send-email-gengdongjiu@huawei.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dongjiu Geng April 30, 2017, 5:37 a.m. UTC
when SError happen, kvm notifies kvmtool to generate GHES table
to record the error, then kvmtools inject the SError with specified
virtual syndrome. when switch to guest, a virtual SError will happen with
this specified syndrome.

Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
---
 arch/arm64/include/asm/esr.h         |  2 ++
 arch/arm64/include/asm/kvm_emulate.h | 10 ++++++++++
 arch/arm64/include/asm/kvm_host.h    |  1 +
 arch/arm64/include/asm/sysreg.h      |  3 +++
 arch/arm64/kvm/handle_exit.c         | 25 +++++++++++++++++++------
 arch/arm64/kvm/hyp/switch.c          | 15 ++++++++++++++-
 include/uapi/linux/kvm.h             |  5 +++++
 7 files changed, 54 insertions(+), 7 deletions(-)

Comments

Christoffer Dall May 2, 2017, 8:03 a.m. UTC | #1
On Sun, Apr 30, 2017 at 01:37:56PM +0800, Dongjiu Geng wrote:
> when SError happen, kvm notifies kvmtool to generate GHES table
> to record the error, then kvmtools inject the SError with specified

again, is this really specific to kvmtool?  Pleae try to explain this
mechanism in generic terms.

> virtual syndrome. when switch to guest, a virtual SError will happen with
> this specified syndrome.
> 
> Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
> ---
>  arch/arm64/include/asm/esr.h         |  2 ++
>  arch/arm64/include/asm/kvm_emulate.h | 10 ++++++++++
>  arch/arm64/include/asm/kvm_host.h    |  1 +
>  arch/arm64/include/asm/sysreg.h      |  3 +++
>  arch/arm64/kvm/handle_exit.c         | 25 +++++++++++++++++++------
>  arch/arm64/kvm/hyp/switch.c          | 15 ++++++++++++++-
>  include/uapi/linux/kvm.h             |  5 +++++
>  7 files changed, 54 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
> index 22f9c90..d009c99 100644
> --- a/arch/arm64/include/asm/esr.h
> +++ b/arch/arm64/include/asm/esr.h
> @@ -127,6 +127,8 @@
>  #define ESR_ELx_WFx_ISS_WFE	(UL(1) << 0)
>  #define ESR_ELx_xVC_IMM_MASK	((1UL << 16) - 1)
>  
> +#define VSESR_ELx_IDS_ISS_MASK    ((1UL << 25) - 1)
> +
>  /* ESR value templates for specific events */
>  
>  /* BRK instruction trap from AArch64 state */
> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> index f5ea0ba..a3259a9 100644
> --- a/arch/arm64/include/asm/kvm_emulate.h
> +++ b/arch/arm64/include/asm/kvm_emulate.h
> @@ -148,6 +148,16 @@ static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
>  	return vcpu->arch.fault.esr_el2;
>  }
>  
> +static inline u32 kvm_vcpu_get_vsesr(const struct kvm_vcpu *vcpu)
> +{
> +		return vcpu->arch.fault.vsesr_el2;
> +}
> +
> +static inline void kvm_vcpu_set_vsesr(struct kvm_vcpu *vcpu, unsigned long val)
> +{
> +		vcpu->arch.fault.vsesr_el2 = val;
> +}
> +
>  static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
>  {
>  	u32 esr = kvm_vcpu_get_hsr(vcpu);
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index e7705e7..84ed239 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -86,6 +86,7 @@ struct kvm_vcpu_fault_info {
>  	u32 esr_el2;		/* Hyp Syndrom Register */
>  	u64 far_el2;		/* Hyp Fault Address Register */
>  	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
> +	u32 vsesr_el2;		/* Virtual SError Exception Syndrome Register */
>  };
>  
>  /*
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 32964c7..b6afb7a 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -125,6 +125,9 @@
>  #define REG_PSTATE_PAN_IMM		sys_reg(0, 0, 4, 0, 4)
>  #define REG_PSTATE_UAO_IMM		sys_reg(0, 0, 4, 0, 3)
>  
> +#define VSESR_EL2			sys_reg(3, 4, 5, 2, 3)
> +
> +
>  #define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM |	\
>  				      (!!x)<<8 | 0x1f)
>  #define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM |	\
> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
> index c89d83a..3d024a9 100644
> --- a/arch/arm64/kvm/handle_exit.c
> +++ b/arch/arm64/kvm/handle_exit.c
> @@ -180,7 +180,11 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
>  
>  static int kvm_handle_guest_sei(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  {
> -	unsigned long fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
> +	unsigned long hva, fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
> +	struct kvm_memory_slot *memslot;
> +	int hsr, ret = 1;
> +	bool writable;
> +	gfn_t gfn;
>  
>  	if (handle_guest_sei((unsigned long)fault_ipa,
>  				kvm_vcpu_get_hsr(vcpu))) {
> @@ -190,9 +194,20 @@ static int kvm_handle_guest_sei(struct kvm_vcpu *vcpu, struct kvm_run *run)
>  				(unsigned long)kvm_vcpu_get_hsr(vcpu));
>  
>  		kvm_inject_vabt(vcpu);
> +	} else {
> +		hsr = kvm_vcpu_get_hsr(vcpu);
> +
> +		gfn = fault_ipa >> PAGE_SHIFT;
> +		memslot = gfn_to_memslot(vcpu->kvm, gfn);
> +		hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
> +
> +		run->exit_reason = KVM_EXIT_INTR;
> +		run->intr.syndrome_info = hsr;
> +		run->intr.address = hva;
> +		ret = 0;
>  	}
>  
> -	return 0;
> +	return ret;
>  }
>  
>  /*
> @@ -218,8 +233,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
>  			*vcpu_pc(vcpu) -= adj;
>  		}
>  
> -		kvm_handle_guest_sei(vcpu, run);
> -		return 1;
> +		return kvm_handle_guest_sei(vcpu, run);
>  	}
>  
>  	exception_index = ARM_EXCEPTION_CODE(exception_index);
> @@ -228,8 +242,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
>  	case ARM_EXCEPTION_IRQ:
>  		return 1;
>  	case ARM_EXCEPTION_EL1_SERROR:
> -		kvm_handle_guest_sei(vcpu, run);
> -		return 1;
> +		return kvm_handle_guest_sei(vcpu, run);
>  	case ARM_EXCEPTION_TRAP:
>  		/*
>  		 * See ARM ARM B1.14.1: "Hyp traps on instructions
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index aede165..ded6211 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -86,6 +86,13 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
>  		isb();
>  	}
>  	write_sysreg(val, hcr_el2);
> +    /* If virtual System Error or Asynchronous Abort is pending. set

nit: I think you want a comma after pending, not a dot.

> +     * the virtual exception syndrome information
> +     */

nit: commenting style

> +	if (cpus_have_cap(ARM64_HAS_RAS_EXTN) &&
> +			(vcpu->arch.hcr_el2 & HCR_VSE))
> +		write_sysreg_s(vcpu->arch.fault.vsesr_el2, VSESR_EL2);
> +
>  	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
>  	write_sysreg(1 << 15, hstr_el2);
>  	/*
> @@ -139,9 +146,15 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
>  	 * the crucial bit is "On taking a vSError interrupt,
>  	 * HCR_EL2.VSE is cleared to 0."
>  	 */
> -	if (vcpu->arch.hcr_el2 & HCR_VSE)
> +	if (vcpu->arch.hcr_el2 & HCR_VSE) {
>  		vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
>  
> +		if (cpus_have_cap(ARM64_HAS_RAS_EXTN)) {
> +			/* set vsesr_el2[24:0] with esr_el2[24:0] */
> +			kvm_vcpu_set_vsesr(vcpu, read_sysreg_el2(esr)
> +					& VSESR_ELx_IDS_ISS_MASK);
> +		}
> +	}
>  	__deactivate_traps_arch()();
>  	write_sysreg(0, hstr_el2);
>  	write_sysreg(0, pmuserenr_el0);
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 27fe556..bb02909 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -360,6 +360,11 @@ struct kvm_run {
>  		struct {
>  			__u8 vector;
>  		} eoi;
> +		/* KVM_EXIT_INTR */
> +		struct {
> +			__u32 syndrome_info;
> +			__u64 address;
> +		} intr;

definitely, not.  KVM_EXIT_INTR is a generic exit code to tell userspace
that we exited because we needed to deliver a signal or something else
related to an asynchronous event.  This implies that the syndrome_info
etc. always has valid values on all architectures when exiting with
KVM_EXIT_INTR.

Either document the behavior as the syndrome_info has side-channel
information on every exit, or on some KVM_EXIT_INTR exits, as we explain
in the KVM_CAP_ARM_USER_IRQ ABI that was just added, or dedicate an
access code.

>  		/* KVM_EXIT_HYPERV */
>  		struct kvm_hyperv_exit hyperv;
>  		/* Fix the size of the union. */
> -- 
> 2.10.1
> 

I'll look at the details of such patches once the ABI is clear and
well-documented.

Thanks,
-Christoffer
Dongjiu Geng May 2, 2017, 12:20 p.m. UTC | #2
Hello Christoffer.


On 2017/5/2 16:03, Christoffer Dall wrote:
> On Sun, Apr 30, 2017 at 01:37:56PM +0800, Dongjiu Geng wrote:
>> when SError happen, kvm notifies kvmtool to generate GHES table
>> to record the error, then kvmtools inject the SError with specified
> 
> again, is this really specific to kvmtool?  Pleae try to explain this
> mechanism in generic terms.
  It is both for qemu and other userspace application. I will correct it.

> 
>> virtual syndrome. when switch to guest, a virtual SError will happen with
>> this specified syndrome.
>>
>> Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
>> ---
>>  arch/arm64/include/asm/esr.h         |  2 ++
>>  arch/arm64/include/asm/kvm_emulate.h | 10 ++++++++++
>>  arch/arm64/include/asm/kvm_host.h    |  1 +
>>  arch/arm64/include/asm/sysreg.h      |  3 +++
>>  arch/arm64/kvm/handle_exit.c         | 25 +++++++++++++++++++------
>>  arch/arm64/kvm/hyp/switch.c          | 15 ++++++++++++++-
>>  include/uapi/linux/kvm.h             |  5 +++++
>>  7 files changed, 54 insertions(+), 7 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
>> index 22f9c90..d009c99 100644
>> --- a/arch/arm64/include/asm/esr.h
>> +++ b/arch/arm64/include/asm/esr.h
>> @@ -127,6 +127,8 @@
>>  #define ESR_ELx_WFx_ISS_WFE	(UL(1) << 0)
>>  #define ESR_ELx_xVC_IMM_MASK	((1UL << 16) - 1)
>>  
>> +#define VSESR_ELx_IDS_ISS_MASK    ((1UL << 25) - 1)
>> +
>>  /* ESR value templates for specific events */
>>  
>>  /* BRK instruction trap from AArch64 state */
>> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
>> index f5ea0ba..a3259a9 100644
>> --- a/arch/arm64/include/asm/kvm_emulate.h
>> +++ b/arch/arm64/include/asm/kvm_emulate.h
>> @@ -148,6 +148,16 @@ static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
>>  	return vcpu->arch.fault.esr_el2;
>>  }
>>  
>> +static inline u32 kvm_vcpu_get_vsesr(const struct kvm_vcpu *vcpu)
>> +{
>> +		return vcpu->arch.fault.vsesr_el2;
>> +}
>> +
>> +static inline void kvm_vcpu_set_vsesr(struct kvm_vcpu *vcpu, unsigned long val)
>> +{
>> +		vcpu->arch.fault.vsesr_el2 = val;
>> +}
>> +
>>  static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
>>  {
>>  	u32 esr = kvm_vcpu_get_hsr(vcpu);
>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>> index e7705e7..84ed239 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -86,6 +86,7 @@ struct kvm_vcpu_fault_info {
>>  	u32 esr_el2;		/* Hyp Syndrom Register */
>>  	u64 far_el2;		/* Hyp Fault Address Register */
>>  	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
>> +	u32 vsesr_el2;		/* Virtual SError Exception Syndrome Register */
>>  };
>>  
>>  /*
>> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
>> index 32964c7..b6afb7a 100644
>> --- a/arch/arm64/include/asm/sysreg.h
>> +++ b/arch/arm64/include/asm/sysreg.h
>> @@ -125,6 +125,9 @@
>>  #define REG_PSTATE_PAN_IMM		sys_reg(0, 0, 4, 0, 4)
>>  #define REG_PSTATE_UAO_IMM		sys_reg(0, 0, 4, 0, 3)
>>  
>> +#define VSESR_EL2			sys_reg(3, 4, 5, 2, 3)
>> +
>> +
>>  #define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM |	\
>>  				      (!!x)<<8 | 0x1f)
>>  #define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM |	\
>> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
>> index c89d83a..3d024a9 100644
>> --- a/arch/arm64/kvm/handle_exit.c
>> +++ b/arch/arm64/kvm/handle_exit.c
>> @@ -180,7 +180,11 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
>>  
>>  static int kvm_handle_guest_sei(struct kvm_vcpu *vcpu, struct kvm_run *run)
>>  {
>> -	unsigned long fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
>> +	unsigned long hva, fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
>> +	struct kvm_memory_slot *memslot;
>> +	int hsr, ret = 1;
>> +	bool writable;
>> +	gfn_t gfn;
>>  
>>  	if (handle_guest_sei((unsigned long)fault_ipa,
>>  				kvm_vcpu_get_hsr(vcpu))) {
>> @@ -190,9 +194,20 @@ static int kvm_handle_guest_sei(struct kvm_vcpu *vcpu, struct kvm_run *run)
>>  				(unsigned long)kvm_vcpu_get_hsr(vcpu));
>>  
>>  		kvm_inject_vabt(vcpu);
>> +	} else {
>> +		hsr = kvm_vcpu_get_hsr(vcpu);
>> +
>> +		gfn = fault_ipa >> PAGE_SHIFT;
>> +		memslot = gfn_to_memslot(vcpu->kvm, gfn);
>> +		hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
>> +
>> +		run->exit_reason = KVM_EXIT_INTR;
>> +		run->intr.syndrome_info = hsr;
>> +		run->intr.address = hva;
>> +		ret = 0;
>>  	}
>>  
>> -	return 0;
>> +	return ret;
>>  }
>>  
>>  /*
>> @@ -218,8 +233,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
>>  			*vcpu_pc(vcpu) -= adj;
>>  		}
>>  
>> -		kvm_handle_guest_sei(vcpu, run);
>> -		return 1;
>> +		return kvm_handle_guest_sei(vcpu, run);
>>  	}
>>  
>>  	exception_index = ARM_EXCEPTION_CODE(exception_index);
>> @@ -228,8 +242,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
>>  	case ARM_EXCEPTION_IRQ:
>>  		return 1;
>>  	case ARM_EXCEPTION_EL1_SERROR:
>> -		kvm_handle_guest_sei(vcpu, run);
>> -		return 1;
>> +		return kvm_handle_guest_sei(vcpu, run);
>>  	case ARM_EXCEPTION_TRAP:
>>  		/*
>>  		 * See ARM ARM B1.14.1: "Hyp traps on instructions
>> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
>> index aede165..ded6211 100644
>> --- a/arch/arm64/kvm/hyp/switch.c
>> +++ b/arch/arm64/kvm/hyp/switch.c
>> @@ -86,6 +86,13 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
>>  		isb();
>>  	}
>>  	write_sysreg(val, hcr_el2);
>> +    /* If virtual System Error or Asynchronous Abort is pending. set
> 
> nit: I think you want a comma after pending, not a dot.
> 
>> +     * the virtual exception syndrome information
>> +     */
> 
> nit: commenting style
> 
>> +	if (cpus_have_cap(ARM64_HAS_RAS_EXTN) &&
>> +			(vcpu->arch.hcr_el2 & HCR_VSE))
>> +		write_sysreg_s(vcpu->arch.fault.vsesr_el2, VSESR_EL2);
>> +
>>  	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
>>  	write_sysreg(1 << 15, hstr_el2);
>>  	/*
>> @@ -139,9 +146,15 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
>>  	 * the crucial bit is "On taking a vSError interrupt,
>>  	 * HCR_EL2.VSE is cleared to 0."
>>  	 */
>> -	if (vcpu->arch.hcr_el2 & HCR_VSE)
>> +	if (vcpu->arch.hcr_el2 & HCR_VSE) {
>>  		vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
>>  
>> +		if (cpus_have_cap(ARM64_HAS_RAS_EXTN)) {
>> +			/* set vsesr_el2[24:0] with esr_el2[24:0] */
>> +			kvm_vcpu_set_vsesr(vcpu, read_sysreg_el2(esr)
>> +					& VSESR_ELx_IDS_ISS_MASK);
>> +		}
>> +	}
>>  	__deactivate_traps_arch()();
>>  	write_sysreg(0, hstr_el2);
>>  	write_sysreg(0, pmuserenr_el0);
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index 27fe556..bb02909 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -360,6 +360,11 @@ struct kvm_run {
>>  		struct {
>>  			__u8 vector;
>>  		} eoi;
>> +		/* KVM_EXIT_INTR */
>> +		struct {
>> +			__u32 syndrome_info;
>> +			__u64 address;
>> +		} intr;
> 
> definitely, not.  KVM_EXIT_INTR is a generic exit code to tell userspace
> that we exited because we needed to deliver a signal or something else
> related to an asynchronous event.  This implies that the syndrome_info
> etc. always has valid values on all architectures when exiting with
> KVM_EXIT_INTR.
> 
> Either document the behavior as the syndrome_info has side-channel
> information on every exit, or on some KVM_EXIT_INTR exits, as we explain
> in the KVM_CAP_ARM_USER_IRQ ABI that was just added, or dedicate an
> access code.
OK.

> 
>>  		/* KVM_EXIT_HYPERV */
>>  		struct kvm_hyperv_exit hyperv;
>>  		/* Fix the size of the union. */
>> -- 
>> 2.10.1
>>
> 
> I'll look at the details of such patches once the ABI is clear and
> well-documented.
  OK.

> 
> Thanks,
> -Christoffer
> 
> .
>
James Morse May 2, 2017, 3:37 p.m. UTC | #3
Hi Dongjiu Geng,

On 30/04/17 06:37, Dongjiu Geng wrote:
> when SError happen, kvm notifies kvmtool to generate GHES table
> to record the error, then kvmtools inject the SError with specified
> virtual syndrome. when switch to guest, a virtual SError will happen with
> this specified syndrome.

GHES records in the HEST (T)able have to be generated before the OS starts as
these are read at boot. Did you mean generate CPER records?


It looks like this is based on the earlier SEI series, please work together and
post a combined series when there are changes. (It also good to summarise the
changes in the cover letter.)

This patch is modifying the world-switch to save/restore VSESR. You should
explain that VSESR is the Virtual SError Syndrome, it becomes the ESR value when
HCR_EL2.VSE injects an SError. This register was added by the RAS Extensions and
needs patching in or guarding.


> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index aede165..ded6211 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -86,6 +86,13 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
>  		isb();
>  	}
>  	write_sysreg(val, hcr_el2);
> +    /* If virtual System Error or Asynchronous Abort is pending. set
> +     * the virtual exception syndrome information
> +     */
> +	if (cpus_have_cap(ARM64_HAS_RAS_EXTN) &&

Is cpus_have_cap() safe to use at EL2?
This would be the first user, and it accesses cpu_hwcaps. You probably want
something like the static_branch_unlikely()s in the vgic code elsewhere in this
file.


> +			(vcpu->arch.hcr_el2 & HCR_VSE))
> +		write_sysreg_s(vcpu->arch.fault.vsesr_el2, VSESR_EL2);
> +

I think this would be clearer if you took this out to a helper method called
something like restore_vsesr() and did the if(cap|VSE) stuff there.

(Nit: comment style)


>  	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
>  	write_sysreg(1 << 15, hstr_el2);
>  	/*
> @@ -139,9 +146,15 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
>  	 * the crucial bit is "On taking a vSError interrupt,
>  	 * HCR_EL2.VSE is cleared to 0."
>  	 */
> -	if (vcpu->arch.hcr_el2 & HCR_VSE)
> +	if (vcpu->arch.hcr_el2 & HCR_VSE) {
>  		vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
>  
> +		if (cpus_have_cap(ARM64_HAS_RAS_EXTN)) {
> +			/* set vsesr_el2[24:0] with esr_el2[24:0] */
> +			kvm_vcpu_set_vsesr(vcpu, read_sysreg_el2(esr)
> +					& VSESR_ELx_IDS_ISS_MASK);

There is no need for KVM to save the VSESR. It is copied to ESR_EL1 when
HCR_EL2.VSE delivers the SError, after this we don't care what the register
value is. When we switch to a guest we should set the value from KVM whenever
the VSE bit is set. We should never read back the value in hardware.

Why read ESR_EL2? This will give you a completely unrelated value. If EL2 takes
an IRQ or a page fault between pending the SError and delivering it, we
overwrite the value set by KVM or user-space with a stray EL2 value.


... I think you expect an SError to arrive at EL2 and have its ESR recorded in
vcpu->arch.fault.vsesr_el2. Some time later KVM decides to inject an SError into
the guest, and this ESR is reused...

We shouldn't do this. Qemu/kvmtool may want to inject a virtual-SError that
never started as a physical-SError. Qemu/kvmtool may choose to notify the guest
of RAS events via another mechanism, or not at all.

KVM should not give the guest an ESR value of its choice. For SError the ESR
describes whether the error is corrected, correctable or fatal. Qemu/kvmtool
must choose this.

I think we need an API that allows Qemu/kvmtool to inject SError into a guest,
but that isn't quite what you have here.

The VSESR value should always come from user space. The only exception are
SErrors that we know weren't due to RAS: for these we should set the VSESR to
zero to keep the existing behaviour.


Thanks,

James
Dongjiu Geng May 5, 2017, 1:19 p.m. UTC | #4
Hi james,

   Thanks for your detailed suggestion.

On 2017/5/2 23:37, James Morse wrote:
> Hi Dongjiu Geng,
> 
> On 30/04/17 06:37, Dongjiu Geng wrote:
>> when SError happen, kvm notifies kvmtool to generate GHES table
>> to record the error, then kvmtools inject the SError with specified
>> virtual syndrome. when switch to guest, a virtual SError will happen with
>> this specified syndrome.
> 
> GHES records in the HEST (T)able have to be generated before the OS starts as
> these are read at boot. Did you mean generate CPER records?
 you are quite right that should generate CPER records.


> 
> 
> It looks like this is based on the earlier SEI series, please work together and
> post a combined series when there are changes. (It also good to summarise the
> changes in the cover letter.)
Ok.

> 
> This patch is modifying the world-switch to save/restore VSESR. You should
> explain that VSESR is the Virtual SError Syndrome, it becomes the ESR value when
> HCR_EL2.VSE injects an SError. This register was added by the RAS Extensions and
> needs patching in or guarding.
yes, you are right.

> 
> 
>> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
>> index aede165..ded6211 100644
>> --- a/arch/arm64/kvm/hyp/switch.c
>> +++ b/arch/arm64/kvm/hyp/switch.c
>> @@ -86,6 +86,13 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
>>  		isb();
>>  	}
>>  	write_sysreg(val, hcr_el2);
>> +    /* If virtual System Error or Asynchronous Abort is pending. set
>> +     * the virtual exception syndrome information
>> +     */
>> +	if (cpus_have_cap(ARM64_HAS_RAS_EXTN) &&
> 
> Is cpus_have_cap() safe to use at EL2?
> This would be the first user, and it accesses cpu_hwcaps. You probably want
> something like the static_branch_unlikely()s in the vgic code elsewhere in this
> file.
> 
> 
>> +			(vcpu->arch.hcr_el2 & HCR_VSE))
>> +		write_sysreg_s(vcpu->arch.fault.vsesr_el2, VSESR_EL2);
>> +
> 
> I think this would be clearer if you took this out to a helper method called
> something like restore_vsesr() and did the if(cap|VSE) stuff there.
  good suggestion.

> 
> (Nit: comment style)
 OK.

> 
> 
>>  	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
>>  	write_sysreg(1 << 15, hstr_el2);
>>  	/*
>> @@ -139,9 +146,15 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
>>  	 * the crucial bit is "On taking a vSError interrupt,
>>  	 * HCR_EL2.VSE is cleared to 0."
>>  	 */
>> -	if (vcpu->arch.hcr_el2 & HCR_VSE)
>> +	if (vcpu->arch.hcr_el2 & HCR_VSE) {
>>  		vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
>>  
>> +		if (cpus_have_cap(ARM64_HAS_RAS_EXTN)) {
>> +			/* set vsesr_el2[24:0] with esr_el2[24:0] */
>> +			kvm_vcpu_set_vsesr(vcpu, read_sysreg_el2(esr)
>> +					& VSESR_ELx_IDS_ISS_MASK);
> 
> There is no need for KVM to save the VSESR. It is copied to ESR_EL1 when
> HCR_EL2.VSE delivers the SError, after this we don't care what the register
> value is. When we switch to a guest we should set the value from KVM whenever
> the VSE bit is set. We should never read back the value in hardware.
  I think you are right. Thanks for your points out.

> 
> Why read ESR_EL2? This will give you a completely unrelated value. If EL2 takes
> an IRQ or a page fault between pending the SError and delivering it, we
> overwrite the value set by KVM or user-space with a stray EL2 value.
> 
> 
> ... I think you expect an SError to arrive at EL2 and have its ESR recorded in
> vcpu->arch.fault.vsesr_el2. Some time later KVM decides to inject an SError into
> the guest, and this ESR is reused...
> 
> We shouldn't do this. Qemu/kvmtool may want to inject a virtual-SError that
> never started as a physical-SError. Qemu/kvmtool may choose to notify the guest
> of RAS events via another mechanism, or not at all.
> 
> KVM should not give the guest an ESR value of its choice. For SError the ESR
> describes whether the error is corrected, correctable or fatal. Qemu/kvmtool
> must choose this.

Below is my previous solution:
For the SError, CPU will firstly trap to EL3 firmware and records the syndrome to ESR_EL3.
Before jumping to El2 hypervisors, it will copy the esr_el3 to esr_el2.
so in order to pass this syndrome to vsesr_el2, using the esr_el2 value to assign it.


If Qemu/kvmtool chooses the ESR value and ESR only describes whether the error is corrected/correctable/fatal,
whether the information is not enough for the guest?


>
> I think we need an API that allows Qemu/kvmtool to inject SError into a guest,
> but that isn't quite what you have here.

KVM provides APIs to inject the SError, Qemu/kvmtool call the API though IOCTL, may be OK?


> 
> The VSESR value should always come from user space. The only exception are
> SErrors that we know weren't due to RAS: for these we should set the VSESR to
> zero to keep the existing behaviour.
> 
> 
> Thanks,
> 
> James
> .
>
James Morse May 12, 2017, 5:24 p.m. UTC | #5
Hi gengdongjiu,

On 05/05/17 14:19, gengdongjiu wrote:
> On 2017/5/2 23:37, James Morse wrote:
> > ... I think you expect an SError to arrive at EL2 and have its ESR recorded in
> > vcpu->arch.fault.vsesr_el2. Some time later KVM decides to inject an SError into
> > the guest, and this ESR is reused...
> > 
> > We shouldn't do this. Qemu/kvmtool may want to inject a virtual-SError that
> > never started as a physical-SError. Qemu/kvmtool may choose to notify the guest
> > of RAS events via another mechanism, or not at all.
> > 
> > KVM should not give the guest an ESR value of its choice. For SError the ESR
> > describes whether the error is corrected, correctable or fatal. Qemu/kvmtool
> > must choose this.

> Below is my previous solution:
> For the SError, CPU will firstly trap to EL3 firmware and records the syndrome to ESR_EL3.
> Before jumping to El2 hypervisors, it will copy the esr_el3 to esr_el2.

(Copying the ESR value won't always be the right thing to do.)


> so in order to pass this syndrome to vsesr_el2, using the esr_el2 value to assign it.

> If Qemu/kvmtool chooses the ESR value and ESR only describes whether the error is corrected/correctable/fatal,
> whether the information is not enough for the guest?

So the API should specify which of these three severities to use? I think this
is too specific. The API should be useful for anything the VSE/VSESR hardware
can do.

VSESR_EL2 is described in the RAS spec: 4.4.12 [0], its a 64 bit register. I
think we should let Qemu/kvmtool specify any 64bit value here, but KVM should
reject values that try to set bits described as RES0.

This would let Qemu/kvmtool specify any SError ISS, either setting ESR_ELx.IDS
and some virtual-machine specific value, or encoding any severity in AET and
choosing the DFSC/EA bits appropriately.


>> > I think we need an API that allows Qemu/kvmtool to inject SError into a guest,
>> > but that isn't quite what you have here.

> KVM provides APIs to inject the SError, Qemu/kvmtool call the API though IOCTL, may be OK?

(just the one API call), yes.


Thanks,

James

[0]
https://static.docs.arm.com/ddi0587/a/RAS%20Extension-release%20candidate_march_29.pdf
gengdongjiu May 21, 2017, 9:08 a.m. UTC | #6
2017-05-13 1:24 GMT+08:00, James Morse <james.morse@arm.com>:
> Hi gengdongjiu,
>
> On 05/05/17 14:19, gengdongjiu wrote:
>> On 2017/5/2 23:37, James Morse wrote:
>> > ... I think you expect an SError to arrive at EL2 and have its ESR
>> > recorded in
>> > vcpu->arch.fault.vsesr_el2. Some time later KVM decides to inject an
>> > SError into
>> > the guest, and this ESR is reused...
>> >
>> > We shouldn't do this. Qemu/kvmtool may want to inject a virtual-SError
>> > that
>> > never started as a physical-SError. Qemu/kvmtool may choose to notify
>> > the guest
>> > of RAS events via another mechanism, or not at all.
>> >
>> > KVM should not give the guest an ESR value of its choice. For SError the
>> > ESR
>> > describes whether the error is corrected, correctable or fatal.
>> > Qemu/kvmtool
>> > must choose this.
>
>> Below is my previous solution:
>> For the SError, CPU will firstly trap to EL3 firmware and records the
>> syndrome to ESR_EL3.
>> Before jumping to El2 hypervisors, it will copy the esr_el3 to esr_el2.
>
> (Copying the ESR value won't always be the right thing to do.)
  James,  thanks for your kindly explanation, understand you  thought.

>
>
>> so in order to pass this syndrome to vsesr_el2, using the esr_el2 value to
>> assign it.
>
>> If Qemu/kvmtool chooses the ESR value and ESR only describes whether the
>> error is corrected/correctable/fatal,
>> whether the information is not enough for the guest?
>
> So the API should specify which of these three severities to use? I think
> this
> is too specific. The API should be useful for anything the VSE/VSESR
> hardware
> can do.
>
> VSESR_EL2 is described in the RAS spec: 4.4.12 [0], its a 64 bit register.
> I
> think we should let Qemu/kvmtool specify any 64bit value here, but KVM
> should
> reject values that try to set bits described as RES0.
>
> This would let Qemu/kvmtool specify any SError ISS, either setting
> ESR_ELx.IDS
> and some virtual-machine specific value, or encoding any severity in AET
> and
> choosing the DFSC/EA bits appropriately.
   it sounds reasonable

>
>
>>> > I think we need an API that allows Qemu/kvmtool to inject SError into a
>>> > guest,
>>> > but that isn't quite what you have here.
>
>> KVM provides APIs to inject the SError, Qemu/kvmtool call the API though
>> IOCTL, may be OK?
>
> (just the one API call), yes.
   Ok, have added.

>
>
> Thanks,
>
> James
>
> [0]
> https://static.docs.arm.com/ddi0587/a/RAS%20Extension-release%20candidate_march_29.pdf
>
> _______________________________________________
> kvmarm mailing list
> kvmarm@lists.cs.columbia.edu
> https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
>
diff mbox

Patch

diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 22f9c90..d009c99 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -127,6 +127,8 @@ 
 #define ESR_ELx_WFx_ISS_WFE	(UL(1) << 0)
 #define ESR_ELx_xVC_IMM_MASK	((1UL << 16) - 1)
 
+#define VSESR_ELx_IDS_ISS_MASK    ((1UL << 25) - 1)
+
 /* ESR value templates for specific events */
 
 /* BRK instruction trap from AArch64 state */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index f5ea0ba..a3259a9 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -148,6 +148,16 @@  static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
 	return vcpu->arch.fault.esr_el2;
 }
 
+static inline u32 kvm_vcpu_get_vsesr(const struct kvm_vcpu *vcpu)
+{
+		return vcpu->arch.fault.vsesr_el2;
+}
+
+static inline void kvm_vcpu_set_vsesr(struct kvm_vcpu *vcpu, unsigned long val)
+{
+		vcpu->arch.fault.vsesr_el2 = val;
+}
+
 static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
 {
 	u32 esr = kvm_vcpu_get_hsr(vcpu);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e7705e7..84ed239 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -86,6 +86,7 @@  struct kvm_vcpu_fault_info {
 	u32 esr_el2;		/* Hyp Syndrom Register */
 	u64 far_el2;		/* Hyp Fault Address Register */
 	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
+	u32 vsesr_el2;		/* Virtual SError Exception Syndrome Register */
 };
 
 /*
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 32964c7..b6afb7a 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -125,6 +125,9 @@ 
 #define REG_PSTATE_PAN_IMM		sys_reg(0, 0, 4, 0, 4)
 #define REG_PSTATE_UAO_IMM		sys_reg(0, 0, 4, 0, 3)
 
+#define VSESR_EL2			sys_reg(3, 4, 5, 2, 3)
+
+
 #define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM |	\
 				      (!!x)<<8 | 0x1f)
 #define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM |	\
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index c89d83a..3d024a9 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -180,7 +180,11 @@  static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
 
 static int kvm_handle_guest_sei(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	unsigned long fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+	unsigned long hva, fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+	struct kvm_memory_slot *memslot;
+	int hsr, ret = 1;
+	bool writable;
+	gfn_t gfn;
 
 	if (handle_guest_sei((unsigned long)fault_ipa,
 				kvm_vcpu_get_hsr(vcpu))) {
@@ -190,9 +194,20 @@  static int kvm_handle_guest_sei(struct kvm_vcpu *vcpu, struct kvm_run *run)
 				(unsigned long)kvm_vcpu_get_hsr(vcpu));
 
 		kvm_inject_vabt(vcpu);
+	} else {
+		hsr = kvm_vcpu_get_hsr(vcpu);
+
+		gfn = fault_ipa >> PAGE_SHIFT;
+		memslot = gfn_to_memslot(vcpu->kvm, gfn);
+		hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
+
+		run->exit_reason = KVM_EXIT_INTR;
+		run->intr.syndrome_info = hsr;
+		run->intr.address = hva;
+		ret = 0;
 	}
 
-	return 0;
+	return ret;
 }
 
 /*
@@ -218,8 +233,7 @@  int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 			*vcpu_pc(vcpu) -= adj;
 		}
 
-		kvm_handle_guest_sei(vcpu, run);
-		return 1;
+		return kvm_handle_guest_sei(vcpu, run);
 	}
 
 	exception_index = ARM_EXCEPTION_CODE(exception_index);
@@ -228,8 +242,7 @@  int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	case ARM_EXCEPTION_IRQ:
 		return 1;
 	case ARM_EXCEPTION_EL1_SERROR:
-		kvm_handle_guest_sei(vcpu, run);
-		return 1;
+		return kvm_handle_guest_sei(vcpu, run);
 	case ARM_EXCEPTION_TRAP:
 		/*
 		 * See ARM ARM B1.14.1: "Hyp traps on instructions
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index aede165..ded6211 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -86,6 +86,13 @@  static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
 		isb();
 	}
 	write_sysreg(val, hcr_el2);
+    /* If virtual System Error or Asynchronous Abort is pending. set
+     * the virtual exception syndrome information
+     */
+	if (cpus_have_cap(ARM64_HAS_RAS_EXTN) &&
+			(vcpu->arch.hcr_el2 & HCR_VSE))
+		write_sysreg_s(vcpu->arch.fault.vsesr_el2, VSESR_EL2);
+
 	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
 	write_sysreg(1 << 15, hstr_el2);
 	/*
@@ -139,9 +146,15 @@  static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
 	 * the crucial bit is "On taking a vSError interrupt,
 	 * HCR_EL2.VSE is cleared to 0."
 	 */
-	if (vcpu->arch.hcr_el2 & HCR_VSE)
+	if (vcpu->arch.hcr_el2 & HCR_VSE) {
 		vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
 
+		if (cpus_have_cap(ARM64_HAS_RAS_EXTN)) {
+			/* set vsesr_el2[24:0] with esr_el2[24:0] */
+			kvm_vcpu_set_vsesr(vcpu, read_sysreg_el2(esr)
+					& VSESR_ELx_IDS_ISS_MASK);
+		}
+	}
 	__deactivate_traps_arch()();
 	write_sysreg(0, hstr_el2);
 	write_sysreg(0, pmuserenr_el0);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 27fe556..bb02909 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -360,6 +360,11 @@  struct kvm_run {
 		struct {
 			__u8 vector;
 		} eoi;
+		/* KVM_EXIT_INTR */
+		struct {
+			__u32 syndrome_info;
+			__u64 address;
+		} intr;
 		/* KVM_EXIT_HYPERV */
 		struct kvm_hyperv_exit hyperv;
 		/* Fix the size of the union. */