diff mbox series

[v2,2/2] KVM: arm64: timers: Save restore CVAL of a ptimer across guest entry and exits

Message ID 20230904114218.590304-3-gankulkarni@os.amperecomputing.com (mailing list archive)
State New, archived
Headers show
Series Avoid spurious ptimer interrupts for non-zero cntpoff | expand

Commit Message

Ganapatrao Kulkarni Sept. 4, 2023, 11:42 a.m. UTC
As per FEAT_ECV, when HCR_EL2.{E2H, TGE} == {1, 1}, Enhanced Counter
Virtualization functionality is disabled and CNTPOFF_EL2 value is treated
as zero. On VHE host, E2H and TGE are set, hence it is required
to save Guest's CVAL to memory and increment it by CNTPOFF_EL2 at
guest exit to avoid false physical timer interrupts and also
restore back the CVAL with saved value before the guest entry.

Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
---
 arch/arm64/kvm/arch_timer.c     | 60 ++++++++++++++++++++++++++++++---
 arch/arm64/kvm/hyp/vhe/switch.c | 13 +++++++
 include/kvm/arm_arch_timer.h    |  1 +
 3 files changed, 69 insertions(+), 5 deletions(-)

Comments

Marc Zyngier Sept. 10, 2023, 11:45 a.m. UTC | #1
On Mon, 04 Sep 2023 12:42:18 +0100,
Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
> 
> As per FEAT_ECV, when HCR_EL2.{E2H, TGE} == {1, 1}, Enhanced Counter
> Virtualization functionality is disabled and CNTPOFF_EL2 value is treated
> as zero. On VHE host, E2H and TGE are set, hence it is required
> to save Guest's CVAL to memory and increment it by CNTPOFF_EL2 at
> guest exit to avoid false physical timer interrupts and also
> restore back the CVAL with saved value before the guest entry.
> 
> Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
> ---
>  arch/arm64/kvm/arch_timer.c     | 60 ++++++++++++++++++++++++++++++---
>  arch/arm64/kvm/hyp/vhe/switch.c | 13 +++++++
>  include/kvm/arm_arch_timer.h    |  1 +
>  3 files changed, 69 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
> index 98b0e8ac02ae..9fe3fa6ed98a 100644
> --- a/arch/arm64/kvm/arch_timer.c
> +++ b/arch/arm64/kvm/arch_timer.c
> @@ -514,6 +514,59 @@ static void set_cntpoff(u64 cntpoff)
>  		write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
>  }
>  
> +static void ptimer_cval_save(struct arch_timer_context *ctx, u64 offset)
> +{
> +	unsigned long flags;
> +	u64 cval;
> +
> +	local_irq_save(flags);
> +	cval = read_sysreg_el0(SYS_CNTP_CVAL);
> +	timer_set_cval(ctx, cval);
> +	cval += offset;
> +	write_sysreg_el0(cval, SYS_CNTP_CVAL);
> +	isb();
> +	local_irq_restore(flags);
> +}
> +
> +static void ptimer_cval_restore(struct arch_timer_context *ctx, u64 offset)
> +{
> +	unsigned long flags;
> +	u64 cval;
> +
> +	local_irq_save(flags);
> +	cval = timer_get_cval(ctx);
> +	write_sysreg_el0(cval, SYS_CNTP_CVAL);
> +	isb();
> +	local_irq_restore(flags);
> +}
> +
> +void kvm_ptimer_cval_save_restore(struct kvm_vcpu *vcpu, bool save)
> +{
> +	struct timer_map map;
> +	struct arch_timer_cpu *timer;
> +	struct arch_timer_context *ctxp;
> +	u64 offset;
> +
> +	get_timer_map(vcpu, &map);
> +	ctxp = map.direct_ptimer;
> +
> +	if (unlikely(ctxp == NULL))
> +		return;
> +
> +	offset = timer_get_offset(ctxp);
> +	if (!offset)
> +		return;
> +
> +	timer = vcpu_timer(ctxp->vcpu);
> +	if (!timer->enabled || !ctxp->loaded)
> +		return;
> +
> +	if (save)
> +		ptimer_cval_save(ctxp, offset);
> +	else
> +		ptimer_cval_restore(ctxp, offset);
> +}
> +

I really don't want to see any of this code in the arch_timer
backend. There is nothing here that executes in the context of the
world-switch until this, and I think this is the wrong level of
abstraction.

You end-up doing things that make no sense in the expected execution
context (timer not loaded?, not enabled?, disabling interrupts?), and
this makes the whole thing pointlessly complex.

My take on this problem is still the same (vaguely amended compared to
the previous version). If this doesn't work for you, please explain
what is wrong with it. But it has the shape of what I really want to
see.

Thanks,

	M.

From 2516a1410c0d45a7d3ba0523847be339bfcb1e50 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 22 Aug 2023 13:18:10 +0100
Subject: [PATCH] KVM: arm64: timers: Correctly handle TGE flip with
 CNTPOFF_EL2

Contrary to common belief, HCR_EL2.TGE has a direct and immediate
effect on the way the EL0 physical counter is offset. Flipping
TGE from 1 to 0 while at EL2 immediately changes the way the counter
compared to the CVAL limit.

This means that we cannot directly save/restore the guest's view of
CVAL, but that we instead must treat it as if CNTPOFF didn't exist.
Only in the world switch, once we figure out that we do have CNTPOFF,
can we must the offset back and forth depending on the polarity of
TGE.

Fixes: 2b4825a86940 ("KVM: arm64: timers: Use CNTPOFF_EL2 to offset the physical timer")
Reported-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/arch_timer.c     | 13 +++-------
 arch/arm64/kvm/hyp/vhe/switch.c | 45 +++++++++++++++++++++++++++++++++
 include/kvm/arm_arch_timer.h    |  7 +++++
 3 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 6dcdae4d38cb..a1e24228aaaa 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -55,11 +55,6 @@ static struct irq_ops arch_timer_irq_ops = {
 	.get_input_level = kvm_arch_timer_get_input_level,
 };
 
-static bool has_cntpoff(void)
-{
-	return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
-}
-
 static int nr_timers(struct kvm_vcpu *vcpu)
 {
 	if (!vcpu_has_nv(vcpu))
@@ -180,7 +175,7 @@ u64 kvm_phys_timer_read(void)
 	return timecounter->cc->read(timecounter->cc);
 }
 
-static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
+void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
 {
 	if (vcpu_has_nv(vcpu)) {
 		if (is_hyp_ctxt(vcpu)) {
@@ -548,8 +543,7 @@ static void timer_save_state(struct arch_timer_context *ctx)
 		timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
 		cval = read_sysreg_el0(SYS_CNTP_CVAL);
 
-		if (!has_cntpoff())
-			cval -= timer_get_offset(ctx);
+		cval -= timer_get_offset(ctx);
 
 		timer_set_cval(ctx, cval);
 
@@ -636,8 +630,7 @@ static void timer_restore_state(struct arch_timer_context *ctx)
 		cval = timer_get_cval(ctx);
 		offset = timer_get_offset(ctx);
 		set_cntpoff(offset);
-		if (!has_cntpoff())
-			cval += offset;
+		cval += offset;
 		write_sysreg_el0(cval, SYS_CNTP_CVAL);
 		isb();
 		write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 6537f58b1a8c..5dcbde57f466 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -39,6 +39,26 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 
 	___activate_traps(vcpu);
 
+	if (has_cntpoff()) {
+		struct timer_map map;
+
+		get_timer_map(vcpu, &map);
+
+		/*
+		 * We're entrering the guest. Reload the correct
+		 * values from memory now that TGE is clear.
+		 */
+		if (map.direct_ptimer == vcpu_ptimer(vcpu))
+			val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+		if (map.direct_ptimer == vcpu_hptimer(vcpu))
+			val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
+
+		if (map.direct_ptimer) {
+			write_sysreg_s(val, SYS_CNTP_CVAL_EL0);
+			isb();
+		}
+	}
+
 	val = read_sysreg(cpacr_el1);
 	val |= CPACR_ELx_TTA;
 	val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
@@ -77,6 +97,31 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
 
 	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
 
+	if (has_cntpoff()) {
+		struct timer_map map;
+		u64 val, offset;
+
+		get_timer_map(vcpu, &map);
+
+		/*
+		 * We're exiting the guest. Save the latest CVAL value
+		 * to memory and apply the offset now that TGE is set.
+		 */
+		val = read_sysreg_s(SYS_CNTP_CVAL_EL0);
+		if (map.direct_ptimer == vcpu_ptimer(vcpu))
+			__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
+		if (map.direct_ptimer == vcpu_hptimer(vcpu))
+			__vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val;
+
+		offset = read_sysreg_s(SYS_CNTPOFF_EL2);
+
+		if (map.direct_ptimer && offset) {
+			offset = read_sysreg_s(SYS_CNTPOFF_EL2);
+			write_sysreg_s(val + offset, SYS_CNTP_CVAL_EL0);
+			isb();
+		}
+	}
+
 	/*
 	 * ARM errata 1165522 and 1530923 require the actual execution of the
 	 * above before we can switch to the EL2/EL0 translation regime used by
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index bb3cb005873e..e748bc957d83 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -82,6 +82,8 @@ struct timer_map {
 	struct arch_timer_context *emul_ptimer;
 };
 
+void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map);
+
 struct arch_timer_cpu {
 	struct arch_timer_context timers[NR_KVM_TIMERS];
 
@@ -145,4 +147,9 @@ u64 timer_get_cval(struct arch_timer_context *ctxt);
 void kvm_timer_cpu_up(void);
 void kvm_timer_cpu_down(void);
 
+static inline bool has_cntpoff(void)
+{
+	return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
+}
+
 #endif
Ganapatrao Kulkarni Sept. 15, 2023, 9:57 a.m. UTC | #2
Hi Marc,

On 10-09-2023 05:15 pm, Marc Zyngier wrote:
> On Mon, 04 Sep 2023 12:42:18 +0100,
> Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
>>
>> As per FEAT_ECV, when HCR_EL2.{E2H, TGE} == {1, 1}, Enhanced Counter
>> Virtualization functionality is disabled and CNTPOFF_EL2 value is treated
>> as zero. On VHE host, E2H and TGE are set, hence it is required
>> to save Guest's CVAL to memory and increment it by CNTPOFF_EL2 at
>> guest exit to avoid false physical timer interrupts and also
>> restore back the CVAL with saved value before the guest entry.
>>
>> Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
>> ---
>>   arch/arm64/kvm/arch_timer.c     | 60 ++++++++++++++++++++++++++++++---
>>   arch/arm64/kvm/hyp/vhe/switch.c | 13 +++++++
>>   include/kvm/arm_arch_timer.h    |  1 +
>>   3 files changed, 69 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
>> index 98b0e8ac02ae..9fe3fa6ed98a 100644
>> --- a/arch/arm64/kvm/arch_timer.c
>> +++ b/arch/arm64/kvm/arch_timer.c
>> @@ -514,6 +514,59 @@ static void set_cntpoff(u64 cntpoff)
>>   		write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
>>   }
>>   
>> +static void ptimer_cval_save(struct arch_timer_context *ctx, u64 offset)
>> +{
>> +	unsigned long flags;
>> +	u64 cval;
>> +
>> +	local_irq_save(flags);
>> +	cval = read_sysreg_el0(SYS_CNTP_CVAL);
>> +	timer_set_cval(ctx, cval);
>> +	cval += offset;
>> +	write_sysreg_el0(cval, SYS_CNTP_CVAL);
>> +	isb();
>> +	local_irq_restore(flags);
>> +}
>> +
>> +static void ptimer_cval_restore(struct arch_timer_context *ctx, u64 offset)
>> +{
>> +	unsigned long flags;
>> +	u64 cval;
>> +
>> +	local_irq_save(flags);
>> +	cval = timer_get_cval(ctx);
>> +	write_sysreg_el0(cval, SYS_CNTP_CVAL);
>> +	isb();
>> +	local_irq_restore(flags);
>> +}
>> +
>> +void kvm_ptimer_cval_save_restore(struct kvm_vcpu *vcpu, bool save)
>> +{
>> +	struct timer_map map;
>> +	struct arch_timer_cpu *timer;
>> +	struct arch_timer_context *ctxp;
>> +	u64 offset;
>> +
>> +	get_timer_map(vcpu, &map);
>> +	ctxp = map.direct_ptimer;
>> +
>> +	if (unlikely(ctxp == NULL))
>> +		return;
>> +
>> +	offset = timer_get_offset(ctxp);
>> +	if (!offset)
>> +		return;
>> +
>> +	timer = vcpu_timer(ctxp->vcpu);
>> +	if (!timer->enabled || !ctxp->loaded)
>> +		return;
>> +
>> +	if (save)
>> +		ptimer_cval_save(ctxp, offset);
>> +	else
>> +		ptimer_cval_restore(ctxp, offset);
>> +}
>> +
> 
> I really don't want to see any of this code in the arch_timer
> backend. There is nothing here that executes in the context of the
> world-switch until this, and I think this is the wrong level of
> abstraction.
> 
> You end-up doing things that make no sense in the expected execution
> context (timer not loaded?, not enabled?, disabling interrupts?), and
> this makes the whole thing pointlessly complex.
> 
> My take on this problem is still the same (vaguely amended compared to
> the previous version). If this doesn't work for you, please explain
> what is wrong with it. But it has the shape of what I really want to
> see.
> 
> Thanks,
> 
> 	M.
> 
>  From 2516a1410c0d45a7d3ba0523847be339bfcb1e50 Mon Sep 17 00:00:00 2001
> From: Marc Zyngier <maz@kernel.org>
> Date: Tue, 22 Aug 2023 13:18:10 +0100
> Subject: [PATCH] KVM: arm64: timers: Correctly handle TGE flip with
>   CNTPOFF_EL2
> 
> Contrary to common belief, HCR_EL2.TGE has a direct and immediate
> effect on the way the EL0 physical counter is offset. Flipping
> TGE from 1 to 0 while at EL2 immediately changes the way the counter
> compared to the CVAL limit.
> 
> This means that we cannot directly save/restore the guest's view of
> CVAL, but that we instead must treat it as if CNTPOFF didn't exist.
> Only in the world switch, once we figure out that we do have CNTPOFF,
> can we must the offset back and forth depending on the polarity of
> TGE.
> 
> Fixes: 2b4825a86940 ("KVM: arm64: timers: Use CNTPOFF_EL2 to offset the physical timer")
> Reported-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>   arch/arm64/kvm/arch_timer.c     | 13 +++-------
>   arch/arm64/kvm/hyp/vhe/switch.c | 45 +++++++++++++++++++++++++++++++++
>   include/kvm/arm_arch_timer.h    |  7 +++++
>   3 files changed, 55 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
> index 6dcdae4d38cb..a1e24228aaaa 100644
> --- a/arch/arm64/kvm/arch_timer.c
> +++ b/arch/arm64/kvm/arch_timer.c
> @@ -55,11 +55,6 @@ static struct irq_ops arch_timer_irq_ops = {
>   	.get_input_level = kvm_arch_timer_get_input_level,
>   };
>   
> -static bool has_cntpoff(void)
> -{
> -	return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
> -}
> -
>   static int nr_timers(struct kvm_vcpu *vcpu)
>   {
>   	if (!vcpu_has_nv(vcpu))
> @@ -180,7 +175,7 @@ u64 kvm_phys_timer_read(void)
>   	return timecounter->cc->read(timecounter->cc);
>   }
>   
> -static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
> +void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
>   {
>   	if (vcpu_has_nv(vcpu)) {
>   		if (is_hyp_ctxt(vcpu)) {
> @@ -548,8 +543,7 @@ static void timer_save_state(struct arch_timer_context *ctx)
>   		timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
>   		cval = read_sysreg_el0(SYS_CNTP_CVAL);
>   
> -		if (!has_cntpoff())
> -			cval -= timer_get_offset(ctx);
> +		cval -= timer_get_offset(ctx);
>   
>   		timer_set_cval(ctx, cval);
>   
> @@ -636,8 +630,7 @@ static void timer_restore_state(struct arch_timer_context *ctx)
>   		cval = timer_get_cval(ctx);
>   		offset = timer_get_offset(ctx);
>   		set_cntpoff(offset);
> -		if (!has_cntpoff())
> -			cval += offset;
> +		cval += offset;
>   		write_sysreg_el0(cval, SYS_CNTP_CVAL);
>   		isb();
>   		write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
> diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
> index 6537f58b1a8c..5dcbde57f466 100644
> --- a/arch/arm64/kvm/hyp/vhe/switch.c
> +++ b/arch/arm64/kvm/hyp/vhe/switch.c
> @@ -39,6 +39,26 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
>   
>   	___activate_traps(vcpu);
>   
> +	if (has_cntpoff()) {
> +		struct timer_map map;
> +
> +		get_timer_map(vcpu, &map);
> +
> +		/*
> +		 * We're entrering the guest. Reload the correct
> +		 * values from memory now that TGE is clear.
> +		 */
> +		if (map.direct_ptimer == vcpu_ptimer(vcpu))
> +			val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
> +		if (map.direct_ptimer == vcpu_hptimer(vcpu))
> +			val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
> +
> +		if (map.direct_ptimer) {
> +			write_sysreg_s(val, SYS_CNTP_CVAL_EL0);
> +			isb();
> +		}
> +	}
> +
>   	val = read_sysreg(cpacr_el1);
>   	val |= CPACR_ELx_TTA;
>   	val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
> @@ -77,6 +97,31 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
>   
>   	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
>   
> +	if (has_cntpoff()) {
> +		struct timer_map map;
> +		u64 val, offset;
> +
> +		get_timer_map(vcpu, &map);
> +
> +		/*
> +		 * We're exiting the guest. Save the latest CVAL value
> +		 * to memory and apply the offset now that TGE is set.
> +		 */
> +		val = read_sysreg_s(SYS_CNTP_CVAL_EL0);
> +		if (map.direct_ptimer == vcpu_ptimer(vcpu))
> +			__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
> +		if (map.direct_ptimer == vcpu_hptimer(vcpu))
> +			__vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val;
> +
> +		offset = read_sysreg_s(SYS_CNTPOFF_EL2);
> +
> +		if (map.direct_ptimer && offset) {
> +			offset = read_sysreg_s(SYS_CNTPOFF_EL2);
> +			write_sysreg_s(val + offset, SYS_CNTP_CVAL_EL0);
> +			isb();
> +		}
> +	}
> +
>   	/*
>   	 * ARM errata 1165522 and 1530923 require the actual execution of the
>   	 * above before we can switch to the EL2/EL0 translation regime used by
> diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
> index bb3cb005873e..e748bc957d83 100644
> --- a/include/kvm/arm_arch_timer.h
> +++ b/include/kvm/arm_arch_timer.h
> @@ -82,6 +82,8 @@ struct timer_map {
>   	struct arch_timer_context *emul_ptimer;
>   };
>   
> +void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map);
> +
>   struct arch_timer_cpu {
>   	struct arch_timer_context timers[NR_KVM_TIMERS];
>   
> @@ -145,4 +147,9 @@ u64 timer_get_cval(struct arch_timer_context *ctxt);
>   void kvm_timer_cpu_up(void);
>   void kvm_timer_cpu_down(void);
>   
> +static inline bool has_cntpoff(void)
> +{
> +	return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
> +}
> +
>   #endif

This patch did not work.
After adding changes as in below diff, it is started working.

diff --git a/arch/arm64/kvm/hyp/vhe/switch.c 
b/arch/arm64/kvm/hyp/vhe/switch.c
index b0b07658f77d..91d2cfb03e26 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -117,7 +117,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
                         val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);

                 if (map.direct_ptimer) {
-                       write_sysreg_s(val, SYS_CNTP_CVAL_EL0);
+                       write_sysreg_el0(val, SYS_CNTP_CVAL);
                         isb();
                 }
         }
@@ -161,8 +161,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)

         ___deactivate_traps(vcpu);

-       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
-
         if (has_cntpoff()) {
                 struct timer_map map;
                 u64 val, offset;
@@ -173,7 +171,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
                  * We're exiting the guest. Save the latest CVAL value
                  * to memory and apply the offset now that TGE is set.
                  */
-               val = read_sysreg_s(SYS_CNTP_CVAL_EL0);
+               val = read_sysreg_el0(SYS_CNTP_CVAL);
                 if (map.direct_ptimer == vcpu_ptimer(vcpu))
                         __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
                 if (map.direct_ptimer == vcpu_hptimer(vcpu))
@@ -182,12 +180,13 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
                 offset = read_sysreg_s(SYS_CNTPOFF_EL2);

                 if (map.direct_ptimer && offset) {
-                       offset = read_sysreg_s(SYS_CNTPOFF_EL2);
-                       write_sysreg_s(val + offset, SYS_CNTP_CVAL_EL0);
+                       write_sysreg_el0(val + offset, SYS_CNTP_CVAL);
                         isb();
                 }
         }

+       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+
         /*
          * ARM errata 1165522 and 1530923 require the actual execution 
of the
          * above before we can switch to the EL2/EL0 translation regime 
used by
[root@eng06sys-r120 arm-platforms]#



Thanks,
Ganapat
Marc Zyngier Sept. 18, 2023, 11:29 a.m. UTC | #3
On Fri, 15 Sep 2023 10:57:46 +0100,
Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
> 
> This patch did not work.
> After adding changes as in below diff, it is started working.

Thanks for looking into this.

> 
> diff --git a/arch/arm64/kvm/hyp/vhe/switch.c
> b/arch/arm64/kvm/hyp/vhe/switch.c
> index b0b07658f77d..91d2cfb03e26 100644
> --- a/arch/arm64/kvm/hyp/vhe/switch.c
> +++ b/arch/arm64/kvm/hyp/vhe/switch.c
> @@ -117,7 +117,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
>                         val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
> 
>                 if (map.direct_ptimer) {
> -                       write_sysreg_s(val, SYS_CNTP_CVAL_EL0);
> +                       write_sysreg_el0(val, SYS_CNTP_CVAL);

Duh, of course. Silly me.

>                         isb();
>                 }
>         }
> @@ -161,8 +161,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
> 
>         ___deactivate_traps(vcpu);
> 
> -       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
> -
>         if (has_cntpoff()) {
>                 struct timer_map map;
>                 u64 val, offset;
> @@ -173,7 +171,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
>                  * We're exiting the guest. Save the latest CVAL value
>                  * to memory and apply the offset now that TGE is set.
>                  */
> -               val = read_sysreg_s(SYS_CNTP_CVAL_EL0);
> +               val = read_sysreg_el0(SYS_CNTP_CVAL);
>                 if (map.direct_ptimer == vcpu_ptimer(vcpu))
>                         __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
>                 if (map.direct_ptimer == vcpu_hptimer(vcpu))
> @@ -182,12 +180,13 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
>                 offset = read_sysreg_s(SYS_CNTPOFF_EL2);
> 
>                 if (map.direct_ptimer && offset) {
> -                       offset = read_sysreg_s(SYS_CNTPOFF_EL2);
> -                       write_sysreg_s(val + offset, SYS_CNTP_CVAL_EL0);
> +                       write_sysreg_el0(val + offset, SYS_CNTP_CVAL);
>                         isb();
>                 }
>         }
> 
> +       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);

Why moving the HCR_EL2 update? I don't grok what it changes. Or is it
that you end-up with spurious interrupts because your GIC is slow to
retire interrupts that are transiently pending?

Thanks,

	M.
Ganapatrao Kulkarni Sept. 19, 2023, 6:15 a.m. UTC | #4
On 18-09-2023 04:59 pm, Marc Zyngier wrote:
> On Fri, 15 Sep 2023 10:57:46 +0100,
> Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
>>
>> This patch did not work.
>> After adding changes as in below diff, it is started working.
> 
> Thanks for looking into this.
> 
>>
>> diff --git a/arch/arm64/kvm/hyp/vhe/switch.c
>> b/arch/arm64/kvm/hyp/vhe/switch.c
>> index b0b07658f77d..91d2cfb03e26 100644
>> --- a/arch/arm64/kvm/hyp/vhe/switch.c
>> +++ b/arch/arm64/kvm/hyp/vhe/switch.c
>> @@ -117,7 +117,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
>>                          val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
>>
>>                  if (map.direct_ptimer) {
>> -                       write_sysreg_s(val, SYS_CNTP_CVAL_EL0);
>> +                       write_sysreg_el0(val, SYS_CNTP_CVAL);
> 
> Duh, of course. Silly me.
> 
>>                          isb();
>>                  }
>>          }
>> @@ -161,8 +161,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
>>
>>          ___deactivate_traps(vcpu);
>>
>> -       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
>> -
>>          if (has_cntpoff()) {
>>                  struct timer_map map;
>>                  u64 val, offset;
>> @@ -173,7 +171,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
>>                   * We're exiting the guest. Save the latest CVAL value
>>                   * to memory and apply the offset now that TGE is set.
>>                   */
>> -               val = read_sysreg_s(SYS_CNTP_CVAL_EL0);
>> +               val = read_sysreg_el0(SYS_CNTP_CVAL);
>>                  if (map.direct_ptimer == vcpu_ptimer(vcpu))
>>                          __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
>>                  if (map.direct_ptimer == vcpu_hptimer(vcpu))
>> @@ -182,12 +180,13 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
>>                  offset = read_sysreg_s(SYS_CNTPOFF_EL2);
>>
>>                  if (map.direct_ptimer && offset) {
>> -                       offset = read_sysreg_s(SYS_CNTPOFF_EL2);
>> -                       write_sysreg_s(val + offset, SYS_CNTP_CVAL_EL0);
>> +                       write_sysreg_el0(val + offset, SYS_CNTP_CVAL);
>>                          isb();
>>                  }
>>          }
>>
>> +       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
> 
> Why moving the HCR_EL2 update? I don't grok what it changes. Or is it

This the line of code which flips the TGE and making timer cval ready to 
handle the TGE flip is more safe way(avoids even corner case of false 
interrupt triggers) than changing after the flipping?

> that you end-up with spurious interrupts because your GIC is slow to
> retire interrupts that are transiently pending?

IIUC, If there are any transient interrupts or asserted already, anyway 
they will be handled when irq is unmasked.

> 
> Thanks,
> 
> 	M.
> 

Thanks,
Ganapat
Marc Zyngier Sept. 24, 2023, 9:48 a.m. UTC | #5
On Tue, 19 Sep 2023 07:15:44 +0100,
Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
> 
> 
> 
> On 18-09-2023 04:59 pm, Marc Zyngier wrote:
> > On Fri, 15 Sep 2023 10:57:46 +0100,
> > Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
> >> 
> >> This patch did not work.
> >> After adding changes as in below diff, it is started working.
> > 
> > Thanks for looking into this.
> > 
> >> 
> >> diff --git a/arch/arm64/kvm/hyp/vhe/switch.c
> >> b/arch/arm64/kvm/hyp/vhe/switch.c
> >> index b0b07658f77d..91d2cfb03e26 100644
> >> --- a/arch/arm64/kvm/hyp/vhe/switch.c
> >> +++ b/arch/arm64/kvm/hyp/vhe/switch.c
> >> @@ -117,7 +117,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
> >>                          val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
> >> 
> >>                  if (map.direct_ptimer) {
> >> -                       write_sysreg_s(val, SYS_CNTP_CVAL_EL0);
> >> +                       write_sysreg_el0(val, SYS_CNTP_CVAL);
> > 
> > Duh, of course. Silly me.
> > 
> >>                          isb();
> >>                  }
> >>          }
> >> @@ -161,8 +161,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
> >> 
> >>          ___deactivate_traps(vcpu);
> >> 
> >> -       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
> >> -
> >>          if (has_cntpoff()) {
> >>                  struct timer_map map;
> >>                  u64 val, offset;
> >> @@ -173,7 +171,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
> >>                   * We're exiting the guest. Save the latest CVAL value
> >>                   * to memory and apply the offset now that TGE is set.
> >>                   */
> >> -               val = read_sysreg_s(SYS_CNTP_CVAL_EL0);
> >> +               val = read_sysreg_el0(SYS_CNTP_CVAL);
> >>                  if (map.direct_ptimer == vcpu_ptimer(vcpu))
> >>                          __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
> >>                  if (map.direct_ptimer == vcpu_hptimer(vcpu))
> >> @@ -182,12 +180,13 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
> >>                  offset = read_sysreg_s(SYS_CNTPOFF_EL2);
> >> 
> >>                  if (map.direct_ptimer && offset) {
> >> -                       offset = read_sysreg_s(SYS_CNTPOFF_EL2);
> >> -                       write_sysreg_s(val + offset, SYS_CNTP_CVAL_EL0);
> >> +                       write_sysreg_el0(val + offset, SYS_CNTP_CVAL);
> >>                          isb();
> >>                  }
> >>          }
> >> 
> >> +       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
> > 
> > Why moving the HCR_EL2 update? I don't grok what it changes. Or is it
> 
> This the line of code which flips the TGE and making timer cval ready
> to handle the TGE flip is more safe way(avoids even corner case of
> false interrupt triggers) than changing after the flipping?

That's pretty dubious. Do you actually see it firing on your HW?

> 
> > that you end-up with spurious interrupts because your GIC is slow to
> > retire interrupts that are transiently pending?
> 
> IIUC, If there are any transient interrupts or asserted already,
> anyway they will be handled when irq is unmasked.

That's the idea. But my question is whether you observe spurious
interrupts when not changing the ordering.

Thanks,

	M.
Ganapatrao Kulkarni Sept. 25, 2023, 5:35 a.m. UTC | #6
On 24-09-2023 03:18 pm, Marc Zyngier wrote:
> On Tue, 19 Sep 2023 07:15:44 +0100,
> Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
>>
>>
>>
>> On 18-09-2023 04:59 pm, Marc Zyngier wrote:
>>> On Fri, 15 Sep 2023 10:57:46 +0100,
>>> Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
>>>>
>>>> This patch did not work.
>>>> After adding changes as in below diff, it is started working.
>>>
>>> Thanks for looking into this.
>>>
>>>>
>>>> diff --git a/arch/arm64/kvm/hyp/vhe/switch.c
>>>> b/arch/arm64/kvm/hyp/vhe/switch.c
>>>> index b0b07658f77d..91d2cfb03e26 100644
>>>> --- a/arch/arm64/kvm/hyp/vhe/switch.c
>>>> +++ b/arch/arm64/kvm/hyp/vhe/switch.c
>>>> @@ -117,7 +117,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
>>>>                           val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
>>>>
>>>>                   if (map.direct_ptimer) {
>>>> -                       write_sysreg_s(val, SYS_CNTP_CVAL_EL0);
>>>> +                       write_sysreg_el0(val, SYS_CNTP_CVAL);
>>>
>>> Duh, of course. Silly me.
>>>
>>>>                           isb();
>>>>                   }
>>>>           }
>>>> @@ -161,8 +161,6 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
>>>>
>>>>           ___deactivate_traps(vcpu);
>>>>
>>>> -       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
>>>> -
>>>>           if (has_cntpoff()) {
>>>>                   struct timer_map map;
>>>>                   u64 val, offset;
>>>> @@ -173,7 +171,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
>>>>                    * We're exiting the guest. Save the latest CVAL value
>>>>                    * to memory and apply the offset now that TGE is set.
>>>>                    */
>>>> -               val = read_sysreg_s(SYS_CNTP_CVAL_EL0);
>>>> +               val = read_sysreg_el0(SYS_CNTP_CVAL);
>>>>                   if (map.direct_ptimer == vcpu_ptimer(vcpu))
>>>>                           __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
>>>>                   if (map.direct_ptimer == vcpu_hptimer(vcpu))
>>>> @@ -182,12 +180,13 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
>>>>                   offset = read_sysreg_s(SYS_CNTPOFF_EL2);
>>>>
>>>>                   if (map.direct_ptimer && offset) {
>>>> -                       offset = read_sysreg_s(SYS_CNTPOFF_EL2);
>>>> -                       write_sysreg_s(val + offset, SYS_CNTP_CVAL_EL0);
>>>> +                       write_sysreg_el0(val + offset, SYS_CNTP_CVAL);
>>>>                           isb();
>>>>                   }
>>>>           }
>>>>
>>>> +       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
>>>
>>> Why moving the HCR_EL2 update? I don't grok what it changes. Or is it
>>
>> This the line of code which flips the TGE and making timer cval ready
>> to handle the TGE flip is more safe way(avoids even corner case of
>> false interrupt triggers) than changing after the flipping?
> 
> That's pretty dubious. Do you actually see it firing on your HW?
> 
>>
>>> that you end-up with spurious interrupts because your GIC is slow to
>>> retire interrupts that are transiently pending?
>>
>> IIUC, If there are any transient interrupts or asserted already,
>> anyway they will be handled when irq is unmasked.
> 
> That's the idea. But my question is whether you observe spurious
> interrupts when not changing the ordering.

I tried with keeping the ordering (i.e flip TGE then change cval) and i 
don't see any issue as such. IMO, it is better to have cval updated 
before TGE flip, anyway either way works for us.

> 
> Thanks,
> 
> 	M.
> 

Thanks,
Ganapat
Ganapatrao Kulkarni Sept. 25, 2023, 5:40 a.m. UTC | #7
On 25-09-2023 11:05 am, Ganapatrao Kulkarni wrote:
> 
> 
> On 24-09-2023 03:18 pm, Marc Zyngier wrote:
>> On Tue, 19 Sep 2023 07:15:44 +0100,
>> Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
>>>
>>>
>>>
>>> On 18-09-2023 04:59 pm, Marc Zyngier wrote:
>>>> On Fri, 15 Sep 2023 10:57:46 +0100,
>>>> Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
>>>>>
>>>>> This patch did not work.
>>>>> After adding changes as in below diff, it is started working.
>>>>
>>>> Thanks for looking into this.
>>>>
>>>>>
>>>>> diff --git a/arch/arm64/kvm/hyp/vhe/switch.c
>>>>> b/arch/arm64/kvm/hyp/vhe/switch.c
>>>>> index b0b07658f77d..91d2cfb03e26 100644
>>>>> --- a/arch/arm64/kvm/hyp/vhe/switch.c
>>>>> +++ b/arch/arm64/kvm/hyp/vhe/switch.c
>>>>> @@ -117,7 +117,7 @@ static void __activate_traps(struct kvm_vcpu 
>>>>> *vcpu)
>>>>>                           val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
>>>>>
>>>>>                   if (map.direct_ptimer) {
>>>>> -                       write_sysreg_s(val, SYS_CNTP_CVAL_EL0);
>>>>> +                       write_sysreg_el0(val, SYS_CNTP_CVAL);
>>>>
>>>> Duh, of course. Silly me.
>>>>
>>>>>                           isb();
>>>>>                   }
>>>>>           }
>>>>> @@ -161,8 +161,6 @@ static void __deactivate_traps(struct kvm_vcpu 
>>>>> *vcpu)
>>>>>
>>>>>           ___deactivate_traps(vcpu);
>>>>>
>>>>> -       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
>>>>> -
>>>>>           if (has_cntpoff()) {
>>>>>                   struct timer_map map;
>>>>>                   u64 val, offset;
>>>>> @@ -173,7 +171,7 @@ static void __deactivate_traps(struct kvm_vcpu 
>>>>> *vcpu)
>>>>>                    * We're exiting the guest. Save the latest CVAL 
>>>>> value
>>>>>                    * to memory and apply the offset now that TGE is 
>>>>> set.
>>>>>                    */
>>>>> -               val = read_sysreg_s(SYS_CNTP_CVAL_EL0);
>>>>> +               val = read_sysreg_el0(SYS_CNTP_CVAL);
>>>>>                   if (map.direct_ptimer == vcpu_ptimer(vcpu))
>>>>>                           __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
>>>>>                   if (map.direct_ptimer == vcpu_hptimer(vcpu))
>>>>> @@ -182,12 +180,13 @@ static void __deactivate_traps(struct 
>>>>> kvm_vcpu *vcpu)
>>>>>                   offset = read_sysreg_s(SYS_CNTPOFF_EL2);
>>>>>
>>>>>                   if (map.direct_ptimer && offset) {
>>>>> -                       offset = read_sysreg_s(SYS_CNTPOFF_EL2);
>>>>> -                       write_sysreg_s(val + offset, 
>>>>> SYS_CNTP_CVAL_EL0);
>>>>> +                       write_sysreg_el0(val + offset, SYS_CNTP_CVAL);
>>>>>                           isb();
>>>>>                   }
>>>>>           }
>>>>>
>>>>> +       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
>>>>
>>>> Why moving the HCR_EL2 update? I don't grok what it changes. Or is it
>>>
>>> This the line of code which flips the TGE and making timer cval ready
>>> to handle the TGE flip is more safe way(avoids even corner case of
>>> false interrupt triggers) than changing after the flipping?
>>
>> That's pretty dubious. Do you actually see it firing on your HW?
>>
>>>
>>>> that you end-up with spurious interrupts because your GIC is slow to
>>>> retire interrupts that are transiently pending?
>>>
>>> IIUC, If there are any transient interrupts or asserted already,
>>> anyway they will be handled when irq is unmasked.
>>
>> That's the idea. But my question is whether you observe spurious
>> interrupts when not changing the ordering.
> 
> I tried with keeping the ordering (i.e flip TGE then change cval) and i 
> don't see any issue as such. IMO, it is better to have cval updated 
> before TGE flip, anyway either way works for us.
> 

Please feel free to add,
Tested-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>

>>
>> Thanks,
>>
>>     M.
>>
> 
> Thanks,
> Ganapat

Thanks,
Ganapat
Ganapatrao Kulkarni Oct. 10, 2023, 4:54 a.m. UTC | #8
Hi Marc,

On 25-09-2023 11:10 am, Ganapatrao Kulkarni wrote:
> 
> 
> On 25-09-2023 11:05 am, Ganapatrao Kulkarni wrote:
>>
>>
>> On 24-09-2023 03:18 pm, Marc Zyngier wrote:
>>> On Tue, 19 Sep 2023 07:15:44 +0100,
>>> Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
>>>>
>>>>
>>>>
>>>> On 18-09-2023 04:59 pm, Marc Zyngier wrote:
>>>>> On Fri, 15 Sep 2023 10:57:46 +0100,
>>>>> Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
>>>>>>
>>>>>> This patch did not work.
>>>>>> After adding changes as in below diff, it is started working.
>>>>>
>>>>> Thanks for looking into this.
>>>>>
>>>>>>
>>>>>> diff --git a/arch/arm64/kvm/hyp/vhe/switch.c
>>>>>> b/arch/arm64/kvm/hyp/vhe/switch.c
>>>>>> index b0b07658f77d..91d2cfb03e26 100644
>>>>>> --- a/arch/arm64/kvm/hyp/vhe/switch.c
>>>>>> +++ b/arch/arm64/kvm/hyp/vhe/switch.c
>>>>>> @@ -117,7 +117,7 @@ static void __activate_traps(struct kvm_vcpu 
>>>>>> *vcpu)
>>>>>>                           val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
>>>>>>
>>>>>>                   if (map.direct_ptimer) {
>>>>>> -                       write_sysreg_s(val, SYS_CNTP_CVAL_EL0);
>>>>>> +                       write_sysreg_el0(val, SYS_CNTP_CVAL);
>>>>>
>>>>> Duh, of course. Silly me.
>>>>>
>>>>>>                           isb();
>>>>>>                   }
>>>>>>           }
>>>>>> @@ -161,8 +161,6 @@ static void __deactivate_traps(struct kvm_vcpu 
>>>>>> *vcpu)
>>>>>>
>>>>>>           ___deactivate_traps(vcpu);
>>>>>>
>>>>>> -       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
>>>>>> -
>>>>>>           if (has_cntpoff()) {
>>>>>>                   struct timer_map map;
>>>>>>                   u64 val, offset;
>>>>>> @@ -173,7 +171,7 @@ static void __deactivate_traps(struct kvm_vcpu 
>>>>>> *vcpu)
>>>>>>                    * We're exiting the guest. Save the latest CVAL 
>>>>>> value
>>>>>>                    * to memory and apply the offset now that TGE 
>>>>>> is set.
>>>>>>                    */
>>>>>> -               val = read_sysreg_s(SYS_CNTP_CVAL_EL0);
>>>>>> +               val = read_sysreg_el0(SYS_CNTP_CVAL);
>>>>>>                   if (map.direct_ptimer == vcpu_ptimer(vcpu))
>>>>>>                           __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
>>>>>>                   if (map.direct_ptimer == vcpu_hptimer(vcpu))
>>>>>> @@ -182,12 +180,13 @@ static void __deactivate_traps(struct 
>>>>>> kvm_vcpu *vcpu)
>>>>>>                   offset = read_sysreg_s(SYS_CNTPOFF_EL2);
>>>>>>
>>>>>>                   if (map.direct_ptimer && offset) {
>>>>>> -                       offset = read_sysreg_s(SYS_CNTPOFF_EL2);
>>>>>> -                       write_sysreg_s(val + offset, 
>>>>>> SYS_CNTP_CVAL_EL0);
>>>>>> +                       write_sysreg_el0(val + offset, 
>>>>>> SYS_CNTP_CVAL);
>>>>>>                           isb();
>>>>>>                   }
>>>>>>           }
>>>>>>
>>>>>> +       write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
>>>>>
>>>>> Why moving the HCR_EL2 update? I don't grok what it changes. Or is it
>>>>
>>>> This the line of code which flips the TGE and making timer cval ready
>>>> to handle the TGE flip is more safe way(avoids even corner case of
>>>> false interrupt triggers) than changing after the flipping?
>>>
>>> That's pretty dubious. Do you actually see it firing on your HW?
>>>
>>>>
>>>>> that you end-up with spurious interrupts because your GIC is slow to
>>>>> retire interrupts that are transiently pending?
>>>>
>>>> IIUC, If there are any transient interrupts or asserted already,
>>>> anyway they will be handled when irq is unmasked.
>>>
>>> That's the idea. But my question is whether you observe spurious
>>> interrupts when not changing the ordering.
>>
>> I tried with keeping the ordering (i.e flip TGE then change cval) and 
>> i don't see any issue as such. IMO, it is better to have cval updated 
>> before TGE flip, anyway either way works for us.
>>
> 
> Please feel free to add,
> Tested-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
> 

Are you planning to take this patch as part of NV-V11 or going as fix 
patch to next?

Thanks,
Ganapat
diff mbox series

Patch

diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 98b0e8ac02ae..9fe3fa6ed98a 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -514,6 +514,59 @@  static void set_cntpoff(u64 cntpoff)
 		write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
 }
 
+static void ptimer_cval_save(struct arch_timer_context *ctx, u64 offset)
+{
+	unsigned long flags;
+	u64 cval;
+
+	local_irq_save(flags);
+	cval = read_sysreg_el0(SYS_CNTP_CVAL);
+	timer_set_cval(ctx, cval);
+	cval += offset;
+	write_sysreg_el0(cval, SYS_CNTP_CVAL);
+	isb();
+	local_irq_restore(flags);
+}
+
+static void ptimer_cval_restore(struct arch_timer_context *ctx, u64 offset)
+{
+	unsigned long flags;
+	u64 cval;
+
+	local_irq_save(flags);
+	cval = timer_get_cval(ctx);
+	write_sysreg_el0(cval, SYS_CNTP_CVAL);
+	isb();
+	local_irq_restore(flags);
+}
+
+void kvm_ptimer_cval_save_restore(struct kvm_vcpu *vcpu, bool save)
+{
+	struct timer_map map;
+	struct arch_timer_cpu *timer;
+	struct arch_timer_context *ctxp;
+	u64 offset;
+
+	get_timer_map(vcpu, &map);
+	ctxp = map.direct_ptimer;
+
+	if (unlikely(ctxp == NULL))
+		return;
+
+	offset = timer_get_offset(ctxp);
+	if (!offset)
+		return;
+
+	timer = vcpu_timer(ctxp->vcpu);
+	if (!timer->enabled || !ctxp->loaded)
+		return;
+
+	if (save)
+		ptimer_cval_save(ctxp, offset);
+	else
+		ptimer_cval_restore(ctxp, offset);
+}
+
 static void timer_save_state(struct arch_timer_context *ctx)
 {
 	struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
@@ -562,9 +615,7 @@  static void timer_save_state(struct arch_timer_context *ctx)
 		timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
 		cval = read_sysreg_el0(SYS_CNTP_CVAL);
 
-		if (!has_cntpoff())
-			cval -= timer_get_offset(ctx);
-
+		cval -= timer_get_offset(ctx);
 		timer_set_cval(ctx, cval);
 
 		/* Disable the timer */
@@ -650,8 +701,7 @@  static void timer_restore_state(struct arch_timer_context *ctx)
 		cval = timer_get_cval(ctx);
 		offset = timer_get_offset(ctx);
 		set_cntpoff(offset);
-		if (!has_cntpoff())
-			cval += offset;
+		cval += offset;
 		write_sysreg_el0(cval, SYS_CNTP_CVAL);
 		isb();
 		write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 561cb53e19ce..097fcaf7b208 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -100,6 +100,10 @@  static void __activate_traps(struct kvm_vcpu *vcpu)
 		hcr |= vhcr_el2;
 	}
 
+	/* Restore CVAL */
+	if (has_cntpoff())
+		kvm_ptimer_cval_save_restore(vcpu, false);
+
 	___activate_traps(vcpu, hcr);
 
 	val = read_sysreg(cpacr_el1);
@@ -141,6 +145,15 @@  static void __deactivate_traps(struct kvm_vcpu *vcpu)
 
 	___deactivate_traps(vcpu);
 
+	/*
+	 * For VHE Host, HCR_EL2.{E2H, TGE} is {1, 1}, FEAT_ECV
+	 * is disabled and CNTPOFF_EL2 value is treated as zero.
+	 * Hence, need to save guest written CVAL in memory and
+	 * increment PTIMER's CVAL by CNTPOFF to avoid false interrupt.
+	 */
+	if (has_cntpoff())
+		kvm_ptimer_cval_save_restore(vcpu, true);
+
 	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
 
 	/*
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index ea77a569a907..ce3f4d9e7dd4 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -117,6 +117,7 @@  void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu);
 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu);
 
 void kvm_timer_init_vhe(void);
+void kvm_ptimer_cval_save_restore(struct kvm_vcpu *vcpu, bool save);
 
 #define vcpu_timer(v)	(&(v)->arch.timer_cpu)
 #define vcpu_get_timer(v,t)	(&vcpu_timer(v)->timers[(t)])