diff mbox series

[v3,9/9] KVM: riscv: selftests: Add sstc timer test

Message ID 64e0637cd6f22dd7557ed44bd2242001e7830d1c.1694421911.git.haibo1.xu@intel.com (mailing list archive)
State Handled Elsewhere
Headers show
Series RISCV: Add kvm Sstc timer selftests | expand

Checks

Context Check Description
conchuod/cover_letter success Series has a cover letter
conchuod/tree_selection success Guessed tree name to be for-next at HEAD 0bb80ecc33a8
conchuod/fixes_present success Fixes tag not required for -next series
conchuod/maintainers_pattern success MAINTAINERS pattern errors before the patch: 5 and now 5
conchuod/verify_signedoff success Signed-off-by tag matches author and committer
conchuod/kdoc success Errors and warnings before: 0 this patch: 0
conchuod/build_rv64_clang_allmodconfig success Errors and warnings before: 9 this patch: 9
conchuod/module_param success Was 0 now: 0
conchuod/build_rv64_gcc_allmodconfig success Errors and warnings before: 9 this patch: 9
conchuod/build_rv32_defconfig success Build OK
conchuod/dtb_warn_rv64 success Errors and warnings before: 25 this patch: 25
conchuod/header_inline success No static functions without inline keyword in header files
conchuod/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: Unbalanced braces around else statement CHECK: braces {} should be used on all arms of this statement WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
conchuod/build_rv64_nommu_k210_defconfig success Build OK
conchuod/verify_fixes success No Fixes tag
conchuod/build_rv64_nommu_virt_defconfig success Build OK

Commit Message

Xu, Haibo1 Sept. 14, 2023, 1:37 a.m. UTC
Add a KVM selftests to validate the Sstc timer functionality.
The test was ported from arm64 arch timer test.

Signed-off-by: Haibo Xu <haibo1.xu@intel.com>
---
 tools/testing/selftests/kvm/Makefile          |   1 +
 .../selftests/kvm/aarch64/arch_timer.c        |  12 +-
 tools/testing/selftests/kvm/arch_timer.c      |  10 +-
 .../selftests/kvm/include/riscv/arch_timer.h  |  80 +++++++++++++
 .../selftests/kvm/include/riscv/processor.h   |  10 ++
 .../selftests/kvm/include/timer_test.h        |   3 +-
 .../testing/selftests/kvm/riscv/arch_timer.c  | 107 ++++++++++++++++++
 7 files changed, 214 insertions(+), 9 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/include/riscv/arch_timer.h
 create mode 100644 tools/testing/selftests/kvm/riscv/arch_timer.c

Comments

Andrew Jones Sept. 14, 2023, 9:36 a.m. UTC | #1
On Thu, Sep 14, 2023 at 09:37:03AM +0800, Haibo Xu wrote:
> Add a KVM selftests to validate the Sstc timer functionality.
> The test was ported from arm64 arch timer test.
> 
> Signed-off-by: Haibo Xu <haibo1.xu@intel.com>
> ---
>  tools/testing/selftests/kvm/Makefile          |   1 +
>  .../selftests/kvm/aarch64/arch_timer.c        |  12 +-
>  tools/testing/selftests/kvm/arch_timer.c      |  10 +-
>  .../selftests/kvm/include/riscv/arch_timer.h  |  80 +++++++++++++
>  .../selftests/kvm/include/riscv/processor.h   |  10 ++
>  .../selftests/kvm/include/timer_test.h        |   3 +-
>  .../testing/selftests/kvm/riscv/arch_timer.c  | 107 ++++++++++++++++++
>  7 files changed, 214 insertions(+), 9 deletions(-)
>  create mode 100644 tools/testing/selftests/kvm/include/riscv/arch_timer.h
>  create mode 100644 tools/testing/selftests/kvm/riscv/arch_timer.c
> 
> diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
> index a5bc8e156047..73d393a5f337 100644
> --- a/tools/testing/selftests/kvm/Makefile
> +++ b/tools/testing/selftests/kvm/Makefile
> @@ -180,6 +180,7 @@ TEST_GEN_PROGS_s390x += rseq_test
>  TEST_GEN_PROGS_s390x += set_memory_region_test
>  TEST_GEN_PROGS_s390x += kvm_binary_stats_test
>  
> +TEST_GEN_PROGS_riscv += arch_timer
>  TEST_GEN_PROGS_riscv += demand_paging_test
>  TEST_GEN_PROGS_riscv += dirty_log_test
>  TEST_GEN_PROGS_riscv += guest_print_test
> diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
> index 4688b258247c..178f37737d33 100644
> --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
> +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
> @@ -190,10 +190,14 @@ struct kvm_vm *test_vm_create(void)
>  	vm_init_vector_tables(vm);
>  	vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
>  
> -	if (!test_args.offset.reserved) {
> -		if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET))
> -			vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &test_args.offset);
> -		else
> +	if (!test_args.reserved) {
> +		if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) {
> +			struct kvm_arm_counter_offset offset = {
> +				.counter_offset = test_args.counter_offset,
> +				.reserved = 0,
> +			};
> +			vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &offset);
> +		} else
>  			TEST_FAIL("no support for global offset\n");
>  	}
>  
> diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c
> index ea3dd1a772b0..184a0f5f484d 100644
> --- a/tools/testing/selftests/kvm/arch_timer.c
> +++ b/tools/testing/selftests/kvm/arch_timer.c
> @@ -33,7 +33,7 @@ struct test_args test_args = {
>  	.nr_iter = NR_TEST_ITERS_DEF,
>  	.timer_period_ms = TIMER_TEST_PERIOD_MS_DEF,
>  	.migration_freq_ms = TIMER_TEST_MIGRATION_FREQ_MS,
> -	.offset = { .reserved = 1 },
> +	.reserved = 1,
>  };
>  
>  struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
> @@ -72,6 +72,8 @@ static void *test_vcpu_run(void *arg)
>  		TEST_FAIL("Unexpected guest exit\n");
>  	}
>  
> +	pr_info("PASS(vCPU-%d).\n", vcpu_idx);
> +
>  	return NULL;
>  }
>  
> @@ -186,7 +188,7 @@ static void test_print_help(char *name)
>  		TIMER_TEST_PERIOD_MS_DEF);
>  	pr_info("\t-m: Frequency (in ms) of vCPUs to migrate to different pCPU. 0 to turn off (default: %u)\n",
>  		TIMER_TEST_MIGRATION_FREQ_MS);
> -	pr_info("\t-o: Counter offset (in counter cycles, default: 0)\n");
> +	pr_info("\t-o: Counter offset (in counter cycles, default: 0)[aarch64-only]\n");
                                                                     ^
								     need
						     		a space here

>  	pr_info("\t-h: print this help screen\n");
>  }
>  
> @@ -214,8 +216,8 @@ static bool parse_args(int argc, char *argv[])
>  			test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg);
>  			break;
>  		case 'o':
> -			test_args.offset.counter_offset = strtol(optarg, NULL, 0);
> -			test_args.offset.reserved = 0;
> +			test_args.counter_offset = strtol(optarg, NULL, 0);
> +			test_args.reserved = 0;
>  			break;
>  		case 'h':
>  		default:
> diff --git a/tools/testing/selftests/kvm/include/riscv/arch_timer.h b/tools/testing/selftests/kvm/include/riscv/arch_timer.h
> new file mode 100644
> index 000000000000..5c063df13b98
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/include/riscv/arch_timer.h
> @@ -0,0 +1,80 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * RISC-V Arch Timer(sstc) specific interface
> + *
> + * Copyright (c) 2023 Intel Corporation
> + */
> +
> +#ifndef SELFTEST_KVM_ARCH_TIMER_H
> +#define SELFTEST_KVM_ARCH_TIMER_H
> +
> +#include <asm/csr.h>
> +
> +static unsigned long timer_freq;
> +
> +#define msec_to_cycles(msec)	\
> +	((timer_freq) * (uint64_t)(msec) / 1000)
> +
> +#define usec_to_cycles(usec)	\
> +	((timer_freq) * (uint64_t)(usec) / 1000000)
> +
> +#define cycles_to_usec(cycles) \
> +	((uint64_t)(cycles) * 1000000 / (timer_freq))
> +
> +static inline uint64_t timer_get_cycles(void)
> +{
> +	return csr_read(CSR_TIME);
> +}
> +
> +static inline void timer_set_cmp(uint64_t cval)
> +{
> +	csr_write(CSR_STIMECMP, cval);
> +}
> +
> +static inline uint64_t timer_get_cmp(void)
> +{
> +	return csr_read(CSR_STIMECMP);
> +}
> +
> +static inline void timer_irq_enable(void)
> +{
> +	csr_set(CSR_SIE, IE_TIE);
> +}
> +
> +static inline void timer_irq_disable(void)
> +{
> +	csr_clear(CSR_SIE, IE_TIE);
> +}
> +
> +static inline void timer_set_next_cmp_ms(uint32_t msec)
> +{
> +	uint64_t now_ct = timer_get_cycles();
> +	uint64_t next_ct = now_ct + msec_to_cycles(msec);
> +
> +	timer_set_cmp(next_ct);
> +}
> +
> +static inline void cpu_relax(void)
> +{
> +#ifdef __riscv_zihintpause
> +	asm volatile("pause" ::: "memory");
> +#else
> +	/* Encoding of the pause instruction */
> +	asm volatile(".4byte 0x100000F" ::: "memory");
> +#endif
> +}

cpu_relax() should go to include/riscv/processor.h

> +
> +static inline void __delay(uint64_t cycles)
> +{
> +	uint64_t start = timer_get_cycles();
> +
> +	while ((timer_get_cycles() - start) < cycles)
> +		cpu_relax();
> +}
> +
> +static inline void udelay(unsigned long usec)
> +{
> +	__delay(usec_to_cycles(usec));
> +}
> +
> +#endif /* SELFTEST_KVM_ARCH_TIMER_H */
> diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
> index 7d5517648ea7..268c549f87cf 100644
> --- a/tools/testing/selftests/kvm/include/riscv/processor.h
> +++ b/tools/testing/selftests/kvm/include/riscv/processor.h
> @@ -158,4 +158,14 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
>  			unsigned long arg3, unsigned long arg4,
>  			unsigned long arg5);
>  
> +static inline void local_irq_enable(void)
> +{
> +	csr_set(CSR_SSTATUS, SR_SIE);
> +}
> +
> +static inline void local_irq_disable(void)
> +{
> +	csr_clear(CSR_SSTATUS, SR_SIE);
> +}
> +
>  #endif /* SELFTEST_KVM_PROCESSOR_H */
> diff --git a/tools/testing/selftests/kvm/include/timer_test.h b/tools/testing/selftests/kvm/include/timer_test.h
> index 04e8aff2dc22..00b455235b82 100644
> --- a/tools/testing/selftests/kvm/include/timer_test.h
> +++ b/tools/testing/selftests/kvm/include/timer_test.h
> @@ -22,7 +22,8 @@ struct test_args {
>  	int nr_iter;
>  	int timer_period_ms;
>  	int migration_freq_ms;
> -	struct kvm_arm_counter_offset offset;
> +	uint64_t counter_offset;
> +	uint64_t reserved;

These need a comment above them explaining that they are the members of
struct kvm_arm_counter_offset or they can be renamed with arm_ prefixes.

>  };
>  
>  /* Shared variables between host and guest */
> diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
> new file mode 100644
> index 000000000000..13bf184d1ff5
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
> @@ -0,0 +1,107 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * arch_timer.c - Tests the riscv64 sstc timer IRQ functionality
> + *
> + * The test validates the sstc timer IRQs using vstimecmp registers.
> + * It's ported from the aarch64 arch_timer test.
> + *
> + * Copyright (c) 2023, Intel Corporation.
> + */
> +
> +#define _GNU_SOURCE
> +
> +#include "arch_timer.h"
> +#include "kvm_util.h"
> +#include "processor.h"
> +#include "timer_test.h"
> +
> +static int timer_irq = IRQ_S_TIMER;
> +
> +static void guest_irq_handler(struct ex_regs *regs)
> +{
> +	uint64_t xcnt, xcnt_diff_us, cmp;
> +	unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG;
> +	uint32_t cpu = guest_get_vcpuid();
> +	struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
> +
> +	timer_irq_disable();
> +
> +	xcnt = timer_get_cycles();
> +	cmp = timer_get_cmp();
> +	xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
> +
> +	/* Make sure we are dealing with the correct timer IRQ */
> +	GUEST_ASSERT_EQ(intid, timer_irq);
> +
> +	__GUEST_ASSERT(xcnt >= cmp,
> +			"xcnt = 0x%llx, cmp = 0x%llx, xcnt_diff_us = 0x%llx",
> +			xcnt, cmp, xcnt_diff_us);
> +
> +	WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
> +}
> +
> +static void guest_run(struct test_vcpu_shared_data *shared_data)
> +{
> +	uint32_t irq_iter, config_iter;
> +
> +	shared_data->nr_iter = 0;
> +	shared_data->guest_stage = 0;
> +
> +	for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
> +		/* Setup the next interrupt */
> +		timer_set_next_cmp_ms(test_args.timer_period_ms);
> +		shared_data->xcnt = timer_get_cycles();
> +		timer_irq_enable();
> +
> +		/* Setup a timeout for the interrupt to arrive */
> +		udelay(msecs_to_usecs(test_args.timer_period_ms) +
> +			TIMER_TEST_ERR_MARGIN_US);
> +
> +		irq_iter = READ_ONCE(shared_data->nr_iter);
> +		GUEST_ASSERT_EQ(config_iter + 1, irq_iter);
> +	}
> +}
> +
> +static void guest_code(void)
> +{
> +	uint32_t cpu = guest_get_vcpuid();
> +	struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
> +
> +	timer_irq_disable();
> +	local_irq_enable();
> +
> +	guest_run(shared_data);
> +
> +	GUEST_DONE();
> +}
> +
> +struct kvm_vm *test_vm_create(void)
> +{
> +	struct kvm_vm *vm;
> +	int nr_vcpus = test_args.nr_vcpus;
> +
> +	vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
> +	__TEST_REQUIRE(__vcpu_has_ext(vcpus[0], KVM_RISCV_ISA_EXT_SSTC),
> +				   "SSTC not available, skipping test\n");
> +
> +	vm_init_vector_tables(vm);
> +	vm_install_interrupt_handler(vm, guest_irq_handler);
> +
> +	for (int i = 0; i < nr_vcpus; i++)
> +		vcpu_init_vector_tables(vcpus[i]);
> +
> +	/* Initialize guest timer frequency. */
> +	vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency), &timer_freq);
> +	sync_global_to_guest(vm, timer_freq);
> +	pr_debug("timer_freq: %lu\n", timer_freq);
> +
> +	/* Make all the test's cmdline args visible to the guest */
> +	sync_global_to_guest(vm, test_args);
> +
> +	return vm;
> +}
> +
> +void test_vm_cleanup(struct kvm_vm *vm)
> +{
> +	kvm_vm_free(vm);
> +}
> -- 
> 2.34.1
>

Besides the three minor comments, this looks good to me.

Reviewed-by: Andrew Jones <ajones@ventanamicro.com>

Thanks,
drew
Andrew Jones Sept. 14, 2023, 9:51 a.m. UTC | #2
On Thu, Sep 14, 2023 at 09:37:03AM +0800, Haibo Xu wrote:
> Add a KVM selftests to validate the Sstc timer functionality.
> The test was ported from arm64 arch timer test.

I just tried this test out. Running it over and over again on QEMU I see
it works sometimes, but it frequently fails with the
GUEST_ASSERT_EQ(config_iter + 1, irq_iter) assert and at least once I
also saw the __GUEST_ASSERT(xcnt >= cmp) assert.

Thanks,
drew
Conor Dooley Sept. 14, 2023, 9:52 a.m. UTC | #3
On Thu, Sep 14, 2023 at 11:36:01AM +0200, Andrew Jones wrote:
> > +static inline void cpu_relax(void)
> > +{
> > +#ifdef __riscv_zihintpause
> > +	asm volatile("pause" ::: "memory");
> > +#else
> > +	/* Encoding of the pause instruction */
> > +	asm volatile(".4byte 0x100000F" ::: "memory");
> > +#endif
> > +}
> 
> cpu_relax() should go to include/riscv/processor.h

Can the one from asm/vdso/processor.h be reused, or are there special
considerations preventing that?
Andrew Jones Sept. 14, 2023, 10:15 a.m. UTC | #4
On Thu, Sep 14, 2023 at 10:52:15AM +0100, Conor Dooley wrote:
> On Thu, Sep 14, 2023 at 11:36:01AM +0200, Andrew Jones wrote:
> > > +static inline void cpu_relax(void)
> > > +{
> > > +#ifdef __riscv_zihintpause
> > > +	asm volatile("pause" ::: "memory");
> > > +#else
> > > +	/* Encoding of the pause instruction */
> > > +	asm volatile(".4byte 0x100000F" ::: "memory");
> > > +#endif
> > > +}
> > 
> > cpu_relax() should go to include/riscv/processor.h
> 
> Can the one from asm/vdso/processor.h be reused, or are there special
> considerations preventing that?

We'd need to copy it into tools/arch/riscv/include/asm, but it could be
done. Hmm, now that I look at it, I see we're missing the barrier() call
in this kvm selftests version.

Thanks,
drew
Haibo Xu Sept. 15, 2023, 6:21 a.m. UTC | #5
On Thu, Sep 14, 2023 at 5:52 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Thu, Sep 14, 2023 at 09:37:03AM +0800, Haibo Xu wrote:
> > Add a KVM selftests to validate the Sstc timer functionality.
> > The test was ported from arm64 arch timer test.
>
> I just tried this test out. Running it over and over again on QEMU I see
> it works sometimes, but it frequently fails with the
> GUEST_ASSERT_EQ(config_iter + 1, irq_iter) assert and at least once I
> also saw the __GUEST_ASSERT(xcnt >= cmp) assert.
>

Good catch!

I can also reproduce this issue and it is a common problem for both
arm64 and riscv because it also happens in a arm64 Qemu VM.

It seems like a synchronization issue between host and guest shared
variables. Will double check the test code.

> Thanks,
> drew
Haibo Xu Sept. 15, 2023, 6:23 a.m. UTC | #6
On Thu, Sep 14, 2023 at 6:15 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Thu, Sep 14, 2023 at 10:52:15AM +0100, Conor Dooley wrote:
> > On Thu, Sep 14, 2023 at 11:36:01AM +0200, Andrew Jones wrote:
> > > > +static inline void cpu_relax(void)
> > > > +{
> > > > +#ifdef __riscv_zihintpause
> > > > + asm volatile("pause" ::: "memory");
> > > > +#else
> > > > + /* Encoding of the pause instruction */
> > > > + asm volatile(".4byte 0x100000F" ::: "memory");
> > > > +#endif
> > > > +}
> > >
> > > cpu_relax() should go to include/riscv/processor.h
> >
> > Can the one from asm/vdso/processor.h be reused, or are there special
> > considerations preventing that?
>
> We'd need to copy it into tools/arch/riscv/include/asm, but it could be
> done. Hmm, now that I look at it, I see we're missing the barrier() call
> in this kvm selftests version.
>

Will reuse the one from asm/vdso/processor.h and copy it to
tools/arch/riscv/include/asm.

> Thanks,
> drew
Haibo Xu Dec. 4, 2023, 2:42 a.m. UTC | #7
On Fri, Sep 15, 2023 at 2:21 PM Haibo Xu <xiaobo55x@gmail.com> wrote:
>
> On Thu, Sep 14, 2023 at 5:52 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> >
> > On Thu, Sep 14, 2023 at 09:37:03AM +0800, Haibo Xu wrote:
> > > Add a KVM selftests to validate the Sstc timer functionality.
> > > The test was ported from arm64 arch timer test.
> >
> > I just tried this test out. Running it over and over again on QEMU I see
> > it works sometimes, but it frequently fails with the
> > GUEST_ASSERT_EQ(config_iter + 1, irq_iter) assert and at least once I
> > also saw the __GUEST_ASSERT(xcnt >= cmp) assert.
> >
>
> Good catch!
>
> I can also reproduce this issue and it is a common problem for both
> arm64 and riscv because it also happens in a arm64 Qemu VM.
>
> It seems like a synchronization issue between host and guest shared
> variables. Will double check the test code.
>
> > Thanks,
> > drew

Hi Andrew,

After several rounds of regression testing, some findings:
1. The intermittent failure also happened on ARM64 Qemu VM, and even
in the initial arch_timer commit(4959d8650e9f4).
2. it didn't happen on a ARM64 HW(but a different failure occured
during stress test)
3. The failure have a close relationship with
TIMER_TEST_ERR_MARGIN_US(default 100), and after increasing
     the macro to 300, the failure couldn't reproduced in 1000 loops
stress test in RISC-V Qemu VM

So my suggestion is we can expose the TIMER_TEST_ERR_MARGIN_US
parameter as an arch_timer test arg parameter
and tune it based on a specific test environment.

What's your opinion?

Regards,
Haibo
Andrew Jones Dec. 4, 2023, 11:32 a.m. UTC | #8
On Mon, Dec 04, 2023 at 10:42:24AM +0800, Haibo Xu wrote:
> On Fri, Sep 15, 2023 at 2:21 PM Haibo Xu <xiaobo55x@gmail.com> wrote:
> >
> > On Thu, Sep 14, 2023 at 5:52 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> > >
> > > On Thu, Sep 14, 2023 at 09:37:03AM +0800, Haibo Xu wrote:
> > > > Add a KVM selftests to validate the Sstc timer functionality.
> > > > The test was ported from arm64 arch timer test.
> > >
> > > I just tried this test out. Running it over and over again on QEMU I see
> > > it works sometimes, but it frequently fails with the
> > > GUEST_ASSERT_EQ(config_iter + 1, irq_iter) assert and at least once I
> > > also saw the __GUEST_ASSERT(xcnt >= cmp) assert.
> > >
> >
> > Good catch!
> >
> > I can also reproduce this issue and it is a common problem for both
> > arm64 and riscv because it also happens in a arm64 Qemu VM.
> >
> > It seems like a synchronization issue between host and guest shared
> > variables. Will double check the test code.
> >
> > > Thanks,
> > > drew
> 
> Hi Andrew,
> 
> After several rounds of regression testing, some findings:
> 1. The intermittent failure also happened on ARM64 Qemu VM, and even
> in the initial arch_timer commit(4959d8650e9f4).
> 2. it didn't happen on a ARM64 HW(but a different failure occured
> during stress test)
> 3. The failure have a close relationship with
> TIMER_TEST_ERR_MARGIN_US(default 100), and after increasing
>      the macro to 300, the failure couldn't reproduced in 1000 loops
> stress test in RISC-V Qemu VM
> 
> So my suggestion is we can expose the TIMER_TEST_ERR_MARGIN_US
> parameter as an arch_timer test arg parameter
> and tune it based on a specific test environment.
> 
> What's your opinion?

The concept of "timeout for an interrupt to arrive" is always going to
leave us exposed to random failures. Your suggestion of making the
timeout user configurable is probably the best we can do. I would
suggest also adding more descriptive failure text and a hint about
trying to adjust the timeout.

Or, one thing we do in kvm-unit-tests, is to reduce typical delays while
allowing expected delays to be longer by looping over a shorter delay and
a non-fatal check, i.e.

 pass = false;
 for (i = 0; i < 10; i++) {
   udelay(100);
   if (check(...)) {
      pass = true;
      break;
   }
 }
 assert(pass);

We could try that approach here too.

Thanks,
drew
Haibo Xu Dec. 5, 2023, 7:58 a.m. UTC | #9
On Mon, Dec 4, 2023 at 7:32 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Mon, Dec 04, 2023 at 10:42:24AM +0800, Haibo Xu wrote:
> > On Fri, Sep 15, 2023 at 2:21 PM Haibo Xu <xiaobo55x@gmail.com> wrote:
> > >
> > > On Thu, Sep 14, 2023 at 5:52 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> > > >
> > > > On Thu, Sep 14, 2023 at 09:37:03AM +0800, Haibo Xu wrote:
> > > > > Add a KVM selftests to validate the Sstc timer functionality.
> > > > > The test was ported from arm64 arch timer test.
> > > >
> > > > I just tried this test out. Running it over and over again on QEMU I see
> > > > it works sometimes, but it frequently fails with the
> > > > GUEST_ASSERT_EQ(config_iter + 1, irq_iter) assert and at least once I
> > > > also saw the __GUEST_ASSERT(xcnt >= cmp) assert.
> > > >
> > >
> > > Good catch!
> > >
> > > I can also reproduce this issue and it is a common problem for both
> > > arm64 and riscv because it also happens in a arm64 Qemu VM.
> > >
> > > It seems like a synchronization issue between host and guest shared
> > > variables. Will double check the test code.
> > >
> > > > Thanks,
> > > > drew
> >
> > Hi Andrew,
> >
> > After several rounds of regression testing, some findings:
> > 1. The intermittent failure also happened on ARM64 Qemu VM, and even
> > in the initial arch_timer commit(4959d8650e9f4).
> > 2. it didn't happen on a ARM64 HW(but a different failure occured
> > during stress test)
> > 3. The failure have a close relationship with
> > TIMER_TEST_ERR_MARGIN_US(default 100), and after increasing
> >      the macro to 300, the failure couldn't reproduced in 1000 loops
> > stress test in RISC-V Qemu VM
> >
> > So my suggestion is we can expose the TIMER_TEST_ERR_MARGIN_US
> > parameter as an arch_timer test arg parameter
> > and tune it based on a specific test environment.
> >
> > What's your opinion?
>
> The concept of "timeout for an interrupt to arrive" is always going to
> leave us exposed to random failures. Your suggestion of making the
> timeout user configurable is probably the best we can do. I would
> suggest also adding more descriptive failure text and a hint about
> trying to adjust the timeout.
>
> Or, one thing we do in kvm-unit-tests, is to reduce typical delays while
> allowing expected delays to be longer by looping over a shorter delay and
> a non-fatal check, i.e.
>
>  pass = false;
>  for (i = 0; i < 10; i++) {
>    udelay(100);
>    if (check(...)) {
>       pass = true;
>       break;
>    }
>  }
>  assert(pass);
>
> We could try that approach here too.
>
> Thanks,
> drew

Thanks for the feedback, I will send out patch set v4 soon!
diff mbox series

Patch

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index a5bc8e156047..73d393a5f337 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -180,6 +180,7 @@  TEST_GEN_PROGS_s390x += rseq_test
 TEST_GEN_PROGS_s390x += set_memory_region_test
 TEST_GEN_PROGS_s390x += kvm_binary_stats_test
 
+TEST_GEN_PROGS_riscv += arch_timer
 TEST_GEN_PROGS_riscv += demand_paging_test
 TEST_GEN_PROGS_riscv += dirty_log_test
 TEST_GEN_PROGS_riscv += guest_print_test
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index 4688b258247c..178f37737d33 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -190,10 +190,14 @@  struct kvm_vm *test_vm_create(void)
 	vm_init_vector_tables(vm);
 	vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
 
-	if (!test_args.offset.reserved) {
-		if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET))
-			vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &test_args.offset);
-		else
+	if (!test_args.reserved) {
+		if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) {
+			struct kvm_arm_counter_offset offset = {
+				.counter_offset = test_args.counter_offset,
+				.reserved = 0,
+			};
+			vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &offset);
+		} else
 			TEST_FAIL("no support for global offset\n");
 	}
 
diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c
index ea3dd1a772b0..184a0f5f484d 100644
--- a/tools/testing/selftests/kvm/arch_timer.c
+++ b/tools/testing/selftests/kvm/arch_timer.c
@@ -33,7 +33,7 @@  struct test_args test_args = {
 	.nr_iter = NR_TEST_ITERS_DEF,
 	.timer_period_ms = TIMER_TEST_PERIOD_MS_DEF,
 	.migration_freq_ms = TIMER_TEST_MIGRATION_FREQ_MS,
-	.offset = { .reserved = 1 },
+	.reserved = 1,
 };
 
 struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
@@ -72,6 +72,8 @@  static void *test_vcpu_run(void *arg)
 		TEST_FAIL("Unexpected guest exit\n");
 	}
 
+	pr_info("PASS(vCPU-%d).\n", vcpu_idx);
+
 	return NULL;
 }
 
@@ -186,7 +188,7 @@  static void test_print_help(char *name)
 		TIMER_TEST_PERIOD_MS_DEF);
 	pr_info("\t-m: Frequency (in ms) of vCPUs to migrate to different pCPU. 0 to turn off (default: %u)\n",
 		TIMER_TEST_MIGRATION_FREQ_MS);
-	pr_info("\t-o: Counter offset (in counter cycles, default: 0)\n");
+	pr_info("\t-o: Counter offset (in counter cycles, default: 0)[aarch64-only]\n");
 	pr_info("\t-h: print this help screen\n");
 }
 
@@ -214,8 +216,8 @@  static bool parse_args(int argc, char *argv[])
 			test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg);
 			break;
 		case 'o':
-			test_args.offset.counter_offset = strtol(optarg, NULL, 0);
-			test_args.offset.reserved = 0;
+			test_args.counter_offset = strtol(optarg, NULL, 0);
+			test_args.reserved = 0;
 			break;
 		case 'h':
 		default:
diff --git a/tools/testing/selftests/kvm/include/riscv/arch_timer.h b/tools/testing/selftests/kvm/include/riscv/arch_timer.h
new file mode 100644
index 000000000000..5c063df13b98
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/arch_timer.h
@@ -0,0 +1,80 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * RISC-V Arch Timer(sstc) specific interface
+ *
+ * Copyright (c) 2023 Intel Corporation
+ */
+
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include <asm/csr.h>
+
+static unsigned long timer_freq;
+
+#define msec_to_cycles(msec)	\
+	((timer_freq) * (uint64_t)(msec) / 1000)
+
+#define usec_to_cycles(usec)	\
+	((timer_freq) * (uint64_t)(usec) / 1000000)
+
+#define cycles_to_usec(cycles) \
+	((uint64_t)(cycles) * 1000000 / (timer_freq))
+
+static inline uint64_t timer_get_cycles(void)
+{
+	return csr_read(CSR_TIME);
+}
+
+static inline void timer_set_cmp(uint64_t cval)
+{
+	csr_write(CSR_STIMECMP, cval);
+}
+
+static inline uint64_t timer_get_cmp(void)
+{
+	return csr_read(CSR_STIMECMP);
+}
+
+static inline void timer_irq_enable(void)
+{
+	csr_set(CSR_SIE, IE_TIE);
+}
+
+static inline void timer_irq_disable(void)
+{
+	csr_clear(CSR_SIE, IE_TIE);
+}
+
+static inline void timer_set_next_cmp_ms(uint32_t msec)
+{
+	uint64_t now_ct = timer_get_cycles();
+	uint64_t next_ct = now_ct + msec_to_cycles(msec);
+
+	timer_set_cmp(next_ct);
+}
+
+static inline void cpu_relax(void)
+{
+#ifdef __riscv_zihintpause
+	asm volatile("pause" ::: "memory");
+#else
+	/* Encoding of the pause instruction */
+	asm volatile(".4byte 0x100000F" ::: "memory");
+#endif
+}
+
+static inline void __delay(uint64_t cycles)
+{
+	uint64_t start = timer_get_cycles();
+
+	while ((timer_get_cycles() - start) < cycles)
+		cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+	__delay(usec_to_cycles(usec));
+}
+
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
index 7d5517648ea7..268c549f87cf 100644
--- a/tools/testing/selftests/kvm/include/riscv/processor.h
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -158,4 +158,14 @@  struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
 			unsigned long arg3, unsigned long arg4,
 			unsigned long arg5);
 
+static inline void local_irq_enable(void)
+{
+	csr_set(CSR_SSTATUS, SR_SIE);
+}
+
+static inline void local_irq_disable(void)
+{
+	csr_clear(CSR_SSTATUS, SR_SIE);
+}
+
 #endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/timer_test.h b/tools/testing/selftests/kvm/include/timer_test.h
index 04e8aff2dc22..00b455235b82 100644
--- a/tools/testing/selftests/kvm/include/timer_test.h
+++ b/tools/testing/selftests/kvm/include/timer_test.h
@@ -22,7 +22,8 @@  struct test_args {
 	int nr_iter;
 	int timer_period_ms;
 	int migration_freq_ms;
-	struct kvm_arm_counter_offset offset;
+	uint64_t counter_offset;
+	uint64_t reserved;
 };
 
 /* Shared variables between host and guest */
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
new file mode 100644
index 000000000000..13bf184d1ff5
--- /dev/null
+++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
@@ -0,0 +1,107 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer.c - Tests the riscv64 sstc timer IRQ functionality
+ *
+ * The test validates the sstc timer IRQs using vstimecmp registers.
+ * It's ported from the aarch64 arch_timer test.
+ *
+ * Copyright (c) 2023, Intel Corporation.
+ */
+
+#define _GNU_SOURCE
+
+#include "arch_timer.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "timer_test.h"
+
+static int timer_irq = IRQ_S_TIMER;
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+	uint64_t xcnt, xcnt_diff_us, cmp;
+	unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG;
+	uint32_t cpu = guest_get_vcpuid();
+	struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+	timer_irq_disable();
+
+	xcnt = timer_get_cycles();
+	cmp = timer_get_cmp();
+	xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+	/* Make sure we are dealing with the correct timer IRQ */
+	GUEST_ASSERT_EQ(intid, timer_irq);
+
+	__GUEST_ASSERT(xcnt >= cmp,
+			"xcnt = 0x%llx, cmp = 0x%llx, xcnt_diff_us = 0x%llx",
+			xcnt, cmp, xcnt_diff_us);
+
+	WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+}
+
+static void guest_run(struct test_vcpu_shared_data *shared_data)
+{
+	uint32_t irq_iter, config_iter;
+
+	shared_data->nr_iter = 0;
+	shared_data->guest_stage = 0;
+
+	for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+		/* Setup the next interrupt */
+		timer_set_next_cmp_ms(test_args.timer_period_ms);
+		shared_data->xcnt = timer_get_cycles();
+		timer_irq_enable();
+
+		/* Setup a timeout for the interrupt to arrive */
+		udelay(msecs_to_usecs(test_args.timer_period_ms) +
+			TIMER_TEST_ERR_MARGIN_US);
+
+		irq_iter = READ_ONCE(shared_data->nr_iter);
+		GUEST_ASSERT_EQ(config_iter + 1, irq_iter);
+	}
+}
+
+static void guest_code(void)
+{
+	uint32_t cpu = guest_get_vcpuid();
+	struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+	timer_irq_disable();
+	local_irq_enable();
+
+	guest_run(shared_data);
+
+	GUEST_DONE();
+}
+
+struct kvm_vm *test_vm_create(void)
+{
+	struct kvm_vm *vm;
+	int nr_vcpus = test_args.nr_vcpus;
+
+	vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+	__TEST_REQUIRE(__vcpu_has_ext(vcpus[0], KVM_RISCV_ISA_EXT_SSTC),
+				   "SSTC not available, skipping test\n");
+
+	vm_init_vector_tables(vm);
+	vm_install_interrupt_handler(vm, guest_irq_handler);
+
+	for (int i = 0; i < nr_vcpus; i++)
+		vcpu_init_vector_tables(vcpus[i]);
+
+	/* Initialize guest timer frequency. */
+	vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency), &timer_freq);
+	sync_global_to_guest(vm, timer_freq);
+	pr_debug("timer_freq: %lu\n", timer_freq);
+
+	/* Make all the test's cmdline args visible to the guest */
+	sync_global_to_guest(vm, test_args);
+
+	return vm;
+}
+
+void test_vm_cleanup(struct kvm_vm *vm)
+{
+	kvm_vm_free(vm);
+}