@@ -65,6 +65,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
+TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test
TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
new file mode 100644
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tsc_scaling_sync.c
+ *
+ * Copyright © 2022 Amazon.com, Inc. or its affiliates.
+ *
+ * TSC scaling / sync test
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+
+#define NR_TEST_VCPUS 20
+
+static struct kvm_vm *vm;
+pthread_spinlock_t create_lock;
+
+#define TEST_TSC_KHZ 2987654UL
+#define TEST_TSC_OFFSET 200000000
+
+uint64_t tsc_sync;
+static void guest_code(void)
+{
+ uint64_t start_tsc, local_tsc, tmp;
+
+ GUEST_SYNC(0);
+
+ start_tsc = rdtsc();
+ do {
+ tmp = READ_ONCE(tsc_sync);
+ local_tsc = rdtsc();
+ WRITE_ONCE(tsc_sync, local_tsc);
+ if (unlikely(local_tsc < tmp))
+ GUEST_SYNC_ARGS(1, local_tsc, tmp, 0, 0);
+ //GUEST_ASSERT(local_tsc <= tmp);
+ } while (local_tsc - start_tsc < 5000 * TEST_TSC_KHZ);
+
+ GUEST_DONE();
+}
+
+
+enum {
+ ALL_SERIALIZED,
+ FIRST_SERIALIZED,
+ NONE_SERIALIZED
+} serialize_mode = FIRST_SERIALIZED;
+
+
+static void *run_vcpu(void *_cpu_nr)
+{
+ unsigned long cpu = (unsigned long)_cpu_nr;
+ unsigned long failures = 0;
+ static bool first_cpu_scaled;
+ bool this_cpu_scaled = false;
+
+ pthread_spin_lock(&create_lock);
+
+ vm_vcpu_add_default(vm, cpu, guest_code);
+
+ if (serialize_mode == ALL_SERIALIZED ||
+ (serialize_mode == FIRST_SERIALIZED && !first_cpu_scaled)) {
+ vcpu_ioctl(vm, cpu, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ);
+ vcpu_set_msr(vm, cpu, MSR_IA32_TSC, TEST_TSC_OFFSET);
+ this_cpu_scaled = first_cpu_scaled = true;
+ }
+
+ pthread_spin_unlock(&create_lock);
+
+ if (!this_cpu_scaled) {
+ vcpu_ioctl(vm, cpu, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ);
+ vcpu_set_msr(vm, cpu, MSR_IA32_TSC, TEST_TSC_OFFSET);
+ }
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu_state(vm, cpu);
+ struct ucall uc;
+
+ vcpu_run(vm, cpu);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, cpu, &uc)) {
+ case UCALL_DONE:
+ goto out;
+
+ case UCALL_SYNC:
+ switch(uc.args[1]) {
+ case 0:
+ //printf("set TSC\n");
+ // vcpu_set_msr(vm, cpu, MSR_IA32_TSC, 0);
+ break;
+
+ case 1:
+ printf("Guest %ld sync %lx %lx %ld\n", cpu, uc.args[2], uc.args[3], uc.args[2] - uc.args[3]);
+ failures++;
+ break;
+ }
+ break;
+
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+ out:
+ return (void *)failures;
+}
+
+int main(int argc, char *argv[])
+{
+ if (!kvm_check_cap(KVM_CAP_TSC_CONTROL)) {
+ print_skip("KVM_CAP_TSC_CONTROL not available");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_default_with_vcpus(0, DEFAULT_STACK_PGS * NR_TEST_VCPUS, 0, guest_code, NULL);
+
+ pthread_spin_init(&create_lock, PTHREAD_PROCESS_PRIVATE);
+ pthread_t cpu_threads[NR_TEST_VCPUS];
+ unsigned long cpu;
+ for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++)
+ pthread_create(&cpu_threads[cpu], NULL, run_vcpu, (void *)cpu);
+
+ unsigned long failures = 0;
+ for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) {
+ void *this_cpu_failures;
+ pthread_join(cpu_threads[cpu], &this_cpu_failures);
+ failures += (unsigned long)this_cpu_failures;
+ }
+
+ TEST_ASSERT(!failures, "TSC sync failed");
+ pthread_spin_destroy(&create_lock);
+ kvm_vm_free(vm);
+ return 0;
+}
Here's a fun test case. Create a VM, then spawn a bunch of threads each of which creates its own vCPU and then sets the TSC frequency and value. The guest vCPUs then run a stupidly naïve loop checking for TSC monotonicity. It dies horribly. Even if you use its "nice" mode where it actually serializes everything and the startup effectively isn't threaded at all. I can fix the serialized and "first CPU is scaled before the others are created" modes, by making new vCPUs get created with the same frequency of the last TSC sync. That is, in kvm_arch_vcpu_create() I make it do: kvm_set_tsc_khz(READ_ONCE(vcpu->kvm->arch.last_tsc_khz) ? : max_tsc_khz) instead of just using max_tsc_khz unconditionally. The free-for-all mode of all threads just running freely and creating their vCPU, then setting its frequency and its TSC value, remains hosed. We end up with kvm->arch.last_tsc_khz *alternating* between the default startup frequency and the user-configured one. I wonder if the answer here might be for new vCPUs not to get kvm->arch.last_tsc_khz from the last sync, but for them to inherit the last *explicitly* set TSC frequency, which we'd have to store separately? There are still potential race conditions between the initial frequency being set in kvm_arch_vcpu_create(), and the first TSC sync which happens later in kvm_arch_vcpu_postcreate(). It's possible that another vCPU has explicitly set its frequency in between those two being called for a newly-created vCPU, and the new one would still not sync correctly because of the apparent mismatch. A KVM-wide setting for the default frequency, instead of inferring it from an explicit setting on a vCPU, might address that... ? Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> --- tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/x86_64/tsc_scaling_sync.c | 143 ++++++++++++++++++ 2 files changed, 144 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c