Message ID | 20240920154422.2890096-1-vkuznets@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: selftests: x86: Avoid using SSE/AVX instructions | expand |
On Fri, Sep 20, 2024, Vitaly Kuznetsov wrote: > diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile > index 48d32c5aa3eb..3f1b24ed7245 100644 > --- a/tools/testing/selftests/kvm/Makefile > +++ b/tools/testing/selftests/kvm/Makefile > @@ -238,6 +238,7 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ > -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ > -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ > -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \ > + -march=x86-64-v2 \ I would rather go straight to playing nice with AVX. Not because I care about being able to use AVX, but because pretty much every instance where KVM selftests punts setup to individual tests eventually leads to gross copy+paste code. The diff ends up being bigger than I was hoping, but that's largely because tests are already manually enabling stuff in XCR0 (see above copy+paste complaint). I can post the below later this week (probably as multiple patches). Note, -march=x86-64-v3 is there just to make it easy to test, I won't actually include that in the patches :-) --- tools/testing/selftests/kvm/Makefile | 1 + .../selftests/kvm/include/x86_64/processor.h | 5 ++++ .../selftests/kvm/lib/x86_64/processor.c | 24 +++++++++++++++++++ tools/testing/selftests/kvm/x86_64/amx_test.c | 23 ++++-------------- .../testing/selftests/kvm/x86_64/cpuid_test.c | 6 ++++- .../selftests/kvm/x86_64/sev_smoke_test.c | 11 --------- .../testing/selftests/kvm/x86_64/state_test.c | 5 ---- .../selftests/kvm/x86_64/xcr0_cpuid_test.c | 11 ++++++--- 8 files changed, 47 insertions(+), 39 deletions(-) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 960cf6a77198..7ef4b3cc403d 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -238,6 +238,7 @@ else LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ + -march=x86-64-v3 \ -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ -fno-builtin-memcmp -fno-builtin-memcpy \ -fno-builtin-memset -fno-builtin-strnlen \ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index e247f99e0473..645200e95f89 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -1049,6 +1049,11 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); } +static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) +{ + vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); +} + void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, struct kvm_x86_cpu_property property, uint32_t value); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 974bcd2df6d7..636b29ba8985 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -506,6 +506,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; + if (kvm_cpu_has(X86_FEATURE_XSAVE)) + sregs.cr4 |= X86_CR4_OSXSAVE; sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); kvm_seg_set_unusable(&sregs.ldt); @@ -519,6 +521,20 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) vcpu_sregs_set(vcpu, &sregs); } +static void vcpu_init_xcrs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + struct kvm_xcrs xcrs = { + .nr_xcrs = 1, + .xcrs[0].xcr = 0, + .xcrs[0].value = kvm_cpu_supported_xcr0(), + }; + + if (!kvm_cpu_has(X86_FEATURE_XSAVE)) + return; + + vcpu_xcrs_set(vcpu, &xcrs); +} + static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, int dpl, unsigned short selector) { @@ -675,6 +691,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); vcpu_init_sregs(vm, vcpu); + vcpu_init_xcrs(vm, vcpu); /* Setup guest general purpose registers */ vcpu_regs_get(vcpu, ®s); @@ -686,6 +703,13 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) mp_state.mp_state = 0; vcpu_mp_state_set(vcpu, &mp_state); + /* + * Refresh CPUID after setting SREGS and XCR0, so that KVM's "runtime" + * updates to guest CPUID, e.g. for OSXSAVE and XSAVE state size, are + * reflected into selftests' vCPU CPUID cache, i.e. so that the cache + * is consistent with vCPU state. + */ + vcpu_get_cpuid(vcpu); return vcpu; } diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index 903940c54d2d..f4ce5a185a7d 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -86,6 +86,8 @@ static inline void __xsavec(struct xstate *xstate, uint64_t rfbm) static void check_xtile_info(void) { + GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE); + GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0)); GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE); @@ -122,29 +124,12 @@ static void set_tilecfg(struct tile_config *cfg) } } -static void init_regs(void) -{ - uint64_t cr4, xcr0; - - GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE)); - - /* turn on CR4.OSXSAVE */ - cr4 = get_cr4(); - cr4 |= X86_CR4_OSXSAVE; - set_cr4(cr4); - GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); - - xcr0 = xgetbv(0); - xcr0 |= XFEATURE_MASK_XTILE; - xsetbv(0x0, xcr0); - GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE); -} - static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, struct tile_data *tiledata, struct xstate *xstate) { - init_regs(); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) && + this_cpu_has(X86_FEATURE_OSXSAVE)); check_xtile_info(); GUEST_SYNC(1); diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c index 8c579ce714e9..e79a6577254f 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c @@ -37,7 +37,11 @@ static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax); GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx); - GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx); + __GUEST_ASSERT(ecx == guest_cpuid->entries[i].ecx, + "CPUID.0x%x.0x%x.ECX 0%x != 0x%x", + guest_cpuid->entries[i].function, + guest_cpuid->entries[i].index, + ecx, guest_cpuid->entries[i].ecx); GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx); } diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c index 2e9197eb1652..59a5a2227944 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c @@ -70,12 +70,6 @@ static void test_sync_vmsa(uint32_t policy) double x87val = M_PI; struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 }; - struct kvm_sregs sregs; - struct kvm_xcrs xcrs = { - .nr_xcrs = 1, - .xcrs[0].xcr = 0, - .xcrs[0].value = XFEATURE_MASK_X87_AVX, - }; vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu); gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR, @@ -84,11 +78,6 @@ static void test_sync_vmsa(uint32_t policy) vcpu_args_set(vcpu, 1, gva); - vcpu_sregs_get(vcpu, &sregs); - sregs.cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXSAVE; - vcpu_sregs_set(vcpu, &sregs); - - vcpu_xcrs_set(vcpu, &xcrs); asm("fninit\n" "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n" "fldl %3\n" diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 1c756db329e5..141b7fc0c965 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -145,11 +145,6 @@ static void __attribute__((__flatten__)) guest_code(void *arg) memset(buffer, 0xcc, sizeof(buffer)); - set_cr4(get_cr4() | X86_CR4_OSXSAVE); - GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); - - xsetbv(0, xgetbv(0) | supported_xcr0); - /* * Modify state for all supported xfeatures to take them out of * their "init" state, i.e. to make them show up in XSTATE_BV. diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c index 95ce192d0753..c8a5c5e51661 100644 --- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c @@ -48,16 +48,16 @@ do { \ static void guest_code(void) { - uint64_t xcr0_reset; + uint64_t initial_xcr0; uint64_t supported_xcr0; int i, vector; set_cr4(get_cr4() | X86_CR4_OSXSAVE); - xcr0_reset = xgetbv(0); + initial_xcr0 = xgetbv(0); supported_xcr0 = this_cpu_supported_xcr0(); - GUEST_ASSERT(xcr0_reset == XFEATURE_MASK_FP); + GUEST_ASSERT(initial_xcr0 == supported_xcr0); /* Check AVX */ ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, @@ -79,6 +79,11 @@ static void guest_code(void) ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, XFEATURE_MASK_XTILE); + vector = xsetbv_safe(0, XFEATURE_MASK_FP); + __GUEST_ASSERT(!vector, + "Expected success on XSETBV(FP), got vector '0x%x'", + vector); + vector = xsetbv_safe(0, supported_xcr0); __GUEST_ASSERT(!vector, "Expected success on XSETBV(0x%lx), got vector '0x%x'", base-commit: 3f8df6285271d9d8f17d733433e5213a63b83a0b --
From: Vitaly Kuznetsov <vkuznets@redhat.com> > Sent: 20 September 2024 16:44 > > Some distros switched gcc to '-march=x86-64-v3' by default and while it's > hard to find a CPU which doesn't support it today, I didn't think that any of the Atom based cpu supported AVX. I'm pretty sure one we use that are still in production as server motherboards don't support it. Doesn't -v3 also require support for the VEX encoding. Which removes a lot of perfectly reasonable cpu? David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
David Laight <David.Laight@ACULAB.COM> writes: > From: Vitaly Kuznetsov <vkuznets@redhat.com> >> Sent: 20 September 2024 16:44 >> >> Some distros switched gcc to '-march=x86-64-v3' by default and while it's >> hard to find a CPU which doesn't support it today, > > I didn't think that any of the Atom based cpu supported AVX. > I'm pretty sure one we use that are still in production as > server motherboards don't support it. > > Doesn't -v3 also require support for the VEX encoding. > Which removes a lot of perfectly reasonable cpu? > Well, distros making such decision are obviously not very interested in running on these CPUs then :-) In this particular case, the distro was Centos Stream 10 and they are very explicit about the decision: https://developers.redhat.com/articles/2024/01/02/exploring-x86-64-v3-red-hat-enterprise-linux-10#new_cpu_capabilities_in_x86_64_v3 and 'gcc -v' tells me Configured with: ../configure ... --with-arch_64=x86-64-v3 ... ... gcc version 14.2.1 20240801 (Red Hat 14.2.1-1) (GCC)
On Fri, Sep 20, 2024 at 05:44:22PM +0200, Vitaly Kuznetsov wrote: > Some distros switched gcc to '-march=x86-64-v3' by default and while it's > hard to find a CPU which doesn't support it today, many KVM selftests fail > with This patch, which is queued in -next as 9a400068a1586bc4 targeted as a fix, breaks the build on non-x86 architectures: aarch64-linux-gnu-gcc -D_GNU_SOURCE= -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset -fno-builtin-strnlen -fno-stack-protector -fno-PIE -I/build/stage/linux/tools/testing/selftests/../../../tools/include -I/build/stage/linux/tools/testing/selftests/../../../tools/arch/arm64/include -I/build/stage/linux/tools/testing/selftests/../../../usr/include/ -Iinclude -Iaarch64 -Iinclude/aarch64 -I ../rseq -I.. -march=x86-64-v2 -isystem /build/stage/build-work/usr/include -I/build/stage/linux/tools/testing/selftests/../../../tools/arch/arm64/include/generated/ -c aarch64/aarch32_id_regs.c -o /build/stage/build-work/kselftest/kvm/aarch64/aarch32_id_regs.o cc1: error: unknown value ‘x86-64-v2’ for ‘-march’ This is because: > diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile > index 48d32c5aa3eb..3f1b24ed7245 100644 > --- a/tools/testing/selftests/kvm/Makefile > +++ b/tools/testing/selftests/kvm/Makefile > @@ -238,6 +238,7 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ > -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ > -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ > -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \ > + -march=x86-64-v2 \ > $(KHDR_INCLUDES) > ifeq ($(ARCH),s390) > CFLAGS += -march=z10 unconditionally sets an architecture specific flag which is obviously not going to work on anything except x86. This should be set under an architecture check like the similar S/390 flag that can be seen in the context for the diff.
On Mon, Oct 21, 2024 at 07:32:17PM +0100, Mark Brown wrote: > On Fri, Sep 20, 2024 at 05:44:22PM +0200, Vitaly Kuznetsov wrote: > > > Some distros switched gcc to '-march=x86-64-v3' by default and while it's > > hard to find a CPU which doesn't support it today, many KVM selftests fail > > with > > This patch, which is queued in -next as 9a400068a1586bc4 targeted as a > fix, breaks the build on non-x86 architectures: This patch is now in Linus' tree, having been applied on Sunday and as a result appeared in -next today. > > aarch64-linux-gnu-gcc -D_GNU_SOURCE= -Wall -Wstrict-prototypes -Wuninitialized > -O2 -g -std=gnu99 -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT > -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset -fno-builtin-strnlen -fno-stack-protector -fno-PIE -I/build/stage/linux/tools/testing/selftests/../../../tools/include -I/build/stage/linux/tools/testing/selftests/../../../tools/arch/arm64/include -I/build/stage/linux/tools/testing/selftests/../../../usr/include/ -Iinclude -Iaarch64 -Iinclude/aarch64 -I ../rseq -I.. -march=x86-64-v2 -isystem /build/stage/build-work/usr/include -I/build/stage/linux/tools/testing/selftests/../../../tools/arch/arm64/include/generated/ -c aarch64/aarch32_id_regs.c -o /build/stage/build-work/kselftest/kvm/aarch64/aarch32_id_regs.o > cc1: error: unknown value ‘x86-64-v2’ for ‘-march’ > > This is because: > > > diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile > > index 48d32c5aa3eb..3f1b24ed7245 100644 > > --- a/tools/testing/selftests/kvm/Makefile > > +++ b/tools/testing/selftests/kvm/Makefile > > @@ -238,6 +238,7 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ > > -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ > > -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ > > -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \ > > + -march=x86-64-v2 \ > > $(KHDR_INCLUDES) > > ifeq ($(ARCH),s390) > > CFLAGS += -march=z10 > > unconditionally sets an architecture specific flag which is obviously > not going to work on anything except x86. This should be set under an > architecture check like the similar S/390 flag that can be seen in the > context for the diff.
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c index 78878b3a2725..704668adb3bd 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -82,8 +82,17 @@ static void test_hcall(struct hcall_data *hc) static void guest_main(void) { + uint64_t cr4, xcr0; int i; + cr4 = get_cr4(); + cr4 |= X86_CR4_OSXSAVE; + set_cr4(cr4); + + xcr0 = xgetbv(0); + xcr0 |= XFEATURE_MASK_SSE | XFEATURE_MASK_YMM; + xsetbv(0x0, xcr0); + for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) { test_msr(&msrs_to_test[i]); } but this needs to be made conditional depending on the compilation target and added to all selftests. Slap a band-aid on the problem by forcing '-march=x86-64-v2' in Makefile. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> --- tools/testing/selftests/kvm/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 48d32c5aa3eb..3f1b24ed7245 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -238,6 +238,7 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \ + -march=x86-64-v2 \ $(KHDR_INCLUDES) ifeq ($(ARCH),s390) CFLAGS += -march=z10