diff mbox series

[kvm-unit-tests] x86/access: Use HVMOP_flush_tlbs hypercall instead of invlpg() for Xen

Message ID 20230728071857.29991-1-metikaya@amazon.co.uk (mailing list archive)
State New, archived
Headers show
Series [kvm-unit-tests] x86/access: Use HVMOP_flush_tlbs hypercall instead of invlpg() for Xen | expand

Commit Message

Kaya, Metin July 28, 2023, 7:18 a.m. UTC
QEMU has ability of running guests under Xen starting from v8.0 [1].

And a patch is submitted to Linux/KVM to add hvm_op/HVMOP_flush_tlbs
hypercall support to KVM [2].

Hence, prefer HVMOP_flush_tlbs hypercall over invlpg instruction in Xen
environment *if* KVM has hvm_op/HVMOP_flush_tlbs hypercall implemented.

Also fix trivial formatting warnings for casting in invlpg() calls.

[1] https://qemu-project.gitlab.io/qemu/system/i386/xen.html
[2] https://lore.kernel.org/all/20230418101306.98263-1-metikaya@amazon.co.uk

Signed-off-by: Metin Kaya <metikaya@amazon.co.uk>
---
 x86/access.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 89 insertions(+), 3 deletions(-)

Comments

Sean Christopherson Aug. 4, 2023, 12:16 a.m. UTC | #1
*sigh*

First off, thanks for the test, much appreciated.

However, the purpose of adding test code is to be able to actually run the damn
test.  Yeah, I got there in the end by hacking the QEMU command line generated
by KUT, but holy moly I should not have had to do that.  E.g. swapping out
"-machine accel=kvm" for "-accel kvm" just to be able to enable xen-version=0x4000a
was not a detail I wanted to find out on my own.

And the real kicker: after upgrading QEMU and figuring out the right command line,
the test fails.  Specifically, it hangs in the focused check_smep_andnot_wp() test.

My host is a HSW.  I haven't tried another host.  AFAICT, the test unexpectedly
ends up at RIP 0x0 and loops indefinitely on a page fault (the test has a nop
page fault handler so it doesn't die).

E.g. with EPT disabling so that KVM intercepts #PF:

stable-4247    [007] .....   456.192867: kvm_page_fault: vcpu 0 rip 0x0 address 0x0000000000000000 error_code 0x10
stable-4247    [007] .....   456.192868: kvm_inj_exception: #PF (0x11)
stable-4247    [007] d....   456.192868: kvm_entry: vcpu 0, rip 0x0


On Fri, Jul 28, 2023, Metin Kaya wrote:
> @@ -250,12 +251,90 @@ static void set_cr0_wp(int wp)
>  	}
>  }
>  
> +uint8_t *hypercall_page;
> +
> +#define __HYPERVISOR_hvm_op	34
> +#define HVMOP_flush_tlbs	5
> +
> +static inline int do_hvm_op_flush_tlbs(void)
> +{
> +	long res = 0, _a1 = (long)(HVMOP_flush_tlbs), _a2 = (long)(NULL);
> +
> +	asm volatile ("call *%[offset]"
> +#if defined(__x86_64__)
> +		      : "=a" (res), "+D" (_a1), "+S" (_a2)
> +#else
> +		      : "=a" (res), "+b" (_a1), "+c" (_a2)
> +#endif
> +		      : [offset] "r" (hypercall_page + (__HYPERVISOR_hvm_op * 32))
> +		      : "memory");

Can this be easily extracted to a generic-ish xen_hypercall() helper?  I.e. make
the inline assembly reusable.

> +
> +	if (res)
> +		printf("hvm_op/HVMOP_flush_tlbs failed: %ld.", res);
> +
> +	return (int)res;
> +}
> +
> +#define XEN_CPUID_FIRST_LEAF    0x40000000
> +#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */
> +#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */
> +#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */
> +
> +static void init_hypercalls(void)
> +{
> +	struct cpuid c;
> +	u32 base;
> +	bool found = false;
> +
> +	for (base = XEN_CPUID_FIRST_LEAF; base < XEN_CPUID_FIRST_LEAF + 0x10000;
> +			base += 0x100) {
> +		c = cpuid(base);
> +		if ((c.b == XEN_CPUID_SIGNATURE_EBX) &&
> +		    (c.c == XEN_CPUID_SIGNATURE_ECX) &&
> +		    (c.d == XEN_CPUID_SIGNATURE_EDX) &&
> +		    ((c.a - base) >= 2)) {
> +			found = true;
> +			break;
> +		}
> +	}
> +	if (!found) {
> +		printf("Using native invlpg instruction\n");
> +		return;
> +	}
> +
> +	hypercall_page = alloc_pages_flags(0, AREA_ANY | FLAG_DONTZERO);
> +	if (!hypercall_page)
> +		report_abort("failed to allocate hypercall page");
> +
> +	memset(hypercall_page, 0xc3, PAGE_SIZE);
> +
> +	c = cpuid(base + 2);
> +	wrmsr(c.b, (u64)hypercall_page);
> +	barrier();
> +
> +	if (hypercall_page[0] == 0xc3)
> +		report_abort("Hypercall page not initialised correctly\n");
> +
> +	/*
> +	 * Fall back to invlpg instruction if HVMOP_flush_tlbs hypercall is
> +	 * unsupported.
> +	 */
> +	if (do_hvm_op_flush_tlbs()) {
> +		printf("Using native invlpg instruction\n");
> +		free_page(hypercall_page);
> +		hypercall_page = NULL;
> +		return;
> +	}
> +
> +	printf("Using Xen HVMOP_flush_tlbs hypercall\n");
> +}

All of the Xen stuff belongs in "library" code, e.g. add xen.{c,h} and prefix
the public functions with xen_.  And drop the HVMOP_flush_tlbs probing from
xen_init_hypercalls(), i.e. make it a generic helper.

And I think it also makes sense to have a separate testcase for the Xen hypercall,
e.g. so that we test both cases when Xen is present, eliminate flakiness from Xen
setup failing, make it easy to run or skip the Xen test, make it easy to see which
flavor is running (checking the log is annoying), etc.

That way you can have x86/access_test.c's main() pivot on a command line param
and skip the test if do_hvm_op_flush_tlbs() fails (though I would call it something
like xen_flush_tlbs()).

>  static void clear_user_mask(pt_element_t *ptep, int level, unsigned long virt)
>  {
>  	*ptep &= ~PT_USER_MASK;
>  
>  	/* Flush to avoid spurious #PF */
> -	invlpg((void*)virt);
> +	hypercall_page ? do_hvm_op_flush_tlbs() : invlpg((void *)virt);

And rather than hypercall_page, end up with "bool use_xen_pv_tlb_flush" or so.
diff mbox series

Patch

diff --git a/x86/access.c b/x86/access.c
index 70d81bf02d9d..0a858f807dde 100644
--- a/x86/access.c
+++ b/x86/access.c
@@ -4,6 +4,7 @@ 
 #include "asm/page.h"
 #include "x86/vm.h"
 #include "access.h"
+#include "alloc_page.h"
 
 #define true 1
 #define false 0
@@ -250,12 +251,90 @@  static void set_cr0_wp(int wp)
 	}
 }
 
+uint8_t *hypercall_page;
+
+#define __HYPERVISOR_hvm_op	34
+#define HVMOP_flush_tlbs	5
+
+static inline int do_hvm_op_flush_tlbs(void)
+{
+	long res = 0, _a1 = (long)(HVMOP_flush_tlbs), _a2 = (long)(NULL);
+
+	asm volatile ("call *%[offset]"
+#if defined(__x86_64__)
+		      : "=a" (res), "+D" (_a1), "+S" (_a2)
+#else
+		      : "=a" (res), "+b" (_a1), "+c" (_a2)
+#endif
+		      : [offset] "r" (hypercall_page + (__HYPERVISOR_hvm_op * 32))
+		      : "memory");
+
+	if (res)
+		printf("hvm_op/HVMOP_flush_tlbs failed: %ld.", res);
+
+	return (int)res;
+}
+
+#define XEN_CPUID_FIRST_LEAF    0x40000000
+#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */
+#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */
+#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */
+
+static void init_hypercalls(void)
+{
+	struct cpuid c;
+	u32 base;
+	bool found = false;
+
+	for (base = XEN_CPUID_FIRST_LEAF; base < XEN_CPUID_FIRST_LEAF + 0x10000;
+			base += 0x100) {
+		c = cpuid(base);
+		if ((c.b == XEN_CPUID_SIGNATURE_EBX) &&
+		    (c.c == XEN_CPUID_SIGNATURE_ECX) &&
+		    (c.d == XEN_CPUID_SIGNATURE_EDX) &&
+		    ((c.a - base) >= 2)) {
+			found = true;
+			break;
+		}
+	}
+	if (!found) {
+		printf("Using native invlpg instruction\n");
+		return;
+	}
+
+	hypercall_page = alloc_pages_flags(0, AREA_ANY | FLAG_DONTZERO);
+	if (!hypercall_page)
+		report_abort("failed to allocate hypercall page");
+
+	memset(hypercall_page, 0xc3, PAGE_SIZE);
+
+	c = cpuid(base + 2);
+	wrmsr(c.b, (u64)hypercall_page);
+	barrier();
+
+	if (hypercall_page[0] == 0xc3)
+		report_abort("Hypercall page not initialised correctly\n");
+
+	/*
+	 * Fall back to invlpg instruction if HVMOP_flush_tlbs hypercall is
+	 * unsupported.
+	 */
+	if (do_hvm_op_flush_tlbs()) {
+		printf("Using native invlpg instruction\n");
+		free_page(hypercall_page);
+		hypercall_page = NULL;
+		return;
+	}
+
+	printf("Using Xen HVMOP_flush_tlbs hypercall\n");
+}
+
 static void clear_user_mask(pt_element_t *ptep, int level, unsigned long virt)
 {
 	*ptep &= ~PT_USER_MASK;
 
 	/* Flush to avoid spurious #PF */
-	invlpg((void*)virt);
+	hypercall_page ? do_hvm_op_flush_tlbs() : invlpg((void *)virt);
 }
 
 static void set_user_mask(pt_element_t *ptep, int level, unsigned long virt)
@@ -263,7 +342,7 @@  static void set_user_mask(pt_element_t *ptep, int level, unsigned long virt)
 	*ptep |= PT_USER_MASK;
 
 	/* Flush to avoid spurious #PF */
-	invlpg((void*)virt);
+	hypercall_page ? do_hvm_op_flush_tlbs() : invlpg((void *)virt);
 }
 
 static unsigned set_cr4_smep(ac_test_t *at, int smep)
@@ -583,7 +662,7 @@  fault:
 static void __ac_set_expected_status(ac_test_t *at, bool flush)
 {
 	if (flush)
-		invlpg(at->virt);
+		hypercall_page ? do_hvm_op_flush_tlbs() : invlpg((void *)at->virt);
 
 	if (at->ptep)
 		at->expected_pte = *at->ptep;
@@ -1243,6 +1322,10 @@  void ac_test_run(int pt_levels, bool force_emulation)
 	printf("run\n");
 	tests = successes = 0;
 
+	setup_vm();
+
+	init_hypercalls();
+
 	shadow_cr0 = read_cr0();
 	shadow_cr4 = read_cr4();
 	shadow_cr3 = read_cr3();
@@ -1318,6 +1401,9 @@  void ac_test_run(int pt_levels, bool force_emulation)
 		successes += ac_test_cases[i](&pt_env);
 	}
 
+	if (hypercall_page)
+		free_page(hypercall_page);
+
 	printf("\n%d tests, %d failures\n", tests, tests - successes);
 
 	report(successes == tests, "%d-level paging tests%s", pt_levels,