diff mbox series

[v3] KVM: x86/xen: Implement hvm_op/HVMOP_flush_tlbs hypercall

Message ID 20230418101306.98263-1-metikaya@amazon.co.uk (mailing list archive)
State New, archived
Headers show
Series [v3] KVM: x86/xen: Implement hvm_op/HVMOP_flush_tlbs hypercall | expand

Commit Message

Kaya, Metin April 18, 2023, 10:13 a.m. UTC
Implement in-KVM support for Xen's HVMOP_flush_tlbs hypercall, which
allows the guest to flush all vCPU's TLBs. KVM doesn't provide an
ioctl() to precisely flush guest TLBs, and punting to userspace would
likely negate the performance benefits of avoiding a TLB shootdown in
the guest.

Signed-off-by: Metin Kaya <metikaya@amazon.co.uk>

---
v3:
  - Addressed comments for v2.
  - Verified with XTF/invlpg test case.

v2:
  - Removed an irrelevant URL from commit message.
---
 arch/x86/kvm/xen.c                 | 15 +++++++++++++++
 include/xen/interface/hvm/hvm_op.h |  3 +++
 2 files changed, 18 insertions(+)

Comments

Paul Durrant April 18, 2023, 10:48 a.m. UTC | #1
On 18/04/2023 11:13, Metin Kaya wrote:
> Implement in-KVM support for Xen's HVMOP_flush_tlbs hypercall, which
> allows the guest to flush all vCPU's TLBs. KVM doesn't provide an
> ioctl() to precisely flush guest TLBs, and punting to userspace would
> likely negate the performance benefits of avoiding a TLB shootdown in
> the guest.
> 
> Signed-off-by: Metin Kaya <metikaya@amazon.co.uk>
> 
> ---
> v3:
>    - Addressed comments for v2.
>    - Verified with XTF/invlpg test case.
> 
> v2:
>    - Removed an irrelevant URL from commit message.
> ---
>   arch/x86/kvm/xen.c                 | 15 +++++++++++++++
>   include/xen/interface/hvm/hvm_op.h |  3 +++
>   2 files changed, 18 insertions(+)
> 
> diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
> index 40edf4d1974c..a63c48e8d8fa 100644
> --- a/arch/x86/kvm/xen.c
> +++ b/arch/x86/kvm/xen.c
> @@ -21,6 +21,7 @@
>   #include <xen/interface/vcpu.h>
>   #include <xen/interface/version.h>
>   #include <xen/interface/event_channel.h>
> +#include <xen/interface/hvm/hvm_op.h>
>   #include <xen/interface/sched.h>
>   
>   #include <asm/xen/cpuid.h>
> @@ -1330,6 +1331,17 @@ static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, bool longmode,
>   	return false;
>   }
>   
> +static bool kvm_xen_hcall_hvm_op(struct kvm_vcpu *vcpu, int cmd, u64 arg, u64 *r)
> +{
> +	if (cmd == HVMOP_flush_tlbs && !arg) {
> +		kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_TLB_FLUSH_GUEST);
> +		*r = 0;
> +		return true;
> +	}
> +
> +	return false;
> +}

This code structure means that arg != NULL will result in the guest 
seeing ENOSYS rather than EINVAL.

   Paul

> +
>   struct compat_vcpu_set_singleshot_timer {
>       uint64_t timeout_abs_ns;
>       uint32_t flags;
> @@ -1501,6 +1513,9 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
>   			timeout |= params[1] << 32;
>   		handled = kvm_xen_hcall_set_timer_op(vcpu, timeout, &r);
>   		break;
> +	case __HYPERVISOR_hvm_op:
> +		handled = kvm_xen_hcall_hvm_op(vcpu, params[0], params[1], &r);
> +		break;
>   	}
>   	default:
>   		break;
> diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h
> index 03134bf3cec1..240d8149bc04 100644
> --- a/include/xen/interface/hvm/hvm_op.h
> +++ b/include/xen/interface/hvm/hvm_op.h
> @@ -16,6 +16,9 @@ struct xen_hvm_param {
>   };
>   DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param);
>   
> +/* Flushes guest TLBs for all vCPUs: @arg must be 0. */
> +#define HVMOP_flush_tlbs            5
> +
>   /* Hint from PV drivers for pagetable destruction. */
>   #define HVMOP_pagetable_dying       9
>   struct xen_hvm_pagetable_dying {
Kaya, Metin April 18, 2023, 11:04 a.m. UTC | #2
On 18/04/2023 11:48, Paul Durrant wrote:
>On 18/04/2023 11:13, Metin Kaya wrote:
>> Implement in-KVM support for Xen's HVMOP_flush_tlbs hypercall, which
>> allows the guest to flush all vCPU's TLBs. KVM doesn't provide an
>> ioctl() to precisely flush guest TLBs, and punting to userspace would
>> likely negate the performance benefits of avoiding a TLB shootdown in
>> the guest.
>>
>> Signed-off-by: Metin Kaya <metikaya@amazon.co.uk>
>>
>> ---
>> v3:
>>    - Addressed comments for v2.
>>    - Verified with XTF/invlpg test case.
>>
>> v2:
>>    - Removed an irrelevant URL from commit message.
>> ---
>>   arch/x86/kvm/xen.c                 | 15 +++++++++++++++
>>   include/xen/interface/hvm/hvm_op.h |  3 +++
>>   2 files changed, 18 insertions(+)
>>
>> diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index
>> 40edf4d1974c..a63c48e8d8fa 100644
>> --- a/arch/x86/kvm/xen.c
>> +++ b/arch/x86/kvm/xen.c
>> @@ -21,6 +21,7 @@
>>   #include <xen/interface/vcpu.h>
>>   #include <xen/interface/version.h>
>>   #include <xen/interface/event_channel.h>
>> +#include <xen/interface/hvm/hvm_op.h>
>>   #include <xen/interface/sched.h>
>>
>>   #include <asm/xen/cpuid.h>
>> @@ -1330,6 +1331,17 @@ static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, bool longmode,
>>       return false;
>>   }
>>
>> +static bool kvm_xen_hcall_hvm_op(struct kvm_vcpu *vcpu, int cmd, u64
>> +arg, u64 *r) {
>> +     if (cmd == HVMOP_flush_tlbs && !arg) {
>> +             kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_TLB_FLUSH_GUEST);
>> +             *r = 0;
>> +             return true;
>> +     }
>> +
>> +     return false;
>> +}
>
>This code structure means that arg != NULL will result in the guest seeing ENOSYS rather than EINVAL.
>
>   Paul

Yes, because of this comment in David's email:
"I don't even know that we care about in-kernel acceleration for the
-EINVAL case. We could just return false for that, and let userspace
(report and) handle it."

>
>> +
>>   struct compat_vcpu_set_singleshot_timer {
>>       uint64_t timeout_abs_ns;
>>       uint32_t flags;
>> @@ -1501,6 +1513,9 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
>>                       timeout |= params[1] << 32;
>>               handled = kvm_xen_hcall_set_timer_op(vcpu, timeout, &r);
>>               break;
>> +     case __HYPERVISOR_hvm_op:
>> +             handled = kvm_xen_hcall_hvm_op(vcpu, params[0], params[1], &r);
>> +             break;
>>       }
>>       default:
>>               break;
>> diff --git a/include/xen/interface/hvm/hvm_op.h
>> b/include/xen/interface/hvm/hvm_op.h
>> index 03134bf3cec1..240d8149bc04 100644
>> --- a/include/xen/interface/hvm/hvm_op.h
>> +++ b/include/xen/interface/hvm/hvm_op.h
>> @@ -16,6 +16,9 @@ struct xen_hvm_param {
>>   };
>>   DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param);
>>
>> +/* Flushes guest TLBs for all vCPUs: @arg must be 0. */
>> +#define HVMOP_flush_tlbs            5
>> +
>>   /* Hint from PV drivers for pagetable destruction. */
>>   #define HVMOP_pagetable_dying       9
>>   struct xen_hvm_pagetable_dying {
>
David Woodhouse April 18, 2023, 11:05 a.m. UTC | #3
On Tue, 2023-04-18 at 11:48 +0100, Paul Durrant wrote:
> On 18/04/2023 11:13, Metin Kaya wrote:
> > Implement in-KVM support for Xen's HVMOP_flush_tlbs hypercall, which
> > allows the guest to flush all vCPU's TLBs. KVM doesn't provide an
> > ioctl() to precisely flush guest TLBs, and punting to userspace would
> > likely negate the performance benefits of avoiding a TLB shootdown in
> > the guest.
> > 
> > Signed-off-by: Metin Kaya <metikaya@amazon.co.uk>

Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>

Although as noted on the internal review and by Sean, it would be good
to have a test case that verifies that the TLBs are actually flushed.

> > 
> > ---
> > v3:
> >    - Addressed comments for v2.
> >    - Verified with XTF/invlpg test case.
> > 
> > v2:
> >    - Removed an irrelevant URL from commit message.
> > ---
> >   arch/x86/kvm/xen.c                 | 15 +++++++++++++++
> >   include/xen/interface/hvm/hvm_op.h |  3 +++
> >   2 files changed, 18 insertions(+)
> > 
> > diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
> > index 40edf4d1974c..a63c48e8d8fa 100644
> > --- a/arch/x86/kvm/xen.c
> > +++ b/arch/x86/kvm/xen.c
> > @@ -21,6 +21,7 @@
> >   #include <xen/interface/vcpu.h>
> >   #include <xen/interface/version.h>
> >   #include <xen/interface/event_channel.h>
> > +#include <xen/interface/hvm/hvm_op.h>
> >   #include <xen/interface/sched.h>
> > 
> >   #include <asm/xen/cpuid.h>
> > @@ -1330,6 +1331,17 @@ static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, bool longmode,
> >       return false;
> >   }
> > 
> > +static bool kvm_xen_hcall_hvm_op(struct kvm_vcpu *vcpu, int cmd, u64 arg, u64 *r)
> > +{
> > +     if (cmd == HVMOP_flush_tlbs && !arg) {
> > +             kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_TLB_FLUSH_GUEST);
> > +             *r = 0;
> > +             return true;
> > +     }
> > +
> > +     return false;
> > +}
> 
> This code structure means that arg != NULL will result in the guest
> seeing ENOSYS rather than EINVAL.

In kvm_xen_hypercall() the default for 'r' is -ENOSYS but because
'handled' never gets set to true, we don't hand that back to the guest.
Instead we get to handle_in_userspace: and do the KVM_EXIT_XEN exit.

So arg != NULL will cause the standard hypercall exit to userspace just
as it does today.
Paul Durrant April 18, 2023, 11:13 a.m. UTC | #4
On 18/04/2023 12:04, Kaya, Metin wrote:
> On 18/04/2023 11:48, Paul Durrant wrote:
>> On 18/04/2023 11:13, Metin Kaya wrote:
>>> Implement in-KVM support for Xen's HVMOP_flush_tlbs hypercall, which
>>> allows the guest to flush all vCPU's TLBs. KVM doesn't provide an
>>> ioctl() to precisely flush guest TLBs, and punting to userspace would
>>> likely negate the performance benefits of avoiding a TLB shootdown in
>>> the guest.
>>>
>>> Signed-off-by: Metin Kaya <metikaya@amazon.co.uk>
>>>
>>> ---
>>> v3:
>>>     - Addressed comments for v2.
>>>     - Verified with XTF/invlpg test case.
>>>
>>> v2:
>>>     - Removed an irrelevant URL from commit message.
>>> ---
>>>    arch/x86/kvm/xen.c                 | 15 +++++++++++++++
>>>    include/xen/interface/hvm/hvm_op.h |  3 +++
>>>    2 files changed, 18 insertions(+)
>>>
>>> diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index
>>> 40edf4d1974c..a63c48e8d8fa 100644
>>> --- a/arch/x86/kvm/xen.c
>>> +++ b/arch/x86/kvm/xen.c
>>> @@ -21,6 +21,7 @@
>>>    #include <xen/interface/vcpu.h>
>>>    #include <xen/interface/version.h>
>>>    #include <xen/interface/event_channel.h>
>>> +#include <xen/interface/hvm/hvm_op.h>
>>>    #include <xen/interface/sched.h>
>>>
>>>    #include <asm/xen/cpuid.h>
>>> @@ -1330,6 +1331,17 @@ static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, bool longmode,
>>>        return false;
>>>    }
>>>
>>> +static bool kvm_xen_hcall_hvm_op(struct kvm_vcpu *vcpu, int cmd, u64
>>> +arg, u64 *r) {
>>> +     if (cmd == HVMOP_flush_tlbs && !arg) {
>>> +             kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_TLB_FLUSH_GUEST);
>>> +             *r = 0;
>>> +             return true;
>>> +     }
>>> +
>>> +     return false;
>>> +}
>>
>> This code structure means that arg != NULL will result in the guest seeing ENOSYS rather than EINVAL.
>>
>>    Paul
> 
> Yes, because of this comment in David's email:
> "I don't even know that we care about in-kernel acceleration for the
> -EINVAL case. We could just return false for that, and let userspace
> (report and) handle it."
> 

Ok, fair enough.

Reviewed-by: Paul Durrant <paul@xen.org>
Sean Christopherson May 26, 2023, 8:32 p.m. UTC | #5
On Tue, Apr 18, 2023, Metin Kaya wrote:
> Implement in-KVM support for Xen's HVMOP_flush_tlbs hypercall, which
> allows the guest to flush all vCPU's TLBs. KVM doesn't provide an
> ioctl() to precisely flush guest TLBs, and punting to userspace would
> likely negate the performance benefits of avoiding a TLB shootdown in
> the guest.
> 
> Signed-off-by: Metin Kaya <metikaya@amazon.co.uk>
> 
> ---
> v3:
>   - Addressed comments for v2.
>   - Verified with XTF/invlpg test case.
> 
> v2:
>   - Removed an irrelevant URL from commit message.
> ---
>  arch/x86/kvm/xen.c                 | 15 +++++++++++++++
>  include/xen/interface/hvm/hvm_op.h |  3 +++
>  2 files changed, 18 insertions(+)

I still don't see a testcase.

  : This doesn't even compile.  I'll give you one guess as to how much confidence I
  : have that this was properly tested.
  : 
  : Aha!  And QEMU appears to have Xen emulation support.  That means KVM-Unit-Tests
  : is an option.  Specifically, extend the "access" test to use this hypercall instead
  : of INVLPG.  That'll verify that the flush is actually being performed as expteced.

Let me be more explicit this time: I am not applying this without a test.  I don't
care how trivial a patch may seem, I'm done taking patches without test coverage
unless there's a *really* good reason for me to do so.
David Woodhouse July 25, 2023, 12:56 p.m. UTC | #6
On Fri, 2023-05-26 at 13:32 -0700, Sean Christopherson wrote:
>   : Aha!  And QEMU appears to have Xen emulation support.  That means KVM-Unit-Tests
>   : is an option.  Specifically, extend the "access" test to use this hypercall instead
>   : of INVLPG.  That'll verify that the flush is actually being performed as expteced.

That works. Metin has a better version that actually sets up the
hypercall page properly and uses it, but that one bails out when Xen
support isn't present, and doesn't show the failure mode quite so
clearly. This is the simple version:

From: Metin Kaya <metikaya@amazon.co.uk>
Subject: [PATCH] x86/access: Use hvm_op/flush_tlbs hypercall instead of invlpg instruction

Signed-off-by: Metin Kaya <metikaya@amazon.co.uk>
---
 x86/access.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/x86/access.c b/x86/access.c
index 83c8221..6fa08c5 100644
--- a/x86/access.c
+++ b/x86/access.c
@@ -253,12 +253,31 @@ static void clear_user_mask(pt_element_t *ptep, int level, unsigned long virt)
 	*ptep &= ~PT_USER_MASK;
 }
 
+#define KVM_HYPERCALL_INTEL ".byte 0x0f,0x01,0xc1"
+#define __HYPERVISOR_hvm_op 34
+#define HVMOP_flush_tlbs    5
+
+static inline long hvm_op_flush_tlbs(void)
+{
+	long ret;
+	uint64_t nr = __HYPERVISOR_hvm_op;
+	uint64_t op = HVMOP_flush_tlbs;
+	uint64_t arg = 0;
+
+	asm volatile(KVM_HYPERCALL_INTEL
+		     : "=a"(ret)
+		     : "a"(nr), "D" (op), "S" (arg)
+		     : "memory");
+
+	return ret;
+}
+
 static void set_user_mask(pt_element_t *ptep, int level, unsigned long virt)
 {
 	*ptep |= PT_USER_MASK;
 
 	/* Flush to avoid spurious #PF */
-	invlpg((void*)virt);
+	hvm_op_flush_tlbs();
 }
 
 static unsigned set_cr4_smep(ac_test_t *at, int smep)
@@ -577,7 +596,7 @@ fault:
 
 static void ac_set_expected_status(ac_test_t *at)
 {
-	invlpg(at->virt);
+	hvm_op_flush_tlbs();
 
 	if (at->ptep)
 		at->expected_pte = *at->ptep;
Sean Christopherson July 26, 2023, 8:07 p.m. UTC | #7
On Tue, Jul 25, 2023, David Woodhouse wrote:
> On Fri, 2023-05-26 at 13:32 -0700, Sean Christopherson wrote:
> >   : Aha!  And QEMU appears to have Xen emulation support.  That means KVM-Unit-Tests
> >   : is an option.  Specifically, extend the "access" test to use this hypercall instead
> >   : of INVLPG.  That'll verify that the flush is actually being performed as expteced.
> 
> That works. Metin has a better version that actually sets up the
> hypercall page properly and uses it, but that one bails out when Xen
> support isn't present, and doesn't show the failure mode quite so
> clearly. This is the simple version:

IIUC, y'all have already written both tests, so why not post both?  I certainly
won't object to more tests if they provide different coverage.

> > Let me be more explicit this time: I am not applying this without a test.  I don't
> > care how trivial a patch may seem, I'm done taking patches without test coverage
> > unless there's a *really* good reason for me to do so.
> 
> Understood.
> 
> So, we know it *works*, but the above is a one-off and not a
> *regression* test.
> 
> It would definitely be nice to have regression tests that cover
> absolutely everything, but adding Xen guest support to the generic KVM-
> Unit-Tests might be considered overkill, because this *particular*
> thing is fairly unlikely to regress? It really is just calling
> kvm_flush_remote_tlbs(), and if that goes wrong we're probably likely
> to notice it anyway.
> 
> What do you think?

I'm a-ok with just having basic test coverage.  We don't have anywhere near 100%
(or even 10%...) coverage of KVM's TLB management, so it would be ridiculous to
require that for Xen PV.

I'd definitely love to change that, and raise the bar for test coverage in general,
but that'll take time (and effort).
David Woodhouse July 27, 2023, 12:04 p.m. UTC | #8
On Wed, 2023-07-26 at 13:07 -0700, Sean Christopherson wrote:
> On Tue, Jul 25, 2023, David Woodhouse wrote:
> > On Fri, 2023-05-26 at 13:32 -0700, Sean Christopherson wrote:
> > >   : Aha!  And QEMU appears to have Xen emulation support.  That means KVM-Unit-Tests
> > >   : is an option.  Specifically, extend the "access" test to use this hypercall instead
> > >   : of INVLPG.  That'll verify that the flush is actually being performed as expteced.
> > 
> > That works. Metin has a better version that actually sets up the
> > hypercall page properly and uses it, but that one bails out when Xen
> > support isn't present, and doesn't show the failure mode quite so
> > clearly. This is the simple version:
> 
> IIUC, y'all have already written both tests, so why not post both?  I certainly
> won't object to more tests if they provide different coverage.

Yeah, it just needed cleaning up.

This is what we have; Metin will submit it for real after a little more
polishing. It modifies the existing access test so that *if* it's run
in a Xen environment, and *if* the HVMOP_flush_tlbs call returns
success instead of -ENOSYS, it'll use that instead of invlpg.

In itself, that doesn't give us an automatic regression tests, because
you still need to run it manually — as before,
 qemu-system-x86_64 -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial stdio -device pci-testdev --accel kvm,xen-version=0x4000a,kernel-irqchip=split  -kernel ~/access_test.flat

If we really want to, we can look at making it run that way when qemu
and the host kernel support Xen guests...?

diff --git a/x86/access.c b/x86/access.c
index 83c8221..8c6e44a 100644
--- a/x86/access.c
+++ b/x86/access.c
@@ -4,6 +4,7 @@
 #include "asm/page.h"
 #include "x86/vm.h"
 #include "access.h"
+#include "alloc_page.h"
 
 #define true 1
 #define false 0
@@ -253,12 +254,90 @@ static void clear_user_mask(pt_element_t *ptep, int level, unsigned long virt)
 	*ptep &= ~PT_USER_MASK;
 }
 
+uint8_t *hypercall_page;
+
+#define __HYPERVISOR_hvm_op	34
+#define HVMOP_flush_tlbs	5
+
+static inline int do_hvm_op_flush_tlbs(void)
+{
+	long res = 0, _a1 = (long)(HVMOP_flush_tlbs), _a2 = (long)(NULL);
+
+	asm volatile ("call *%[offset]"
+#if defined(__x86_64__)
+		      : "=a" (res), "+D" (_a1), "+S" (_a2)
+#else
+		      : "=a" (res), "+b" (_a1), "+c" (_a2)
+#endif
+		      : [offset] "r" (hypercall_page + (__HYPERVISOR_hvm_op * 32))
+		      : "memory");
+
+	if (res)
+		printf("hvm_op/HVMOP_flush_tlbs failed: %ld.", res);
+
+	return (int)res;
+}
+
+#define XEN_CPUID_FIRST_LEAF    0x40000000
+#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */
+#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */
+#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */
+
+static void init_hypercalls(void)
+{
+	struct cpuid c;
+	u32 base;
+	bool found = false;
+
+	for (base = XEN_CPUID_FIRST_LEAF; base < XEN_CPUID_FIRST_LEAF + 0x10000;
+			base += 0x100) {
+		c = cpuid(base);
+		if ((c.b == XEN_CPUID_SIGNATURE_EBX) &&
+		    (c.c == XEN_CPUID_SIGNATURE_ECX) &&
+		    (c.d == XEN_CPUID_SIGNATURE_EDX) &&
+		    ((c.a - base) >= 2)) {
+			found = true;
+			break;
+		}
+	}
+	if (!found) {
+		printf("Using native invlpg instruction\n");
+		return;
+	}
+
+	hypercall_page = alloc_pages_flags(0, AREA_ANY | FLAG_DONTZERO);
+	if (!hypercall_page)
+		report_abort("failed to allocate hypercall page");
+
+	memset(hypercall_page, 0xc3, PAGE_SIZE);
+
+	c = cpuid(base + 2);
+	wrmsr(c.b, (u64)hypercall_page);
+	barrier();
+
+	if (hypercall_page[0] == 0xc3)
+		report_abort("Hypercall page not initialised correctly\n");
+
+	/*
+	 * Fall back to invlpg instruction if HVMOP_flush_tlbs hypercall is
+	 * unsuported.
+	 */
+	if (do_hvm_op_flush_tlbs()) {
+		printf("Using native invlpg instruction\n");
+		free_page(hypercall_page);
+		hypercall_page = NULL;
+		return;
+	}
+
+	printf("Using Xen HVMOP_flush_tlbs hypercall\n");
+}
+
 static void set_user_mask(pt_element_t *ptep, int level, unsigned long virt)
 {
 	*ptep |= PT_USER_MASK;
 
 	/* Flush to avoid spurious #PF */
-	invlpg((void*)virt);
+	hypercall_page ? do_hvm_op_flush_tlbs() : invlpg((void*)virt);
 }
 
 static unsigned set_cr4_smep(ac_test_t *at, int smep)
@@ -577,7 +656,7 @@ fault:
 
 static void ac_set_expected_status(ac_test_t *at)
 {
-	invlpg(at->virt);
+	hypercall_page ? do_hvm_op_flush_tlbs() : invlpg((void*)at->virt);
 
 	if (at->ptep)
 		at->expected_pte = *at->ptep;
@@ -1162,6 +1241,10 @@ int ac_test_run(int pt_levels)
 	printf("run\n");
 	tests = successes = 0;
 
+	setup_vm();
+
+	init_hypercalls();
+
 	shadow_cr0 = read_cr0();
 	shadow_cr4 = read_cr4();
 	shadow_cr3 = read_cr3();
@@ -1234,6 +1317,9 @@ int ac_test_run(int pt_levels)
 		successes += ac_test_cases[i](&pt_env);
 	}
 
+	if (hypercall_page)
+		free_page(hypercall_page);
+
 	printf("\n%d tests, %d failures\n", tests, tests - successes);
 
 	return successes == tests;
Kaya, Metin July 28, 2023, 7:31 a.m. UTC | #9
On Thu, Jul 27, 2023 at 1:04 PM, David Woodhouse wrote:
> On Wed, 2023-07-26 at 13:07 -0700, Sean Christopherson wrote:
> > On Tue, Jul 25, 2023, David Woodhouse wrote:
> > > On Fri, 2023-05-26 at 13:32 -0700, Sean Christopherson wrote:
> > > >   : Aha!  And QEMU appears to have Xen emulation support.  That means KVM-Unit-Tests
> > > >   : is an option.  Specifically, extend the "access" test to use this hypercall instead
> > > >   : of INVLPG.  That'll verify that the flush is actually being performed as expteced.
> > >
> > > That works. Metin has a better version that actually sets up the
> > > hypercall page properly and uses it, but that one bails out when Xen
> > > support isn't present, and doesn't show the failure mode quite so
> > > clearly. This is the simple version:
> >
> > IIUC, y'all have already written both tests, so why not post both?  I certainly
> > won't object to more tests if they provide different coverage.
>
> Yeah, it just needed cleaning up.
>
> This is what we have; Metin will submit it for real after a little more
> polishing. It modifies the existing access test so that *if* it's run
> in a Xen environment, and *if* the HVMOP_flush_tlbs call returns
> success instead of -ENOSYS, it'll use that instead of invlpg.

Patch is posted to kvm-unit-tests: https://marc.info/?l=kvm&m=169052867024191&w=2

> In itself, that doesn't give us an automatic regression tests, because
> you still need to run it manually — as before,
>  qemu-system-x86_64 -device isa-debug-exit,iobase=0xf4,iosize=0x4 -vnc none -serial stdio -device pci-testdev --accel kvm,xen-version=0x4000a,kernel-irqchip=split  -kernel ~/access_test.flat
>
> If we really want to, we can look at making it run that way when qemu
> and the host kernel support Xen guests...?
Sean Christopherson Aug. 4, 2023, 12:22 a.m. UTC | #10
On Tue, Apr 18, 2023, Metin Kaya wrote:
> Implement in-KVM support for Xen's HVMOP_flush_tlbs hypercall, which
> allows the guest to flush all vCPU's TLBs. KVM doesn't provide an
> ioctl() to precisely flush guest TLBs, and punting to userspace would
> likely negate the performance benefits of avoiding a TLB shootdown in
> the guest.
> 
> Signed-off-by: Metin Kaya <metikaya@amazon.co.uk>
> 
> ---
> v3:
>   - Addressed comments for v2.
>   - Verified with XTF/invlpg test case.
> 
> v2:
>   - Removed an irrelevant URL from commit message.
> ---

I had applied this and even generated the "thank you", but then I actually ran
the KUT access test[*].  It may very well be a pre-existing bug, but that doesn't
change the fact that this patch breaks a test.

I apologize for being snippy, especially if it turns out this is unique to HSW
(or worse, my system), and your systems don't exhibit issues.  Getting the test
to run took way too long and I'm bit grumpy.

Anyways, whatever is going wrong needs to be sorted out before this can be applied.

[*] https://lore.kernel.org/all/ZMxDRg6gWsVLeYFL@google.com
diff mbox series

Patch

diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 40edf4d1974c..a63c48e8d8fa 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -21,6 +21,7 @@ 
 #include <xen/interface/vcpu.h>
 #include <xen/interface/version.h>
 #include <xen/interface/event_channel.h>
+#include <xen/interface/hvm/hvm_op.h>
 #include <xen/interface/sched.h>
 
 #include <asm/xen/cpuid.h>
@@ -1330,6 +1331,17 @@  static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, bool longmode,
 	return false;
 }
 
+static bool kvm_xen_hcall_hvm_op(struct kvm_vcpu *vcpu, int cmd, u64 arg, u64 *r)
+{
+	if (cmd == HVMOP_flush_tlbs && !arg) {
+		kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_TLB_FLUSH_GUEST);
+		*r = 0;
+		return true;
+	}
+
+	return false;
+}
+
 struct compat_vcpu_set_singleshot_timer {
     uint64_t timeout_abs_ns;
     uint32_t flags;
@@ -1501,6 +1513,9 @@  int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
 			timeout |= params[1] << 32;
 		handled = kvm_xen_hcall_set_timer_op(vcpu, timeout, &r);
 		break;
+	case __HYPERVISOR_hvm_op:
+		handled = kvm_xen_hcall_hvm_op(vcpu, params[0], params[1], &r);
+		break;
 	}
 	default:
 		break;
diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h
index 03134bf3cec1..240d8149bc04 100644
--- a/include/xen/interface/hvm/hvm_op.h
+++ b/include/xen/interface/hvm/hvm_op.h
@@ -16,6 +16,9 @@  struct xen_hvm_param {
 };
 DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param);
 
+/* Flushes guest TLBs for all vCPUs: @arg must be 0. */
+#define HVMOP_flush_tlbs            5
+
 /* Hint from PV drivers for pagetable destruction. */
 #define HVMOP_pagetable_dying       9
 struct xen_hvm_pagetable_dying {