diff mbox series

[v2,4/9] i386: hvf: Implement CPU kick

Message ID 20200630102824.77604-5-r.bolshakov@yadro.com (mailing list archive)
State New, archived
Headers show
Series Improve synchronization between QEMU and HVF | expand

Commit Message

Roman Bolshakov June 30, 2020, 10:28 a.m. UTC
HVF doesn't have a CPU kick and without it it's not possible to perform
an action on CPU thread until a VMEXIT happens. The kick is also needed
for timely interrupt delivery.

Existing implementation of CPU kick sends SIG_IPI (aka SIGUSR1) to vCPU
thread, but it's different from what hv_vcpu_interrupt does. The latter
one results in invocation of mp_cpus_kick() in XNU kernel [1].

mp_cpus_kick() sends an IPI through the host LAPIC to the HVF vCPU.
And the kick interrupt leads to VM exit because "external-interrupt
exiting” VM-execution control is enabled for HVF. VMX-preemption timer
is used (if available) to avoid kick loss if the kick is delivered
outside of hv_vcpu_run().

While at it, correct type of hvf_fd to the type of hv_vcpuid_t to avoid
compilation warnings.

1. https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/i386/mp.c

Cc: Cameron Esfahani <dirty@apple.com>
Signed-off-by: Roman Bolshakov <r.bolshakov@yadro.com>
---
 cpus.c                 | 13 +++++++++----
 include/hw/core/cpu.h  |  2 +-
 include/sysemu/hvf.h   |  1 +
 target/i386/cpu.h      |  1 +
 target/i386/hvf/hvf.c  | 42 +++++++++++++++++++++++++++---------------
 target/i386/hvf/vmcs.h |  1 +
 6 files changed, 40 insertions(+), 20 deletions(-)

Comments

Paolo Bonzini June 30, 2020, 12:33 p.m. UTC | #1
On 30/06/20 12:28, Roman Bolshakov wrote:
> @@ -966,6 +964,20 @@ int hvf_vcpu_exec(CPUState *cpu)
>      return ret;
>  }
>  
> +void hvf_vcpu_kick(CPUState *cpu)
> +{
> +    X86CPU *x86_cpu = X86_CPU(cpu);
> +    CPUX86State *env = &x86_cpu->env;
> +    hv_return_t err;
> +
> +    atomic_set(&env->hvf_deadline, 0);
> +    err = hv_vcpu_interrupt(&cpu->hvf_fd, 1);
> +    if (err) {
> +        fprintf(stderr, "qemu:%s error %#x\n", __func__, err);
> +        exit(1);
> +    }

Can a signal interrupt hv_vcpu_run?  If so you actually don't need
hv_vcpu_interrupt at all.  You can also require the preemption time, all
processor that support HVF have it, but never set it by default.  The
deadline can be left at 0 all the time; instead, you toggle the bit in
the pin-based controls.  In the signal handler you do:

	if (atomic_xchg(&env->hvf_in_guest, false)) {
		wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
		      rvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
			| VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);
	}

In the main loop you do:

	atomic_set(&env->hvf_guest_mode, true);
	smp_mb();
	hv_vcpu_run(...);
	atomic_set(&env->hvf_guest_mode, false);

and in the preemption timer vmexit handler:
	
		wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
		      rvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
			& ~VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);

I'll leave out this patch in the meanwhile.

Paolo

> +}
> +
>  bool hvf_allowed;
>  
>  static int hvf_accel_init(MachineState *ms)
> diff --git a/target/i386/hvf/vmcs.h b/target/i386/hvf/vmcs.h
> index 42de7ebc3a..6615365023 100644
> --- a/target/i386/hvf/vmcs.h
> +++ b/target/i386/hvf/vmcs.h
> @@ -349,6 +349,7 @@
>  #define VMCS_PIN_BASED_CTLS_EXTINT            (1 << 0)
>  #define VMCS_PIN_BASED_CTLS_NMI               (1 << 3)
>  #define VMCS_PIN_BASED_CTLS_VNMI              (1 << 5)
> +#define VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER (1 << 6)
>  
>  #define VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING (1 << 2)
>  #define VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET (1 << 3)
>
Roman Bolshakov June 30, 2020, 3:50 p.m. UTC | #2
On Tue, Jun 30, 2020 at 02:33:42PM +0200, Paolo Bonzini wrote:
> On 30/06/20 12:28, Roman Bolshakov wrote:
> > @@ -966,6 +964,20 @@ int hvf_vcpu_exec(CPUState *cpu)
> >      return ret;
> >  }
> >  
> > +void hvf_vcpu_kick(CPUState *cpu)
> > +{
> > +    X86CPU *x86_cpu = X86_CPU(cpu);
> > +    CPUX86State *env = &x86_cpu->env;
> > +    hv_return_t err;
> > +
> > +    atomic_set(&env->hvf_deadline, 0);
> > +    err = hv_vcpu_interrupt(&cpu->hvf_fd, 1);
> > +    if (err) {
> > +        fprintf(stderr, "qemu:%s error %#x\n", __func__, err);
> > +        exit(1);
> > +    }
> 
> Can a signal interrupt hv_vcpu_run?  If so you actually don't need
> hv_vcpu_interrupt at all.

Existing signal masking and SIG_IPI didn't work IIRC when I tried to add
a primitive version of gdbstub support.

> You can also require the preemption time, all
> processor that support HVF have it, but never set it by default.  The
> deadline can be left at 0 all the time; instead, you toggle the bit in
> the pin-based controls.  In the signal handler you do:
> 
> 	if (atomic_xchg(&env->hvf_in_guest, false)) {
> 		wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
> 		      rvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
> 			| VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);
> 	}
> 
> In the main loop you do:
> 
> 	atomic_set(&env->hvf_guest_mode, true);
> 	smp_mb();
> 	hv_vcpu_run(...);
> 	atomic_set(&env->hvf_guest_mode, false);
> 
> and in the preemption timer vmexit handler:
> 	
> 		wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
> 		      rvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
> 			& ~VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);
> 

Ok, I'll look into that. Thanks for the advices!

-Roman
Paolo Bonzini June 30, 2020, 4:04 p.m. UTC | #3
On 30/06/20 17:50, Roman Bolshakov wrote:
> On Tue, Jun 30, 2020 at 02:33:42PM +0200, Paolo Bonzini wrote:
>> On 30/06/20 12:28, Roman Bolshakov wrote:
>>> @@ -966,6 +964,20 @@ int hvf_vcpu_exec(CPUState *cpu)
>>>      return ret;
>>>  }
>>>  
>>> +void hvf_vcpu_kick(CPUState *cpu)
>>> +{
>>> +    X86CPU *x86_cpu = X86_CPU(cpu);
>>> +    CPUX86State *env = &x86_cpu->env;
>>> +    hv_return_t err;
>>> +
>>> +    atomic_set(&env->hvf_deadline, 0);
>>> +    err = hv_vcpu_interrupt(&cpu->hvf_fd, 1);
>>> +    if (err) {
>>> +        fprintf(stderr, "qemu:%s error %#x\n", __func__, err);
>>> +        exit(1);
>>> +    }
>>
>> Can a signal interrupt hv_vcpu_run?  If so you actually don't need
>> hv_vcpu_interrupt at all.
> 
> Existing signal masking and SIG_IPI didn't work IIRC when I tried to add
> a primitive version of gdbstub support.

You can try pthread_kill followed by hv_vcpu_interrupt if it doesn't.
The signal would be delivered after return to userspace.

Paolo

>> You can also require the preemption time, all
>> processor that support HVF have it, but never set it by default.  The
>> deadline can be left at 0 all the time; instead, you toggle the bit in
>> the pin-based controls.  In the signal handler you do:
>>
>> 	if (atomic_xchg(&env->hvf_in_guest, false)) {
>> 		wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
>> 		      rvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
>> 			| VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);
>> 	}
>>
>> In the main loop you do:
>>
>> 	atomic_set(&env->hvf_guest_mode, true);
>> 	smp_mb();
>> 	hv_vcpu_run(...);
>> 	atomic_set(&env->hvf_guest_mode, false);
>>
>> and in the preemption timer vmexit handler:
>> 	
>> 		wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
>> 		      rvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
>> 			& ~VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);
>>
> 
> Ok, I'll look into that. Thanks for the advices!
> 
> -Roman
>
Roman Bolshakov July 1, 2020, 6:36 p.m. UTC | #4
On Tue, Jun 30, 2020 at 06:04:23PM +0200, Paolo Bonzini wrote:
> On 30/06/20 17:50, Roman Bolshakov wrote:
> > On Tue, Jun 30, 2020 at 02:33:42PM +0200, Paolo Bonzini wrote:
> >> Can a signal interrupt hv_vcpu_run?  If so you actually don't need
> >> hv_vcpu_interrupt at all.
> > 
> > Existing signal masking and SIG_IPI didn't work IIRC when I tried to add
> > a primitive version of gdbstub support.
> 
> You can try pthread_kill followed by hv_vcpu_interrupt if it doesn't.
> The signal would be delivered after return to userspace.
> 

I looked at the signal setup for HVF again. I was wrong with regards to
SIG_IPI. It isn't delivered to vCPU because the signal is masked, this
fixes it:

diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index d81f569aed..7bf05bca21 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -479,6 +479,7 @@ int hvf_init_vcpu(CPUState *cpu)

     pthread_sigmask(SIG_BLOCK, NULL, &set);
     sigdelset(&set, SIG_IPI);
+    pthread_sigmask(SIG_SETMASK, &set, NULL);

     init_emu();
     init_decoder();

But the signal is delivered only after vmxexit, perhaps a sequence of
pthread_kill() and hv_vcpu_interrupt() is really needed.

So, there are two race windows on kernel-to-user border in v2: just
before checking the deadline and vmenter and just after vmxexit and
re-arm of preemption timer, that's two places where kicks could be lost.
The approach you proposed seems to address them.

Thanks,
Roman

> >> You can also require the preemption time, all
> >> processor that support HVF have it, but never set it by default.  The
> >> deadline can be left at 0 all the time; instead, you toggle the bit in
> >> the pin-based controls.  In the signal handler you do:
> >>
> >> 	if (atomic_xchg(&env->hvf_in_guest, false)) {
> >> 		wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
> >> 		      rvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
> >> 			| VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);
> >> 	}
> >>
> >> In the main loop you do:
> >>
> >> 	atomic_set(&env->hvf_guest_mode, true);
> >> 	smp_mb();
> >> 	hv_vcpu_run(...);
> >> 	atomic_set(&env->hvf_guest_mode, false);
> >>
> >> and in the preemption timer vmexit handler:
> >> 	
> >> 		wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
> >> 		      rvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
> >> 			& ~VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);
> >>
> >
Paolo Bonzini July 1, 2020, 6:50 p.m. UTC | #5
Thanks, sounds good! Of course the best solution would be in HVF itself,
similar to KVM and WHPX, but at least it's possible to work around it.

Paolo

Il mer 1 lug 2020, 20:37 Roman Bolshakov <r.bolshakov@yadro.com> ha scritto:

> On Tue, Jun 30, 2020 at 06:04:23PM +0200, Paolo Bonzini wrote:
> > On 30/06/20 17:50, Roman Bolshakov wrote:
> > > On Tue, Jun 30, 2020 at 02:33:42PM +0200, Paolo Bonzini wrote:
> > >> Can a signal interrupt hv_vcpu_run?  If so you actually don't need
> > >> hv_vcpu_interrupt at all.
> > >
> > > Existing signal masking and SIG_IPI didn't work IIRC when I tried to
> add
> > > a primitive version of gdbstub support.
> >
> > You can try pthread_kill followed by hv_vcpu_interrupt if it doesn't.
> > The signal would be delivered after return to userspace.
> >
>
> I looked at the signal setup for HVF again. I was wrong with regards to
> SIG_IPI. It isn't delivered to vCPU because the signal is masked, this
> fixes it:
>
> diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
> index d81f569aed..7bf05bca21 100644
> --- a/target/i386/hvf/hvf.c
> +++ b/target/i386/hvf/hvf.c
> @@ -479,6 +479,7 @@ int hvf_init_vcpu(CPUState *cpu)
>
>      pthread_sigmask(SIG_BLOCK, NULL, &set);
>      sigdelset(&set, SIG_IPI);
> +    pthread_sigmask(SIG_SETMASK, &set, NULL);
>
>      init_emu();
>      init_decoder();
>
> But the signal is delivered only after vmxexit, perhaps a sequence of
> pthread_kill() and hv_vcpu_interrupt() is really needed.
>
> So, there are two race windows on kernel-to-user border in v2: just
> before checking the deadline and vmenter and just after vmxexit and
> re-arm of preemption timer, that's two places where kicks could be lost.
> The approach you proposed seems to address them.
>
> Thanks,
> Roman
>
> > >> You can also require the preemption time, all
> > >> processor that support HVF have it, but never set it by default.  The
> > >> deadline can be left at 0 all the time; instead, you toggle the bit in
> > >> the pin-based controls.  In the signal handler you do:
> > >>
> > >>    if (atomic_xchg(&env->hvf_in_guest, false)) {
> > >>            wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
> > >>                  rvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
> > >>                    | VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);
> > >>    }
> > >>
> > >> In the main loop you do:
> > >>
> > >>    atomic_set(&env->hvf_guest_mode, true);
> > >>    smp_mb();
> > >>    hv_vcpu_run(...);
> > >>    atomic_set(&env->hvf_guest_mode, false);
> > >>
> > >> and in the preemption timer vmexit handler:
> > >>
> > >>            wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
> > >>                  rvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
> > >>                    & ~VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);
> > >>
> > >
>
>
diff mbox series

Patch

diff --git a/cpus.c b/cpus.c
index d94456ed29..6be42ff734 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1792,10 +1792,15 @@  static void qemu_cpu_kick_thread(CPUState *cpu)
         return;
     }
     cpu->thread_kicked = true;
-    err = pthread_kill(cpu->thread->thread, SIG_IPI);
-    if (err && err != ESRCH) {
-        fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
-        exit(1);
+
+    if (hvf_enabled()) {
+        hvf_vcpu_kick(cpu);
+    } else {
+        err = pthread_kill(cpu->thread->thread, SIG_IPI);
+        if (err && err != ESRCH) {
+            fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
+            exit(1);
+        }
     }
 #else /* _WIN32 */
     if (!qemu_cpu_is_self(cpu)) {
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index b3f4b79318..288a2bd57e 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -438,7 +438,7 @@  struct CPUState {
 
     struct hax_vcpu_state *hax_vcpu;
 
-    int hvf_fd;
+    unsigned hvf_fd;
 
     /* track IOMMUs whose translations we've cached in the TCG TLB */
     GArray *iommu_notifiers;
diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h
index 1d40a8ec01..aaa00cbf05 100644
--- a/include/sysemu/hvf.h
+++ b/include/sysemu/hvf.h
@@ -25,6 +25,7 @@  extern bool hvf_allowed;
 
 int hvf_init_vcpu(CPUState *);
 int hvf_vcpu_exec(CPUState *);
+void hvf_vcpu_kick(CPUState *);
 void hvf_cpu_synchronize_state(CPUState *);
 void hvf_cpu_synchronize_post_reset(CPUState *);
 void hvf_cpu_synchronize_post_init(CPUState *);
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 7d77efd9e4..4ae6038f22 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1602,6 +1602,7 @@  typedef struct CPUX86State {
     struct kvm_nested_state *nested_state;
 #endif
 #if defined(CONFIG_HVF)
+    uint64_t hvf_deadline;
     HVFX86LazyFlags hvf_lflags;
     void *hvf_mmio_buf;
 #endif
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index efe9802962..317304aa1d 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -72,6 +72,9 @@ 
 #include "sysemu/accel.h"
 #include "target/i386/cpu.h"
 
+/* Maximum value of VMX-preemption timer */
+#define HVF_MAX_DEADLINE UINT32_MAX
+
 HVFState *hvf_state;
 
 static void assert_hvf_ok(hv_return_t ret)
@@ -552,10 +555,6 @@  void hvf_vcpu_destroy(CPUState *cpu)
     assert_hvf_ok(ret);
 }
 
-static void dummy_signal(int sig)
-{
-}
-
 int hvf_init_vcpu(CPUState *cpu)
 {
 
@@ -563,21 +562,11 @@  int hvf_init_vcpu(CPUState *cpu)
     CPUX86State *env = &x86cpu->env;
     int r;
 
-    /* init cpu signals */
-    sigset_t set;
-    struct sigaction sigact;
-
-    memset(&sigact, 0, sizeof(sigact));
-    sigact.sa_handler = dummy_signal;
-    sigaction(SIG_IPI, &sigact, NULL);
-
-    pthread_sigmask(SIG_BLOCK, NULL, &set);
-    sigdelset(&set, SIG_IPI);
-
     init_emu();
     init_decoder();
 
     hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
+    env->hvf_deadline = HVF_MAX_DEADLINE;
     env->hvf_mmio_buf = g_new(char, 4096);
 
     r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
@@ -606,6 +595,7 @@  int hvf_init_vcpu(CPUState *cpu)
           cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
           VMCS_PIN_BASED_CTLS_EXTINT |
           VMCS_PIN_BASED_CTLS_NMI |
+          VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER |
           VMCS_PIN_BASED_CTLS_VNMI));
     wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS,
           cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
@@ -725,7 +715,14 @@  int hvf_vcpu_exec(CPUState *cpu)
             return EXCP_HLT;
         }
 
+        /* Use VMX-preemption timer trick only if available */
+        if (rvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS) &
+            VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER) {
+            wvmcs(cpu->hvf_fd, VMCS_PREEMPTION_TIMER_VALUE,
+                  atomic_read(&env->hvf_deadline));
+        }
         hv_return_t r  = hv_vcpu_run(cpu->hvf_fd);
+        atomic_set(&env->hvf_deadline, HVF_MAX_DEADLINE);
         assert_hvf_ok(r);
 
         /* handle VMEXIT */
@@ -869,6 +866,7 @@  int hvf_vcpu_exec(CPUState *cpu)
             ret = EXCP_INTERRUPT;
             break;
         case EXIT_REASON_EXT_INTR:
+        case EXIT_REASON_VMX_PREEMPT:
             /* force exit and allow io handling */
             ret = EXCP_INTERRUPT;
             break;
@@ -966,6 +964,20 @@  int hvf_vcpu_exec(CPUState *cpu)
     return ret;
 }
 
+void hvf_vcpu_kick(CPUState *cpu)
+{
+    X86CPU *x86_cpu = X86_CPU(cpu);
+    CPUX86State *env = &x86_cpu->env;
+    hv_return_t err;
+
+    atomic_set(&env->hvf_deadline, 0);
+    err = hv_vcpu_interrupt(&cpu->hvf_fd, 1);
+    if (err) {
+        fprintf(stderr, "qemu:%s error %#x\n", __func__, err);
+        exit(1);
+    }
+}
+
 bool hvf_allowed;
 
 static int hvf_accel_init(MachineState *ms)
diff --git a/target/i386/hvf/vmcs.h b/target/i386/hvf/vmcs.h
index 42de7ebc3a..6615365023 100644
--- a/target/i386/hvf/vmcs.h
+++ b/target/i386/hvf/vmcs.h
@@ -349,6 +349,7 @@ 
 #define VMCS_PIN_BASED_CTLS_EXTINT            (1 << 0)
 #define VMCS_PIN_BASED_CTLS_NMI               (1 << 3)
 #define VMCS_PIN_BASED_CTLS_VNMI              (1 << 5)
+#define VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER (1 << 6)
 
 #define VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING (1 << 2)
 #define VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET (1 << 3)