@@ -454,7 +454,7 @@ struct CPUState {
struct hax_vcpu_state *hax_vcpu;
- int hvf_fd;
+ unsigned hvf_fd;
/* track IOMMUs whose translations we've cached in the TCG TLB */
GArray *iommu_notifiers;
@@ -25,6 +25,7 @@ extern bool hvf_allowed;
int hvf_init_vcpu(CPUState *);
int hvf_vcpu_exec(CPUState *);
+void hvf_vcpu_kick(CPUState *);
void hvf_cpu_synchronize_state(CPUState *);
void hvf_cpu_synchronize_post_reset(CPUState *);
void hvf_cpu_synchronize_post_init(CPUState *);
@@ -1708,10 +1708,15 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
return;
}
cpu->thread_kicked = true;
- err = pthread_kill(cpu->thread->thread, SIG_IPI);
- if (err && err != ESRCH) {
- fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
- exit(1);
+
+ if (hvf_enabled()) {
+ hvf_vcpu_kick(cpu);
+ } else {
+ err = pthread_kill(cpu->thread->thread, SIG_IPI);
+ if (err && err != ESRCH) {
+ fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
+ exit(1);
+ }
}
#else /* _WIN32 */
if (!qemu_cpu_is_self(cpu)) {
@@ -1608,6 +1608,7 @@ typedef struct CPUX86State {
struct kvm_nested_state *nested_state;
#endif
#if defined(CONFIG_HVF)
+ bool hvf_in_guest;
HVFX86LazyFlags hvf_lflags;
void *hvf_mmio_buf;
#endif
@@ -458,8 +458,27 @@ void hvf_vcpu_destroy(CPUState *cpu)
assert_hvf_ok(ret);
}
-static void dummy_signal(int sig)
+static void hvf_handle_ipi(int sig)
{
+ if (!current_cpu) {
+ return;
+ }
+
+ /*
+ * skip object type check to avoid a deadlock in
+ * qemu_log/vfprintf/flockfile.
+ */
+ X86CPU *x86_cpu = (X86CPU *)current_cpu;
+ CPUX86State *env = &x86_cpu->env;
+
+ atomic_set(¤t_cpu->exit_request, true);
+ /* Write cpu->exit_request before reading env->hvf_in_guest */
+ smp_mb();
+ if (atomic_read(&env->hvf_in_guest)) {
+ wvmcs(current_cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
+ rvmcs(current_cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
+ | VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);
+ }
}
int hvf_init_vcpu(CPUState *cpu)
@@ -474,16 +493,18 @@ int hvf_init_vcpu(CPUState *cpu)
struct sigaction sigact;
memset(&sigact, 0, sizeof(sigact));
- sigact.sa_handler = dummy_signal;
+ sigact.sa_handler = hvf_handle_ipi;
sigaction(SIG_IPI, &sigact, NULL);
pthread_sigmask(SIG_BLOCK, NULL, &set);
sigdelset(&set, SIG_IPI);
+ pthread_sigmask(SIG_SETMASK, &set, NULL);
init_emu();
init_decoder();
hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1);
+ env->hvf_in_guest = false;
env->hvf_mmio_buf = g_new(char, 4096);
r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT);
@@ -529,6 +550,7 @@ int hvf_init_vcpu(CPUState *cpu)
wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */
wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
+ wvmcs(cpu->hvf_fd, VMCS_PREEMPTION_TIMER_VALUE, 0);
x86cpu = X86_CPU(cpu);
x86cpu->env.xsave_buf = qemu_memalign(4096, 4096);
@@ -631,7 +653,20 @@ int hvf_vcpu_exec(CPUState *cpu)
return EXCP_HLT;
}
+ atomic_set(&env->hvf_in_guest, true);
+ /* Read cpu->exit_request after writing env->hvf_in_guest */
+ smp_mb();
+ if (atomic_read(&cpu->exit_request)) {
+ atomic_set(&env->hvf_in_guest, false);
+ qemu_mutex_lock_iothread();
+ wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
+ rvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
+ & ~VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);
+ atomic_set(&cpu->exit_request, false);
+ return EXCP_INTERRUPT;
+ }
hv_return_t r = hv_vcpu_run(cpu->hvf_fd);
+ atomic_store_release(&env->hvf_in_guest, false);
assert_hvf_ok(r);
/* handle VMEXIT */
@@ -774,7 +809,12 @@ int hvf_vcpu_exec(CPUState *cpu)
vmx_clear_nmi_window_exiting(cpu);
ret = EXCP_INTERRUPT;
break;
+ case EXIT_REASON_VMX_PREEMPT:
case EXIT_REASON_EXT_INTR:
+ wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
+ rvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS)
+ & ~VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER);
+ atomic_set(&cpu->exit_request, false);
/* force exit and allow io handling */
ret = EXCP_INTERRUPT;
break;
@@ -872,6 +912,22 @@ int hvf_vcpu_exec(CPUState *cpu)
return ret;
}
+void hvf_vcpu_kick(CPUState *cpu)
+{
+ hv_return_t err;
+
+ err = pthread_kill(cpu->thread->thread, SIG_IPI);
+ if (err) {
+ fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
+ exit(1);
+ }
+ err = hv_vcpu_interrupt(&cpu->hvf_fd, 1);
+ if (err) {
+ fprintf(stderr, "qemu:%s error %#x\n", __func__, err);
+ exit(1);
+ }
+}
+
bool hvf_allowed;
static int hvf_accel_init(MachineState *ms)
@@ -349,6 +349,7 @@
#define VMCS_PIN_BASED_CTLS_EXTINT (1 << 0)
#define VMCS_PIN_BASED_CTLS_NMI (1 << 3)
#define VMCS_PIN_BASED_CTLS_VNMI (1 << 5)
+#define VMCS_PIN_BASED_CTLS_VMX_PREEMPT_TIMER (1 << 6)
#define VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING (1 << 2)
#define VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET (1 << 3)
HVF doesn't have a CPU kick and without it it's not possible to perform an action on CPU thread until a VMEXIT happens. The kick is also needed for timely interrupt delivery. Existing implementation of CPU kick sends SIG_IPI (aka SIGUSR1) to vCPU thread, but it's different from what hv_vcpu_interrupt does. The latter one results in invocation of mp_cpus_kick() in XNU kernel [1]. mp_cpus_kick() sends an IPI through the host LAPIC to the HVF vCPU. And the kick interrupt leads to VM exit because "external-interrupt exiting” VM-execution control is enabled for HVF. hv_vcpu_interrupt() has no effect if it's delivered when vCPU is outside of a guest, therefore to avoid kick loss it's complemented with a SIG_IPI handler and zero VMX-preemption timer. If the kick happens outside of hv_vcpu_run(), the signal handler will re-queue the kick by setting exit_request. exit_request is cleared when the request is satisifed, i.e. when vCPU thread returns with EXCP_INTERRUPT. So we get the following scenarios time/location-wise for the kick: 1) vCPU thread is far away before hv_vcpu_run(), then exit_request is scheduled. As soon as vCPU thread approaches hv_vcpu_run(), the exit request is satasified. 2) vCPU thread is about to enter the guest, then VMX-preemption timer is enabled to expedite immediate VM-exit. The VMX-preemption timer is then cleared in VM-exit handler, exit from vCPU thread is performed. 3) The guest is running, then hv_vcpu_run() is interrupted by hv_vcpu_interrupt() and vCPU thread quits. 4) vCPU thread has just made VM-exit, then exit_request is recorded and VMX-preemption timer is enabled but the exit request won't be satisfied until the next iteration of vCPU thread, no kick loss happens. 5) vCPU thread is far after hv_vcpu_run(), then exit_request is recorded and VMX-preemption timer is not enabled. The exit request will be satasfied on the next iteration of vCPU thread, like in 4). The kick is not lost. 6) If some external interupt happens we can satisify exit request and can clear VMX-preemption timer, i.e. kicks are coalesced with interrupts. While at it, correct type of hvf_fd to the type of hv_vcpuid_t to avoid compilation warnings. 1. https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/i386/mp.c Cc: Cameron Esfahani <dirty@apple.com> Signed-off-by: Roman Bolshakov <r.bolshakov@yadro.com> --- Hi, The version is just for review. There's an issue where interrupts (x86_gsi_interrupt) don't reach HVF (i.e. no interrupts are loaded into VMCS interruption info field). I've been triaging the issue for a while and dtrace patches are byproduct of the triage efforts. Once I figure out where interrupts are lost/suppressed (which won't happen until the end of the next week due to vacation), I'll send v5 with kick patch and the interrupt delivery fix. Thanks, Roman Changes since v3: - Replaced TLS pthread_key_t with current_cpu (Paolo) - Eliminated race condition by signalling through atomic variables (Paolo) Changes since v2: - Reworked workaround to minimize kick loss race. Use signals to interrupt vCPU thread outside of hv_vcpu_run() and turn-on/turn-off VMX-preemeption timer, while timer value is always zero. v3 also assumes that VMX-preemption timer control is always available on the hardware that supports HVF. Changes since v1: - Reduced kick loss race (Paolo) and removed SIG_IPI blocking include/hw/core/cpu.h | 2 +- include/sysemu/hvf.h | 1 + softmmu/cpus.c | 13 ++++++--- target/i386/cpu.h | 1 + target/i386/hvf/hvf.c | 60 ++++++++++++++++++++++++++++++++++++++++-- target/i386/hvf/vmcs.h | 1 + 6 files changed, 71 insertions(+), 7 deletions(-)