Message ID | 20210519202253.76782-15-agraf@csgraf.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | hvf: Implement Apple Silicon Support | expand |
On Wed, May 19, 2021 at 10:22:48PM +0200, Alexander Graf wrote: > From: Peter Collingbourne <pcc@google.com> > > Sleep on WFI until the VTIMER is due but allow ourselves to be woken > up on IPI. > > In this implementation IPI is blocked on the CPU thread at startup and > pselect() is used to atomically unblock the signal and begin sleeping. > The signal is sent unconditionally so there's no need to worry about > races between actually sleeping and the "we think we're sleeping" > state. It may lead to an extra wakeup but that's better than missing > it entirely. > > Signed-off-by: Peter Collingbourne <pcc@google.com> > [agraf: Remove unused 'set' variable, always advance PC on WFX trap] > Signed-off-by: Alexander Graf <agraf@csgraf.de> > Acked-by: Roman Bolshakov <r.bolshakov@yadro.com> > > --- > > v6 -> v7: > > - Move WFI into function > - Improve comment wording > --- > accel/hvf/hvf-accel-ops.c | 5 ++- > include/sysemu/hvf_int.h | 1 + > target/arm/hvf/hvf.c | 68 +++++++++++++++++++++++++++++++++++++++ > 3 files changed, 71 insertions(+), 3 deletions(-) Reviewed-by: Sergio Lopez <slp@redhat.com>
On Wed, 19 May 2021 at 21:23, Alexander Graf <agraf@csgraf.de> wrote: > > From: Peter Collingbourne <pcc@google.com> > > Sleep on WFI until the VTIMER is due but allow ourselves to be woken > up on IPI. > > In this implementation IPI is blocked on the CPU thread at startup and > pselect() is used to atomically unblock the signal and begin sleeping. > The signal is sent unconditionally so there's no need to worry about > races between actually sleeping and the "we think we're sleeping" > state. It may lead to an extra wakeup but that's better than missing > it entirely. > > Signed-off-by: Peter Collingbourne <pcc@google.com> > [agraf: Remove unused 'set' variable, always advance PC on WFX trap] > Signed-off-by: Alexander Graf <agraf@csgraf.de> > Acked-by: Roman Bolshakov <r.bolshakov@yadro.com> > > --- > +static void hvf_wfi(CPUState *cpu) > +{ > + ARMCPU *arm_cpu = ARM_CPU(cpu); > + hv_return_t r; > + uint64_t ctl; > + > + if (cpu->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ)) { > + /* Interrupt pending, no need to wait */ > + return; > + } > + > + r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, > + &ctl); > + assert_hvf_ok(r); > + > + if (!(ctl & 1) || (ctl & 2)) { > + /* Timer disabled or masked, just wait for an IPI. */ > + hvf_wait_for_ipi(cpu, NULL); > + return; > + } > + > + uint64_t cval; > + r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0, > + &cval); > + assert_hvf_ok(r); > + > + int64_t ticks_to_sleep = cval - mach_absolute_time(); This looks odd. The CNTV_CVAL is the compare value against the CNTVCT (virtual count), which should start at 0 when the VM starts, pause when the VM is paused, and so on. But here we are comparing it against what looks like a host absolute timecount... > + if (ticks_to_sleep < 0) { > + return; > + } > + > + uint64_t seconds = ticks_to_sleep / arm_cpu->gt_cntfrq_hz; > + uint64_t nanos = > + (ticks_to_sleep - arm_cpu->gt_cntfrq_hz * seconds) * > + 1000000000 / arm_cpu->gt_cntfrq_hz; Should this be calling gt_cntfrq_period_ns() ? (If not, please use the NANOSECONDS_PER_SECOND constant instead of a raw 1000000000.) > + > + /* > + * Don't sleep for less than the time a context switch would take, > + * so that we can satisfy fast timer requests on the same CPU. > + * Measurements on M1 show the sweet spot to be ~2ms. > + */ > + if (!seconds && nanos < 2000000) { "2 * SCALE_MS" is a bit easier to read I think. > + return; > + } > + > + struct timespec ts = { seconds, nanos }; > + hvf_wait_for_ipi(cpu, &ts); > +} thanks -- PMM
diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index 48e402ef57..63ec8a6f25 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -369,15 +369,14 @@ static int hvf_init_vcpu(CPUState *cpu) cpu->hvf = g_malloc0(sizeof(*cpu->hvf)); /* init cpu signals */ - sigset_t set; struct sigaction sigact; memset(&sigact, 0, sizeof(sigact)); sigact.sa_handler = dummy_signal; sigaction(SIG_IPI, &sigact, NULL); - pthread_sigmask(SIG_BLOCK, NULL, &set); - sigdelset(&set, SIG_IPI); + pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask); + sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI); #ifdef __aarch64__ r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL); diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h index e52d67ed5c..6d4eef8065 100644 --- a/include/sysemu/hvf_int.h +++ b/include/sysemu/hvf_int.h @@ -51,6 +51,7 @@ struct hvf_vcpu_state { uint64_t fd; void *exit; bool vtimer_masked; + sigset_t unblock_ipi_mask; }; void assert_hvf_ok(hv_return_t ret); diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index 3934c05979..67002efd36 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -2,6 +2,7 @@ * QEMU Hypervisor.framework support for Apple Silicon * Copyright 2020 Alexander Graf <agraf@csgraf.de> + * Copyright 2020 Google LLC * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. @@ -17,6 +18,8 @@ #include "sysemu/hvf_int.h" #include "sysemu/hw_accel.h" +#include <mach/mach_time.h> + #include "exec/address-spaces.h" #include "hw/irq.h" #include "qemu/main-loop.h" @@ -457,6 +460,7 @@ int hvf_arch_init_vcpu(CPUState *cpu) void hvf_kick_vcpu_thread(CPUState *cpu) { + cpus_kick_thread(cpu); hv_vcpus_exit(&cpu->hvf->fd, 1); } @@ -536,6 +540,67 @@ static int hvf_inject_interrupts(CPUState *cpu) return 0; } +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts) +{ + /* + * Use pselect to sleep so that other threads can IPI us while we're + * sleeping. + */ + qatomic_mb_set(&cpu->thread_kicked, false); + qemu_mutex_unlock_iothread(); + pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask); + qemu_mutex_lock_iothread(); +} + +static void hvf_wfi(CPUState *cpu) +{ + ARMCPU *arm_cpu = ARM_CPU(cpu); + hv_return_t r; + uint64_t ctl; + + if (cpu->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ)) { + /* Interrupt pending, no need to wait */ + return; + } + + r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, + &ctl); + assert_hvf_ok(r); + + if (!(ctl & 1) || (ctl & 2)) { + /* Timer disabled or masked, just wait for an IPI. */ + hvf_wait_for_ipi(cpu, NULL); + return; + } + + uint64_t cval; + r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0, + &cval); + assert_hvf_ok(r); + + int64_t ticks_to_sleep = cval - mach_absolute_time(); + if (ticks_to_sleep < 0) { + return; + } + + uint64_t seconds = ticks_to_sleep / arm_cpu->gt_cntfrq_hz; + uint64_t nanos = + (ticks_to_sleep - arm_cpu->gt_cntfrq_hz * seconds) * + 1000000000 / arm_cpu->gt_cntfrq_hz; + + /* + * Don't sleep for less than the time a context switch would take, + * so that we can satisfy fast timer requests on the same CPU. + * Measurements on M1 show the sweet spot to be ~2ms. + */ + if (!seconds && nanos < 2000000) { + return; + } + + struct timespec ts = { seconds, nanos }; + hvf_wait_for_ipi(cpu, &ts); +} + static void hvf_sync_vtimer(CPUState *cpu) { ARMCPU *arm_cpu = ARM_CPU(cpu); @@ -670,6 +735,9 @@ int hvf_vcpu_exec(CPUState *cpu) } case EC_WFX_TRAP: advance_pc = true; + if (!(syndrome & WFX_IS_WFE)) { + hvf_wfi(cpu); + } break; case EC_AA64_HVC: cpu_synchronize_state(cpu);