Message ID | 20210120224444.71840-10-agraf@csgraf.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | hvf: Implement Apple Silicon Support | expand |
On Wed, 20 Jan 2021 at 22:44, Alexander Graf <agraf@csgraf.de> wrote: > > From: Peter Collingbourne <pcc@google.com> > > Sleep on WFI until the VTIMER is due but allow ourselves to be woken > up on IPI. > > In this implementation IPI is blocked on the CPU thread at startup and > pselect() is used to atomically unblock the signal and begin sleeping. > The signal is sent unconditionally so there's no need to worry about > races between actually sleeping and the "we think we're sleeping" > state. It may lead to an extra wakeup but that's better than missing > it entirely. > > Signed-off-by: Peter Collingbourne <pcc@google.com> > [agraf: Remove unused 'set' variable, always advance PC on WFX trap] > Signed-off-by: Alexander Graf <agraf@csgraf.de> > Acked-by: Roman Bolshakov <r.bolshakov@yadro.com> > --- > accel/hvf/hvf-cpus.c | 5 ++-- > include/sysemu/hvf_int.h | 1 + > target/arm/hvf/hvf.c | 56 ++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 59 insertions(+), 3 deletions(-) > > diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c > index 6d70ee742e..abef6a58f7 100644 > --- a/accel/hvf/hvf-cpus.c > +++ b/accel/hvf/hvf-cpus.c > @@ -322,15 +322,14 @@ static int hvf_init_vcpu(CPUState *cpu) > cpu->hvf = g_malloc0(sizeof(*cpu->hvf)); > > /* init cpu signals */ > - sigset_t set; > struct sigaction sigact; > > memset(&sigact, 0, sizeof(sigact)); > sigact.sa_handler = dummy_signal; > sigaction(SIG_IPI, &sigact, NULL); > > - pthread_sigmask(SIG_BLOCK, NULL, &set); > - sigdelset(&set, SIG_IPI); > + pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask); > + sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI); > > #ifdef __aarch64__ > r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL); > diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h > index c2ac6c8f97..7a397fe85a 100644 > --- a/include/sysemu/hvf_int.h > +++ b/include/sysemu/hvf_int.h > @@ -51,6 +51,7 @@ extern HVFState *hvf_state; > struct hvf_vcpu_state { > uint64_t fd; > void *exit; > + sigset_t unblock_ipi_mask; > }; > > void assert_hvf_ok(hv_return_t ret); > diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c > index 8f18efe856..f0850ab14a 100644 > --- a/target/arm/hvf/hvf.c > +++ b/target/arm/hvf/hvf.c > @@ -2,6 +2,7 @@ > * QEMU Hypervisor.framework support for Apple Silicon > > * Copyright 2020 Alexander Graf <agraf@csgraf.de> > + * Copyright 2020 Google LLC > * > * This work is licensed under the terms of the GNU GPL, version 2 or later. > * See the COPYING file in the top-level directory. > @@ -17,6 +18,8 @@ > #include "sysemu/hvf_int.h" > #include "sysemu/hw_accel.h" > > +#include <mach/mach_time.h> > + > #include "exec/address-spaces.h" > #include "hw/irq.h" > #include "qemu/main-loop.h" > @@ -411,6 +414,7 @@ int hvf_arch_init_vcpu(CPUState *cpu) > > void hvf_kick_vcpu_thread(CPUState *cpu) > { > + cpus_kick_thread(cpu); > hv_vcpus_exit(&cpu->hvf->fd, 1); > } > > @@ -466,6 +470,18 @@ static int hvf_inject_interrupts(CPUState *cpu) > return 0; > } > > +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts) > +{ > + /* > + * Use pselect to sleep so that other threads can IPI us while we're > + * sleeping. > + */ > + qatomic_mb_set(&cpu->thread_kicked, false); > + qemu_mutex_unlock_iothread(); > + pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask); > + qemu_mutex_lock_iothread(); > +} It seems a bit odd that this is specific to Arm hvf. Doesn't x86 hvf need "pause until interrupt" functionality ? > + > int hvf_vcpu_exec(CPUState *cpu) > { > ARMCPU *arm_cpu = ARM_CPU(cpu); > @@ -577,6 +593,46 @@ int hvf_vcpu_exec(CPUState *cpu) > } > case EC_WFX_TRAP: > advance_pc = true; > + if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request & > + (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) { > + > + uint64_t ctl; > + r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, > + &ctl); > + assert_hvf_ok(r); > + > + if (!(ctl & 1) || (ctl & 2)) { > + /* Timer disabled or masked, just wait for an IPI. */ > + hvf_wait_for_ipi(cpu, NULL); > + break; > + } > + > + uint64_t cval; > + r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0, > + &cval); > + assert_hvf_ok(r); > + > + int64_t ticks_to_sleep = cval - mach_absolute_time(); > + if (ticks_to_sleep < 0) { > + break; > + } > + > + uint64_t seconds = ticks_to_sleep / arm_cpu->gt_cntfrq_hz; > + uint64_t nanos = > + (ticks_to_sleep - arm_cpu->gt_cntfrq_hz * seconds) * > + 1000000000 / arm_cpu->gt_cntfrq_hz; > + > + /* > + * Don't sleep for less than 2ms. This is believed to improve > + * latency of message passing workloads. > + */ Believed by who ? > + if (!seconds && nanos < 2000000) { > + break; > + } > + > + struct timespec ts = { seconds, nanos }; > + hvf_wait_for_ipi(cpu, &ts); > + } Why doesn't the timer timeout manifest as an IPI ? (Put another way, why is the timer interrupt special?) thanks -- PMM
On Thu, Jan 28, 2021 at 8:25 AM Peter Maydell <peter.maydell@linaro.org> wrote: > > On Wed, 20 Jan 2021 at 22:44, Alexander Graf <agraf@csgraf.de> wrote: > > > > From: Peter Collingbourne <pcc@google.com> > > > > Sleep on WFI until the VTIMER is due but allow ourselves to be woken > > up on IPI. > > > > In this implementation IPI is blocked on the CPU thread at startup and > > pselect() is used to atomically unblock the signal and begin sleeping. > > The signal is sent unconditionally so there's no need to worry about > > races between actually sleeping and the "we think we're sleeping" > > state. It may lead to an extra wakeup but that's better than missing > > it entirely. > > > > Signed-off-by: Peter Collingbourne <pcc@google.com> > > [agraf: Remove unused 'set' variable, always advance PC on WFX trap] > > Signed-off-by: Alexander Graf <agraf@csgraf.de> > > Acked-by: Roman Bolshakov <r.bolshakov@yadro.com> > > --- > > accel/hvf/hvf-cpus.c | 5 ++-- > > include/sysemu/hvf_int.h | 1 + > > target/arm/hvf/hvf.c | 56 ++++++++++++++++++++++++++++++++++++++++ > > 3 files changed, 59 insertions(+), 3 deletions(-) > > > > diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c > > index 6d70ee742e..abef6a58f7 100644 > > --- a/accel/hvf/hvf-cpus.c > > +++ b/accel/hvf/hvf-cpus.c > > @@ -322,15 +322,14 @@ static int hvf_init_vcpu(CPUState *cpu) > > cpu->hvf = g_malloc0(sizeof(*cpu->hvf)); > > > > /* init cpu signals */ > > - sigset_t set; > > struct sigaction sigact; > > > > memset(&sigact, 0, sizeof(sigact)); > > sigact.sa_handler = dummy_signal; > > sigaction(SIG_IPI, &sigact, NULL); > > > > - pthread_sigmask(SIG_BLOCK, NULL, &set); > > - sigdelset(&set, SIG_IPI); > > + pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask); > > + sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI); > > > > #ifdef __aarch64__ > > r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL); > > diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h > > index c2ac6c8f97..7a397fe85a 100644 > > --- a/include/sysemu/hvf_int.h > > +++ b/include/sysemu/hvf_int.h > > @@ -51,6 +51,7 @@ extern HVFState *hvf_state; > > struct hvf_vcpu_state { > > uint64_t fd; > > void *exit; > > + sigset_t unblock_ipi_mask; > > }; > > > > void assert_hvf_ok(hv_return_t ret); > > diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c > > index 8f18efe856..f0850ab14a 100644 > > --- a/target/arm/hvf/hvf.c > > +++ b/target/arm/hvf/hvf.c > > @@ -2,6 +2,7 @@ > > * QEMU Hypervisor.framework support for Apple Silicon > > > > * Copyright 2020 Alexander Graf <agraf@csgraf.de> > > + * Copyright 2020 Google LLC > > * > > * This work is licensed under the terms of the GNU GPL, version 2 or later. > > * See the COPYING file in the top-level directory. > > @@ -17,6 +18,8 @@ > > #include "sysemu/hvf_int.h" > > #include "sysemu/hw_accel.h" > > > > +#include <mach/mach_time.h> > > + > > #include "exec/address-spaces.h" > > #include "hw/irq.h" > > #include "qemu/main-loop.h" > > @@ -411,6 +414,7 @@ int hvf_arch_init_vcpu(CPUState *cpu) > > > > void hvf_kick_vcpu_thread(CPUState *cpu) > > { > > + cpus_kick_thread(cpu); > > hv_vcpus_exit(&cpu->hvf->fd, 1); > > } > > > > @@ -466,6 +470,18 @@ static int hvf_inject_interrupts(CPUState *cpu) > > return 0; > > } > > > > +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts) > > +{ > > + /* > > + * Use pselect to sleep so that other threads can IPI us while we're > > + * sleeping. > > + */ > > + qatomic_mb_set(&cpu->thread_kicked, false); > > + qemu_mutex_unlock_iothread(); > > + pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask); > > + qemu_mutex_lock_iothread(); > > +} > > It seems a bit odd that this is specific to Arm hvf. > Doesn't x86 hvf need "pause until interrupt" functionality ? I'm not very familiar with x86 HVF (and I don't have an x86 Mac to test with), but my reading of the x86 HVF code is that there appear to be no exits that put the system to sleep (not even MWAIT which I think is the x86 equivalent of WFI -- the code just appears to busy loop). I think that implies that either we actually busy loop on x86 (seems unlikely to me since I guess someone would have noticed by now) or MWAIT does not actually result in a VM exit, and HVF itself goes to sleep inside hv_vcpu_run(), unlike ARM HVF where WFI results in an exit (and immediate re-entering would otherwise busy loop). > > + > > int hvf_vcpu_exec(CPUState *cpu) > > { > > ARMCPU *arm_cpu = ARM_CPU(cpu); > > @@ -577,6 +593,46 @@ int hvf_vcpu_exec(CPUState *cpu) > > } > > case EC_WFX_TRAP: > > advance_pc = true; > > + if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request & > > + (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) { > > + > > + uint64_t ctl; > > + r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, > > + &ctl); > > + assert_hvf_ok(r); > > + > > + if (!(ctl & 1) || (ctl & 2)) { > > + /* Timer disabled or masked, just wait for an IPI. */ > > + hvf_wait_for_ipi(cpu, NULL); > > + break; > > + } > > + > > + uint64_t cval; > > + r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0, > > + &cval); > > + assert_hvf_ok(r); > > + > > + int64_t ticks_to_sleep = cval - mach_absolute_time(); > > + if (ticks_to_sleep < 0) { > > + break; > > + } > > + > > + uint64_t seconds = ticks_to_sleep / arm_cpu->gt_cntfrq_hz; > > + uint64_t nanos = > > + (ticks_to_sleep - arm_cpu->gt_cntfrq_hz * seconds) * > > + 1000000000 / arm_cpu->gt_cntfrq_hz; > > + > > + /* > > + * Don't sleep for less than 2ms. This is believed to improve > > + * latency of message passing workloads. > > + */ > > Believed by who ? Alexander asked me to add this [1], so I'll defer to him. As I mentioned on the thread I personally would prefer not to have this logic without data specifically collected on the M1, but I don't have a strong opinion. > > + if (!seconds && nanos < 2000000) { > > + break; > > + } > > + > > + struct timespec ts = { seconds, nanos }; > > + hvf_wait_for_ipi(cpu, &ts); > > + } > > Why doesn't the timer timeout manifest as an IPI ? (Put another way, > why is the timer interrupt special?) Timer timeouts result in an IPI (via HV_EXIT_REASON_VTIMER_ACTIVATED) if they become due while in hv_vcpu_run(). But at this point we are not in hv_vcpu_run() (due to the aforementioned difference in wait behavior between x86 and ARM) so we need to "manually" wait for the timer to become due, re-enter the guest, let it exit with HV_EXIT_REASON_VTIMER_ACTIVATED and then trigger the IPI. Peter [1] https://lists.gnu.org/archive/html/qemu-arm/2020-12/msg00056.html
On Wed, 10 Feb 2021 at 20:25, Peter Collingbourne <pcc@google.com> wrote: > > On Thu, Jan 28, 2021 at 8:25 AM Peter Maydell <peter.maydell@linaro.org> wrote: > > > > On Wed, 20 Jan 2021 at 22:44, Alexander Graf <agraf@csgraf.de> wrote: > > > + if (!seconds && nanos < 2000000) { > > > + break; > > > + } > > > + > > > + struct timespec ts = { seconds, nanos }; > > > + hvf_wait_for_ipi(cpu, &ts); > > > + } > > > > Why doesn't the timer timeout manifest as an IPI ? (Put another way, > > why is the timer interrupt special?) > > Timer timeouts result in an IPI (via HV_EXIT_REASON_VTIMER_ACTIVATED) > if they become due while in hv_vcpu_run(). But at this point we are > not in hv_vcpu_run() (due to the aforementioned difference in wait > behavior between x86 and ARM) so we need to "manually" wait for the > timer to become due, re-enter the guest, let it exit with > HV_EXIT_REASON_VTIMER_ACTIVATED and then trigger the IPI. But WFI doesn't just wait for a timer interrupt, it waits for any interrupt. So it doesn't seem right that the timer interrupt in particular is being handled specially here. thanks -- PMM
On 10.02.21 23:17, Peter Maydell wrote: > On Wed, 10 Feb 2021 at 20:25, Peter Collingbourne <pcc@google.com> wrote: >> On Thu, Jan 28, 2021 at 8:25 AM Peter Maydell <peter.maydell@linaro.org> wrote: >>> On Wed, 20 Jan 2021 at 22:44, Alexander Graf <agraf@csgraf.de> wrote: >>>> + if (!seconds && nanos < 2000000) { >>>> + break; >>>> + } >>>> + >>>> + struct timespec ts = { seconds, nanos }; >>>> + hvf_wait_for_ipi(cpu, &ts); >>>> + } >>> Why doesn't the timer timeout manifest as an IPI ? (Put another way, >>> why is the timer interrupt special?) >> Timer timeouts result in an IPI (via HV_EXIT_REASON_VTIMER_ACTIVATED) >> if they become due while in hv_vcpu_run(). But at this point we are >> not in hv_vcpu_run() (due to the aforementioned difference in wait >> behavior between x86 and ARM) so we need to "manually" wait for the >> timer to become due, re-enter the guest, let it exit with >> HV_EXIT_REASON_VTIMER_ACTIVATED and then trigger the IPI. > But WFI doesn't just wait for a timer interrupt, it waits for > any interrupt. So it doesn't seem right that the timer interrupt > in particular is being handled specially here. The vtimer is handled by hvf itself and results in a #vmexit when triggered. On wfi, we're switching from an hvf owned vtimer to a QEMU owned one. The only events that can happen to wake us from wfi are kicks (IRQs, maintenance events) or expiry of the vtimer. The select() logic in this patch handles both, as it wakes up on an IPI (kick because of IRQ/maint event) and exits gracefully after the timer deadline. Alex
On 10.02.21 23:17, Peter Maydell wrote: > On Wed, 10 Feb 2021 at 20:25, Peter Collingbourne <pcc@google.com> wrote: >> On Thu, Jan 28, 2021 at 8:25 AM Peter Maydell <peter.maydell@linaro.org> wrote: >>> On Wed, 20 Jan 2021 at 22:44, Alexander Graf <agraf@csgraf.de> wrote: >>>> + if (!seconds && nanos < 2000000) { >>>> + break; >>>> + } >>>> + >>>> + struct timespec ts = { seconds, nanos }; >>>> + hvf_wait_for_ipi(cpu, &ts); >>>> + } >>> Why doesn't the timer timeout manifest as an IPI ? (Put another way, >>> why is the timer interrupt special?) >> Timer timeouts result in an IPI (via HV_EXIT_REASON_VTIMER_ACTIVATED) >> if they become due while in hv_vcpu_run(). But at this point we are >> not in hv_vcpu_run() (due to the aforementioned difference in wait >> behavior between x86 and ARM) so we need to "manually" wait for the >> timer to become due, re-enter the guest, let it exit with >> HV_EXIT_REASON_VTIMER_ACTIVATED and then trigger the IPI. > But WFI doesn't just wait for a timer interrupt, it waits for > any interrupt. So it doesn't seem right that the timer interrupt > in particular is being handled specially here. It waits for either an external interrupt (vcpu_kick() -> IPI -> signal -> pselect exits) or a vtimer (kept in the CPU thread, handled by hvf natively when vCPU is running, handled through the pselect timeout when in WFI mode). In hvf on ARM, the vtimer is handled specially. It is owned by the kernel code when we're inside the CPU loop. We don't even get vtimer MSR exits. However when user space decides to not return to the kernel CPU loop, it needs to handle the vtimer expiry itself. We really only have that case on WFI. Alex
diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c index 6d70ee742e..abef6a58f7 100644 --- a/accel/hvf/hvf-cpus.c +++ b/accel/hvf/hvf-cpus.c @@ -322,15 +322,14 @@ static int hvf_init_vcpu(CPUState *cpu) cpu->hvf = g_malloc0(sizeof(*cpu->hvf)); /* init cpu signals */ - sigset_t set; struct sigaction sigact; memset(&sigact, 0, sizeof(sigact)); sigact.sa_handler = dummy_signal; sigaction(SIG_IPI, &sigact, NULL); - pthread_sigmask(SIG_BLOCK, NULL, &set); - sigdelset(&set, SIG_IPI); + pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask); + sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI); #ifdef __aarch64__ r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL); diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h index c2ac6c8f97..7a397fe85a 100644 --- a/include/sysemu/hvf_int.h +++ b/include/sysemu/hvf_int.h @@ -51,6 +51,7 @@ extern HVFState *hvf_state; struct hvf_vcpu_state { uint64_t fd; void *exit; + sigset_t unblock_ipi_mask; }; void assert_hvf_ok(hv_return_t ret); diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index 8f18efe856..f0850ab14a 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -2,6 +2,7 @@ * QEMU Hypervisor.framework support for Apple Silicon * Copyright 2020 Alexander Graf <agraf@csgraf.de> + * Copyright 2020 Google LLC * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. @@ -17,6 +18,8 @@ #include "sysemu/hvf_int.h" #include "sysemu/hw_accel.h" +#include <mach/mach_time.h> + #include "exec/address-spaces.h" #include "hw/irq.h" #include "qemu/main-loop.h" @@ -411,6 +414,7 @@ int hvf_arch_init_vcpu(CPUState *cpu) void hvf_kick_vcpu_thread(CPUState *cpu) { + cpus_kick_thread(cpu); hv_vcpus_exit(&cpu->hvf->fd, 1); } @@ -466,6 +470,18 @@ static int hvf_inject_interrupts(CPUState *cpu) return 0; } +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts) +{ + /* + * Use pselect to sleep so that other threads can IPI us while we're + * sleeping. + */ + qatomic_mb_set(&cpu->thread_kicked, false); + qemu_mutex_unlock_iothread(); + pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask); + qemu_mutex_lock_iothread(); +} + int hvf_vcpu_exec(CPUState *cpu) { ARMCPU *arm_cpu = ARM_CPU(cpu); @@ -577,6 +593,46 @@ int hvf_vcpu_exec(CPUState *cpu) } case EC_WFX_TRAP: advance_pc = true; + if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request & + (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) { + + uint64_t ctl; + r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0, + &ctl); + assert_hvf_ok(r); + + if (!(ctl & 1) || (ctl & 2)) { + /* Timer disabled or masked, just wait for an IPI. */ + hvf_wait_for_ipi(cpu, NULL); + break; + } + + uint64_t cval; + r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0, + &cval); + assert_hvf_ok(r); + + int64_t ticks_to_sleep = cval - mach_absolute_time(); + if (ticks_to_sleep < 0) { + break; + } + + uint64_t seconds = ticks_to_sleep / arm_cpu->gt_cntfrq_hz; + uint64_t nanos = + (ticks_to_sleep - arm_cpu->gt_cntfrq_hz * seconds) * + 1000000000 / arm_cpu->gt_cntfrq_hz; + + /* + * Don't sleep for less than 2ms. This is believed to improve + * latency of message passing workloads. + */ + if (!seconds && nanos < 2000000) { + break; + } + + struct timespec ts = { seconds, nanos }; + hvf_wait_for_ipi(cpu, &ts); + } break; case EC_AA64_HVC: cpu_synchronize_state(cpu);