Message ID | 1458317932-1875-10-git-send-email-alex.bennee@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 18/03/2016 17:18, Alex Bennée wrote: > From: KONRAD Frederic <fred.konrad@greensocs.com> > > This finally allows TCG to benefit from the iothread introduction: Drop > the global mutex while running pure TCG CPU code. Reacquire the lock > when entering MMIO or PIO emulation, or when leaving the TCG loop. > > We have to revert a few optimization for the current TCG threading > model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not > kicking it in qemu_cpu_kick. We also need to disable RAM block > reordering until we have a more efficient locking mechanism at hand. > > Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here. > These numbers demonstrate where we gain something: > > 20338 jan 20 0 331m 75m 6904 R 99 0.9 0:50.95 qemu-system-arm > 20337 jan 20 0 331m 75m 6904 S 20 0.9 0:26.50 qemu-system-arm > > The guest CPU was fully loaded, but the iothread could still run mostly > independent on a second core. Without the patch we don't get beyond > > 32206 jan 20 0 330m 73m 7036 R 82 0.9 1:06.00 qemu-system-arm > 32204 jan 20 0 330m 73m 7036 S 21 0.9 0:17.03 qemu-system-arm > > We don't benefit significantly, though, when the guest is not fully > loading a host CPU. > > Message-Id: <1439220437-23957-10-git-send-email-fred.konrad@greensocs.com> > Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com> > [AJB: -smp single-threaded fix, rm old info from commit msg] > Signed-off-by: Alex Bennée <alex.bennee@linaro.org> You should probably squash this and patch 10 together. Paolo
Hi Alex, Thanks for having pulling all that together! About this patch the original author was Jan Kiszka (https://lists.gnu.org/archive/html/qemu-devel/2011-09/msg03323.html) This has probably been dropped during a rebase. Thanks, Fred Le 18/03/2016 17:18, Alex Bennée a écrit : > From: KONRAD Frederic <fred.konrad@greensocs.com> > > This finally allows TCG to benefit from the iothread introduction: Drop > the global mutex while running pure TCG CPU code. Reacquire the lock > when entering MMIO or PIO emulation, or when leaving the TCG loop. > > We have to revert a few optimization for the current TCG threading > model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not > kicking it in qemu_cpu_kick. We also need to disable RAM block > reordering until we have a more efficient locking mechanism at hand. > > Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here. > These numbers demonstrate where we gain something: > > 20338 jan 20 0 331m 75m 6904 R 99 0.9 0:50.95 qemu-system-arm > 20337 jan 20 0 331m 75m 6904 S 20 0.9 0:26.50 qemu-system-arm > > The guest CPU was fully loaded, but the iothread could still run mostly > independent on a second core. Without the patch we don't get beyond > > 32206 jan 20 0 330m 73m 7036 R 82 0.9 1:06.00 qemu-system-arm > 32204 jan 20 0 330m 73m 7036 S 21 0.9 0:17.03 qemu-system-arm > > We don't benefit significantly, though, when the guest is not fully > loading a host CPU. > > Message-Id: <1439220437-23957-10-git-send-email-fred.konrad@greensocs.com> > Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com> > [AJB: -smp single-threaded fix, rm old info from commit msg] > Signed-off-by: Alex Bennée <alex.bennee@linaro.org> > > --- > v1 (ajb): > - SMP failure now fixed by previous commit > Changes from Fred Konrad (mttcg-v7 via paolo): > * Rebase on the current HEAD. > * Fixes a deadlock in qemu_devices_reset(). > * Remove the mutex in address_space_* > --- > cpu-exec.c | 10 ++++++++++ > cpus.c | 26 +++----------------------- > cputlb.c | 4 ++++ > exec.c | 12 ++++++++++++ > hw/i386/kvmvapic.c | 3 +++ > softmmu_template.h | 17 +++++++++++++++++ > translate-all.c | 11 +++++++++-- > 7 files changed, 58 insertions(+), 25 deletions(-) > > diff --git a/cpu-exec.c b/cpu-exec.c > index 3572256..76891fd 100644 > --- a/cpu-exec.c > +++ b/cpu-exec.c > @@ -28,6 +28,7 @@ > #include "qemu/rcu.h" > #include "exec/tb-hash.h" > #include "exec/log.h" > +#include "qemu/main-loop.h" > #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY) > #include "hw/i386/apic.h" > #endif > @@ -449,6 +450,13 @@ int cpu_exec(CPUState *cpu) > for(;;) { > interrupt_request = cpu->interrupt_request; > if (unlikely(interrupt_request)) { > + /* FIXME: this needs to take the iothread lock. > + * For this we need to find all places in > + * cc->cpu_exec_interrupt that can call cpu_loop_exit, > + * and call qemu_unlock_iothread_mutex() there. Else, > + * add a flag telling cpu_loop_exit() to unlock it. > + */ > + > if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) { > /* Mask out external interrupts for this step. */ > interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK; > @@ -501,6 +509,7 @@ int cpu_exec(CPUState *cpu) > the program flow was changed */ > next_tb = 0; > } > + > } > if (unlikely(cpu->exit_request > || replay_has_interrupt())) { > @@ -618,6 +627,7 @@ int cpu_exec(CPUState *cpu) > g_assert(env == &x86_cpu->env); > #endif > #endif /* buggy compiler */ > + > cpu->can_do_io = 1; > tb_lock_reset(); > } > diff --git a/cpus.c b/cpus.c > index a87fbf9..0e3d5f9 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -911,8 +911,6 @@ static void qemu_kvm_init_cpu_signals(CPUState *cpu) > #endif /* _WIN32 */ > > static QemuMutex qemu_global_mutex; > -static QemuCond qemu_io_proceeded_cond; > -static unsigned iothread_requesting_mutex; > > static QemuThread io_thread; > > @@ -928,7 +926,6 @@ void qemu_init_cpu_loop(void) > qemu_cond_init(&qemu_cpu_cond); > qemu_cond_init(&qemu_pause_cond); > qemu_cond_init(&qemu_work_cond); > - qemu_cond_init(&qemu_io_proceeded_cond); > qemu_mutex_init(&qemu_global_mutex); > > qemu_thread_get_self(&io_thread); > @@ -1043,10 +1040,6 @@ static void qemu_tcg_wait_io_event(CPUState *cpu) > qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); > } > > - while (iothread_requesting_mutex) { > - qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex); > - } > - > CPU_FOREACH(cpu) { > qemu_wait_io_event_common(cpu); > } > @@ -1314,22 +1307,7 @@ bool qemu_mutex_iothread_locked(void) > > void qemu_mutex_lock_iothread(void) > { > - atomic_inc(&iothread_requesting_mutex); > - /* In the simple case there is no need to bump the VCPU thread out of > - * TCG code execution. > - */ > - if (!tcg_enabled() || qemu_in_vcpu_thread() || > - !first_cpu || !first_cpu->created) { > - qemu_mutex_lock(&qemu_global_mutex); > - atomic_dec(&iothread_requesting_mutex); > - } else { > - if (qemu_mutex_trylock(&qemu_global_mutex)) { > - qemu_cpu_kick_no_halt(); > - qemu_mutex_lock(&qemu_global_mutex); > - } > - atomic_dec(&iothread_requesting_mutex); > - qemu_cond_broadcast(&qemu_io_proceeded_cond); > - } > + qemu_mutex_lock(&qemu_global_mutex); > iothread_locked = true; > } > > @@ -1575,7 +1553,9 @@ static int tcg_cpu_exec(CPUState *cpu) > cpu->icount_decr.u16.low = decr; > cpu->icount_extra = count; > } > + qemu_mutex_unlock_iothread(); > ret = cpu_exec(cpu); > + qemu_mutex_lock_iothread(); > #ifdef CONFIG_PROFILER > tcg_time += profile_getclock() - ti; > #endif > diff --git a/cputlb.c b/cputlb.c > index 2f7a166..d607471 100644 > --- a/cputlb.c > +++ b/cputlb.c > @@ -30,6 +30,10 @@ > #include "exec/ram_addr.h" > #include "tcg/tcg.h" > > +#ifdef CONFIG_SOFTMMU > +#include "qemu/main-loop.h" > +#endif > + > //#define DEBUG_TLB > //#define DEBUG_TLB_CHECK > > diff --git a/exec.c b/exec.c > index 402b751..9986d59 100644 > --- a/exec.c > +++ b/exec.c > @@ -2113,6 +2113,12 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags) > } > cpu->watchpoint_hit = wp; > > + /* > + * Unlock iothread mutex before calling cpu_loop_exit > + * or cpu_resume_from_signal. > + */ > + qemu_mutex_unlock_iothread(); > + > /* Unlocked by cpu_loop_exit or cpu_resume_from_signal. */ > tb_lock(); > tb_check_watchpoint(cpu); > @@ -2348,8 +2354,14 @@ static void io_mem_init(void) > memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX); > memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL, > NULL, UINT64_MAX); > + > + /* io_mem_notdirty calls tb_invalidate_phys_page_fast, > + * which must be called without the iothread mutex. > + */ > memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL, > NULL, UINT64_MAX); > + memory_region_clear_global_locking(&io_mem_notdirty); > + > memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL, > NULL, UINT64_MAX); > } > diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c > index 7c0d542..a0439a8 100644 > --- a/hw/i386/kvmvapic.c > +++ b/hw/i386/kvmvapic.c > @@ -447,6 +447,9 @@ static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip) > resume_all_vcpus(); > > if (!kvm_enabled()) { > + /* Unlock iothread mutex before calling cpu_resume_from_signal. */ > + qemu_mutex_unlock_iothread(); > + > /* Unlocked by cpu_resume_from_signal. */ > tb_lock(); > cs->current_tb = NULL; > diff --git a/softmmu_template.h b/softmmu_template.h > index 208f808..0b6c609 100644 > --- a/softmmu_template.h > +++ b/softmmu_template.h > @@ -151,6 +151,7 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, > CPUState *cpu = ENV_GET_CPU(env); > hwaddr physaddr = iotlbentry->addr; > MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs); > + bool locked = false; > > physaddr = (physaddr & TARGET_PAGE_MASK) + addr; > cpu->mem_io_pc = retaddr; > @@ -159,8 +160,15 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, > } > > cpu->mem_io_vaddr = addr; > + if (mr->global_locking) { > + qemu_mutex_lock_iothread(); > + locked = true; > + } > memory_region_dispatch_read(mr, physaddr, &val, 1 << SHIFT, > iotlbentry->attrs); > + if (locked) { > + qemu_mutex_unlock_iothread(); > + } > return val; > } > #endif > @@ -358,6 +366,7 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env, > CPUState *cpu = ENV_GET_CPU(env); > hwaddr physaddr = iotlbentry->addr; > MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs); > + bool locked = false; > > physaddr = (physaddr & TARGET_PAGE_MASK) + addr; > if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) { > @@ -366,8 +375,16 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env, > > cpu->mem_io_vaddr = addr; > cpu->mem_io_pc = retaddr; > + > + if (mr->global_locking) { > + qemu_mutex_lock_iothread(); > + locked = true; > + } > memory_region_dispatch_write(mr, physaddr, val, 1 << SHIFT, > iotlbentry->attrs); > + if (locked) { > + qemu_mutex_unlock_iothread(); > + } > } > > void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, > diff --git a/translate-all.c b/translate-all.c > index 1aab243..fe1392a 100644 > --- a/translate-all.c > +++ b/translate-all.c > @@ -1221,6 +1221,7 @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end) > * this TB. > * > * Called with mmap_lock held for user-mode emulation > + * If called from generated code, iothread mutex must not be held. > */ > void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, > int is_cpu_write_access) > @@ -1333,7 +1334,9 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, > } > > #ifdef CONFIG_SOFTMMU > -/* len must be <= 8 and start must be a multiple of len */ > +/* len must be <= 8 and start must be a multiple of len. > + * Called via softmmu_template.h, with iothread mutex not held. > + */ > void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) > { > PageDesc *p; > @@ -1591,6 +1594,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) > } > > #if !defined(CONFIG_USER_ONLY) > +/* If called from generated code, iothread mutex must not be held. */ > void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr) > { > ram_addr_t ram_addr; > @@ -1637,7 +1641,10 @@ void tb_check_watchpoint(CPUState *cpu) > > #ifndef CONFIG_USER_ONLY > /* in deterministic execution mode, instructions doing device I/Os > - must be at the end of the TB */ > + * must be at the end of the TB. > + * > + * Called by softmmu_template.h, with iothread mutex not held. > + */ > void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) > { > #if defined(TARGET_MIPS) || defined(TARGET_SH4)
KONRAD Frederic <fred.konrad@greensocs.com> writes: > Hi Alex, > > Thanks for having pulling all that together! > About this patch the original author was Jan Kiszka > (https://lists.gnu.org/archive/html/qemu-devel/2011-09/msg03323.html) > > This has probably been dropped during a rebase. I'll amend the author on v2. Jan, The original didn't have a s-o-b tag from you. Are you happy to give one for its current iteration? > > Thanks, > Fred > > Le 18/03/2016 17:18, Alex Bennée a écrit : >> From: KONRAD Frederic <fred.konrad@greensocs.com> >> >> This finally allows TCG to benefit from the iothread introduction: Drop >> the global mutex while running pure TCG CPU code. Reacquire the lock >> when entering MMIO or PIO emulation, or when leaving the TCG loop. >> >> We have to revert a few optimization for the current TCG threading >> model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not >> kicking it in qemu_cpu_kick. We also need to disable RAM block >> reordering until we have a more efficient locking mechanism at hand. >> >> Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here. >> These numbers demonstrate where we gain something: >> >> 20338 jan 20 0 331m 75m 6904 R 99 0.9 0:50.95 qemu-system-arm >> 20337 jan 20 0 331m 75m 6904 S 20 0.9 0:26.50 qemu-system-arm >> >> The guest CPU was fully loaded, but the iothread could still run mostly >> independent on a second core. Without the patch we don't get beyond >> >> 32206 jan 20 0 330m 73m 7036 R 82 0.9 1:06.00 qemu-system-arm >> 32204 jan 20 0 330m 73m 7036 S 21 0.9 0:17.03 qemu-system-arm >> >> We don't benefit significantly, though, when the guest is not fully >> loading a host CPU. >> >> Message-Id: <1439220437-23957-10-git-send-email-fred.konrad@greensocs.com> >> Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com> >> [AJB: -smp single-threaded fix, rm old info from commit msg] >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org> >> >> --- >> v1 (ajb): >> - SMP failure now fixed by previous commit >> Changes from Fred Konrad (mttcg-v7 via paolo): >> * Rebase on the current HEAD. >> * Fixes a deadlock in qemu_devices_reset(). >> * Remove the mutex in address_space_* >> --- >> cpu-exec.c | 10 ++++++++++ >> cpus.c | 26 +++----------------------- >> cputlb.c | 4 ++++ >> exec.c | 12 ++++++++++++ >> hw/i386/kvmvapic.c | 3 +++ >> softmmu_template.h | 17 +++++++++++++++++ >> translate-all.c | 11 +++++++++-- >> 7 files changed, 58 insertions(+), 25 deletions(-) >> >> diff --git a/cpu-exec.c b/cpu-exec.c >> index 3572256..76891fd 100644 >> --- a/cpu-exec.c >> +++ b/cpu-exec.c >> @@ -28,6 +28,7 @@ >> #include "qemu/rcu.h" >> #include "exec/tb-hash.h" >> #include "exec/log.h" >> +#include "qemu/main-loop.h" >> #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY) >> #include "hw/i386/apic.h" >> #endif >> @@ -449,6 +450,13 @@ int cpu_exec(CPUState *cpu) >> for(;;) { >> interrupt_request = cpu->interrupt_request; >> if (unlikely(interrupt_request)) { >> + /* FIXME: this needs to take the iothread lock. >> + * For this we need to find all places in >> + * cc->cpu_exec_interrupt that can call cpu_loop_exit, >> + * and call qemu_unlock_iothread_mutex() there. Else, >> + * add a flag telling cpu_loop_exit() to unlock it. >> + */ >> + >> if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) { >> /* Mask out external interrupts for this step. */ >> interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK; >> @@ -501,6 +509,7 @@ int cpu_exec(CPUState *cpu) >> the program flow was changed */ >> next_tb = 0; >> } >> + >> } >> if (unlikely(cpu->exit_request >> || replay_has_interrupt())) { >> @@ -618,6 +627,7 @@ int cpu_exec(CPUState *cpu) >> g_assert(env == &x86_cpu->env); >> #endif >> #endif /* buggy compiler */ >> + >> cpu->can_do_io = 1; >> tb_lock_reset(); >> } >> diff --git a/cpus.c b/cpus.c >> index a87fbf9..0e3d5f9 100644 >> --- a/cpus.c >> +++ b/cpus.c >> @@ -911,8 +911,6 @@ static void qemu_kvm_init_cpu_signals(CPUState *cpu) >> #endif /* _WIN32 */ >> >> static QemuMutex qemu_global_mutex; >> -static QemuCond qemu_io_proceeded_cond; >> -static unsigned iothread_requesting_mutex; >> >> static QemuThread io_thread; >> >> @@ -928,7 +926,6 @@ void qemu_init_cpu_loop(void) >> qemu_cond_init(&qemu_cpu_cond); >> qemu_cond_init(&qemu_pause_cond); >> qemu_cond_init(&qemu_work_cond); >> - qemu_cond_init(&qemu_io_proceeded_cond); >> qemu_mutex_init(&qemu_global_mutex); >> >> qemu_thread_get_self(&io_thread); >> @@ -1043,10 +1040,6 @@ static void qemu_tcg_wait_io_event(CPUState *cpu) >> qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); >> } >> >> - while (iothread_requesting_mutex) { >> - qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex); >> - } >> - >> CPU_FOREACH(cpu) { >> qemu_wait_io_event_common(cpu); >> } >> @@ -1314,22 +1307,7 @@ bool qemu_mutex_iothread_locked(void) >> >> void qemu_mutex_lock_iothread(void) >> { >> - atomic_inc(&iothread_requesting_mutex); >> - /* In the simple case there is no need to bump the VCPU thread out of >> - * TCG code execution. >> - */ >> - if (!tcg_enabled() || qemu_in_vcpu_thread() || >> - !first_cpu || !first_cpu->created) { >> - qemu_mutex_lock(&qemu_global_mutex); >> - atomic_dec(&iothread_requesting_mutex); >> - } else { >> - if (qemu_mutex_trylock(&qemu_global_mutex)) { >> - qemu_cpu_kick_no_halt(); >> - qemu_mutex_lock(&qemu_global_mutex); >> - } >> - atomic_dec(&iothread_requesting_mutex); >> - qemu_cond_broadcast(&qemu_io_proceeded_cond); >> - } >> + qemu_mutex_lock(&qemu_global_mutex); >> iothread_locked = true; >> } >> >> @@ -1575,7 +1553,9 @@ static int tcg_cpu_exec(CPUState *cpu) >> cpu->icount_decr.u16.low = decr; >> cpu->icount_extra = count; >> } >> + qemu_mutex_unlock_iothread(); >> ret = cpu_exec(cpu); >> + qemu_mutex_lock_iothread(); >> #ifdef CONFIG_PROFILER >> tcg_time += profile_getclock() - ti; >> #endif >> diff --git a/cputlb.c b/cputlb.c >> index 2f7a166..d607471 100644 >> --- a/cputlb.c >> +++ b/cputlb.c >> @@ -30,6 +30,10 @@ >> #include "exec/ram_addr.h" >> #include "tcg/tcg.h" >> >> +#ifdef CONFIG_SOFTMMU >> +#include "qemu/main-loop.h" >> +#endif >> + >> //#define DEBUG_TLB >> //#define DEBUG_TLB_CHECK >> >> diff --git a/exec.c b/exec.c >> index 402b751..9986d59 100644 >> --- a/exec.c >> +++ b/exec.c >> @@ -2113,6 +2113,12 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags) >> } >> cpu->watchpoint_hit = wp; >> >> + /* >> + * Unlock iothread mutex before calling cpu_loop_exit >> + * or cpu_resume_from_signal. >> + */ >> + qemu_mutex_unlock_iothread(); >> + >> /* Unlocked by cpu_loop_exit or cpu_resume_from_signal. */ >> tb_lock(); >> tb_check_watchpoint(cpu); >> @@ -2348,8 +2354,14 @@ static void io_mem_init(void) >> memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX); >> memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL, >> NULL, UINT64_MAX); >> + >> + /* io_mem_notdirty calls tb_invalidate_phys_page_fast, >> + * which must be called without the iothread mutex. >> + */ >> memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL, >> NULL, UINT64_MAX); >> + memory_region_clear_global_locking(&io_mem_notdirty); >> + >> memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL, >> NULL, UINT64_MAX); >> } >> diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c >> index 7c0d542..a0439a8 100644 >> --- a/hw/i386/kvmvapic.c >> +++ b/hw/i386/kvmvapic.c >> @@ -447,6 +447,9 @@ static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip) >> resume_all_vcpus(); >> >> if (!kvm_enabled()) { >> + /* Unlock iothread mutex before calling cpu_resume_from_signal. */ >> + qemu_mutex_unlock_iothread(); >> + >> /* Unlocked by cpu_resume_from_signal. */ >> tb_lock(); >> cs->current_tb = NULL; >> diff --git a/softmmu_template.h b/softmmu_template.h >> index 208f808..0b6c609 100644 >> --- a/softmmu_template.h >> +++ b/softmmu_template.h >> @@ -151,6 +151,7 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, >> CPUState *cpu = ENV_GET_CPU(env); >> hwaddr physaddr = iotlbentry->addr; >> MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs); >> + bool locked = false; >> >> physaddr = (physaddr & TARGET_PAGE_MASK) + addr; >> cpu->mem_io_pc = retaddr; >> @@ -159,8 +160,15 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, >> } >> >> cpu->mem_io_vaddr = addr; >> + if (mr->global_locking) { >> + qemu_mutex_lock_iothread(); >> + locked = true; >> + } >> memory_region_dispatch_read(mr, physaddr, &val, 1 << SHIFT, >> iotlbentry->attrs); >> + if (locked) { >> + qemu_mutex_unlock_iothread(); >> + } >> return val; >> } >> #endif >> @@ -358,6 +366,7 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env, >> CPUState *cpu = ENV_GET_CPU(env); >> hwaddr physaddr = iotlbentry->addr; >> MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs); >> + bool locked = false; >> >> physaddr = (physaddr & TARGET_PAGE_MASK) + addr; >> if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) { >> @@ -366,8 +375,16 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env, >> >> cpu->mem_io_vaddr = addr; >> cpu->mem_io_pc = retaddr; >> + >> + if (mr->global_locking) { >> + qemu_mutex_lock_iothread(); >> + locked = true; >> + } >> memory_region_dispatch_write(mr, physaddr, val, 1 << SHIFT, >> iotlbentry->attrs); >> + if (locked) { >> + qemu_mutex_unlock_iothread(); >> + } >> } >> >> void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, >> diff --git a/translate-all.c b/translate-all.c >> index 1aab243..fe1392a 100644 >> --- a/translate-all.c >> +++ b/translate-all.c >> @@ -1221,6 +1221,7 @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end) >> * this TB. >> * >> * Called with mmap_lock held for user-mode emulation >> + * If called from generated code, iothread mutex must not be held. >> */ >> void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, >> int is_cpu_write_access) >> @@ -1333,7 +1334,9 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, >> } >> >> #ifdef CONFIG_SOFTMMU >> -/* len must be <= 8 and start must be a multiple of len */ >> +/* len must be <= 8 and start must be a multiple of len. >> + * Called via softmmu_template.h, with iothread mutex not held. >> + */ >> void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) >> { >> PageDesc *p; >> @@ -1591,6 +1594,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) >> } >> >> #if !defined(CONFIG_USER_ONLY) >> +/* If called from generated code, iothread mutex must not be held. */ >> void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr) >> { >> ram_addr_t ram_addr; >> @@ -1637,7 +1641,10 @@ void tb_check_watchpoint(CPUState *cpu) >> >> #ifndef CONFIG_USER_ONLY >> /* in deterministic execution mode, instructions doing device I/Os >> - must be at the end of the TB */ >> + * must be at the end of the TB. >> + * >> + * Called by softmmu_template.h, with iothread mutex not held. >> + */ >> void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) >> { >> #if defined(TARGET_MIPS) || defined(TARGET_SH4) -- Alex Bennée
On 2016-03-23 17:27, Alex Bennée wrote: > > KONRAD Frederic <fred.konrad@greensocs.com> writes: > >> Hi Alex, >> >> Thanks for having pulling all that together! >> About this patch the original author was Jan Kiszka >> (https://lists.gnu.org/archive/html/qemu-devel/2011-09/msg03323.html) >> >> This has probably been dropped during a rebase. > > I'll amend the author on v2. > > Jan, > > The original didn't have a s-o-b tag from you. Are you happy to give one > for its current iteration? You can consider it Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> At that point, I didn't consider it ready for upstream (see the log), so I left out the signature. Jan > >> >> Thanks, >> Fred >> >> Le 18/03/2016 17:18, Alex Bennée a écrit : >>> From: KONRAD Frederic <fred.konrad@greensocs.com> >>> >>> This finally allows TCG to benefit from the iothread introduction: Drop >>> the global mutex while running pure TCG CPU code. Reacquire the lock >>> when entering MMIO or PIO emulation, or when leaving the TCG loop. >>> >>> We have to revert a few optimization for the current TCG threading >>> model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not >>> kicking it in qemu_cpu_kick. We also need to disable RAM block >>> reordering until we have a more efficient locking mechanism at hand. >>> >>> Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here. >>> These numbers demonstrate where we gain something: >>> >>> 20338 jan 20 0 331m 75m 6904 R 99 0.9 0:50.95 qemu-system-arm >>> 20337 jan 20 0 331m 75m 6904 S 20 0.9 0:26.50 qemu-system-arm >>> >>> The guest CPU was fully loaded, but the iothread could still run mostly >>> independent on a second core. Without the patch we don't get beyond >>> >>> 32206 jan 20 0 330m 73m 7036 R 82 0.9 1:06.00 qemu-system-arm >>> 32204 jan 20 0 330m 73m 7036 S 21 0.9 0:17.03 qemu-system-arm >>> >>> We don't benefit significantly, though, when the guest is not fully >>> loading a host CPU. >>> >>> Message-Id: <1439220437-23957-10-git-send-email-fred.konrad@greensocs.com> >>> Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com> >>> [AJB: -smp single-threaded fix, rm old info from commit msg] >>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org> >>> >>> --- >>> v1 (ajb): >>> - SMP failure now fixed by previous commit >>> Changes from Fred Konrad (mttcg-v7 via paolo): >>> * Rebase on the current HEAD. >>> * Fixes a deadlock in qemu_devices_reset(). >>> * Remove the mutex in address_space_* >>> --- >>> cpu-exec.c | 10 ++++++++++ >>> cpus.c | 26 +++----------------------- >>> cputlb.c | 4 ++++ >>> exec.c | 12 ++++++++++++ >>> hw/i386/kvmvapic.c | 3 +++ >>> softmmu_template.h | 17 +++++++++++++++++ >>> translate-all.c | 11 +++++++++-- >>> 7 files changed, 58 insertions(+), 25 deletions(-) >>> >>> diff --git a/cpu-exec.c b/cpu-exec.c >>> index 3572256..76891fd 100644 >>> --- a/cpu-exec.c >>> +++ b/cpu-exec.c >>> @@ -28,6 +28,7 @@ >>> #include "qemu/rcu.h" >>> #include "exec/tb-hash.h" >>> #include "exec/log.h" >>> +#include "qemu/main-loop.h" >>> #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY) >>> #include "hw/i386/apic.h" >>> #endif >>> @@ -449,6 +450,13 @@ int cpu_exec(CPUState *cpu) >>> for(;;) { >>> interrupt_request = cpu->interrupt_request; >>> if (unlikely(interrupt_request)) { >>> + /* FIXME: this needs to take the iothread lock. >>> + * For this we need to find all places in >>> + * cc->cpu_exec_interrupt that can call cpu_loop_exit, >>> + * and call qemu_unlock_iothread_mutex() there. Else, >>> + * add a flag telling cpu_loop_exit() to unlock it. >>> + */ >>> + >>> if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) { >>> /* Mask out external interrupts for this step. */ >>> interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK; >>> @@ -501,6 +509,7 @@ int cpu_exec(CPUState *cpu) >>> the program flow was changed */ >>> next_tb = 0; >>> } >>> + >>> } >>> if (unlikely(cpu->exit_request >>> || replay_has_interrupt())) { >>> @@ -618,6 +627,7 @@ int cpu_exec(CPUState *cpu) >>> g_assert(env == &x86_cpu->env); >>> #endif >>> #endif /* buggy compiler */ >>> + >>> cpu->can_do_io = 1; >>> tb_lock_reset(); >>> } >>> diff --git a/cpus.c b/cpus.c >>> index a87fbf9..0e3d5f9 100644 >>> --- a/cpus.c >>> +++ b/cpus.c >>> @@ -911,8 +911,6 @@ static void qemu_kvm_init_cpu_signals(CPUState *cpu) >>> #endif /* _WIN32 */ >>> >>> static QemuMutex qemu_global_mutex; >>> -static QemuCond qemu_io_proceeded_cond; >>> -static unsigned iothread_requesting_mutex; >>> >>> static QemuThread io_thread; >>> >>> @@ -928,7 +926,6 @@ void qemu_init_cpu_loop(void) >>> qemu_cond_init(&qemu_cpu_cond); >>> qemu_cond_init(&qemu_pause_cond); >>> qemu_cond_init(&qemu_work_cond); >>> - qemu_cond_init(&qemu_io_proceeded_cond); >>> qemu_mutex_init(&qemu_global_mutex); >>> >>> qemu_thread_get_self(&io_thread); >>> @@ -1043,10 +1040,6 @@ static void qemu_tcg_wait_io_event(CPUState *cpu) >>> qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); >>> } >>> >>> - while (iothread_requesting_mutex) { >>> - qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex); >>> - } >>> - >>> CPU_FOREACH(cpu) { >>> qemu_wait_io_event_common(cpu); >>> } >>> @@ -1314,22 +1307,7 @@ bool qemu_mutex_iothread_locked(void) >>> >>> void qemu_mutex_lock_iothread(void) >>> { >>> - atomic_inc(&iothread_requesting_mutex); >>> - /* In the simple case there is no need to bump the VCPU thread out of >>> - * TCG code execution. >>> - */ >>> - if (!tcg_enabled() || qemu_in_vcpu_thread() || >>> - !first_cpu || !first_cpu->created) { >>> - qemu_mutex_lock(&qemu_global_mutex); >>> - atomic_dec(&iothread_requesting_mutex); >>> - } else { >>> - if (qemu_mutex_trylock(&qemu_global_mutex)) { >>> - qemu_cpu_kick_no_halt(); >>> - qemu_mutex_lock(&qemu_global_mutex); >>> - } >>> - atomic_dec(&iothread_requesting_mutex); >>> - qemu_cond_broadcast(&qemu_io_proceeded_cond); >>> - } >>> + qemu_mutex_lock(&qemu_global_mutex); >>> iothread_locked = true; >>> } >>> >>> @@ -1575,7 +1553,9 @@ static int tcg_cpu_exec(CPUState *cpu) >>> cpu->icount_decr.u16.low = decr; >>> cpu->icount_extra = count; >>> } >>> + qemu_mutex_unlock_iothread(); >>> ret = cpu_exec(cpu); >>> + qemu_mutex_lock_iothread(); >>> #ifdef CONFIG_PROFILER >>> tcg_time += profile_getclock() - ti; >>> #endif >>> diff --git a/cputlb.c b/cputlb.c >>> index 2f7a166..d607471 100644 >>> --- a/cputlb.c >>> +++ b/cputlb.c >>> @@ -30,6 +30,10 @@ >>> #include "exec/ram_addr.h" >>> #include "tcg/tcg.h" >>> >>> +#ifdef CONFIG_SOFTMMU >>> +#include "qemu/main-loop.h" >>> +#endif >>> + >>> //#define DEBUG_TLB >>> //#define DEBUG_TLB_CHECK >>> >>> diff --git a/exec.c b/exec.c >>> index 402b751..9986d59 100644 >>> --- a/exec.c >>> +++ b/exec.c >>> @@ -2113,6 +2113,12 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags) >>> } >>> cpu->watchpoint_hit = wp; >>> >>> + /* >>> + * Unlock iothread mutex before calling cpu_loop_exit >>> + * or cpu_resume_from_signal. >>> + */ >>> + qemu_mutex_unlock_iothread(); >>> + >>> /* Unlocked by cpu_loop_exit or cpu_resume_from_signal. */ >>> tb_lock(); >>> tb_check_watchpoint(cpu); >>> @@ -2348,8 +2354,14 @@ static void io_mem_init(void) >>> memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX); >>> memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL, >>> NULL, UINT64_MAX); >>> + >>> + /* io_mem_notdirty calls tb_invalidate_phys_page_fast, >>> + * which must be called without the iothread mutex. >>> + */ >>> memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL, >>> NULL, UINT64_MAX); >>> + memory_region_clear_global_locking(&io_mem_notdirty); >>> + >>> memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL, >>> NULL, UINT64_MAX); >>> } >>> diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c >>> index 7c0d542..a0439a8 100644 >>> --- a/hw/i386/kvmvapic.c >>> +++ b/hw/i386/kvmvapic.c >>> @@ -447,6 +447,9 @@ static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip) >>> resume_all_vcpus(); >>> >>> if (!kvm_enabled()) { >>> + /* Unlock iothread mutex before calling cpu_resume_from_signal. */ >>> + qemu_mutex_unlock_iothread(); >>> + >>> /* Unlocked by cpu_resume_from_signal. */ >>> tb_lock(); >>> cs->current_tb = NULL; >>> diff --git a/softmmu_template.h b/softmmu_template.h >>> index 208f808..0b6c609 100644 >>> --- a/softmmu_template.h >>> +++ b/softmmu_template.h >>> @@ -151,6 +151,7 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, >>> CPUState *cpu = ENV_GET_CPU(env); >>> hwaddr physaddr = iotlbentry->addr; >>> MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs); >>> + bool locked = false; >>> >>> physaddr = (physaddr & TARGET_PAGE_MASK) + addr; >>> cpu->mem_io_pc = retaddr; >>> @@ -159,8 +160,15 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, >>> } >>> >>> cpu->mem_io_vaddr = addr; >>> + if (mr->global_locking) { >>> + qemu_mutex_lock_iothread(); >>> + locked = true; >>> + } >>> memory_region_dispatch_read(mr, physaddr, &val, 1 << SHIFT, >>> iotlbentry->attrs); >>> + if (locked) { >>> + qemu_mutex_unlock_iothread(); >>> + } >>> return val; >>> } >>> #endif >>> @@ -358,6 +366,7 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env, >>> CPUState *cpu = ENV_GET_CPU(env); >>> hwaddr physaddr = iotlbentry->addr; >>> MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs); >>> + bool locked = false; >>> >>> physaddr = (physaddr & TARGET_PAGE_MASK) + addr; >>> if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) { >>> @@ -366,8 +375,16 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env, >>> >>> cpu->mem_io_vaddr = addr; >>> cpu->mem_io_pc = retaddr; >>> + >>> + if (mr->global_locking) { >>> + qemu_mutex_lock_iothread(); >>> + locked = true; >>> + } >>> memory_region_dispatch_write(mr, physaddr, val, 1 << SHIFT, >>> iotlbentry->attrs); >>> + if (locked) { >>> + qemu_mutex_unlock_iothread(); >>> + } >>> } >>> >>> void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, >>> diff --git a/translate-all.c b/translate-all.c >>> index 1aab243..fe1392a 100644 >>> --- a/translate-all.c >>> +++ b/translate-all.c >>> @@ -1221,6 +1221,7 @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end) >>> * this TB. >>> * >>> * Called with mmap_lock held for user-mode emulation >>> + * If called from generated code, iothread mutex must not be held. >>> */ >>> void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, >>> int is_cpu_write_access) >>> @@ -1333,7 +1334,9 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, >>> } >>> >>> #ifdef CONFIG_SOFTMMU >>> -/* len must be <= 8 and start must be a multiple of len */ >>> +/* len must be <= 8 and start must be a multiple of len. >>> + * Called via softmmu_template.h, with iothread mutex not held. >>> + */ >>> void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) >>> { >>> PageDesc *p; >>> @@ -1591,6 +1594,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) >>> } >>> >>> #if !defined(CONFIG_USER_ONLY) >>> +/* If called from generated code, iothread mutex must not be held. */ >>> void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr) >>> { >>> ram_addr_t ram_addr; >>> @@ -1637,7 +1641,10 @@ void tb_check_watchpoint(CPUState *cpu) >>> >>> #ifndef CONFIG_USER_ONLY >>> /* in deterministic execution mode, instructions doing device I/Os >>> - must be at the end of the TB */ >>> + * must be at the end of the TB. >>> + * >>> + * Called by softmmu_template.h, with iothread mutex not held. >>> + */ >>> void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) >>> { >>> #if defined(TARGET_MIPS) || defined(TARGET_SH4) > > > -- > Alex Bennée >
diff --git a/cpu-exec.c b/cpu-exec.c index 3572256..76891fd 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -28,6 +28,7 @@ #include "qemu/rcu.h" #include "exec/tb-hash.h" #include "exec/log.h" +#include "qemu/main-loop.h" #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY) #include "hw/i386/apic.h" #endif @@ -449,6 +450,13 @@ int cpu_exec(CPUState *cpu) for(;;) { interrupt_request = cpu->interrupt_request; if (unlikely(interrupt_request)) { + /* FIXME: this needs to take the iothread lock. + * For this we need to find all places in + * cc->cpu_exec_interrupt that can call cpu_loop_exit, + * and call qemu_unlock_iothread_mutex() there. Else, + * add a flag telling cpu_loop_exit() to unlock it. + */ + if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) { /* Mask out external interrupts for this step. */ interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK; @@ -501,6 +509,7 @@ int cpu_exec(CPUState *cpu) the program flow was changed */ next_tb = 0; } + } if (unlikely(cpu->exit_request || replay_has_interrupt())) { @@ -618,6 +627,7 @@ int cpu_exec(CPUState *cpu) g_assert(env == &x86_cpu->env); #endif #endif /* buggy compiler */ + cpu->can_do_io = 1; tb_lock_reset(); } diff --git a/cpus.c b/cpus.c index a87fbf9..0e3d5f9 100644 --- a/cpus.c +++ b/cpus.c @@ -911,8 +911,6 @@ static void qemu_kvm_init_cpu_signals(CPUState *cpu) #endif /* _WIN32 */ static QemuMutex qemu_global_mutex; -static QemuCond qemu_io_proceeded_cond; -static unsigned iothread_requesting_mutex; static QemuThread io_thread; @@ -928,7 +926,6 @@ void qemu_init_cpu_loop(void) qemu_cond_init(&qemu_cpu_cond); qemu_cond_init(&qemu_pause_cond); qemu_cond_init(&qemu_work_cond); - qemu_cond_init(&qemu_io_proceeded_cond); qemu_mutex_init(&qemu_global_mutex); qemu_thread_get_self(&io_thread); @@ -1043,10 +1040,6 @@ static void qemu_tcg_wait_io_event(CPUState *cpu) qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); } - while (iothread_requesting_mutex) { - qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex); - } - CPU_FOREACH(cpu) { qemu_wait_io_event_common(cpu); } @@ -1314,22 +1307,7 @@ bool qemu_mutex_iothread_locked(void) void qemu_mutex_lock_iothread(void) { - atomic_inc(&iothread_requesting_mutex); - /* In the simple case there is no need to bump the VCPU thread out of - * TCG code execution. - */ - if (!tcg_enabled() || qemu_in_vcpu_thread() || - !first_cpu || !first_cpu->created) { - qemu_mutex_lock(&qemu_global_mutex); - atomic_dec(&iothread_requesting_mutex); - } else { - if (qemu_mutex_trylock(&qemu_global_mutex)) { - qemu_cpu_kick_no_halt(); - qemu_mutex_lock(&qemu_global_mutex); - } - atomic_dec(&iothread_requesting_mutex); - qemu_cond_broadcast(&qemu_io_proceeded_cond); - } + qemu_mutex_lock(&qemu_global_mutex); iothread_locked = true; } @@ -1575,7 +1553,9 @@ static int tcg_cpu_exec(CPUState *cpu) cpu->icount_decr.u16.low = decr; cpu->icount_extra = count; } + qemu_mutex_unlock_iothread(); ret = cpu_exec(cpu); + qemu_mutex_lock_iothread(); #ifdef CONFIG_PROFILER tcg_time += profile_getclock() - ti; #endif diff --git a/cputlb.c b/cputlb.c index 2f7a166..d607471 100644 --- a/cputlb.c +++ b/cputlb.c @@ -30,6 +30,10 @@ #include "exec/ram_addr.h" #include "tcg/tcg.h" +#ifdef CONFIG_SOFTMMU +#include "qemu/main-loop.h" +#endif + //#define DEBUG_TLB //#define DEBUG_TLB_CHECK diff --git a/exec.c b/exec.c index 402b751..9986d59 100644 --- a/exec.c +++ b/exec.c @@ -2113,6 +2113,12 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags) } cpu->watchpoint_hit = wp; + /* + * Unlock iothread mutex before calling cpu_loop_exit + * or cpu_resume_from_signal. + */ + qemu_mutex_unlock_iothread(); + /* Unlocked by cpu_loop_exit or cpu_resume_from_signal. */ tb_lock(); tb_check_watchpoint(cpu); @@ -2348,8 +2354,14 @@ static void io_mem_init(void) memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX); memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX); + + /* io_mem_notdirty calls tb_invalidate_phys_page_fast, + * which must be called without the iothread mutex. + */ memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL, NULL, UINT64_MAX); + memory_region_clear_global_locking(&io_mem_notdirty); + memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL, NULL, UINT64_MAX); } diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c index 7c0d542..a0439a8 100644 --- a/hw/i386/kvmvapic.c +++ b/hw/i386/kvmvapic.c @@ -447,6 +447,9 @@ static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip) resume_all_vcpus(); if (!kvm_enabled()) { + /* Unlock iothread mutex before calling cpu_resume_from_signal. */ + qemu_mutex_unlock_iothread(); + /* Unlocked by cpu_resume_from_signal. */ tb_lock(); cs->current_tb = NULL; diff --git a/softmmu_template.h b/softmmu_template.h index 208f808..0b6c609 100644 --- a/softmmu_template.h +++ b/softmmu_template.h @@ -151,6 +151,7 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, CPUState *cpu = ENV_GET_CPU(env); hwaddr physaddr = iotlbentry->addr; MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs); + bool locked = false; physaddr = (physaddr & TARGET_PAGE_MASK) + addr; cpu->mem_io_pc = retaddr; @@ -159,8 +160,15 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, } cpu->mem_io_vaddr = addr; + if (mr->global_locking) { + qemu_mutex_lock_iothread(); + locked = true; + } memory_region_dispatch_read(mr, physaddr, &val, 1 << SHIFT, iotlbentry->attrs); + if (locked) { + qemu_mutex_unlock_iothread(); + } return val; } #endif @@ -358,6 +366,7 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env, CPUState *cpu = ENV_GET_CPU(env); hwaddr physaddr = iotlbentry->addr; MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs); + bool locked = false; physaddr = (physaddr & TARGET_PAGE_MASK) + addr; if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) { @@ -366,8 +375,16 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env, cpu->mem_io_vaddr = addr; cpu->mem_io_pc = retaddr; + + if (mr->global_locking) { + qemu_mutex_lock_iothread(); + locked = true; + } memory_region_dispatch_write(mr, physaddr, val, 1 << SHIFT, iotlbentry->attrs); + if (locked) { + qemu_mutex_unlock_iothread(); + } } void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, diff --git a/translate-all.c b/translate-all.c index 1aab243..fe1392a 100644 --- a/translate-all.c +++ b/translate-all.c @@ -1221,6 +1221,7 @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end) * this TB. * * Called with mmap_lock held for user-mode emulation + * If called from generated code, iothread mutex must not be held. */ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, int is_cpu_write_access) @@ -1333,7 +1334,9 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, } #ifdef CONFIG_SOFTMMU -/* len must be <= 8 and start must be a multiple of len */ +/* len must be <= 8 and start must be a multiple of len. + * Called via softmmu_template.h, with iothread mutex not held. + */ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) { PageDesc *p; @@ -1591,6 +1594,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) } #if !defined(CONFIG_USER_ONLY) +/* If called from generated code, iothread mutex must not be held. */ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr) { ram_addr_t ram_addr; @@ -1637,7 +1641,10 @@ void tb_check_watchpoint(CPUState *cpu) #ifndef CONFIG_USER_ONLY /* in deterministic execution mode, instructions doing device I/Os - must be at the end of the TB */ + * must be at the end of the TB. + * + * Called by softmmu_template.h, with iothread mutex not held. + */ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) { #if defined(TARGET_MIPS) || defined(TARGET_SH4)