Message ID | 1460730231-1184-10-git-send-email-alex.bennee@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 15/04/16 17:23, Alex Bennée wrote: > diff --git a/cputlb.c b/cputlb.c > index 1412049..42a3b07 100644 > --- a/cputlb.c > +++ b/cputlb.c > @@ -56,22 +56,14 @@ > } \ > } while (0) > > +/* We need a solution for stuffing 64 bit pointers in 32 bit ones if > + * we care about this combination */ > +QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(void *)); > + > /* statistics */ > int tlb_flush_count; > > -/* NOTE: > - * If flush_global is true (the usual case), flush all tlb entries. > - * If flush_global is false, flush (at least) all tlb entries not > - * marked global. > - * > - * Since QEMU doesn't currently implement a global/not-global flag > - * for tlb entries, at the moment tlb_flush() will also flush all > - * tlb entries in the flush_global == false case. This is OK because > - * CPU architectures generally permit an implementation to drop > - * entries from the TLB at any time, so flushing more entries than > - * required is only an efficiency issue, not a correctness issue. > - */ > -void tlb_flush(CPUState *cpu, int flush_global) > +static void tlb_flush_nocheck(CPUState *cpu, int flush_global) > { > CPUArchState *env = cpu->env_ptr; > > @@ -89,6 +81,34 @@ void tlb_flush(CPUState *cpu, int flush_global) > env->tlb_flush_addr = -1; > env->tlb_flush_mask = 0; > tlb_flush_count++; > + /* atomic_mb_set(&cpu->pending_tlb_flush, 0); */ > +} > + > +static void tlb_flush_global_async_work(CPUState *cpu, void *opaque) > +{ > + tlb_flush_nocheck(cpu, GPOINTER_TO_INT(opaque)); > +} > + > +/* NOTE: > + * If flush_global is true (the usual case), flush all tlb entries. > + * If flush_global is false, flush (at least) all tlb entries not > + * marked global. > + * > + * Since QEMU doesn't currently implement a global/not-global flag > + * for tlb entries, at the moment tlb_flush() will also flush all > + * tlb entries in the flush_global == false case. This is OK because > + * CPU architectures generally permit an implementation to drop > + * entries from the TLB at any time, so flushing more entries than > + * required is only an efficiency issue, not a correctness issue. > + */ > +void tlb_flush(CPUState *cpu, int flush_global) > +{ > + if (cpu->created) { Why do we check for 'cpu->created' here? Any why don't do that in tlb_flush_page_all()? > + async_run_on_cpu(cpu, tlb_flush_global_async_work, > + GINT_TO_POINTER(flush_global)); > + } else { > + tlb_flush_nocheck(cpu, flush_global); > + } > } > > static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp) > @@ -222,6 +242,21 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...) > tb_flush_jmp_cache(cpu, addr); > } > > +static void tlb_flush_page_async_work(CPUState *cpu, void *opaque) > +{ > + tlb_flush_page(cpu, GPOINTER_TO_UINT(opaque)); > +} > + > +void tlb_flush_page_all(target_ulong addr) > +{ > + CPUState *cpu; > + > + CPU_FOREACH(cpu) { > + async_run_on_cpu(cpu, tlb_flush_page_async_work, > + GUINT_TO_POINTER(addr)); > + } > +} > + > /* update the TLBs so that writes to code in the virtual page 'addr' > can be detected */ > void tlb_protect_code(ram_addr_t ram_addr) > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index 9144ee0..f695577 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -190,6 +190,7 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr, > void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr); > void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx, > uintptr_t retaddr); > +void tlb_flush_page_all(target_ulong addr); > #else > static inline void tlb_flush_page(CPUState *cpu, target_ulong addr) > { tlb_flush_by_mmuidx() and tlb_flush_page_by_mmuidx() want to be safe as well. Kind regards, Sergey
Sergey Fedorov <serge.fdrv@gmail.com> writes: > On 15/04/16 17:23, Alex Bennée wrote: >> diff --git a/cputlb.c b/cputlb.c >> index 1412049..42a3b07 100644 >> --- a/cputlb.c >> +++ b/cputlb.c >> @@ -56,22 +56,14 @@ >> } \ >> } while (0) >> >> +/* We need a solution for stuffing 64 bit pointers in 32 bit ones if >> + * we care about this combination */ >> +QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(void *)); >> + >> /* statistics */ >> int tlb_flush_count; >> >> -/* NOTE: >> - * If flush_global is true (the usual case), flush all tlb entries. >> - * If flush_global is false, flush (at least) all tlb entries not >> - * marked global. >> - * >> - * Since QEMU doesn't currently implement a global/not-global flag >> - * for tlb entries, at the moment tlb_flush() will also flush all >> - * tlb entries in the flush_global == false case. This is OK because >> - * CPU architectures generally permit an implementation to drop >> - * entries from the TLB at any time, so flushing more entries than >> - * required is only an efficiency issue, not a correctness issue. >> - */ >> -void tlb_flush(CPUState *cpu, int flush_global) >> +static void tlb_flush_nocheck(CPUState *cpu, int flush_global) >> { >> CPUArchState *env = cpu->env_ptr; >> >> @@ -89,6 +81,34 @@ void tlb_flush(CPUState *cpu, int flush_global) >> env->tlb_flush_addr = -1; >> env->tlb_flush_mask = 0; >> tlb_flush_count++; >> + /* atomic_mb_set(&cpu->pending_tlb_flush, 0); */ >> +} >> + >> +static void tlb_flush_global_async_work(CPUState *cpu, void *opaque) >> +{ >> + tlb_flush_nocheck(cpu, GPOINTER_TO_INT(opaque)); >> +} >> + >> +/* NOTE: >> + * If flush_global is true (the usual case), flush all tlb entries. >> + * If flush_global is false, flush (at least) all tlb entries not >> + * marked global. >> + * >> + * Since QEMU doesn't currently implement a global/not-global flag >> + * for tlb entries, at the moment tlb_flush() will also flush all >> + * tlb entries in the flush_global == false case. This is OK because >> + * CPU architectures generally permit an implementation to drop >> + * entries from the TLB at any time, so flushing more entries than >> + * required is only an efficiency issue, not a correctness issue. >> + */ >> +void tlb_flush(CPUState *cpu, int flush_global) >> +{ >> + if (cpu->created) { > > Why do we check for 'cpu->created' here? Any why don't do that in > tlb_flush_page_all()? A bunch of random stuff gets kicked off at start-up which was getting in the way (c.f. arm_cpu_reset and watch/breakpoints). tlb_flush() is rather liberally sprinkled around the init code of various CPUs. > >> + async_run_on_cpu(cpu, tlb_flush_global_async_work, >> + GINT_TO_POINTER(flush_global)); >> + } else { >> + tlb_flush_nocheck(cpu, flush_global); >> + } >> } >> >> static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp) >> @@ -222,6 +242,21 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...) >> tb_flush_jmp_cache(cpu, addr); >> } >> >> +static void tlb_flush_page_async_work(CPUState *cpu, void *opaque) >> +{ >> + tlb_flush_page(cpu, GPOINTER_TO_UINT(opaque)); >> +} >> + >> +void tlb_flush_page_all(target_ulong addr) >> +{ >> + CPUState *cpu; >> + >> + CPU_FOREACH(cpu) { >> + async_run_on_cpu(cpu, tlb_flush_page_async_work, >> + GUINT_TO_POINTER(addr)); >> + } >> +} >> + >> /* update the TLBs so that writes to code in the virtual page 'addr' >> can be detected */ >> void tlb_protect_code(ram_addr_t ram_addr) >> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h >> index 9144ee0..f695577 100644 >> --- a/include/exec/exec-all.h >> +++ b/include/exec/exec-all.h >> @@ -190,6 +190,7 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr, >> void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr); >> void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx, >> uintptr_t retaddr); >> +void tlb_flush_page_all(target_ulong addr); >> #else >> static inline void tlb_flush_page(CPUState *cpu, target_ulong addr) >> { > > tlb_flush_by_mmuidx() and tlb_flush_page_by_mmuidx() want to be safe as > well. > > Kind regards, > Sergey -- Alex Bennée
On 06/06/16 11:54, Alex Bennée wrote: > Sergey Fedorov <serge.fdrv@gmail.com> writes: > >> On 15/04/16 17:23, Alex Bennée wrote: >>> diff --git a/cputlb.c b/cputlb.c >>> index 1412049..42a3b07 100644 >>> --- a/cputlb.c >>> +++ b/cputlb.c (snip) >>> @@ -89,6 +81,34 @@ void tlb_flush(CPUState *cpu, int flush_global) >>> env->tlb_flush_addr = -1; >>> env->tlb_flush_mask = 0; >>> tlb_flush_count++; >>> + /* atomic_mb_set(&cpu->pending_tlb_flush, 0); */ >>> +} >>> + >>> +static void tlb_flush_global_async_work(CPUState *cpu, void *opaque) >>> +{ >>> + tlb_flush_nocheck(cpu, GPOINTER_TO_INT(opaque)); >>> +} >>> + >>> +/* NOTE: >>> + * If flush_global is true (the usual case), flush all tlb entries. >>> + * If flush_global is false, flush (at least) all tlb entries not >>> + * marked global. >>> + * >>> + * Since QEMU doesn't currently implement a global/not-global flag >>> + * for tlb entries, at the moment tlb_flush() will also flush all >>> + * tlb entries in the flush_global == false case. This is OK because >>> + * CPU architectures generally permit an implementation to drop >>> + * entries from the TLB at any time, so flushing more entries than >>> + * required is only an efficiency issue, not a correctness issue. >>> + */ >>> +void tlb_flush(CPUState *cpu, int flush_global) >>> +{ >>> + if (cpu->created) { >> Why do we check for 'cpu->created' here? Any why don't do that in >> tlb_flush_page_all()? > A bunch of random stuff gets kicked off at start-up which was getting in > the way (c.f. arm_cpu_reset and watch/breakpoints). tlb_flush() is > rather liberally sprinkled around the init code of various CPUs. Wouldn't we race if we call tlb_flush() with no BQL held? We could just queue an async job since we force each CPU thread to flush its work queue before starting execution anyway. Kind regards, Sergey > >>> + async_run_on_cpu(cpu, tlb_flush_global_async_work, >>> + GINT_TO_POINTER(flush_global)); >>> + } else { >>> + tlb_flush_nocheck(cpu, flush_global); >>> + } >>> } >>> >>> static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp) >>>
diff --git a/cputlb.c b/cputlb.c index 1412049..42a3b07 100644 --- a/cputlb.c +++ b/cputlb.c @@ -56,22 +56,14 @@ } \ } while (0) +/* We need a solution for stuffing 64 bit pointers in 32 bit ones if + * we care about this combination */ +QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(void *)); + /* statistics */ int tlb_flush_count; -/* NOTE: - * If flush_global is true (the usual case), flush all tlb entries. - * If flush_global is false, flush (at least) all tlb entries not - * marked global. - * - * Since QEMU doesn't currently implement a global/not-global flag - * for tlb entries, at the moment tlb_flush() will also flush all - * tlb entries in the flush_global == false case. This is OK because - * CPU architectures generally permit an implementation to drop - * entries from the TLB at any time, so flushing more entries than - * required is only an efficiency issue, not a correctness issue. - */ -void tlb_flush(CPUState *cpu, int flush_global) +static void tlb_flush_nocheck(CPUState *cpu, int flush_global) { CPUArchState *env = cpu->env_ptr; @@ -89,6 +81,34 @@ void tlb_flush(CPUState *cpu, int flush_global) env->tlb_flush_addr = -1; env->tlb_flush_mask = 0; tlb_flush_count++; + /* atomic_mb_set(&cpu->pending_tlb_flush, 0); */ +} + +static void tlb_flush_global_async_work(CPUState *cpu, void *opaque) +{ + tlb_flush_nocheck(cpu, GPOINTER_TO_INT(opaque)); +} + +/* NOTE: + * If flush_global is true (the usual case), flush all tlb entries. + * If flush_global is false, flush (at least) all tlb entries not + * marked global. + * + * Since QEMU doesn't currently implement a global/not-global flag + * for tlb entries, at the moment tlb_flush() will also flush all + * tlb entries in the flush_global == false case. This is OK because + * CPU architectures generally permit an implementation to drop + * entries from the TLB at any time, so flushing more entries than + * required is only an efficiency issue, not a correctness issue. + */ +void tlb_flush(CPUState *cpu, int flush_global) +{ + if (cpu->created) { + async_run_on_cpu(cpu, tlb_flush_global_async_work, + GINT_TO_POINTER(flush_global)); + } else { + tlb_flush_nocheck(cpu, flush_global); + } } static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp) @@ -222,6 +242,21 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...) tb_flush_jmp_cache(cpu, addr); } +static void tlb_flush_page_async_work(CPUState *cpu, void *opaque) +{ + tlb_flush_page(cpu, GPOINTER_TO_UINT(opaque)); +} + +void tlb_flush_page_all(target_ulong addr) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + async_run_on_cpu(cpu, tlb_flush_page_async_work, + GUINT_TO_POINTER(addr)); + } +} + /* update the TLBs so that writes to code in the virtual page 'addr' can be detected */ void tlb_protect_code(ram_addr_t ram_addr) diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 9144ee0..f695577 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -190,6 +190,7 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr, void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr); void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx, uintptr_t retaddr); +void tlb_flush_page_all(target_ulong addr); #else static inline void tlb_flush_page(CPUState *cpu, target_ulong addr) {