Message ID | 1473832442-17762-3-git-send-email-nikunj@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Sep 14, 2016 at 11:24:01AM +0530, Nikunj A Dadhania wrote: > We flush the qemu TLB lazily. check_tlb_flush is called whenever we hit > a context synchronizing event or instruction that requires a pending > flush to be performed. > > However, we fail to handle broadcast TLB flush operations. In order to > fix that efficiently, we want to differenciate whether check_tlb_flush() > needs to only apply pending local flushes (isync instructions, > interrupts, ...) or also global pending flush operations. The latter is > only needed when executing instructions that are defined architecturally > as synchronizing global TLB flush operations. This in our case is > ptesync on BookS and tlbsync on BookE along with the paravirtualized > hypervisor calls. Much better description, thank you. > > Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> > --- > hw/ppc/spapr_hcall.c | 4 ++-- > target-ppc/excp_helper.c | 4 ++-- > target-ppc/helper.h | 2 +- > target-ppc/helper_regs.h | 4 ++-- > target-ppc/mmu_helper.c | 4 ++-- > target-ppc/translate.c | 20 ++++++++++---------- > 6 files changed, 19 insertions(+), 19 deletions(-) > > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c > index 73af112..ef12ea0 100644 > --- a/hw/ppc/spapr_hcall.c > +++ b/hw/ppc/spapr_hcall.c > @@ -201,7 +201,7 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr, > > switch (ret) { > case REMOVE_SUCCESS: > - check_tlb_flush(env); > + check_tlb_flush(env, 1); > return H_SUCCESS; > > case REMOVE_NOT_FOUND: > @@ -282,7 +282,7 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr, > } > } > exit: > - check_tlb_flush(env); > + check_tlb_flush(env, 1); > > return rc; > } > diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c > index 04ed4da..3b78126 100644 > --- a/target-ppc/excp_helper.c > +++ b/target-ppc/excp_helper.c > @@ -711,7 +711,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) > /* Any interrupt is context synchronizing, check if TCG TLB > * needs a delayed flush on ppc64 > */ > - check_tlb_flush(env); > + check_tlb_flush(env, 0); > } > > void ppc_cpu_do_interrupt(CPUState *cs) > @@ -973,7 +973,7 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr) > cs->interrupt_request |= CPU_INTERRUPT_EXITTB; > > /* Context synchronizing: check if TCG TLB needs flush */ > - check_tlb_flush(env); > + check_tlb_flush(env, 0); > } > > void helper_rfi(CPUPPCState *env) > diff --git a/target-ppc/helper.h b/target-ppc/helper.h > index e75d070..5ececf1 100644 > --- a/target-ppc/helper.h > +++ b/target-ppc/helper.h > @@ -18,7 +18,7 @@ DEF_HELPER_1(rfid, void, env) > DEF_HELPER_1(hrfid, void, env) > DEF_HELPER_2(store_lpcr, void, env, tl) > #endif > -DEF_HELPER_1(check_tlb_flush, void, env) > +DEF_HELPER_2(check_tlb_flush, void, env, i32) > #endif > > DEF_HELPER_3(lmw, void, env, tl, i32) > diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h > index 69204a5..bcf65ce 100644 > --- a/target-ppc/helper_regs.h > +++ b/target-ppc/helper_regs.h > @@ -154,7 +154,7 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value, > } > > #if !defined(CONFIG_USER_ONLY) > -static inline void check_tlb_flush(CPUPPCState *env) > +static inline void check_tlb_flush(CPUPPCState *env, uint32_t global) > { > CPUState *cs = CPU(ppc_env_get_cpu(env)); > if (env->tlb_need_flush & TLB_NEED_LOCAL_FLUSH) { > @@ -163,7 +163,7 @@ static inline void check_tlb_flush(CPUPPCState *env) > } > } > #else > -static inline void check_tlb_flush(CPUPPCState *env) { } > +static inline void check_tlb_flush(CPUPPCState *env, uint32_t global) { } > #endif > > #endif /* HELPER_REGS_H */ > diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c > index d59d2f8..bf9f329 100644 > --- a/target-ppc/mmu_helper.c > +++ b/target-ppc/mmu_helper.c > @@ -2867,9 +2867,9 @@ void helper_booke206_tlbflush(CPUPPCState *env, target_ulong type) > } > > > -void helper_check_tlb_flush(CPUPPCState *env) > +void helper_check_tlb_flush(CPUPPCState *env, unsigned int global) You're using an unsigned int for the flag here, but uint32_t for check_tlb_flush(), which is a needless inconsistency. You might as well make them both bools, since that's how it's actually being used. As a general rule don't use fixed width types unless you actually *need* the fixed width - the type choices are part of the interface documentation and using a fixed width type when you don't need it sends a misleading message. > { > - check_tlb_flush(env); > + check_tlb_flush(env, global); > } > > /*****************************************************************************/ > diff --git a/target-ppc/translate.c b/target-ppc/translate.c > index a27f455..5026804 100644 > --- a/target-ppc/translate.c > +++ b/target-ppc/translate.c > @@ -3066,7 +3066,7 @@ static void gen_eieio(DisasContext *ctx) > } > > #if !defined(CONFIG_USER_ONLY) > -static inline void gen_check_tlb_flush(DisasContext *ctx) > +static inline void gen_check_tlb_flush(DisasContext *ctx, uint32_t global) > { > TCGv_i32 t; > TCGLabel *l; > @@ -3078,12 +3078,13 @@ static inline void gen_check_tlb_flush(DisasContext *ctx) > t = tcg_temp_new_i32(); > tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush)); > tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l); > - gen_helper_check_tlb_flush(cpu_env); > + tcg_gen_movi_i32(t, global); > + gen_helper_check_tlb_flush(cpu_env, t); > gen_set_label(l); > tcg_temp_free_i32(t); > } > #else > -static inline void gen_check_tlb_flush(DisasContext *ctx) { } > +static inline void gen_check_tlb_flush(DisasContext *ctx, uint32_t global) { } > #endif > > /* isync */ > @@ -3094,7 +3095,7 @@ static void gen_isync(DisasContext *ctx) > * kernel mode however so check MSR_PR > */ > if (!ctx->pr) { > - gen_check_tlb_flush(ctx); > + gen_check_tlb_flush(ctx, 0); > } > gen_stop_exception(ctx); > } > @@ -3259,7 +3260,7 @@ static void gen_sync(DisasContext *ctx) > * check MSR_PR as well. > */ > if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) { > - gen_check_tlb_flush(ctx); > + gen_check_tlb_flush(ctx, 1); > } > } > > @@ -4468,11 +4469,10 @@ static void gen_tlbsync(DisasContext *ctx) > #else > CHK_HV; > > - /* tlbsync is a nop for server, ptesync handles delayed tlb flush, > - * embedded however needs to deal with tlbsync. We don't try to be > - * fancy and swallow the overhead of checking for both. > - */ > - gen_check_tlb_flush(ctx); > + /* BookS does both ptesync and tlbsync make tlbsync a nop for server */ > + if (ctx->insns_flags & PPC_BOOKE) { > + gen_check_tlb_flush(ctx, 1); > + } > #endif /* defined(CONFIG_USER_ONLY) */ > } >
David Gibson <david@gibson.dropbear.id.au> writes: > [ Unknown signature status ] > On Wed, Sep 14, 2016 at 11:24:01AM +0530, Nikunj A Dadhania wrote: >> We flush the qemu TLB lazily. check_tlb_flush is called whenever we hit >> a context synchronizing event or instruction that requires a pending >> flush to be performed. >> >> However, we fail to handle broadcast TLB flush operations. In order to >> fix that efficiently, we want to differenciate whether check_tlb_flush() >> needs to only apply pending local flushes (isync instructions, >> interrupts, ...) or also global pending flush operations. The latter is >> only needed when executing instructions that are defined architecturally >> as synchronizing global TLB flush operations. This in our case is >> ptesync on BookS and tlbsync on BookE along with the paravirtualized >> hypervisor calls. > > Much better description, thank you. > >> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> >> --- >> diff --git a/target-ppc/helper.h b/target-ppc/helper.h >> index e75d070..5ececf1 100644 >> --- a/target-ppc/helper.h >> +++ b/target-ppc/helper.h >> @@ -18,7 +18,7 @@ DEF_HELPER_1(rfid, void, env) >> DEF_HELPER_1(hrfid, void, env) >> DEF_HELPER_2(store_lpcr, void, env, tl) >> #endif >> -DEF_HELPER_1(check_tlb_flush, void, env) >> +DEF_HELPER_2(check_tlb_flush, void, env, i32) Not sure if I can use bool here, maybe I can use target_ulong. >> -void helper_check_tlb_flush(CPUPPCState *env) >> +void helper_check_tlb_flush(CPUPPCState *env, unsigned int global) > > You're using an unsigned int for the flag here, but uint32_t for > check_tlb_flush(), which is a needless inconsistency. I can make this as uint32_t for consistency. > You might as well make them both bools, since that's how it's actually > being used. > > As a general rule don't use fixed width types unless you > actually *need* the fixed width - the type choices are part of the > interface documentation and using a fixed width type when you don't > need it sends a misleading message. I optimized it because to avoid a new variable, and re-used "t": -static inline void gen_check_tlb_flush(DisasContext *ctx) +static inline void gen_check_tlb_flush(DisasContext *ctx, uint32_t global) { TCGv_i32 t; TCGLabel *l; @@ -3078,12 +3078,13 @@ static inline void gen_check_tlb_flush(DisasContext *ctx) t = tcg_temp_new_i32(); tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush)); tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l); - gen_helper_check_tlb_flush(cpu_env); + tcg_gen_movi_i32(t, global); + gen_helper_check_tlb_flush(cpu_env, t); gen_set_label(l); tcg_temp_free_i32(t); } Regards Nikunj
On Thu, Sep 15, 2016 at 11:32:39AM +0530, Nikunj A Dadhania wrote: > David Gibson <david@gibson.dropbear.id.au> writes: > > [ Unknown signature status ] > > On Wed, Sep 14, 2016 at 11:24:01AM +0530, Nikunj A Dadhania wrote: > >> We flush the qemu TLB lazily. check_tlb_flush is called whenever we hit > >> a context synchronizing event or instruction that requires a pending > >> flush to be performed. > >> > >> However, we fail to handle broadcast TLB flush operations. In order to > >> fix that efficiently, we want to differenciate whether check_tlb_flush() > >> needs to only apply pending local flushes (isync instructions, > >> interrupts, ...) or also global pending flush operations. The latter is > >> only needed when executing instructions that are defined architecturally > >> as synchronizing global TLB flush operations. This in our case is > >> ptesync on BookS and tlbsync on BookE along with the paravirtualized > >> hypervisor calls. > > > > Much better description, thank you. > > > >> > >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> > >> --- > > >> diff --git a/target-ppc/helper.h b/target-ppc/helper.h > >> index e75d070..5ececf1 100644 > >> --- a/target-ppc/helper.h > >> +++ b/target-ppc/helper.h > >> @@ -18,7 +18,7 @@ DEF_HELPER_1(rfid, void, env) > >> DEF_HELPER_1(hrfid, void, env) > >> DEF_HELPER_2(store_lpcr, void, env, tl) > >> #endif > >> -DEF_HELPER_1(check_tlb_flush, void, env) > >> +DEF_HELPER_2(check_tlb_flush, void, env, i32) > > Not sure if I can use bool here, maybe I can use target_ulong. I think target_ulong would make more sense. > >> -void helper_check_tlb_flush(CPUPPCState *env) > >> +void helper_check_tlb_flush(CPUPPCState *env, unsigned int global) > > > > You're using an unsigned int for the flag here, but uint32_t for > > check_tlb_flush(), which is a needless inconsistency. > > I can make this as uint32_t for consistency. As below, I'd prefer not. Actually I hadn't thought through the TCG helper constraints, so I think having it target_ulong in the helper and bool in the direct function makes sense. > > You might as well make them both bools, since that's how it's actually > > being used. > > > > As a general rule don't use fixed width types unless you > > actually *need* the fixed width - the type choices are part of the > > interface documentation and using a fixed width type when you don't > > need it sends a misleading message. > > I optimized it because to avoid a new variable, and re-used "t": Oh, I see. Hmm. I don't know if that will make a real difference in TCG or not. > -static inline void gen_check_tlb_flush(DisasContext *ctx) > +static inline void gen_check_tlb_flush(DisasContext *ctx, uint32_t global) > { > TCGv_i32 t; > TCGLabel *l; > @@ -3078,12 +3078,13 @@ static inline void gen_check_tlb_flush(DisasContext *ctx) > t = tcg_temp_new_i32(); > tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush)); > tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l); > - gen_helper_check_tlb_flush(cpu_env); > + tcg_gen_movi_i32(t, global); > + gen_helper_check_tlb_flush(cpu_env, t); > gen_set_label(l); > tcg_temp_free_i32(t); > } > > > Regards > Nikunj >
On Thu, 2016-09-15 at 16:16 +1000, David Gibson wrote: > Oh, I see. Hmm. I don't know if that will make a real difference in > TCG or not. It will on 32-bit hosts. Cheers, Ben.
On Thu, Sep 15, 2016 at 04:22:31PM +1000, Benjamin Herrenschmidt wrote: > On Thu, 2016-09-15 at 16:16 +1000, David Gibson wrote: > > Oh, I see. Hmm. I don't know if that will make a real difference in > > TCG or not. > > It will on 32-bit hosts. Hm, yes, I guess it will. Ok, leave it has u32. I'd still prefer to see a bool in the direct-called version.
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 73af112..ef12ea0 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -201,7 +201,7 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr, switch (ret) { case REMOVE_SUCCESS: - check_tlb_flush(env); + check_tlb_flush(env, 1); return H_SUCCESS; case REMOVE_NOT_FOUND: @@ -282,7 +282,7 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr, } } exit: - check_tlb_flush(env); + check_tlb_flush(env, 1); return rc; } diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c index 04ed4da..3b78126 100644 --- a/target-ppc/excp_helper.c +++ b/target-ppc/excp_helper.c @@ -711,7 +711,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) /* Any interrupt is context synchronizing, check if TCG TLB * needs a delayed flush on ppc64 */ - check_tlb_flush(env); + check_tlb_flush(env, 0); } void ppc_cpu_do_interrupt(CPUState *cs) @@ -973,7 +973,7 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr) cs->interrupt_request |= CPU_INTERRUPT_EXITTB; /* Context synchronizing: check if TCG TLB needs flush */ - check_tlb_flush(env); + check_tlb_flush(env, 0); } void helper_rfi(CPUPPCState *env) diff --git a/target-ppc/helper.h b/target-ppc/helper.h index e75d070..5ececf1 100644 --- a/target-ppc/helper.h +++ b/target-ppc/helper.h @@ -18,7 +18,7 @@ DEF_HELPER_1(rfid, void, env) DEF_HELPER_1(hrfid, void, env) DEF_HELPER_2(store_lpcr, void, env, tl) #endif -DEF_HELPER_1(check_tlb_flush, void, env) +DEF_HELPER_2(check_tlb_flush, void, env, i32) #endif DEF_HELPER_3(lmw, void, env, tl, i32) diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h index 69204a5..bcf65ce 100644 --- a/target-ppc/helper_regs.h +++ b/target-ppc/helper_regs.h @@ -154,7 +154,7 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value, } #if !defined(CONFIG_USER_ONLY) -static inline void check_tlb_flush(CPUPPCState *env) +static inline void check_tlb_flush(CPUPPCState *env, uint32_t global) { CPUState *cs = CPU(ppc_env_get_cpu(env)); if (env->tlb_need_flush & TLB_NEED_LOCAL_FLUSH) { @@ -163,7 +163,7 @@ static inline void check_tlb_flush(CPUPPCState *env) } } #else -static inline void check_tlb_flush(CPUPPCState *env) { } +static inline void check_tlb_flush(CPUPPCState *env, uint32_t global) { } #endif #endif /* HELPER_REGS_H */ diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c index d59d2f8..bf9f329 100644 --- a/target-ppc/mmu_helper.c +++ b/target-ppc/mmu_helper.c @@ -2867,9 +2867,9 @@ void helper_booke206_tlbflush(CPUPPCState *env, target_ulong type) } -void helper_check_tlb_flush(CPUPPCState *env) +void helper_check_tlb_flush(CPUPPCState *env, unsigned int global) { - check_tlb_flush(env); + check_tlb_flush(env, global); } /*****************************************************************************/ diff --git a/target-ppc/translate.c b/target-ppc/translate.c index a27f455..5026804 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -3066,7 +3066,7 @@ static void gen_eieio(DisasContext *ctx) } #if !defined(CONFIG_USER_ONLY) -static inline void gen_check_tlb_flush(DisasContext *ctx) +static inline void gen_check_tlb_flush(DisasContext *ctx, uint32_t global) { TCGv_i32 t; TCGLabel *l; @@ -3078,12 +3078,13 @@ static inline void gen_check_tlb_flush(DisasContext *ctx) t = tcg_temp_new_i32(); tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush)); tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l); - gen_helper_check_tlb_flush(cpu_env); + tcg_gen_movi_i32(t, global); + gen_helper_check_tlb_flush(cpu_env, t); gen_set_label(l); tcg_temp_free_i32(t); } #else -static inline void gen_check_tlb_flush(DisasContext *ctx) { } +static inline void gen_check_tlb_flush(DisasContext *ctx, uint32_t global) { } #endif /* isync */ @@ -3094,7 +3095,7 @@ static void gen_isync(DisasContext *ctx) * kernel mode however so check MSR_PR */ if (!ctx->pr) { - gen_check_tlb_flush(ctx); + gen_check_tlb_flush(ctx, 0); } gen_stop_exception(ctx); } @@ -3259,7 +3260,7 @@ static void gen_sync(DisasContext *ctx) * check MSR_PR as well. */ if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) { - gen_check_tlb_flush(ctx); + gen_check_tlb_flush(ctx, 1); } } @@ -4468,11 +4469,10 @@ static void gen_tlbsync(DisasContext *ctx) #else CHK_HV; - /* tlbsync is a nop for server, ptesync handles delayed tlb flush, - * embedded however needs to deal with tlbsync. We don't try to be - * fancy and swallow the overhead of checking for both. - */ - gen_check_tlb_flush(ctx); + /* BookS does both ptesync and tlbsync make tlbsync a nop for server */ + if (ctx->insns_flags & PPC_BOOKE) { + gen_check_tlb_flush(ctx, 1); + } #endif /* defined(CONFIG_USER_ONLY) */ }
We flush the qemu TLB lazily. check_tlb_flush is called whenever we hit a context synchronizing event or instruction that requires a pending flush to be performed. However, we fail to handle broadcast TLB flush operations. In order to fix that efficiently, we want to differenciate whether check_tlb_flush() needs to only apply pending local flushes (isync instructions, interrupts, ...) or also global pending flush operations. The latter is only needed when executing instructions that are defined architecturally as synchronizing global TLB flush operations. This in our case is ptesync on BookS and tlbsync on BookE along with the paravirtualized hypervisor calls. Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> --- hw/ppc/spapr_hcall.c | 4 ++-- target-ppc/excp_helper.c | 4 ++-- target-ppc/helper.h | 2 +- target-ppc/helper_regs.h | 4 ++-- target-ppc/mmu_helper.c | 4 ++-- target-ppc/translate.c | 20 ++++++++++---------- 6 files changed, 19 insertions(+), 19 deletions(-)