diff mbox

[v4,2/3] target-ppc: add flag in chech_tlb_flush()

Message ID 1473832442-17762-3-git-send-email-nikunj@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nikunj A. Dadhania Sept. 14, 2016, 5:54 a.m. UTC
We flush the qemu TLB lazily. check_tlb_flush is called whenever we hit
a context synchronizing event or instruction that requires a pending
flush to be performed.

However, we fail to handle broadcast TLB flush operations. In order to
fix that efficiently, we want to differenciate whether check_tlb_flush()
needs to only apply pending local flushes (isync instructions,
interrupts, ...) or also global pending flush operations. The latter is
only needed when executing instructions that are defined architecturally
as synchronizing global TLB flush operations. This in our case is
ptesync on BookS and tlbsync on BookE along with the paravirtualized
hypervisor calls.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 hw/ppc/spapr_hcall.c     |  4 ++--
 target-ppc/excp_helper.c |  4 ++--
 target-ppc/helper.h      |  2 +-
 target-ppc/helper_regs.h |  4 ++--
 target-ppc/mmu_helper.c  |  4 ++--
 target-ppc/translate.c   | 20 ++++++++++----------
 6 files changed, 19 insertions(+), 19 deletions(-)

Comments

David Gibson Sept. 15, 2016, 12:20 a.m. UTC | #1
On Wed, Sep 14, 2016 at 11:24:01AM +0530, Nikunj A Dadhania wrote:
> We flush the qemu TLB lazily. check_tlb_flush is called whenever we hit
> a context synchronizing event or instruction that requires a pending
> flush to be performed.
> 
> However, we fail to handle broadcast TLB flush operations. In order to
> fix that efficiently, we want to differenciate whether check_tlb_flush()
> needs to only apply pending local flushes (isync instructions,
> interrupts, ...) or also global pending flush operations. The latter is
> only needed when executing instructions that are defined architecturally
> as synchronizing global TLB flush operations. This in our case is
> ptesync on BookS and tlbsync on BookE along with the paravirtualized
> hypervisor calls.

Much better description, thank you.

> 
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr_hcall.c     |  4 ++--
>  target-ppc/excp_helper.c |  4 ++--
>  target-ppc/helper.h      |  2 +-
>  target-ppc/helper_regs.h |  4 ++--
>  target-ppc/mmu_helper.c  |  4 ++--
>  target-ppc/translate.c   | 20 ++++++++++----------
>  6 files changed, 19 insertions(+), 19 deletions(-)
> 
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index 73af112..ef12ea0 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -201,7 +201,7 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>  
>      switch (ret) {
>      case REMOVE_SUCCESS:
> -        check_tlb_flush(env);
> +        check_tlb_flush(env, 1);
>          return H_SUCCESS;
>  
>      case REMOVE_NOT_FOUND:
> @@ -282,7 +282,7 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>          }
>      }
>   exit:
> -    check_tlb_flush(env);
> +    check_tlb_flush(env, 1);
>  
>      return rc;
>  }
> diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> index 04ed4da..3b78126 100644
> --- a/target-ppc/excp_helper.c
> +++ b/target-ppc/excp_helper.c
> @@ -711,7 +711,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>      /* Any interrupt is context synchronizing, check if TCG TLB
>       * needs a delayed flush on ppc64
>       */
> -    check_tlb_flush(env);
> +    check_tlb_flush(env, 0);
>  }
>  
>  void ppc_cpu_do_interrupt(CPUState *cs)
> @@ -973,7 +973,7 @@ static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr)
>      cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
>  
>      /* Context synchronizing: check if TCG TLB needs flush */
> -    check_tlb_flush(env);
> +    check_tlb_flush(env, 0);
>  }
>  
>  void helper_rfi(CPUPPCState *env)
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index e75d070..5ececf1 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -18,7 +18,7 @@ DEF_HELPER_1(rfid, void, env)
>  DEF_HELPER_1(hrfid, void, env)
>  DEF_HELPER_2(store_lpcr, void, env, tl)
>  #endif
> -DEF_HELPER_1(check_tlb_flush, void, env)
> +DEF_HELPER_2(check_tlb_flush, void, env, i32)
>  #endif
>  
>  DEF_HELPER_3(lmw, void, env, tl, i32)
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index 69204a5..bcf65ce 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -154,7 +154,7 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
>  }
>  
>  #if !defined(CONFIG_USER_ONLY)
> -static inline void check_tlb_flush(CPUPPCState *env)
> +static inline void check_tlb_flush(CPUPPCState *env, uint32_t global)
>  {
>      CPUState *cs = CPU(ppc_env_get_cpu(env));
>      if (env->tlb_need_flush & TLB_NEED_LOCAL_FLUSH) {
> @@ -163,7 +163,7 @@ static inline void check_tlb_flush(CPUPPCState *env)
>      }
>  }
>  #else
> -static inline void check_tlb_flush(CPUPPCState *env) { }
> +static inline void check_tlb_flush(CPUPPCState *env, uint32_t global) { }
>  #endif
>  
>  #endif /* HELPER_REGS_H */
> diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
> index d59d2f8..bf9f329 100644
> --- a/target-ppc/mmu_helper.c
> +++ b/target-ppc/mmu_helper.c
> @@ -2867,9 +2867,9 @@ void helper_booke206_tlbflush(CPUPPCState *env, target_ulong type)
>  }
>  
>  
> -void helper_check_tlb_flush(CPUPPCState *env)
> +void helper_check_tlb_flush(CPUPPCState *env, unsigned int global)

You're using an unsigned int for the flag here, but uint32_t for
check_tlb_flush(), which is a needless inconsistency.

You might as well make them both bools, since that's how it's actually
being used.  As a general rule don't use fixed width types unless you
actually *need* the fixed width - the type choices are part of the
interface documentation and using a fixed width type when you don't
need it sends a misleading message.

>  {
> -    check_tlb_flush(env);
> +    check_tlb_flush(env, global);
>  }
>  
>  /*****************************************************************************/
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index a27f455..5026804 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -3066,7 +3066,7 @@ static void gen_eieio(DisasContext *ctx)
>  }
>  
>  #if !defined(CONFIG_USER_ONLY)
> -static inline void gen_check_tlb_flush(DisasContext *ctx)
> +static inline void gen_check_tlb_flush(DisasContext *ctx, uint32_t global)
>  {
>      TCGv_i32 t;
>      TCGLabel *l;
> @@ -3078,12 +3078,13 @@ static inline void gen_check_tlb_flush(DisasContext *ctx)
>      t = tcg_temp_new_i32();
>      tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
>      tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l);
> -    gen_helper_check_tlb_flush(cpu_env);
> +    tcg_gen_movi_i32(t, global);
> +    gen_helper_check_tlb_flush(cpu_env, t);
>      gen_set_label(l);
>      tcg_temp_free_i32(t);
>  }
>  #else
> -static inline void gen_check_tlb_flush(DisasContext *ctx) { }
> +static inline void gen_check_tlb_flush(DisasContext *ctx, uint32_t global) { }
>  #endif
>  
>  /* isync */
> @@ -3094,7 +3095,7 @@ static void gen_isync(DisasContext *ctx)
>       * kernel mode however so check MSR_PR
>       */
>      if (!ctx->pr) {
> -        gen_check_tlb_flush(ctx);
> +        gen_check_tlb_flush(ctx, 0);
>      }
>      gen_stop_exception(ctx);
>  }
> @@ -3259,7 +3260,7 @@ static void gen_sync(DisasContext *ctx)
>       * check MSR_PR as well.
>       */
>      if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) {
> -        gen_check_tlb_flush(ctx);
> +        gen_check_tlb_flush(ctx, 1);
>      }
>  }
>  
> @@ -4468,11 +4469,10 @@ static void gen_tlbsync(DisasContext *ctx)
>  #else
>      CHK_HV;
>  
> -    /* tlbsync is a nop for server, ptesync handles delayed tlb flush,
> -     * embedded however needs to deal with tlbsync. We don't try to be
> -     * fancy and swallow the overhead of checking for both.
> -     */
> -    gen_check_tlb_flush(ctx);
> +    /* BookS does both ptesync and tlbsync make tlbsync a nop for server */
> +    if (ctx->insns_flags & PPC_BOOKE) {
> +        gen_check_tlb_flush(ctx, 1);
> +    }
>  #endif /* defined(CONFIG_USER_ONLY) */
>  }
>
Nikunj A. Dadhania Sept. 15, 2016, 6:02 a.m. UTC | #2
David Gibson <david@gibson.dropbear.id.au> writes:
> [ Unknown signature status ]
> On Wed, Sep 14, 2016 at 11:24:01AM +0530, Nikunj A Dadhania wrote:
>> We flush the qemu TLB lazily. check_tlb_flush is called whenever we hit
>> a context synchronizing event or instruction that requires a pending
>> flush to be performed.
>> 
>> However, we fail to handle broadcast TLB flush operations. In order to
>> fix that efficiently, we want to differenciate whether check_tlb_flush()
>> needs to only apply pending local flushes (isync instructions,
>> interrupts, ...) or also global pending flush operations. The latter is
>> only needed when executing instructions that are defined architecturally
>> as synchronizing global TLB flush operations. This in our case is
>> ptesync on BookS and tlbsync on BookE along with the paravirtualized
>> hypervisor calls.
>
> Much better description, thank you.
>
>> 
>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> ---

>> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
>> index e75d070..5ececf1 100644
>> --- a/target-ppc/helper.h
>> +++ b/target-ppc/helper.h
>> @@ -18,7 +18,7 @@ DEF_HELPER_1(rfid, void, env)
>>  DEF_HELPER_1(hrfid, void, env)
>>  DEF_HELPER_2(store_lpcr, void, env, tl)
>>  #endif
>> -DEF_HELPER_1(check_tlb_flush, void, env)
>> +DEF_HELPER_2(check_tlb_flush, void, env, i32)

Not sure if I can use bool here, maybe I can use target_ulong.

>> -void helper_check_tlb_flush(CPUPPCState *env)
>> +void helper_check_tlb_flush(CPUPPCState *env, unsigned int global)
>
> You're using an unsigned int for the flag here, but uint32_t for
> check_tlb_flush(), which is a needless inconsistency.

I can make this as uint32_t for consistency.

> You might as well make them both bools, since that's how it's actually
> being used.
>
> As a general rule don't use fixed width types unless you
> actually *need* the fixed width - the type choices are part of the
> interface documentation and using a fixed width type when you don't
> need it sends a misleading message.

I optimized it because to avoid a new variable, and re-used "t":

-static inline void gen_check_tlb_flush(DisasContext *ctx)
+static inline void gen_check_tlb_flush(DisasContext *ctx, uint32_t global)
 {
     TCGv_i32 t;
     TCGLabel *l;
@@ -3078,12 +3078,13 @@ static inline void gen_check_tlb_flush(DisasContext *ctx)
     t = tcg_temp_new_i32();
     tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
     tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l);
-    gen_helper_check_tlb_flush(cpu_env);
+    tcg_gen_movi_i32(t, global);
+    gen_helper_check_tlb_flush(cpu_env, t);
     gen_set_label(l);
     tcg_temp_free_i32(t);
 }


Regards
Nikunj
David Gibson Sept. 15, 2016, 6:16 a.m. UTC | #3
On Thu, Sep 15, 2016 at 11:32:39AM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> > [ Unknown signature status ]
> > On Wed, Sep 14, 2016 at 11:24:01AM +0530, Nikunj A Dadhania wrote:
> >> We flush the qemu TLB lazily. check_tlb_flush is called whenever we hit
> >> a context synchronizing event or instruction that requires a pending
> >> flush to be performed.
> >> 
> >> However, we fail to handle broadcast TLB flush operations. In order to
> >> fix that efficiently, we want to differenciate whether check_tlb_flush()
> >> needs to only apply pending local flushes (isync instructions,
> >> interrupts, ...) or also global pending flush operations. The latter is
> >> only needed when executing instructions that are defined architecturally
> >> as synchronizing global TLB flush operations. This in our case is
> >> ptesync on BookS and tlbsync on BookE along with the paravirtualized
> >> hypervisor calls.
> >
> > Much better description, thank you.
> >
> >> 
> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> >> ---
> 
> >> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> >> index e75d070..5ececf1 100644
> >> --- a/target-ppc/helper.h
> >> +++ b/target-ppc/helper.h
> >> @@ -18,7 +18,7 @@ DEF_HELPER_1(rfid, void, env)
> >>  DEF_HELPER_1(hrfid, void, env)
> >>  DEF_HELPER_2(store_lpcr, void, env, tl)
> >>  #endif
> >> -DEF_HELPER_1(check_tlb_flush, void, env)
> >> +DEF_HELPER_2(check_tlb_flush, void, env, i32)
> 
> Not sure if I can use bool here, maybe I can use target_ulong.

I think target_ulong would make more sense.

> >> -void helper_check_tlb_flush(CPUPPCState *env)
> >> +void helper_check_tlb_flush(CPUPPCState *env, unsigned int global)
> >
> > You're using an unsigned int for the flag here, but uint32_t for
> > check_tlb_flush(), which is a needless inconsistency.
> 
> I can make this as uint32_t for consistency.

As below, I'd prefer not.  Actually I hadn't thought through the TCG
helper constraints, so I think having it target_ulong in the helper
and bool in the direct function makes sense.

> > You might as well make them both bools, since that's how it's actually
> > being used.
> >
> > As a general rule don't use fixed width types unless you
> > actually *need* the fixed width - the type choices are part of the
> > interface documentation and using a fixed width type when you don't
> > need it sends a misleading message.
> 
> I optimized it because to avoid a new variable, and re-used "t":

Oh, I see.  Hmm.  I don't know if that will make a real difference in
TCG or not.

> -static inline void gen_check_tlb_flush(DisasContext *ctx)
> +static inline void gen_check_tlb_flush(DisasContext *ctx, uint32_t global)
>  {
>      TCGv_i32 t;
>      TCGLabel *l;
> @@ -3078,12 +3078,13 @@ static inline void gen_check_tlb_flush(DisasContext *ctx)
>      t = tcg_temp_new_i32();
>      tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
>      tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l);
> -    gen_helper_check_tlb_flush(cpu_env);
> +    tcg_gen_movi_i32(t, global);
> +    gen_helper_check_tlb_flush(cpu_env, t);
>      gen_set_label(l);
>      tcg_temp_free_i32(t);
>  }
> 
> 
> Regards
> Nikunj
>
Benjamin Herrenschmidt Sept. 15, 2016, 6:22 a.m. UTC | #4
On Thu, 2016-09-15 at 16:16 +1000, David Gibson wrote:
> Oh, I see.  Hmm.  I don't know if that will make a real difference in
> TCG or not.

It will on 32-bit hosts.

Cheers,
Ben.
David Gibson Sept. 15, 2016, 6:25 a.m. UTC | #5
On Thu, Sep 15, 2016 at 04:22:31PM +1000, Benjamin Herrenschmidt wrote:
> On Thu, 2016-09-15 at 16:16 +1000, David Gibson wrote:
> > Oh, I see.  Hmm.  I don't know if that will make a real difference in
> > TCG or not.
> 
> It will on 32-bit hosts.

Hm, yes, I guess it will.  Ok, leave it has u32.  I'd still prefer to
see a bool in the direct-called version.
diff mbox

Patch

diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 73af112..ef12ea0 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -201,7 +201,7 @@  static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
 
     switch (ret) {
     case REMOVE_SUCCESS:
-        check_tlb_flush(env);
+        check_tlb_flush(env, 1);
         return H_SUCCESS;
 
     case REMOVE_NOT_FOUND:
@@ -282,7 +282,7 @@  static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr,
         }
     }
  exit:
-    check_tlb_flush(env);
+    check_tlb_flush(env, 1);
 
     return rc;
 }
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index 04ed4da..3b78126 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -711,7 +711,7 @@  static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     /* Any interrupt is context synchronizing, check if TCG TLB
      * needs a delayed flush on ppc64
      */
-    check_tlb_flush(env);
+    check_tlb_flush(env, 0);
 }
 
 void ppc_cpu_do_interrupt(CPUState *cs)
@@ -973,7 +973,7 @@  static inline void do_rfi(CPUPPCState *env, target_ulong nip, target_ulong msr)
     cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
 
     /* Context synchronizing: check if TCG TLB needs flush */
-    check_tlb_flush(env);
+    check_tlb_flush(env, 0);
 }
 
 void helper_rfi(CPUPPCState *env)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index e75d070..5ececf1 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -18,7 +18,7 @@  DEF_HELPER_1(rfid, void, env)
 DEF_HELPER_1(hrfid, void, env)
 DEF_HELPER_2(store_lpcr, void, env, tl)
 #endif
-DEF_HELPER_1(check_tlb_flush, void, env)
+DEF_HELPER_2(check_tlb_flush, void, env, i32)
 #endif
 
 DEF_HELPER_3(lmw, void, env, tl, i32)
diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index 69204a5..bcf65ce 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -154,7 +154,7 @@  static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
 }
 
 #if !defined(CONFIG_USER_ONLY)
-static inline void check_tlb_flush(CPUPPCState *env)
+static inline void check_tlb_flush(CPUPPCState *env, uint32_t global)
 {
     CPUState *cs = CPU(ppc_env_get_cpu(env));
     if (env->tlb_need_flush & TLB_NEED_LOCAL_FLUSH) {
@@ -163,7 +163,7 @@  static inline void check_tlb_flush(CPUPPCState *env)
     }
 }
 #else
-static inline void check_tlb_flush(CPUPPCState *env) { }
+static inline void check_tlb_flush(CPUPPCState *env, uint32_t global) { }
 #endif
 
 #endif /* HELPER_REGS_H */
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index d59d2f8..bf9f329 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -2867,9 +2867,9 @@  void helper_booke206_tlbflush(CPUPPCState *env, target_ulong type)
 }
 
 
-void helper_check_tlb_flush(CPUPPCState *env)
+void helper_check_tlb_flush(CPUPPCState *env, unsigned int global)
 {
-    check_tlb_flush(env);
+    check_tlb_flush(env, global);
 }
 
 /*****************************************************************************/
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index a27f455..5026804 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -3066,7 +3066,7 @@  static void gen_eieio(DisasContext *ctx)
 }
 
 #if !defined(CONFIG_USER_ONLY)
-static inline void gen_check_tlb_flush(DisasContext *ctx)
+static inline void gen_check_tlb_flush(DisasContext *ctx, uint32_t global)
 {
     TCGv_i32 t;
     TCGLabel *l;
@@ -3078,12 +3078,13 @@  static inline void gen_check_tlb_flush(DisasContext *ctx)
     t = tcg_temp_new_i32();
     tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
     tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l);
-    gen_helper_check_tlb_flush(cpu_env);
+    tcg_gen_movi_i32(t, global);
+    gen_helper_check_tlb_flush(cpu_env, t);
     gen_set_label(l);
     tcg_temp_free_i32(t);
 }
 #else
-static inline void gen_check_tlb_flush(DisasContext *ctx) { }
+static inline void gen_check_tlb_flush(DisasContext *ctx, uint32_t global) { }
 #endif
 
 /* isync */
@@ -3094,7 +3095,7 @@  static void gen_isync(DisasContext *ctx)
      * kernel mode however so check MSR_PR
      */
     if (!ctx->pr) {
-        gen_check_tlb_flush(ctx);
+        gen_check_tlb_flush(ctx, 0);
     }
     gen_stop_exception(ctx);
 }
@@ -3259,7 +3260,7 @@  static void gen_sync(DisasContext *ctx)
      * check MSR_PR as well.
      */
     if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) {
-        gen_check_tlb_flush(ctx);
+        gen_check_tlb_flush(ctx, 1);
     }
 }
 
@@ -4468,11 +4469,10 @@  static void gen_tlbsync(DisasContext *ctx)
 #else
     CHK_HV;
 
-    /* tlbsync is a nop for server, ptesync handles delayed tlb flush,
-     * embedded however needs to deal with tlbsync. We don't try to be
-     * fancy and swallow the overhead of checking for both.
-     */
-    gen_check_tlb_flush(ctx);
+    /* BookS does both ptesync and tlbsync make tlbsync a nop for server */
+    if (ctx->insns_flags & PPC_BOOKE) {
+        gen_check_tlb_flush(ctx, 1);
+    }
 #endif /* defined(CONFIG_USER_ONLY) */
 }