Message ID | 20170406102249.20383-2-nikunj@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 04/06/2017 03:22 AM, Nikunj A Dadhania wrote: > + TCGv_i32 tmp = tcg_temp_local_new_i32(); > + TCGv t0; > > + tcg_gen_movi_i32(tmp, 0); > tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); > l1 = gen_new_label(); > tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1); > - tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ); > - tcg_gen_qemu_st_tl(cpu_gpr[reg], EA, ctx->mem_idx, memop); > + > + t0 = tcg_temp_new(); > + tcg_gen_atomic_cmpxchg_tl(t0, EA, cpu_reserve_val, cpu_gpr[reg], > + ctx->mem_idx, DEF_MEMOP(memop)); > + tcg_gen_setcond_tl(TCG_COND_EQ, t0, t0, cpu_reserve_val); > + tcg_gen_trunc_tl_i32(tmp, t0); > + > gen_set_label(l1); > + tcg_gen_shli_i32(tmp, tmp, CRF_EQ_BIT); > + tcg_gen_or_i32(cpu_crf[0], cpu_crf[0], tmp); I encourage you to move these two lines up beside the setcond. That way you don't need to use a local tmp, which implies a spill/restore from the stack. r~
On 04/06/2017 03:22 AM, Nikunj A Dadhania wrote: > tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); > l1 = gen_new_label(); > tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1); > - tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ); > - tcg_gen_qemu_st_tl(cpu_gpr[reg], EA, ctx->mem_idx, memop); > + > + t0 = tcg_temp_new(); > + tcg_gen_atomic_cmpxchg_tl(t0, EA, cpu_reserve_val, cpu_gpr[reg], > + ctx->mem_idx, DEF_MEMOP(memop)); Actually, I noticed another, existing, problem. This code changes CRF[0] before the user memory write, which might fault. This needs to delay any changes to the architecture visible state until after any exception may be triggered. r~
Richard Henderson <rth@twiddle.net> writes: > On 04/06/2017 03:22 AM, Nikunj A Dadhania wrote: >> + TCGv_i32 tmp = tcg_temp_local_new_i32(); >> + TCGv t0; >> >> + tcg_gen_movi_i32(tmp, 0); >> tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); >> l1 = gen_new_label(); >> tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1); >> - tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ); >> - tcg_gen_qemu_st_tl(cpu_gpr[reg], EA, ctx->mem_idx, memop); >> + >> + t0 = tcg_temp_new(); >> + tcg_gen_atomic_cmpxchg_tl(t0, EA, cpu_reserve_val, cpu_gpr[reg], >> + ctx->mem_idx, DEF_MEMOP(memop)); >> + tcg_gen_setcond_tl(TCG_COND_EQ, t0, t0, cpu_reserve_val); >> + tcg_gen_trunc_tl_i32(tmp, t0); >> + >> gen_set_label(l1); >> + tcg_gen_shli_i32(tmp, tmp, CRF_EQ_BIT); >> + tcg_gen_or_i32(cpu_crf[0], cpu_crf[0], tmp); > > I encourage you to move these two lines up beside the setcond. > That way you don't need to use a local tmp, which implies a > spill/restore from the stack. Sure. Regards Nikunj
Richard Henderson <rth@twiddle.net> writes: > On 04/06/2017 03:22 AM, Nikunj A Dadhania wrote: >> tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); >> l1 = gen_new_label(); >> tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1); >> - tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ); >> - tcg_gen_qemu_st_tl(cpu_gpr[reg], EA, ctx->mem_idx, memop); >> + >> + t0 = tcg_temp_new(); >> + tcg_gen_atomic_cmpxchg_tl(t0, EA, cpu_reserve_val, cpu_gpr[reg], >> + ctx->mem_idx, DEF_MEMOP(memop)); > > Actually, I noticed another, existing, problem. > > This code changes CRF[0] before the user memory write, which might fault. This > needs to delay any changes to the architecture visible state until after any > exception may be triggered. Sure, here you are mentioning cpu_so being moved to CRF. Regards Nikunj
On Thu, Apr 06, 2017 at 03:52:47PM +0530, Nikunj A Dadhania wrote: > Emulating LL/SC with cmpxchg is not correct, since it can suffer from > the ABA problem. However, portable parallel code is written assuming > only cmpxchg which means that in practice this is a viable alternative. > > Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> > --- > target/ppc/translate.c | 24 +++++++++++++++++++++--- > 1 file changed, 21 insertions(+), 3 deletions(-) > > diff --git a/target/ppc/translate.c b/target/ppc/translate.c > index b6abc60..a9c733d 100644 > --- a/target/ppc/translate.c > +++ b/target/ppc/translate.c > @@ -73,6 +73,7 @@ static TCGv cpu_cfar; > #endif > static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32; > static TCGv cpu_reserve; > +static TCGv cpu_reserve_val; > static TCGv cpu_fpscr; > static TCGv_i32 cpu_access_type; > > @@ -181,6 +182,9 @@ void ppc_translate_init(void) > cpu_reserve = tcg_global_mem_new(cpu_env, > offsetof(CPUPPCState, reserve_addr), > "reserve_addr"); > + cpu_reserve_val = tcg_global_mem_new(cpu_env, > + offsetof(CPUPPCState, reserve_val), > + "reserve_val"); I notice that lqarx is not updated. Does that matter? > cpu_fpscr = tcg_global_mem_new(cpu_env, > offsetof(CPUPPCState, fpscr), "fpscr"); > @@ -3023,7 +3027,7 @@ static void gen_##name(DisasContext *ctx) \ > } \ > tcg_gen_qemu_ld_tl(gpr, t0, ctx->mem_idx, memop); \ > tcg_gen_mov_tl(cpu_reserve, t0); \ > - tcg_gen_st_tl(gpr, cpu_env, offsetof(CPUPPCState, reserve_val)); \ > + tcg_gen_mov_tl(cpu_reserve_val, gpr); \ > tcg_temp_free(t0); \ > } > > @@ -3156,14 +3160,28 @@ static void gen_conditional_store(DisasContext *ctx, TCGv EA, > int reg, int memop) > { > TCGLabel *l1; > + TCGv_i32 tmp = tcg_temp_local_new_i32(); > + TCGv t0; > > + tcg_gen_movi_i32(tmp, 0); > tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); > l1 = gen_new_label(); > tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1); > - tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ); > - tcg_gen_qemu_st_tl(cpu_gpr[reg], EA, ctx->mem_idx, memop); > + > + t0 = tcg_temp_new(); > + tcg_gen_atomic_cmpxchg_tl(t0, EA, cpu_reserve_val, cpu_gpr[reg], > + ctx->mem_idx, DEF_MEMOP(memop)); > + tcg_gen_setcond_tl(TCG_COND_EQ, t0, t0, cpu_reserve_val); > + tcg_gen_trunc_tl_i32(tmp, t0); > + > gen_set_label(l1); > + tcg_gen_shli_i32(tmp, tmp, CRF_EQ_BIT); > + tcg_gen_or_i32(cpu_crf[0], cpu_crf[0], tmp); > tcg_gen_movi_tl(cpu_reserve, -1); > + tcg_gen_movi_tl(cpu_reserve_val, 0); > + > + tcg_temp_free(t0); > + tcg_temp_free_i32(tmp); > } > #endif >
David Gibson <david@gibson.dropbear.id.au> writes: > [ Unknown signature status ] > On Thu, Apr 06, 2017 at 03:52:47PM +0530, Nikunj A Dadhania wrote: >> Emulating LL/SC with cmpxchg is not correct, since it can suffer from >> the ABA problem. However, portable parallel code is written assuming >> only cmpxchg which means that in practice this is a viable alternative. >> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> >> --- >> target/ppc/translate.c | 24 +++++++++++++++++++++--- >> 1 file changed, 21 insertions(+), 3 deletions(-) >> >> diff --git a/target/ppc/translate.c b/target/ppc/translate.c >> index b6abc60..a9c733d 100644 >> --- a/target/ppc/translate.c >> +++ b/target/ppc/translate.c >> @@ -73,6 +73,7 @@ static TCGv cpu_cfar; >> #endif >> static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32; >> static TCGv cpu_reserve; >> +static TCGv cpu_reserve_val; >> static TCGv cpu_fpscr; >> static TCGv_i32 cpu_access_type; >> >> @@ -181,6 +182,9 @@ void ppc_translate_init(void) >> cpu_reserve = tcg_global_mem_new(cpu_env, >> offsetof(CPUPPCState, reserve_addr), >> "reserve_addr"); >> + cpu_reserve_val = tcg_global_mem_new(cpu_env, >> + offsetof(CPUPPCState, reserve_val), >> + "reserve_val"); > > I notice that lqarx is not updated. Does that matter? Thats correct, I haven't touched that yet. Most of the locks are implemented using lwarx/stwcx. Regards Nikunj
diff --git a/target/ppc/translate.c b/target/ppc/translate.c index b6abc60..a9c733d 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -73,6 +73,7 @@ static TCGv cpu_cfar; #endif static TCGv cpu_xer, cpu_so, cpu_ov, cpu_ca, cpu_ov32, cpu_ca32; static TCGv cpu_reserve; +static TCGv cpu_reserve_val; static TCGv cpu_fpscr; static TCGv_i32 cpu_access_type; @@ -181,6 +182,9 @@ void ppc_translate_init(void) cpu_reserve = tcg_global_mem_new(cpu_env, offsetof(CPUPPCState, reserve_addr), "reserve_addr"); + cpu_reserve_val = tcg_global_mem_new(cpu_env, + offsetof(CPUPPCState, reserve_val), + "reserve_val"); cpu_fpscr = tcg_global_mem_new(cpu_env, offsetof(CPUPPCState, fpscr), "fpscr"); @@ -3023,7 +3027,7 @@ static void gen_##name(DisasContext *ctx) \ } \ tcg_gen_qemu_ld_tl(gpr, t0, ctx->mem_idx, memop); \ tcg_gen_mov_tl(cpu_reserve, t0); \ - tcg_gen_st_tl(gpr, cpu_env, offsetof(CPUPPCState, reserve_val)); \ + tcg_gen_mov_tl(cpu_reserve_val, gpr); \ tcg_temp_free(t0); \ } @@ -3156,14 +3160,28 @@ static void gen_conditional_store(DisasContext *ctx, TCGv EA, int reg, int memop) { TCGLabel *l1; + TCGv_i32 tmp = tcg_temp_local_new_i32(); + TCGv t0; + tcg_gen_movi_i32(tmp, 0); tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so); l1 = gen_new_label(); tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1); - tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], CRF_EQ); - tcg_gen_qemu_st_tl(cpu_gpr[reg], EA, ctx->mem_idx, memop); + + t0 = tcg_temp_new(); + tcg_gen_atomic_cmpxchg_tl(t0, EA, cpu_reserve_val, cpu_gpr[reg], + ctx->mem_idx, DEF_MEMOP(memop)); + tcg_gen_setcond_tl(TCG_COND_EQ, t0, t0, cpu_reserve_val); + tcg_gen_trunc_tl_i32(tmp, t0); + gen_set_label(l1); + tcg_gen_shli_i32(tmp, tmp, CRF_EQ_BIT); + tcg_gen_or_i32(cpu_crf[0], cpu_crf[0], tmp); tcg_gen_movi_tl(cpu_reserve, -1); + tcg_gen_movi_tl(cpu_reserve_val, 0); + + tcg_temp_free(t0); + tcg_temp_free_i32(tmp); } #endif
Emulating LL/SC with cmpxchg is not correct, since it can suffer from the ABA problem. However, portable parallel code is written assuming only cmpxchg which means that in practice this is a viable alternative. Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> --- target/ppc/translate.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-)