Message ID | 1469263490-19130-6-git-send-email-nikunj@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 07/23/2016 02:14 PM, Nikunj A Dadhania wrote: > Adding following instructions: > > moduw: Modulo Unsigned Word > modsw: Modulo Signed Word > > Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> > --- > target-ppc/helper.h | 2 ++ > target-ppc/int_helper.c | 15 +++++++++++++++ > target-ppc/translate.c | 19 +++++++++++++++++++ > 3 files changed, 36 insertions(+) > > diff --git a/target-ppc/helper.h b/target-ppc/helper.h > index 1f5cfd0..76072fd 100644 > --- a/target-ppc/helper.h > +++ b/target-ppc/helper.h > @@ -41,6 +41,8 @@ DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_NO_RWG_SE, tl, tl) > DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl) > DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl) > DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl) > +DEF_HELPER_FLAGS_2(modsw, TCG_CALL_NO_RWG_SE, i32, i32, i32) > +DEF_HELPER_FLAGS_2(moduw, TCG_CALL_NO_RWG_SE, i32, i32, i32) > DEF_HELPER_3(sraw, tl, env, tl, tl) > #if defined(TARGET_PPC64) > DEF_HELPER_FLAGS_1(cntlzd, TCG_CALL_NO_RWG_SE, tl, tl) > diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c > index 7445376..631e0b4 100644 > --- a/target-ppc/int_helper.c > +++ b/target-ppc/int_helper.c > @@ -139,6 +139,21 @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) > > #endif > > +uint32_t helper_modsw(uint32_t rau, uint32_t rbu) > +{ > + int32_t ra = (int32_t) rau; > + int32_t rb = (int32_t) rbu; > + > + if ((rb == 0) || (ra == INT32_MIN && rb == -1)) { > + return 0; > + } > + return ra % rb; > +} > + > +uint32_t helper_moduw(uint32_t ra, uint32_t rb) > +{ > + return rb ? ra % rb : 0; > +} I think, like you, I got distracted by the current div implementation in ppc. I've just re-read the spec and seen the "undefined" language. Which of course gives us much more freedom. With this freedom, we can do the division inline, without branches. Please see target-mips/translate.c, gen_r6_muldiv. Basically, we check for the offending cases and modify the divisor prior to the division. For unsigned: a / (b == 0 ? 1 : b) For signed: a / ((a == INT_MAX & b == -1) | (b == 0) ? : b) r~
Richard Henderson <rth@twiddle.net> writes: > On 07/23/2016 02:14 PM, Nikunj A Dadhania wrote: >> Adding following instructions: >> >> moduw: Modulo Unsigned Word >> modsw: Modulo Signed Word >> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> >> --- >> target-ppc/helper.h | 2 ++ >> target-ppc/int_helper.c | 15 +++++++++++++++ >> target-ppc/translate.c | 19 +++++++++++++++++++ >> 3 files changed, 36 insertions(+) >> >> diff --git a/target-ppc/helper.h b/target-ppc/helper.h >> index 1f5cfd0..76072fd 100644 >> --- a/target-ppc/helper.h >> +++ b/target-ppc/helper.h >> @@ -41,6 +41,8 @@ DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_NO_RWG_SE, tl, tl) >> DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl) >> DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl) >> DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl) >> +DEF_HELPER_FLAGS_2(modsw, TCG_CALL_NO_RWG_SE, i32, i32, i32) >> +DEF_HELPER_FLAGS_2(moduw, TCG_CALL_NO_RWG_SE, i32, i32, i32) >> DEF_HELPER_3(sraw, tl, env, tl, tl) >> #if defined(TARGET_PPC64) >> DEF_HELPER_FLAGS_1(cntlzd, TCG_CALL_NO_RWG_SE, tl, tl) >> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c >> index 7445376..631e0b4 100644 >> --- a/target-ppc/int_helper.c >> +++ b/target-ppc/int_helper.c >> @@ -139,6 +139,21 @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) >> >> #endif >> >> +uint32_t helper_modsw(uint32_t rau, uint32_t rbu) >> +{ >> + int32_t ra = (int32_t) rau; >> + int32_t rb = (int32_t) rbu; >> + >> + if ((rb == 0) || (ra == INT32_MIN && rb == -1)) { >> + return 0; >> + } >> + return ra % rb; >> +} >> + >> +uint32_t helper_moduw(uint32_t ra, uint32_t rb) >> +{ >> + return rb ? ra % rb : 0; >> +} > > I think, like you, I got distracted by the current div implementation in ppc. > I've just re-read the spec and seen the "undefined" language. Which of course > gives us much more freedom. Right, I too thought of the same but didn't do it in this series. Current div implementation is pretty complicated. > With this freedom, we can do the division inline, without branches. Please see > target-mips/translate.c, gen_r6_muldiv. > > Basically, we check for the offending cases and modify the divisor prior to the > division. For unsigned: > > a / (b == 0 ? 1 : b) > > For signed: > > a / ((a == INT_MAX & b == -1) | (b == 0) ? : b) Sure, will add it in this series. Regards Nikunj
Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> writes: > Richard Henderson <rth@twiddle.net> writes: > >> On 07/23/2016 02:14 PM, Nikunj A Dadhania wrote: >> +uint32_t helper_modsw(uint32_t rau, uint32_t rbu) >>> +{ >>> + int32_t ra = (int32_t) rau; >>> + int32_t rb = (int32_t) rbu; >>> + >>> + if ((rb == 0) || (ra == INT32_MIN && rb == -1)) { >>> + return 0; >>> + } >>> + return ra % rb; >>> +} >>> + >>> +uint32_t helper_moduw(uint32_t ra, uint32_t rb) >>> +{ >>> + return rb ? ra % rb : 0; >>> +} >> >> I think, like you, I got distracted by the current div implementation in ppc. >> I've just re-read the spec and seen the "undefined" language. Which of course >> gives us much more freedom. > > Right, I too thought of the same but didn't do it in this series. > Current div implementation is pretty complicated. To be precise gen_op_arith_div[d,w]() implementation. >> With this freedom, we can do the division inline, without branches. Please see >> target-mips/translate.c, gen_r6_muldiv. >> >> Basically, we check for the offending cases and modify the divisor prior to the >> division. For unsigned: >> >> a / (b == 0 ? 1 : b) >> >> For signed: >> >> a / ((a == INT_MAX & b == -1) | (b == 0) ? : b) > > Sure, will add it in this series. > > Regards > Nikunj
Richard Henderson <rth@twiddle.net> writes: > On 07/23/2016 02:14 PM, Nikunj A Dadhania wrote: >> Adding following instructions: >> >> moduw: Modulo Unsigned Word >> modsw: Modulo Signed Word >> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> >> --- >> target-ppc/helper.h | 2 ++ >> target-ppc/int_helper.c | 15 +++++++++++++++ >> target-ppc/translate.c | 19 +++++++++++++++++++ >> 3 files changed, 36 insertions(+) >> >> diff --git a/target-ppc/helper.h b/target-ppc/helper.h >> index 1f5cfd0..76072fd 100644 >> --- a/target-ppc/helper.h >> +++ b/target-ppc/helper.h >> @@ -41,6 +41,8 @@ DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_NO_RWG_SE, tl, tl) >> DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl) >> DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl) >> DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl) >> +DEF_HELPER_FLAGS_2(modsw, TCG_CALL_NO_RWG_SE, i32, i32, i32) >> +DEF_HELPER_FLAGS_2(moduw, TCG_CALL_NO_RWG_SE, i32, i32, i32) >> DEF_HELPER_3(sraw, tl, env, tl, tl) >> #if defined(TARGET_PPC64) >> DEF_HELPER_FLAGS_1(cntlzd, TCG_CALL_NO_RWG_SE, tl, tl) >> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c >> index 7445376..631e0b4 100644 >> --- a/target-ppc/int_helper.c >> +++ b/target-ppc/int_helper.c >> @@ -139,6 +139,21 @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) >> >> #endif >> >> +uint32_t helper_modsw(uint32_t rau, uint32_t rbu) >> +{ >> + int32_t ra = (int32_t) rau; >> + int32_t rb = (int32_t) rbu; >> + >> + if ((rb == 0) || (ra == INT32_MIN && rb == -1)) { >> + return 0; >> + } >> + return ra % rb; >> +} >> + >> +uint32_t helper_moduw(uint32_t ra, uint32_t rb) >> +{ >> + return rb ? ra % rb : 0; >> +} > > I think, like you, I got distracted by the current div implementation in ppc. > I've just re-read the spec and seen the "undefined" language. Which of course > gives us much more freedom. > > With this freedom, we can do the division inline, without branches. Please see > target-mips/translate.c, gen_r6_muldiv. > > Basically, we check for the offending cases and modify the divisor prior to the > division. For unsigned: > > a / (b == 0 ? 1 : b) Modulo case: a % (b == 0 ? 1 : b) tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t1, 0); tcg_gen_movi_i32(t3, 0); tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1); tcg_gen_remu_i32(t3, t0, t1); tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t3); > For signed: > > a / ((a == INT_MAX & b == -1) | (b == 0) ? : b) Modulo case: a % ((a == INT_MAX & b == -1) | (b == 0) ? 1 : b) tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t0, INT_MIN); tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, -1); tcg_gen_and_i32(t2, t2, t3); tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, 0); tcg_gen_or_i32(t2, t2, t3); tcg_gen_movi_i32(t3, 0); tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1); tcg_gen_rem_i32(t3, t0, t1); tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t3); I think you were suggesting something like above? For "div[wd]o." we will have further cases to implement overflow. Regards, Nikunj
On 07/25/2016 04:44 PM, Nikunj A Dadhania wrote: > Modulo case: a % (b == 0 ? 1 : b) > > tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); > tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); > tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t1, 0); > tcg_gen_movi_i32(t3, 0); > tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1); No setcond for unsigned; you can do the whole thing with movcond. t2 = tcg_const_i32(1); t3 = tcg_const_i32(0); tcg_gen_movcond_i32(TCG_COND_EQ, t1, t1, t3, t2, t1); But otherwise, yes. > For "div[wd]o." we will have further cases to implement overflow. For divwo, you *would* actually do the setcond as above, because that would be exactly the overflow condition that you're supposed to compute. The signed case you pasted looks correct, and the T2 input to the movcond is also exactly the overflow condition. r~
Richard Henderson <rth@twiddle.net> writes: > On 07/23/2016 02:14 PM, Nikunj A Dadhania wrote: [...] > Basically, we check for the offending cases and modify the divisor prior to the > division. For unsigned: > > a / (b == 0 ? 1 : b) > > For signed: > > a / ((a == INT_MAX & b == -1) | (b == 0) ? : b) So when we change the divisor to 1, undefined result is set as "a". Hope that should be fine and doesnt break anything :-) Regards, Nikunj
diff --git a/target-ppc/helper.h b/target-ppc/helper.h index 1f5cfd0..76072fd 100644 --- a/target-ppc/helper.h +++ b/target-ppc/helper.h @@ -41,6 +41,8 @@ DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl) +DEF_HELPER_FLAGS_2(modsw, TCG_CALL_NO_RWG_SE, i32, i32, i32) +DEF_HELPER_FLAGS_2(moduw, TCG_CALL_NO_RWG_SE, i32, i32, i32) DEF_HELPER_3(sraw, tl, env, tl, tl) #if defined(TARGET_PPC64) DEF_HELPER_FLAGS_1(cntlzd, TCG_CALL_NO_RWG_SE, tl, tl) diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index 7445376..631e0b4 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -139,6 +139,21 @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) #endif +uint32_t helper_modsw(uint32_t rau, uint32_t rbu) +{ + int32_t ra = (int32_t) rau; + int32_t rb = (int32_t) rbu; + + if ((rb == 0) || (ra == INT32_MIN && rb == -1)) { + return 0; + } + return ra % rb; +} + +uint32_t helper_moduw(uint32_t ra, uint32_t rb) +{ + return rb ? ra % rb : 0; +} target_ulong helper_cntlzw(target_ulong t) { diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 7e349e8..4348efd 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -1175,6 +1175,23 @@ GEN_DIVE(divde, divde, 0); GEN_DIVE(divdeo, divde, 1); #endif +#define GEN_INT_ARITH_MODW(name, opc3, sign) \ +static void glue(gen_, name)(DisasContext *ctx) \ +{ \ + TCGv_i32 t0 = tcg_temp_new_i32(); \ + TCGv_i32 t1 = tcg_temp_new_i32(); \ + \ + tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); \ + tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); \ + gen_helper_##name(t0, t0 , t1); \ + tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0); \ + tcg_temp_free_i32(t0); \ + tcg_temp_free_i32(t1); \ +} + +GEN_INT_ARITH_MODW(moduw, 0x08, 0); +GEN_INT_ARITH_MODW(modsw, 0x18, 1); + /* mulhw mulhw. */ static void gen_mulhw(DisasContext *ctx) { @@ -10241,6 +10258,8 @@ GEN_HANDLER_E(divwe, 0x1F, 0x0B, 0x0D, 0, PPC_NONE, PPC2_DIVE_ISA206), GEN_HANDLER_E(divweo, 0x1F, 0x0B, 0x1D, 0, PPC_NONE, PPC2_DIVE_ISA206), GEN_HANDLER_E(divweu, 0x1F, 0x0B, 0x0C, 0, PPC_NONE, PPC2_DIVE_ISA206), GEN_HANDLER_E(divweuo, 0x1F, 0x0B, 0x1C, 0, PPC_NONE, PPC2_DIVE_ISA206), +GEN_HANDLER_E(modsw, 0x1F, 0x0B, 0x18, 0x00000001, PPC_NONE, PPC2_ISA300), +GEN_HANDLER_E(moduw, 0x1F, 0x0B, 0x08, 0x00000001, PPC_NONE, PPC2_ISA300), #if defined(TARGET_PPC64) #undef GEN_INT_ARITH_DIVD
Adding following instructions: moduw: Modulo Unsigned Word modsw: Modulo Signed Word Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> --- target-ppc/helper.h | 2 ++ target-ppc/int_helper.c | 15 +++++++++++++++ target-ppc/translate.c | 19 +++++++++++++++++++ 3 files changed, 36 insertions(+)