diff mbox

[RFC,v2,05/13] target-ppc: add modulo word operations

Message ID 1469263490-19130-6-git-send-email-nikunj@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nikunj A. Dadhania July 23, 2016, 8:44 a.m. UTC
Adding following instructions:

moduw: Modulo Unsigned Word
modsw: Modulo Signed Word

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/helper.h     |  2 ++
 target-ppc/int_helper.c | 15 +++++++++++++++
 target-ppc/translate.c  | 19 +++++++++++++++++++
 3 files changed, 36 insertions(+)

Comments

Richard Henderson July 24, 2016, 1:24 a.m. UTC | #1
On 07/23/2016 02:14 PM, Nikunj A Dadhania wrote:
> Adding following instructions:
>
> moduw: Modulo Unsigned Word
> modsw: Modulo Signed Word
>
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>  target-ppc/helper.h     |  2 ++
>  target-ppc/int_helper.c | 15 +++++++++++++++
>  target-ppc/translate.c  | 19 +++++++++++++++++++
>  3 files changed, 36 insertions(+)
>
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 1f5cfd0..76072fd 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -41,6 +41,8 @@ DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_NO_RWG_SE, tl, tl)
>  DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl)
>  DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
>  DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
> +DEF_HELPER_FLAGS_2(modsw, TCG_CALL_NO_RWG_SE, i32, i32, i32)
> +DEF_HELPER_FLAGS_2(moduw, TCG_CALL_NO_RWG_SE, i32, i32, i32)
>  DEF_HELPER_3(sraw, tl, env, tl, tl)
>  #if defined(TARGET_PPC64)
>  DEF_HELPER_FLAGS_1(cntlzd, TCG_CALL_NO_RWG_SE, tl, tl)
> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> index 7445376..631e0b4 100644
> --- a/target-ppc/int_helper.c
> +++ b/target-ppc/int_helper.c
> @@ -139,6 +139,21 @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
>
>  #endif
>
> +uint32_t helper_modsw(uint32_t rau, uint32_t rbu)
> +{
> +    int32_t ra = (int32_t) rau;
> +    int32_t rb = (int32_t) rbu;
> +
> +    if ((rb == 0) || (ra == INT32_MIN && rb == -1)) {
> +        return 0;
> +    }
> +    return ra % rb;
> +}
> +
> +uint32_t helper_moduw(uint32_t ra, uint32_t rb)
> +{
> +    return rb ? ra % rb : 0;
> +}

I think, like you, I got distracted by the current div implementation in ppc. 
I've just re-read the spec and seen the "undefined" language.  Which of course 
gives us much more freedom.

With this freedom, we can do the division inline, without branches.  Please see 
target-mips/translate.c, gen_r6_muldiv.

Basically, we check for the offending cases and modify the divisor prior to the 
division.  For unsigned:

	a / (b == 0 ? 1 : b)

For signed:

	a / ((a == INT_MAX & b == -1) | (b == 0) ? : b)


r~
Nikunj A. Dadhania July 25, 2016, 5:37 a.m. UTC | #2
Richard Henderson <rth@twiddle.net> writes:

> On 07/23/2016 02:14 PM, Nikunj A Dadhania wrote:
>> Adding following instructions:
>>
>> moduw: Modulo Unsigned Word
>> modsw: Modulo Signed Word
>>
>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> ---
>>  target-ppc/helper.h     |  2 ++
>>  target-ppc/int_helper.c | 15 +++++++++++++++
>>  target-ppc/translate.c  | 19 +++++++++++++++++++
>>  3 files changed, 36 insertions(+)
>>
>> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
>> index 1f5cfd0..76072fd 100644
>> --- a/target-ppc/helper.h
>> +++ b/target-ppc/helper.h
>> @@ -41,6 +41,8 @@ DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_NO_RWG_SE, tl, tl)
>>  DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl)
>>  DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
>>  DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
>> +DEF_HELPER_FLAGS_2(modsw, TCG_CALL_NO_RWG_SE, i32, i32, i32)
>> +DEF_HELPER_FLAGS_2(moduw, TCG_CALL_NO_RWG_SE, i32, i32, i32)
>>  DEF_HELPER_3(sraw, tl, env, tl, tl)
>>  #if defined(TARGET_PPC64)
>>  DEF_HELPER_FLAGS_1(cntlzd, TCG_CALL_NO_RWG_SE, tl, tl)
>> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
>> index 7445376..631e0b4 100644
>> --- a/target-ppc/int_helper.c
>> +++ b/target-ppc/int_helper.c
>> @@ -139,6 +139,21 @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
>>
>>  #endif
>>
>> +uint32_t helper_modsw(uint32_t rau, uint32_t rbu)
>> +{
>> +    int32_t ra = (int32_t) rau;
>> +    int32_t rb = (int32_t) rbu;
>> +
>> +    if ((rb == 0) || (ra == INT32_MIN && rb == -1)) {
>> +        return 0;
>> +    }
>> +    return ra % rb;
>> +}
>> +
>> +uint32_t helper_moduw(uint32_t ra, uint32_t rb)
>> +{
>> +    return rb ? ra % rb : 0;
>> +}
>
> I think, like you, I got distracted by the current div implementation in ppc. 
> I've just re-read the spec and seen the "undefined" language.  Which of course 
> gives us much more freedom.

Right, I too thought of the same but didn't do it in this series.
Current div implementation is pretty complicated.

> With this freedom, we can do the division inline, without branches.  Please see 
> target-mips/translate.c, gen_r6_muldiv.
>
> Basically, we check for the offending cases and modify the divisor prior to the 
> division.  For unsigned:
>
> 	a / (b == 0 ? 1 : b)
>
> For signed:
>
> 	a / ((a == INT_MAX & b == -1) | (b == 0) ? : b)

Sure, will add it in this series.

Regards
Nikunj
Nikunj A. Dadhania July 25, 2016, 6:07 a.m. UTC | #3
Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> writes:

> Richard Henderson <rth@twiddle.net> writes:
>
>> On 07/23/2016 02:14 PM, Nikunj A Dadhania wrote:
>> +uint32_t helper_modsw(uint32_t rau, uint32_t rbu)
>>> +{
>>> +    int32_t ra = (int32_t) rau;
>>> +    int32_t rb = (int32_t) rbu;
>>> +
>>> +    if ((rb == 0) || (ra == INT32_MIN && rb == -1)) {
>>> +        return 0;
>>> +    }
>>> +    return ra % rb;
>>> +}
>>> +
>>> +uint32_t helper_moduw(uint32_t ra, uint32_t rb)
>>> +{
>>> +    return rb ? ra % rb : 0;
>>> +}
>>
>> I think, like you, I got distracted by the current div implementation in ppc. 
>> I've just re-read the spec and seen the "undefined" language.  Which of course 
>> gives us much more freedom.
>
> Right, I too thought of the same but didn't do it in this series.
> Current div implementation is pretty complicated.

To be precise gen_op_arith_div[d,w]() implementation.

>> With this freedom, we can do the division inline, without branches.  Please see 
>> target-mips/translate.c, gen_r6_muldiv.
>>
>> Basically, we check for the offending cases and modify the divisor prior to the 
>> division.  For unsigned:
>>
>> 	a / (b == 0 ? 1 : b)
>>
>> For signed:
>>
>> 	a / ((a == INT_MAX & b == -1) | (b == 0) ? : b)
>
> Sure, will add it in this series.
>
> Regards
> Nikunj
Nikunj A. Dadhania July 25, 2016, 11:14 a.m. UTC | #4
Richard Henderson <rth@twiddle.net> writes:

> On 07/23/2016 02:14 PM, Nikunj A Dadhania wrote:
>> Adding following instructions:
>>
>> moduw: Modulo Unsigned Word
>> modsw: Modulo Signed Word
>>
>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> ---
>>  target-ppc/helper.h     |  2 ++
>>  target-ppc/int_helper.c | 15 +++++++++++++++
>>  target-ppc/translate.c  | 19 +++++++++++++++++++
>>  3 files changed, 36 insertions(+)
>>
>> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
>> index 1f5cfd0..76072fd 100644
>> --- a/target-ppc/helper.h
>> +++ b/target-ppc/helper.h
>> @@ -41,6 +41,8 @@ DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_NO_RWG_SE, tl, tl)
>>  DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl)
>>  DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
>>  DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
>> +DEF_HELPER_FLAGS_2(modsw, TCG_CALL_NO_RWG_SE, i32, i32, i32)
>> +DEF_HELPER_FLAGS_2(moduw, TCG_CALL_NO_RWG_SE, i32, i32, i32)
>>  DEF_HELPER_3(sraw, tl, env, tl, tl)
>>  #if defined(TARGET_PPC64)
>>  DEF_HELPER_FLAGS_1(cntlzd, TCG_CALL_NO_RWG_SE, tl, tl)
>> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
>> index 7445376..631e0b4 100644
>> --- a/target-ppc/int_helper.c
>> +++ b/target-ppc/int_helper.c
>> @@ -139,6 +139,21 @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
>>
>>  #endif
>>
>> +uint32_t helper_modsw(uint32_t rau, uint32_t rbu)
>> +{
>> +    int32_t ra = (int32_t) rau;
>> +    int32_t rb = (int32_t) rbu;
>> +
>> +    if ((rb == 0) || (ra == INT32_MIN && rb == -1)) {
>> +        return 0;
>> +    }
>> +    return ra % rb;
>> +}
>> +
>> +uint32_t helper_moduw(uint32_t ra, uint32_t rb)
>> +{
>> +    return rb ? ra % rb : 0;
>> +}
>
> I think, like you, I got distracted by the current div implementation in ppc. 
> I've just re-read the spec and seen the "undefined" language.  Which of course 
> gives us much more freedom.
>
> With this freedom, we can do the division inline, without branches.  Please see 
> target-mips/translate.c, gen_r6_muldiv.
>
> Basically, we check for the offending cases and modify the divisor prior to the 
> division.  For unsigned:
>
> 	a / (b == 0 ? 1 : b)

Modulo case: a % (b == 0 ? 1 : b)

    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
    tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t1, 0);
    tcg_gen_movi_i32(t3, 0);
    tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1);
    tcg_gen_remu_i32(t3, t0, t1);
    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t3); 

> For signed:
>
> 	a / ((a == INT_MAX & b == -1) | (b == 0) ? : b)

Modulo case: a % ((a == INT_MAX & b == -1) | (b == 0) ? 1 : b)

    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
    tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t0, INT_MIN);
    tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, -1);
    tcg_gen_and_i32(t2, t2, t3);
    tcg_gen_setcondi_i32(TCG_COND_EQ, t3, t1, 0);
    tcg_gen_or_i32(t2, t2, t3);
    tcg_gen_movi_i32(t3, 0);
    tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1);
    tcg_gen_rem_i32(t3, t0, t1);
    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t3);

I think you were suggesting something like above?

For "div[wd]o." we will have further cases to implement overflow.

Regards,
Nikunj
Richard Henderson July 25, 2016, 2:34 p.m. UTC | #5
On 07/25/2016 04:44 PM, Nikunj A Dadhania wrote:
> Modulo case: a % (b == 0 ? 1 : b)
>
>     tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
>     tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
>     tcg_gen_setcondi_i32(TCG_COND_EQ, t2, t1, 0);
>     tcg_gen_movi_i32(t3, 0);
>     tcg_gen_movcond_i32(TCG_COND_NE, t1, t2, t3, t2, t1);

No setcond for unsigned; you can do the whole thing with movcond.

   t2 = tcg_const_i32(1);
   t3 = tcg_const_i32(0);
   tcg_gen_movcond_i32(TCG_COND_EQ, t1, t1, t3, t2, t1);

But otherwise, yes.

> For "div[wd]o." we will have further cases to implement overflow.

For divwo, you *would* actually do the setcond as above, because that would be 
exactly the overflow condition that you're supposed to compute.

The signed case you pasted looks correct, and the T2 input to the movcond is 
also exactly the overflow condition.


r~
Nikunj A. Dadhania July 25, 2016, 4:31 p.m. UTC | #6
Richard Henderson <rth@twiddle.net> writes:
> On 07/23/2016 02:14 PM, Nikunj A Dadhania wrote:
[...]
> Basically, we check for the offending cases and modify the divisor prior to the 
> division.  For unsigned:
>
> 	a / (b == 0 ? 1 : b)
>
> For signed:
>
> 	a / ((a == INT_MAX & b == -1) | (b == 0) ? : b)

So when we change the divisor to 1, undefined result is set as "a". Hope
that should be fine and doesnt break anything :-)

Regards,
Nikunj
diff mbox

Patch

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 1f5cfd0..76072fd 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -41,6 +41,8 @@  DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(modsw, TCG_CALL_NO_RWG_SE, i32, i32, i32)
+DEF_HELPER_FLAGS_2(moduw, TCG_CALL_NO_RWG_SE, i32, i32, i32)
 DEF_HELPER_3(sraw, tl, env, tl, tl)
 #if defined(TARGET_PPC64)
 DEF_HELPER_FLAGS_1(cntlzd, TCG_CALL_NO_RWG_SE, tl, tl)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 7445376..631e0b4 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -139,6 +139,21 @@  uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
 
 #endif
 
+uint32_t helper_modsw(uint32_t rau, uint32_t rbu)
+{
+    int32_t ra = (int32_t) rau;
+    int32_t rb = (int32_t) rbu;
+
+    if ((rb == 0) || (ra == INT32_MIN && rb == -1)) {
+        return 0;
+    }
+    return ra % rb;
+}
+
+uint32_t helper_moduw(uint32_t ra, uint32_t rb)
+{
+    return rb ? ra % rb : 0;
+}
 
 target_ulong helper_cntlzw(target_ulong t)
 {
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 7e349e8..4348efd 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1175,6 +1175,23 @@  GEN_DIVE(divde, divde, 0);
 GEN_DIVE(divdeo, divde, 1);
 #endif
 
+#define GEN_INT_ARITH_MODW(name, opc3, sign)                            \
+static void glue(gen_, name)(DisasContext *ctx)                         \
+{                                                                       \
+    TCGv_i32 t0 = tcg_temp_new_i32();                                   \
+    TCGv_i32 t1 = tcg_temp_new_i32();                                   \
+                                                                        \
+    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                 \
+    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                 \
+    gen_helper_##name(t0, t0 , t1);                                     \
+    tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);                  \
+    tcg_temp_free_i32(t0);                                              \
+    tcg_temp_free_i32(t1);                                              \
+}
+
+GEN_INT_ARITH_MODW(moduw, 0x08, 0);
+GEN_INT_ARITH_MODW(modsw, 0x18, 1);
+
 /* mulhw  mulhw. */
 static void gen_mulhw(DisasContext *ctx)
 {
@@ -10241,6 +10258,8 @@  GEN_HANDLER_E(divwe, 0x1F, 0x0B, 0x0D, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(divweo, 0x1F, 0x0B, 0x1D, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(divweu, 0x1F, 0x0B, 0x0C, 0, PPC_NONE, PPC2_DIVE_ISA206),
 GEN_HANDLER_E(divweuo, 0x1F, 0x0B, 0x1C, 0, PPC_NONE, PPC2_DIVE_ISA206),
+GEN_HANDLER_E(modsw, 0x1F, 0x0B, 0x18, 0x00000001, PPC_NONE, PPC2_ISA300),
+GEN_HANDLER_E(moduw, 0x1F, 0x0B, 0x08, 0x00000001, PPC_NONE, PPC2_ISA300),
 
 #if defined(TARGET_PPC64)
 #undef GEN_INT_ARITH_DIVD