diff mbox

[RFC,v1,11/13] target-ppc: add maddld instruction

Message ID 1468861517-2508-12-git-send-email-nikunj@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nikunj A. Dadhania July 18, 2016, 5:05 p.m. UTC
maddld: Multiply-Add Low Doubleword

Multiplies two 64-bit registers (RA * RB), adds third register(RC) to
the result(quadword) and returns the lower dword in the target
register(RT).

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/translate.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

Comments

Richard Henderson July 21, 2016, 6:54 a.m. UTC | #1
On 07/18/2016 10:35 PM, Nikunj A Dadhania wrote:
> +static void gen_maddld(DisasContext *ctx)
> +{
> +    TCGv_i64 lo = tcg_temp_new_i64();
> +    TCGv_i64 hi = tcg_temp_new_i64();
> +    TCGv_i64 t1 = tcg_temp_new_i64();
> +    TCGv_i64 t2 = tcg_temp_new_i64();
> +    TCGv_i64 zero = tcg_const_i64(0);
> +    TCGv_i64 neg = tcg_const_i64(-1);
> +
> +    if (Rc(ctx->opcode)) {
> +        tcg_gen_muls2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)],
> +                          cpu_gpr[rB(ctx->opcode)]);
> +        tcg_gen_movi_i64(t2, -1);
> +        tcg_gen_movcond_i64(TCG_COND_GE, t2, cpu_gpr[rC(ctx->opcode)], zero, zero, neg);
> +    }
> +    tcg_gen_mov_i64(t1, zero);
> +    tcg_gen_add2_i64(cpu_gpr[rD(ctx->opcode)], t1, lo, hi, cpu_gpr[rC(ctx->opcode)], t2);
> +    tcg_temp_free_i64(lo);
> +    tcg_temp_free_i64(hi);
> +    tcg_temp_free_i64(t1);
> +    tcg_temp_free_i64(t2);
> +    tcg_temp_free_i64(zero);
> +    tcg_temp_free_i64(neg);
> +}

None of this double-word arithmetic is required.
This produces a truncated 64-bit result; the high bits aren't used.

Why the conditional on Rc?  I see no special case for R0.


r~
Richard Henderson July 21, 2016, 6:59 a.m. UTC | #2
On 07/21/2016 12:24 PM, Richard Henderson wrote:
> On 07/18/2016 10:35 PM, Nikunj A Dadhania wrote:
>> +static void gen_maddld(DisasContext *ctx)
>> +{
>> +    TCGv_i64 lo = tcg_temp_new_i64();
>> +    TCGv_i64 hi = tcg_temp_new_i64();
>> +    TCGv_i64 t1 = tcg_temp_new_i64();
>> +    TCGv_i64 t2 = tcg_temp_new_i64();
>> +    TCGv_i64 zero = tcg_const_i64(0);
>> +    TCGv_i64 neg = tcg_const_i64(-1);
>> +
>> +    if (Rc(ctx->opcode)) {
>> +        tcg_gen_muls2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)],
>> +                          cpu_gpr[rB(ctx->opcode)]);
>> +        tcg_gen_movi_i64(t2, -1);
>> +        tcg_gen_movcond_i64(TCG_COND_GE, t2, cpu_gpr[rC(ctx->opcode)], zero,
>> zero, neg);
>> +    }
>> +    tcg_gen_mov_i64(t1, zero);
>> +    tcg_gen_add2_i64(cpu_gpr[rD(ctx->opcode)], t1, lo, hi,
>> cpu_gpr[rC(ctx->opcode)], t2);
>> +    tcg_temp_free_i64(lo);
>> +    tcg_temp_free_i64(hi);
>> +    tcg_temp_free_i64(t1);
>> +    tcg_temp_free_i64(t2);
>> +    tcg_temp_free_i64(zero);
>> +    tcg_temp_free_i64(neg);
>> +}
>
> None of this double-word arithmetic is required.
> This produces a truncated 64-bit result; the high bits aren't used.
>
> Why the conditional on Rc?  I see no special case for R0.

Answering my own question, this is the low bit of the opcode, not rC.

Anyway, the conditional is still pointless, because the lsb of the opcode is 
always set, unlike the high-part multiplies.


r~
diff mbox

Patch

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 9464942..9717048 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7760,6 +7760,33 @@  GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20)
 GEN_VAFORM_PAIRED(vsel, vperm, 21)
 GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23)
 
+#if defined(TARGET_PPC64)
+static void gen_maddld(DisasContext *ctx)
+{
+    TCGv_i64 lo = tcg_temp_new_i64();
+    TCGv_i64 hi = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+    TCGv_i64 zero = tcg_const_i64(0);
+    TCGv_i64 neg = tcg_const_i64(-1);
+
+    if (Rc(ctx->opcode)) {
+        tcg_gen_muls2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)],
+                          cpu_gpr[rB(ctx->opcode)]);
+        tcg_gen_movi_i64(t2, -1);
+        tcg_gen_movcond_i64(TCG_COND_GE, t2, cpu_gpr[rC(ctx->opcode)], zero, zero, neg);
+    }
+    tcg_gen_mov_i64(t1, zero);
+    tcg_gen_add2_i64(cpu_gpr[rD(ctx->opcode)], t1, lo, hi, cpu_gpr[rC(ctx->opcode)], t2);
+    tcg_temp_free_i64(lo);
+    tcg_temp_free_i64(hi);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    tcg_temp_free_i64(zero);
+    tcg_temp_free_i64(neg);
+}
+#endif /* defined(TARGET_PPC64) */
+
 GEN_VXFORM_NOA(vclzb, 1, 28)
 GEN_VXFORM_NOA(vclzh, 1, 29)
 GEN_VXFORM_NOA(vclzw, 1, 30)
@@ -10373,6 +10400,9 @@  GEN_HANDLER(lvsr, 0x1f, 0x06, 0x01, 0x00000001, PPC_ALTIVEC),
 GEN_HANDLER(mfvscr, 0x04, 0x2, 0x18, 0x001ff800, PPC_ALTIVEC),
 GEN_HANDLER(mtvscr, 0x04, 0x2, 0x19, 0x03ff0000, PPC_ALTIVEC),
 GEN_HANDLER(vmladduhm, 0x04, 0x11, 0xFF, 0x00000000, PPC_ALTIVEC),
+#if defined(TARGET_PPC64)
+GEN_HANDLER_E(maddld, 0x04, 0x19, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA300),
+#endif
 GEN_HANDLER2(evsel0, "evsel", 0x04, 0x1c, 0x09, 0x00000000, PPC_SPE),
 GEN_HANDLER2(evsel1, "evsel", 0x04, 0x1d, 0x09, 0x00000000, PPC_SPE),
 GEN_HANDLER2(evsel2, "evsel", 0x04, 0x1e, 0x09, 0x00000000, PPC_SPE),