diff mbox series

[6/6] target/tricore: Add shuffle insn

Message ID 20230610105547.159148-7-kbastian@mail.uni-paderborn.de (mailing list archive)
State New, archived
Headers show
Series TriCore 1.6.2 Instructions | expand

Commit Message

Bastian Koppelmann June 10, 2023, 10:55 a.m. UTC
this is mostly authored by volumit (https://github.com/volumit/qemu/)

Signed-off-by: Bastian Koppelmann <kbastian@mail.uni-paderborn.de>
---
 target/tricore/helper.h          |  1 +
 target/tricore/op_helper.c       | 48 ++++++++++++++++++++++++++++++++
 target/tricore/translate.c       |  8 ++++++
 target/tricore/tricore-opcodes.h |  1 +
 4 files changed, 58 insertions(+)

Comments

Richard Henderson June 10, 2023, 3:41 p.m. UTC | #1
On 6/10/23 03:55, Bastian Koppelmann wrote:
> +/*
> + * table from
> + * https://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
> + */
> +static const unsigned char BitReverseTable256[256] = {
> +#   define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
> +#   define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
> +#   define R6(n) R4(n), R4(n + 2 * 4 ), R4(n + 1 * 4 ), R4(n + 3 * 4 )
> +    R6(0), R6(2), R6(1), R6(3)
> +};

This is revbit8() from qemu/host-utils.h.

> +uint32_t helper_shuffle(uint32_t arg0, uint32_t arg1)
> +{
> +    uint8_t buf[4];
> +    uint8_t resbuf[4];
> +    uint32_t byte_select;
> +    uint32_t res = 0;
> +
> +    stl_le_p(buf, arg0);

While storing to a buffer works, it's just as easy to use shifts.

> +    byte_select = arg1 & 0x3;
> +    resbuf[0] = buf[byte_select];

   resb = extract32(arg0, byte_select * 8, 8);
   res |= resb << 0;

> +    resbuf[1] = buf[byte_select];

   res |= resb << 8;

etc.

> +    if (arg1 & 0x100) {
> +        resbuf[3] = BitReverseTable256[resbuf[3]];
> +    }

The bit-reversal is controlled by one bit for all bytes.  It can be done for all bytes in 
parallel.  Use the shifts from bitrev8, applied to the entire uint32_t result.


r~
diff mbox series

Patch

diff --git a/target/tricore/helper.h b/target/tricore/helper.h
index a10576e09e..31d71eac7a 100644
--- a/target/tricore/helper.h
+++ b/target/tricore/helper.h
@@ -134,6 +134,7 @@  DEF_HELPER_FLAGS_5(mulr_h, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32, i32, i32)
 DEF_HELPER_FLAGS_2(crc32b, TCG_CALL_NO_RWG_SE, i32, i32, i32)
 DEF_HELPER_FLAGS_2(crc32_be, TCG_CALL_NO_RWG_SE, i32, i32, i32)
 DEF_HELPER_FLAGS_2(crc32_le, TCG_CALL_NO_RWG_SE, i32, i32, i32)
+DEF_HELPER_FLAGS_2(shuffle, TCG_CALL_NO_RWG_SE, i32, i32, i32)
 /* CSA */
 DEF_HELPER_2(call, void, env, i32)
 DEF_HELPER_1(ret, void, env)
diff --git a/target/tricore/op_helper.c b/target/tricore/op_helper.c
index b6ef1462e4..bd770a2341 100644
--- a/target/tricore/op_helper.c
+++ b/target/tricore/op_helper.c
@@ -2308,6 +2308,54 @@  uint32_t helper_crc32_le(uint32_t arg0, uint32_t arg1)
     return crc32(arg1, buf, 4);
 }
 
+/*
+ * table from
+ * https://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
+ */
+static const unsigned char BitReverseTable256[256] = {
+#   define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
+#   define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
+#   define R6(n) R4(n), R4(n + 2 * 4 ), R4(n + 1 * 4 ), R4(n + 3 * 4 )
+    R6(0), R6(2), R6(1), R6(3)
+};
+
+uint32_t helper_shuffle(uint32_t arg0, uint32_t arg1)
+{
+    uint8_t buf[4];
+    uint8_t resbuf[4];
+    uint32_t byte_select;
+    uint32_t res = 0;
+
+    stl_le_p(buf, arg0);
+
+    byte_select = arg1 & 0x3;
+    resbuf[0] = buf[byte_select];
+    if (arg1 & 0x100) {
+        resbuf[0] = BitReverseTable256[resbuf[0]];
+    }
+
+    byte_select = (arg1 >> 2) & 0x3;
+    resbuf[1] = buf[byte_select];
+    if (arg1 & 0x100) {
+        resbuf[1] = BitReverseTable256[resbuf[1]];
+    }
+
+    byte_select = (arg1 >> 4) & 0x3;
+    resbuf[2] = buf[byte_select];
+    if (arg1 & 0x100) {
+        resbuf[2] = BitReverseTable256[resbuf[2]];
+    }
+
+    byte_select = (arg1 >> 6) & 0x3;
+    resbuf[3] = buf[byte_select];
+    if (arg1 & 0x100) {
+        resbuf[3] = BitReverseTable256[resbuf[3]];
+    }
+
+    res = ldl_le_p(resbuf);
+    return res;
+}
+
 /* context save area (CSA) related helpers */
 
 static int cdc_increment(target_ulong *psw)
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
index 85526ef4db..a4c60e8ae2 100644
--- a/target/tricore/translate.c
+++ b/target/tricore/translate.c
@@ -5011,6 +5011,14 @@  static void decode_rc_logical_shift(DisasContext *ctx)
     case OPC2_32_RC_XOR:
         tcg_gen_xori_tl(cpu_gpr_d[r2], cpu_gpr_d[r1], const9);
         break;
+    case OPC2_32_RC_SHUFFLE:
+        if (has_feature(ctx, TRICORE_FEATURE_162)) {
+            TCGv temp = tcg_constant_i32(const9);
+            gen_helper_shuffle(cpu_gpr_d[r2], cpu_gpr_d[r1], temp);
+        } else {
+            generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC);
+        }
+        break;
     default:
         generate_trap(ctx, TRAPC_INSN_ERR, TIN2_IOPC);
     }
diff --git a/target/tricore/tricore-opcodes.h b/target/tricore/tricore-opcodes.h
index 27f80e1702..af63926731 100644
--- a/target/tricore/tricore-opcodes.h
+++ b/target/tricore/tricore-opcodes.h
@@ -885,6 +885,7 @@  enum {
     OPC2_32_RC_SHAS                              = 0x02,
     OPC2_32_RC_XNOR                              = 0x0d,
     OPC2_32_RC_XOR                               = 0x0c,
+    OPC2_32_RC_SHUFFLE                           = 0x07, /* v1.6.2 only */
 };
 /* OPCM_32_RC_ACCUMULATOR                           */
 enum {