@@ -1569,3 +1569,44 @@ void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
}
clear_high(d, oprsz, desc);
}
+
+#define DO_SZ_OP1(NAME, DSTTY, SRCTY) \
+void HELPER(NAME)(void *d, void *a, uint32_t desc) \
+{ \
+ intptr_t oprsz = simd_oprsz(desc); \
+ intptr_t elsz = oprsz/sizeof(DSTTY); \
+ intptr_t i; \
+ \
+ for (i = 0; i < elsz; ++i) { \
+ SRCTY aa = *((SRCTY *) a + i); \
+ *((DSTTY *) d + i) = aa; \
+ } \
+ clear_high(d, oprsz, desc); \
+}
+
+#define DO_SZ_OP2(NAME, INTTY, DSTSZ, SRCSZ) \
+ DO_SZ_OP1(NAME##SRCSZ##_##DSTSZ, INTTY##DSTSZ##_t, INTTY##SRCSZ##_t)
+
+DO_SZ_OP2(gvec_trunc, uint, 32, 64)
+DO_SZ_OP2(gvec_trunc, uint, 16, 64)
+DO_SZ_OP2(gvec_trunc, uint, 8, 64)
+DO_SZ_OP2(gvec_trunc, uint, 16, 32)
+DO_SZ_OP2(gvec_trunc, uint, 8, 32)
+DO_SZ_OP2(gvec_trunc, uint, 8, 16)
+
+DO_SZ_OP2(gvec_zext, uint, 64, 32)
+DO_SZ_OP2(gvec_zext, uint, 64, 16)
+DO_SZ_OP2(gvec_zext, uint, 64, 8)
+DO_SZ_OP2(gvec_zext, uint, 32, 16)
+DO_SZ_OP2(gvec_zext, uint, 32, 8)
+DO_SZ_OP2(gvec_zext, uint, 16, 8)
+
+DO_SZ_OP2(gvec_sext, int, 64, 32)
+DO_SZ_OP2(gvec_sext, int, 64, 16)
+DO_SZ_OP2(gvec_sext, int, 64, 8)
+DO_SZ_OP2(gvec_sext, int, 32, 16)
+DO_SZ_OP2(gvec_sext, int, 32, 8)
+DO_SZ_OP2(gvec_sext, int, 16, 8)
+
+#undef DO_SZ_OP1
+#undef DO_SZ_OP2
@@ -1,3 +1,4 @@
+#include "tcg/tcg.h"
DEF_HELPER_FLAGS_2(div_i32, TCG_CALL_NO_RWG_SE, s32, s32, s32)
DEF_HELPER_FLAGS_2(rem_i32, TCG_CALL_NO_RWG_SE, s32, s32, s32)
DEF_HELPER_FLAGS_2(divu_i32, TCG_CALL_NO_RWG_SE, i32, i32, i32)
@@ -328,3 +329,24 @@ DEF_HELPER_FLAGS_4(gvec_leus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_4(gvec_leus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
DEF_HELPER_FLAGS_5(gvec_bitsel, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_trunc64_32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_trunc64_16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_trunc64_8, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_trunc32_16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_trunc32_8, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_trunc16_8, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_zext32_64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_zext16_64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_zext8_64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_zext16_32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_zext8_32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_zext8_16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_sext32_64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sext16_64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sext8_64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sext16_32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sext8_32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_sext8_16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
@@ -390,6 +390,24 @@ void tcg_gen_gvec_bitsel(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t bofs, uint32_t cofs,
uint32_t oprsz, uint32_t maxsz);
+/*
+ * Perform vector element truncation/extension operations
+ */
+
+void tcg_gen_gvec_trunc(unsigned vecde, unsigned vecse,
+ uint32_t dofs, uint32_t aofs,
+ uint32_t doprsz, uint32_t aoprsz,
+ uint32_t maxsz);
+
+void tcg_gen_gvec_zext(unsigned vecde, unsigned vecse,
+ uint32_t dofs, uint32_t aofs,
+ uint32_t doprsz, uint32_t aoprsz,
+ uint32_t maxsz);
+
+void tcg_gen_gvec_sext(unsigned vecde, unsigned vecse,
+ uint32_t dofs, uint32_t aofs,
+ uint32_t doprsz, uint32_t aoprsz,
+ uint32_t maxsz);
/*
* 64-bit vector operations. Use these when the register has been allocated
* with tcg_global_mem_new_i64, and so we cannot also address it via pointer.
@@ -4008,3 +4008,81 @@ void tcg_gen_gvec_bitsel(unsigned vece, uint32_t dofs, uint32_t aofs,
tcg_gen_gvec_4(dofs, aofs, bofs, cofs, oprsz, maxsz, &g);
}
+
+void tcg_gen_gvec_trunc(unsigned vecde, unsigned vecse,
+ uint32_t dofs, uint32_t aofs,
+ uint32_t doprsz, uint32_t aoprsz,
+ uint32_t maxsz)
+{
+ gen_helper_gvec_2 * const fns[4][4] = {
+ [MO_64] = {
+ [MO_32] = gen_helper_gvec_trunc64_32,
+ [MO_16] = gen_helper_gvec_trunc64_16,
+ [MO_8] = gen_helper_gvec_trunc64_8,
+ },
+ [MO_32] = {
+ [MO_16] = gen_helper_gvec_trunc32_16,
+ [MO_8] = gen_helper_gvec_trunc32_8,
+ },
+ [MO_16] = {
+ [MO_8] = gen_helper_gvec_trunc16_8,
+ },
+ };
+
+ gen_helper_gvec_2 *fn = fns[vecse][vecde];
+ tcg_debug_assert(fn != 0 && vecse > vecde);
+
+ tcg_gen_gvec_2_ool(dofs, aofs, doprsz, maxsz, 0, fn);
+}
+
+void tcg_gen_gvec_zext(unsigned vecde, unsigned vecse,
+ uint32_t dofs, uint32_t aofs,
+ uint32_t doprsz, uint32_t aoprsz,
+ uint32_t maxsz)
+{
+ gen_helper_gvec_2 * const fns[4][4] = {
+ [MO_8] = {
+ [MO_16] = gen_helper_gvec_zext8_16,
+ [MO_32] = gen_helper_gvec_zext8_32,
+ [MO_64] = gen_helper_gvec_zext8_64,
+ },
+ [MO_16] = {
+ [MO_32] = gen_helper_gvec_zext16_32,
+ [MO_64] = gen_helper_gvec_zext16_64,
+ },
+ [MO_32] = {
+ [MO_64] = gen_helper_gvec_zext32_64,
+ },
+ };
+
+ gen_helper_gvec_2 *fn = fns[vecse][vecde];
+ tcg_debug_assert(fn != 0 && vecse < vecde);
+
+ tcg_gen_gvec_2_ool(dofs, aofs, doprsz, maxsz, 0, fn);
+}
+
+void tcg_gen_gvec_sext(unsigned vecde, unsigned vecse,
+ uint32_t dofs, uint32_t aofs,
+ uint32_t doprsz, uint32_t aoprsz,
+ uint32_t maxsz)
+{
+ gen_helper_gvec_2 * const fns[4][4] = {
+ [MO_8] = {
+ [MO_16] = gen_helper_gvec_sext8_16,
+ [MO_32] = gen_helper_gvec_sext8_32,
+ [MO_64] = gen_helper_gvec_sext8_64,
+ },
+ [MO_16] = {
+ [MO_32] = gen_helper_gvec_sext16_32,
+ [MO_64] = gen_helper_gvec_sext16_64,
+ },
+ [MO_32] = {
+ [MO_64] = gen_helper_gvec_sext32_64,
+ },
+ };
+
+ gen_helper_gvec_2 *fn = fns[vecse][vecde];
+ tcg_debug_assert(fn != 0 && vecse < vecde);
+
+ tcg_gen_gvec_2_ool(dofs, aofs, doprsz, maxsz, 0, fn);
+}
Adds new functions to the gvec API for truncating, sign- or zero extending vector elements. Currently implemented as helper functions, these may be mapped onto host vector instructions in the future. For the time being, allows translation of more complicated vector instructions by helper-to-tcg. Signed-off-by: Anton Johansson <anjo@rev.ng> --- accel/tcg/tcg-runtime-gvec.c | 41 +++++++++++++++++ accel/tcg/tcg-runtime.h | 22 +++++++++ include/tcg/tcg-op-gvec-common.h | 18 ++++++++ tcg/tcg-op-gvec.c | 78 ++++++++++++++++++++++++++++++++ 4 files changed, 159 insertions(+)