@@ -41,8 +41,6 @@ DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
-DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64)
-DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, ptr)
@@ -306,24 +306,6 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
}
-uint64_t HELPER(neon_addlp_u8)(uint64_t a)
-{
- uint64_t tmp;
-
- tmp = a & 0x00ff00ff00ff00ffULL;
- tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
- return tmp;
-}
-
-uint64_t HELPER(neon_addlp_u16)(uint64_t a)
-{
- uint64_t tmp;
-
- tmp = a & 0x0000ffff0000ffffULL;
- tmp += (a >> 16) & 0x0000ffff0000ffffULL;
- return tmp;
-}
-
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
{
@@ -8960,6 +8960,10 @@ static bool do_gvec_fn2_bhs(DisasContext *s, arg_qrr_e *a, GVecGen2Fn *fn)
TRANS(CLS_v, do_gvec_fn2_bhs, a, gen_gvec_cls)
TRANS(CLZ_v, do_gvec_fn2_bhs, a, gen_gvec_clz)
TRANS(REV64_v, do_gvec_fn2_bhs, a, gen_gvec_rev64)
+TRANS(SADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_saddlp)
+TRANS(UADDLP_v, do_gvec_fn2_bhs, a, gen_gvec_uaddlp)
+TRANS(SADALP_v, do_gvec_fn2_bhs, a, gen_gvec_sadalp)
+TRANS(UADALP_v, do_gvec_fn2_bhs, a, gen_gvec_uadalp)
/* Common vector code for handling integer to FP conversion */
static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
@@ -9889,73 +9893,6 @@ static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
}
}
-static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
- bool is_q, int size, int rn, int rd)
-{
- /* Implement the pairwise operations from 2-misc:
- * SADDLP, UADDLP, SADALP, UADALP.
- * These all add pairs of elements in the input to produce a
- * double-width result element in the output (possibly accumulating).
- */
- bool accum = (opcode == 0x6);
- int maxpass = is_q ? 2 : 1;
- int pass;
- TCGv_i64 tcg_res[2];
-
- if (size == 2) {
- /* 32 + 32 -> 64 op */
- MemOp memop = size + (u ? 0 : MO_SIGN);
-
- for (pass = 0; pass < maxpass; pass++) {
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
-
- tcg_res[pass] = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_op1, rn, pass * 2, memop);
- read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
- tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
- if (accum) {
- read_vec_element(s, tcg_op1, rd, pass, MO_64);
- tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
- }
- }
- } else {
- for (pass = 0; pass < maxpass; pass++) {
- TCGv_i64 tcg_op = tcg_temp_new_i64();
- NeonGenOne64OpFn *genfn;
- static NeonGenOne64OpFn * const fns[2][2] = {
- { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 },
- { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 },
- };
-
- genfn = fns[size][u];
-
- tcg_res[pass] = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_op, rn, pass, MO_64);
- genfn(tcg_res[pass], tcg_op);
-
- if (accum) {
- read_vec_element(s, tcg_op, rd, pass, MO_64);
- if (size == 0) {
- gen_helper_neon_addl_u16(tcg_res[pass],
- tcg_res[pass], tcg_op);
- } else {
- gen_helper_neon_addl_u32(tcg_res[pass],
- tcg_res[pass], tcg_op);
- }
- }
- }
- }
- if (!is_q) {
- tcg_res[1] = tcg_constant_i64(0);
- }
- for (pass = 0; pass < 2; pass++) {
- write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
- }
-}
-
static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
{
/* Implement SHLL and SHLL2 */
@@ -10015,17 +9952,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
return;
- case 0x2: /* SADDLP, UADDLP */
- case 0x6: /* SADALP, UADALP */
- if (size == 3) {
- unallocated_encoding(s);
- return;
- }
- if (!fp_access_check(s)) {
- return;
- }
- handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
- return;
case 0x13: /* SHLL, SHLL2 */
if (u == 0 || size == 3) {
unallocated_encoding(s);
@@ -10207,9 +10133,11 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
default:
case 0x0: /* REV64 */
case 0x1: /* REV16, REV32 */
+ case 0x2: /* SADDLP, UADDLP */
case 0x3: /* SUQADD, USQADD */
case 0x4: /* CLS, CLZ */
case 0x5: /* CNT, NOT, RBIT */
+ case 0x6: /* SADALP, UADALP */
case 0x7: /* SQABS, SQNEG */
case 0x8: /* CMGT, CMGE */
case 0x9: /* CMEQ, CMLE */
@@ -1662,3 +1662,8 @@ CMLT0_v 0.00 1110 ..1 00000 10101 0 ..... ..... @qrr_e
REV16_v 0.00 1110 001 00000 00011 0 ..... ..... @qrr_b
REV32_v 0.10 1110 0.1 00000 00011 0 ..... ..... @qrr_bh
REV64_v 0.00 1110 ..1 00000 00001 0 ..... ..... @qrr_e
+
+SADDLP_v 0.00 1110 ..1 00000 00101 0 ..... ..... @qrr_e
+UADDLP_v 0.10 1110 ..1 00000 00101 0 ..... ..... @qrr_e
+SADALP_v 0.00 1110 ..1 00000 01101 0 ..... ..... @qrr_e
+UADALP_v 0.10 1110 ..1 00000 01101 0 ..... ..... @qrr_e
This includes SADDLP, UADDLP, SADALP, UADALP. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/helper-a64.h | 2 - target/arm/tcg/helper-a64.c | 18 -------- target/arm/tcg/translate-a64.c | 84 +++------------------------------- target/arm/tcg/a64.decode | 5 ++ 4 files changed, 11 insertions(+), 98 deletions(-)