@@ -135,6 +135,9 @@ static const TCGReg tcg_target_call_oarg_regs[2] = {
};
static tcg_insn_unit *tb_ret_addr;
+static tcg_insn_unit *bswap32s_addr;
+static tcg_insn_unit *bswap32u_addr;
+static tcg_insn_unit *bswap64_addr;
static inline uint32_t reloc_pc16_val(tcg_insn_unit *pc, tcg_insn_unit *target)
{
@@ -187,6 +190,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
ct_str = *pct_str;
switch(ct_str[0]) {
case 'r':
+ do_default:
ct->ct |= TCG_CT_REG;
tcg_regset_set(ct->u.regs, 0xffffffff);
break;
@@ -208,6 +212,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
case 'S': /* qemu_st constraint */
ct->ct |= TCG_CT_REG;
tcg_regset_set(ct->u.regs, 0xffffffff);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_V0);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
#if defined(CONFIG_SOFTMMU)
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
@@ -218,6 +223,22 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
}
#endif
break;
+ case 'v': /* bswap output constraint */
+ if (use_mips32r2_instructions) {
+ goto do_default;
+ }
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_clear(ct->u.regs);
+ tcg_regset_set_reg(ct->u.regs, TCG_REG_V0);
+ break;
+ case 'a': /* bswap input constraint */
+ if (use_mips32r2_instructions) {
+ goto do_default;
+ }
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_clear(ct->u.regs);
+ tcg_regset_set_reg(ct->u.regs, TCG_REG_A0);
+ break;
case 'I':
ct->ct |= TCG_CT_CONST_U16;
break;
@@ -618,29 +639,23 @@ static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg)
}
}
+static void tcg_out_bswap_subr(TCGContext *s, tcg_insn_unit *sub)
+{
+ if (!tcg_out_opc_jmp(s, OPC_JAL, sub)) {
+ tcg_abort();
+ }
+}
+
static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
{
if (use_mips32r2_instructions) {
tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16);
} else {
- /* ret and arg must be different and can't be register at */
- if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0) {
- tcg_abort();
- }
-
- tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
-
- tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 24);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
-
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00);
- tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
-
- tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, arg, 8);
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0xff00);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+ assert(ret == TCG_REG_V0);
+ tcg_out_bswap_subr(s, bswap32s_addr);
+ /* delay slot */
+ tcg_out_opc_reg(s, OPC_OR, TCG_REG_A0, arg, TCG_REG_ZERO);
}
}
@@ -648,26 +663,13 @@ static inline void tcg_out_bswap32u(TCGContext *s, TCGReg ret, TCGReg arg)
{
if (use_mips32r2_instructions) {
tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg);
- tcg_out_opc_reg(s, OPC_DSHD, ret, 0, arg);
+ tcg_out_opc_reg(s, OPC_DSHD, ret, 0, ret);
tcg_out_dsrl(s, ret, ret, 32);
} else {
- /* ret and arg must be different and can't be register at */
- if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0) {
- tcg_abort();
- }
-
- tcg_out_dsll(s, ret, arg, 24);
-
- tcg_out_dsrl(s, TCG_TMP0, arg, 24);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
-
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00);
- tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 8);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
-
- tcg_out_dsrl(s, TCG_TMP0, arg, 8);
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0xff00);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+ assert(ret == TCG_REG_V0);
+ tcg_out_bswap_subr(s, bswap32u_addr);
+ /* delay slot */
+ tcg_out_opc_reg(s, OPC_OR, TCG_REG_A0, arg, TCG_REG_ZERO);
}
}
@@ -677,44 +679,10 @@ static void tcg_out_bswap64(TCGContext *s, TCGReg ret, TCGReg arg)
tcg_out_opc_reg(s, OPC_DSBH, ret, 0, arg);
tcg_out_opc_reg(s, OPC_DSHD, ret, 0, arg);
} else {
- /* ret and arg must be different and can't be either tmp reg. */
- if (ret == arg || ret == TCG_TMP0 || arg == TCG_TMP0
- || ret == TCG_TMP1 || arg == TCG_TMP1) {
- tcg_abort();
- }
-
- /* ??? Consider just making this a subroutine. */
-
- /* A... ...H -> H... ...A */
- tcg_out_dsll(s, ret, arg, 56);
- tcg_out_dsrl(s, TCG_TMP0, arg, 56);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
-
- /* .B.. ..G. -> .G.. ..B. */
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, arg, 0xff00);
- tcg_out_dsrl(s, TCG_TMP1, arg, 40);
- tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 40);
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1);
-
- /* ..CD .... -> .... DC.. */
- tcg_out_dsrl(s, TCG_TMP0, arg, 32);
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP0, 0xff00);
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff);
- tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 8);
- tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 24);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
-
- /* .... EF.. -> ..FE .... */
- tcg_out_dsrl(s, TCG_TMP0, arg, 16);
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP0, 0xff00);
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff);
- tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 24);
- tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 40);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP1);
- tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_TMP0);
+ assert(ret == TCG_REG_V0);
+ tcg_out_bswap_subr(s, bswap64_addr);
+ /* delay slot */
+ tcg_out_opc_reg(s, OPC_OR, TCG_REG_A0, arg, TCG_REG_ZERO);
}
}
@@ -1425,72 +1393,111 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
}
#endif
-static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
+static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
TCGReg base, TCGMemOp opc, bool is_64)
{
+ bool hi_first = MIPS_BE ? hi != base : lo == base;
+
switch (opc & (MO_SSIZE | MO_BSWAP)) {
case MO_UB:
- tcg_out_opc_imm(s, OPC_LBU, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_LBU, lo, base, 0);
break;
case MO_SB:
- tcg_out_opc_imm(s, OPC_LB, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_LB, lo, base, 0);
break;
case MO_UW | MO_BSWAP:
tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
- tcg_out_bswap16(s, datalo, TCG_TMP1);
+ tcg_out_bswap16(s, lo, TCG_TMP1);
break;
case MO_UW:
- tcg_out_opc_imm(s, OPC_LHU, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_LHU, lo, base, 0);
break;
case MO_SW | MO_BSWAP:
tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
- tcg_out_bswap16s(s, datalo, TCG_TMP1);
+ tcg_out_bswap16s(s, lo, TCG_TMP1);
break;
case MO_SW:
- tcg_out_opc_imm(s, OPC_LH, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
break;
case MO_UL | MO_BSWAP:
if (TCG_TARGET_REG_BITS == 64 && is_64) {
- tcg_out_opc_imm(s, OPC_LWU, TCG_TMP1, base, 0);
- tcg_out_bswap32u(s, datalo, TCG_TMP1);
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
+ tcg_out_bswap32u(s, lo, lo);
+ } else {
+ tcg_out_bswap_subr(s, bswap32u_addr);
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LWU, TCG_REG_A0, base, 0);
+ tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_REG_V0);
+ }
break;
}
/* FALLTHRU */
case MO_SL | MO_BSWAP:
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 0);
- tcg_out_bswap32(s, datalo, TCG_TMP1);
+ if (use_mips32r2_instructions) {
+ tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
+ tcg_out_bswap32(s, lo, lo);
+ } else {
+ tcg_out_bswap_subr(s, bswap32s_addr);
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, base, 0);
+ tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_V0);
+ }
break;
case MO_UL:
if (TCG_TARGET_REG_BITS == 64 && is_64) {
- tcg_out_opc_imm(s, OPC_LWU, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
break;
}
/* FALLTHRU */
case MO_SL:
- tcg_out_opc_imm(s, OPC_LW, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
break;
case MO_Q | MO_BSWAP:
- if (TCG_TARGET_REG_BITS == 32) {
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, HI_OFF);
- tcg_out_bswap32(s, datalo, TCG_TMP1);
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, LO_OFF);
- tcg_out_bswap32(s, datahi, TCG_TMP1);
+ if (TCG_TARGET_REG_BITS == 64 && use_mips32r2_instructions) {
+ tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
+ tcg_out_bswap64(s, lo, lo);
+ } else if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_bswap_subr(s, bswap64_addr);
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LD, TCG_REG_A0, base, 0);
+ tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_REG_V0);
+ } else if (use_mips32r2_instructions) {
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
+ tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 4);
+ tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0);
+ tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1);
+ tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16);
+ tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16);
} else {
- tcg_out_opc_imm(s, OPC_LD, TCG_REG_V0, base, 0);
- tcg_out_bswap64(s, datalo, TCG_REG_V0);
+ tcg_out_bswap_subr(s, bswap32s_addr);
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, base,
+ hi_first ? LO_OFF : HI_OFF);
+ tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, TCG_REG_V0);
+
+ tcg_out_bswap_subr(s, bswap32s_addr);
+ /* delay slot */
+ tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, base,
+ hi_first ? LO_OFF : HI_OFF);
+ tcg_out_mov(s, TCG_TYPE_I32, hi_first ? lo : hi, TCG_REG_V0);
+ tcg_out_mov(s, TCG_TYPE_I32, hi_first ? hi : lo, TCG_REG_A2);
}
break;
case MO_Q:
- if (TCG_TARGET_REG_BITS == 32) {
- tcg_out_opc_imm(s, OPC_LW, datalo, base, LO_OFF);
- tcg_out_opc_imm(s, OPC_LW, datahi, base, HI_OFF);
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
+ } else if (hi_first) {
+ tcg_out_opc_imm(s, OPC_LW, hi, base, HI_OFF);
+ tcg_out_opc_imm(s, OPC_LW, lo, base, LO_OFF);
} else {
- tcg_out_opc_imm(s, OPC_LD, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_LW, lo, base, LO_OFF);
+ tcg_out_opc_imm(s, OPC_LW, hi, base, HI_OFF);
}
break;
default:
@@ -1540,54 +1547,62 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
#endif
}
-static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
+static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
TCGReg base, TCGMemOp opc)
{
- if ((datalo | datahi) == 0) {
+ /* Don't clutter the code below with checks to avoid bswapping ZERO. */
+ if ((lo | hi) == 0) {
opc &= ~MO_BSWAP;
}
switch (opc & (MO_SIZE | MO_BSWAP)) {
case MO_8:
- tcg_out_opc_imm(s, OPC_SB, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_SB, lo, base, 0);
break;
case MO_16 | MO_BSWAP:
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, datalo, 0xffff);
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, lo, 0xffff);
tcg_out_bswap16(s, TCG_TMP1, TCG_TMP1);
- datalo = TCG_TMP1;
+ lo = TCG_TMP1;
/* FALLTHRU */
case MO_16:
- tcg_out_opc_imm(s, OPC_SH, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_SH, lo, base, 0);
break;
case MO_32 | MO_BSWAP:
- tcg_out_bswap32(s, TCG_TMP1, datalo);
- datalo = TCG_TMP1;
+ tcg_out_bswap32(s, TCG_REG_V0, lo);
+ lo = TCG_REG_V0;
/* FALLTHRU */
case MO_32:
- tcg_out_opc_imm(s, OPC_SW, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_SW, lo, base, 0);
break;
case MO_64 | MO_BSWAP:
- if (TCG_TARGET_REG_BITS == 32) {
- tcg_out_bswap32(s, TCG_TMP1, datalo);
- datalo = TCG_TMP1;
- tcg_out_opc_imm(s, OPC_SW, datalo, base, HI_OFF);
- tcg_out_bswap32(s, TCG_TMP1, datahi);
- datahi = TCG_TMP1;
- tcg_out_opc_imm(s, OPC_SW, datahi, base, LO_OFF);
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_bswap64(s, TCG_REG_V0, lo);
+ lo = TCG_REG_V0;
+ } else if (use_mips32r2_instructions) {
+ tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? lo : hi);
+ tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? hi : lo);
+ tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16);
+ tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16);
+ tcg_out_opc_imm(s, OPC_SW, TCG_TMP0, base, 0);
+ tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, 4);
+ break;
+ } else {
+ tcg_out_bswap32(s, TCG_REG_V0, lo);
+ tcg_out_opc_imm(s, OPC_SW, TCG_REG_V0, base, HI_OFF);
+ tcg_out_bswap32(s, TCG_REG_V0, hi);
+ tcg_out_opc_imm(s, OPC_SW, TCG_REG_V0, base, LO_OFF);
break;
}
- tcg_out_bswap64(s, TCG_REG_A1, datalo);
- datalo = TCG_REG_A1;
/* FALLTHRU */
case MO_64:
if (TCG_TARGET_REG_BITS == 32) {
- tcg_out_opc_imm(s, OPC_SW, datalo, base, LO_OFF);
- tcg_out_opc_imm(s, OPC_SW, datahi, base, HI_OFF);
+ tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? hi : lo, base, 0);
+ tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? lo : hi, base, 4);
} else {
- tcg_out_opc_imm(s, OPC_SD, datalo, base, 0);
+ tcg_out_opc_imm(s, OPC_SD, lo, base, 0);
}
break;
@@ -2117,7 +2132,7 @@ static const TCGTargetOpDef mips_op_defs[] = {
{ INDEX_op_rotl_i32, { "r", "rZ", "ri" } },
{ INDEX_op_bswap16_i32, { "r", "r" } },
- { INDEX_op_bswap32_i32, { "r", "r" } },
+ { INDEX_op_bswap32_i32, { "v", "a" } },
{ INDEX_op_ext8s_i32, { "r", "rZ" } },
{ INDEX_op_ext16s_i32, { "r", "rZ" } },
@@ -2179,8 +2194,8 @@ static const TCGTargetOpDef mips_op_defs[] = {
{ INDEX_op_rotl_i64, { "r", "rZ", "ri" } },
{ INDEX_op_bswap16_i64, { "r", "r" } },
- { INDEX_op_bswap32_i64, { "r", "r" } },
- { INDEX_op_bswap64_i64, { "r", "r" } },
+ { INDEX_op_bswap32_i64, { "v", "a" } },
+ { INDEX_op_bswap64_i64, { "v", "a" } },
{ INDEX_op_ext8s_i64, { "r", "rZ" } },
{ INDEX_op_ext16s_i64, { "r", "rZ" } },
@@ -2324,6 +2339,16 @@ static void tcg_target_detect_isa(void)
/* We're expecting to be able to use an immediate for frame allocation. */
QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7fff);
+static tcg_insn_unit *align_code_ptr(TCGContext *s)
+{
+ uintptr_t p = (uintptr_t)s->code_ptr;
+ if (p & 15) {
+ p = (p + 15) & -16;
+ s->code_ptr = (void *)p;
+ }
+ return s->code_ptr;
+}
+
/* Generate global QEMU prologue and epilogue code */
static void tcg_target_qemu_prologue(TCGContext *s)
{
@@ -2353,6 +2378,128 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
/* delay slot */
tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE);
+
+ if (use_mips32r2_instructions) {
+ return;
+ }
+
+ /* Bswap subroutines: Input in TCG_REG_A0, output in TCG_REG_V0;
+ clobbers TCG_TMP1, TCG_TMP0. */
+
+ bswap32s_addr = align_code_ptr(s);
+
+ /*
+ * bswap32s -- signed 32-bit swap. a0 = abcd.
+ */
+ /* v0 = (ssss)d000 */
+ tcg_out_opc_sa(s, OPC_SLL, TCG_REG_V0, TCG_REG_A0, 24);
+ /* t1 = 000a */
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_REG_A0, 24);
+ /* t0 = 00c0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_REG_A0, 0xff00);
+ /* v0 = d00a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1);
+ /* t1 = 0abc */
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_REG_A0, 8);
+ /* t0 = 0c00 */
+ tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
+ /* t1 = 00b0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+ /* v0 = dc0a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP0);
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
+ /* v0 = dcba -- delay slot */
+ tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1);
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ return;
+ }
+
+ bswap32u_addr = align_code_ptr(s);
+
+ /*
+ * bswap32u -- unsigned 32-bit swap. a0 = ....abcd.
+ */
+ /* t1 = (0000)000d */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_REG_A0, 0xff);
+ /* v0 = 000a */
+ tcg_out_opc_sa(s, OPC_SRL, TCG_REG_V0, TCG_REG_A0, 24);
+ /* t1 = (0000)d000 */
+ tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 24);
+ /* t0 = 00c0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_REG_A0, 0xff00);
+ /* v0 = d00a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1);
+ /* t1 = 0abc */
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP1, TCG_REG_A0, 8);
+ /* t0 = 0c00 */
+ tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
+ /* t1 = 00b0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+ /* v0 = dc0a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP0);
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
+ /* v0 = dcba -- delay slot */
+ tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1);
+
+ bswap64_addr = align_code_ptr(s);
+
+ /*
+ * bswap64 -- 64-bit swap. a0 = abcdefgh
+ */
+ /* v0 = h0000000 */
+ tcg_out_dsll(s, TCG_REG_V0, TCG_REG_A0, 56);
+ /* t1 = 0000000a */
+ tcg_out_dsrl(s, TCG_TMP1, TCG_REG_A0, 56);
+
+ /* t0 = 000000g0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_REG_A0, 0xff00);
+ /* v0 = h000000a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1);
+ /* t1 = 00000abc */
+ tcg_out_dsrl(s, TCG_TMP1, TCG_REG_A0, 40);
+ /* t0 = 0g000000 */
+ tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 40);
+ /* t1 = 000000b0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+
+ /* v0 = hg00000a */
+ tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP0);
+ /* t0 = 0000abcd */
+ tcg_out_dsrl(s, TCG_TMP0, TCG_REG_A0, 32);
+ /* v0 = hg0000ba */
+ tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1);
+
+ /* t1 = 000000c0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP0, 0xff00);
+ /* t0 = 0000000d */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP0, 0x00ff);
+ /* t1 = 00000c00 */
+ tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 8);
+ /* t0 = 0000d000 */
+ tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 24);
+
+ /* v0 = hg000cba */
+ tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1);
+ /* t1 = 00abcdef */
+ tcg_out_dsrl(s, TCG_TMP1, TCG_REG_A0, 16);
+ /* v0 = hg00dcba */
+ tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP0);
+
+ /* t0 = 0000000f */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, TCG_TMP1, 0x00ff);
+ /* t1 = 000000e0 */
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP1, TCG_TMP1, 0xff00);
+ /* t0 = 00f00000 */
+ tcg_out_dsll(s, TCG_TMP0, TCG_TMP0, 40);
+ /* t1 = 000e0000 */
+ tcg_out_dsll(s, TCG_TMP1, TCG_TMP1, 24);
+
+ /* v0 = hgf0dcba */
+ tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP0);
+ tcg_out_opc_reg(s, OPC_JR, 0, TCG_REG_RA, 0);
+ /* v0 = hgfedcba -- delay slot */
+ tcg_out_opc_reg(s, OPC_OR, TCG_REG_V0, TCG_REG_V0, TCG_TMP1);
}
static void tcg_target_init(TCGContext *s)
@@ -128,6 +128,7 @@ extern bool use_mips32r2_instructions;
#define TCG_TARGET_HAS_muls2_i32 (!use_mips32r6_instructions)
#define TCG_TARGET_HAS_muluh_i32 1
#define TCG_TARGET_HAS_mulsh_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_add2_i32 0
@@ -150,12 +151,13 @@ extern bool use_mips32r2_instructions;
#define TCG_TARGET_HAS_mulsh_i64 1
#define TCG_TARGET_HAS_ext32s_i64 1
#define TCG_TARGET_HAS_ext32u_i64 1
+#define TCG_TARGET_HAS_bswap32_i64 1
+#define TCG_TARGET_HAS_bswap64_i64 1
#endif
/* optional instructions detected at runtime */
#define TCG_TARGET_HAS_movcond_i32 use_movnz_instructions
#define TCG_TARGET_HAS_bswap16_i32 use_mips32r2_instructions
-#define TCG_TARGET_HAS_bswap32_i32 use_mips32r2_instructions
#define TCG_TARGET_HAS_deposit_i32 use_mips32r2_instructions
#define TCG_TARGET_HAS_ext8s_i32 use_mips32r2_instructions
#define TCG_TARGET_HAS_ext16s_i32 use_mips32r2_instructions
@@ -164,8 +166,6 @@ extern bool use_mips32r2_instructions;
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_movcond_i64 use_movnz_instructions
#define TCG_TARGET_HAS_bswap16_i64 use_mips32r2_instructions
-#define TCG_TARGET_HAS_bswap32_i64 use_mips32r2_instructions
-#define TCG_TARGET_HAS_bswap64_i64 use_mips32r2_instructions
#define TCG_TARGET_HAS_deposit_i64 use_mips32r2_instructions
#define TCG_TARGET_HAS_ext8s_i64 use_mips32r2_instructions
#define TCG_TARGET_HAS_ext16s_i64 use_mips32r2_instructions
Without the mips32r2 / mips64r2 instructions to perform swapping, 32 and 64-bit bswap is quite large. Move them to a subroutine in the prologue block to minimize code bloat. Signed-off-by: Richard Henderson <rth@twiddle.net> --- tcg/mips/tcg-target.c | 389 ++++++++++++++++++++++++++++++++++---------------- tcg/mips/tcg-target.h | 6 +- 2 files changed, 271 insertions(+), 124 deletions(-)