Message ID | 20221117070316.58447-7-liweiwei@iscas.ac.cn (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | support subsets of code size reduction extension | expand |
On 11/16/22 23:03, Weiwei Li wrote: > Add encode, trans* functions for Zcmp instructions > > Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> > Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> > --- > target/riscv/insn16.decode | 18 ++ > target/riscv/insn_trans/trans_rvzce.c.inc | 242 +++++++++++++++++++++- > target/riscv/translate.c | 5 + > 3 files changed, 264 insertions(+), 1 deletion(-) Better, though... > +static bool gen_zcmp_check(DisasContext *ctx, arg_zcmp *a) > +{ > + /* rlist 0 to 3 are reserved for future EABI variant */ > + if (a->zcmp_rlist < 4) { > + return false; > + } > + > + /* rlist <= 6 when RV32E/RV64E */ > + if (ctx->cfg_ptr->ext_e && a->zcmp_rlist > 6) { > + return false; > + } > + > + return true; > +} This could be merged into... > + > +#define X_S0 8 > +#define X_S1 9 > +#define X_Sn 16 > + > +static inline void update_push_pop_list(target_ulong rlist, bool *xreg_list) ... here. For instance, one way is to return false when the list is invalid. Better is to return a uint32_t bitmap of the registers in the list, with 0 indicating invalid. Nit 1: Remove the inline. Nit 2: A better name might be decode_push_pop_list. > +static inline target_ulong caculate_stack_adj(int bytes, target_ulong rlist, > + target_ulong spimm) > +{ > + target_ulong stack_adj_base = 0; > + switch (rlist) { > + case 15: > + stack_adj_base = bytes == 4 ? 64 : 112; > + break; > + case 14: > + stack_adj_base = bytes == 4 ? 48 : 96; > + break; > + case 13: > + case 12: > + stack_adj_base = bytes == 4 ? 48 : 80; > + break; > + case 11: > + case 10: > + stack_adj_base = bytes == 4 ? 32 : 64; > + break; > + case 9: > + case 8: > + stack_adj_base = bytes == 4 ? 32 : 48; > + break; > + case 7: > + case 6: > + stack_adj_base = bytes == 4 ? 16 : 32; > + break; > + case 5: > + case 4: > + stack_adj_base = 16; > + break; > + } I really dislike this, as it replicates the decoding done just above. I think this ought to be simply: ROUND_UP(ctpop32(reg_bitmap) * reg_size, 16) + spimm > +static bool gen_pop(DisasContext *ctx, arg_zcmp *a, bool ret, bool ret_val) > +{ > + REQUIRE_ZCMP(ctx); > + > + if (!gen_zcmp_check(ctx, a)) { > + return false; > + } > + > + bool xreg_list[32] = {false}; > + int bytes = get_ol(ctx) == MXL_RV32 ? 4 : 8; Better with MemOp memop = get_ol(ctx) == MXL_RV32 ? MO_TEUL : MO_TEUQ; int reg_size = memop_size(memop); > + switch (bytes) { > + case 4: > + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, MO_32); > + break; > + case 8: > + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, MO_64); > + break; > + default: > + break; > + } These are incorrect in that they do not indicate the target endianness. Better to merge the two using the common memop computed above: tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, memop); > +static bool trans_cm_mvsa01(DisasContext *ctx, arg_cm_mvsa01 *a) > +{ > + REQUIRE_ZCMP(ctx); > + > + TCGv a0 = get_gpr(ctx, xA0, EXT_NONE); > + TCGv a1 = get_gpr(ctx, xA1, EXT_NONE); > + > + gen_set_gpr(ctx, a->rs1, a0); > + gen_set_gpr(ctx, a->rs2, a1); rs1 must not equal rs2. r~
Thanks for your detail comments! On 2022/11/17 17:44, Richard Henderson wrote: > On 11/16/22 23:03, Weiwei Li wrote: >> Add encode, trans* functions for Zcmp instructions >> >> Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn> >> Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn> >> --- >> target/riscv/insn16.decode | 18 ++ >> target/riscv/insn_trans/trans_rvzce.c.inc | 242 +++++++++++++++++++++- >> target/riscv/translate.c | 5 + >> 3 files changed, 264 insertions(+), 1 deletion(-) > > Better, though... > >> +static bool gen_zcmp_check(DisasContext *ctx, arg_zcmp *a) >> +{ >> + /* rlist 0 to 3 are reserved for future EABI variant */ >> + if (a->zcmp_rlist < 4) { >> + return false; >> + } >> + >> + /* rlist <= 6 when RV32E/RV64E */ >> + if (ctx->cfg_ptr->ext_e && a->zcmp_rlist > 6) { >> + return false; >> + } >> + >> + return true; >> +} > > This could be merged into... > >> + >> +#define X_S0 8 >> +#define X_S1 9 >> +#define X_Sn 16 >> + >> +static inline void update_push_pop_list(target_ulong rlist, bool >> *xreg_list) > > ... here. > > For instance, one way is to return false when the list is invalid. > Better is to return a uint32_t bitmap of the registers in the list, > with 0 indicating invalid. > > Nit 1: Remove the inline. > Nit 2: A better name might be decode_push_pop_list. OK. I'll update this in next version. > >> +static inline target_ulong caculate_stack_adj(int bytes, >> target_ulong rlist, >> + target_ulong spimm) >> +{ >> + target_ulong stack_adj_base = 0; >> + switch (rlist) { >> + case 15: >> + stack_adj_base = bytes == 4 ? 64 : 112; >> + break; >> + case 14: >> + stack_adj_base = bytes == 4 ? 48 : 96; >> + break; >> + case 13: >> + case 12: >> + stack_adj_base = bytes == 4 ? 48 : 80; >> + break; >> + case 11: >> + case 10: >> + stack_adj_base = bytes == 4 ? 32 : 64; >> + break; >> + case 9: >> + case 8: >> + stack_adj_base = bytes == 4 ? 32 : 48; >> + break; >> + case 7: >> + case 6: >> + stack_adj_base = bytes == 4 ? 16 : 32; >> + break; >> + case 5: >> + case 4: >> + stack_adj_base = 16; >> + break; >> + } > > I really dislike this, as it replicates the decoding done just above. > I think this ought to be simply: > > ROUND_UP(ctpop32(reg_bitmap) * reg_size, 16) + spimm Yeah. This is more simply. I'll update this. > > >> +static bool gen_pop(DisasContext *ctx, arg_zcmp *a, bool ret, bool >> ret_val) >> +{ >> + REQUIRE_ZCMP(ctx); >> + >> + if (!gen_zcmp_check(ctx, a)) { >> + return false; >> + } >> + >> + bool xreg_list[32] = {false}; >> + int bytes = get_ol(ctx) == MXL_RV32 ? 4 : 8; > > Better with > > MemOp memop = get_ol(ctx) == MXL_RV32 ? MO_TEUL : MO_TEUQ; > int reg_size = memop_size(memop); OK. > >> + switch (bytes) { >> + case 4: >> + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, MO_32); >> + break; >> + case 8: >> + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, MO_64); >> + break; >> + default: >> + break; >> + } > > These are incorrect in that they do not indicate the target endianness. > Better to merge the two using the common memop computed above: > > tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, memop); Yeah. I didn't take the target endianness into consideration. I'll fix this. >> +static bool trans_cm_mvsa01(DisasContext *ctx, arg_cm_mvsa01 *a) >> +{ >> + REQUIRE_ZCMP(ctx); >> + >> + TCGv a0 = get_gpr(ctx, xA0, EXT_NONE); >> + TCGv a1 = get_gpr(ctx, xA1, EXT_NONE); >> + >> + gen_set_gpr(ctx, a->rs1, a0); >> + gen_set_gpr(ctx, a->rs2, a1); > > rs1 must not equal rs2. > > Yeah. I lost this check. I'll add it. Regards, Weiwei Li > r~
diff --git a/target/riscv/insn16.decode b/target/riscv/insn16.decode index 47603ec1e0..4654c23052 100644 --- a/target/riscv/insn16.decode +++ b/target/riscv/insn16.decode @@ -21,6 +21,8 @@ %rs1_3 7:3 !function=ex_rvc_register %rs2_3 2:3 !function=ex_rvc_register %rs2_5 2:5 +%sreg1 7:3 !function=ex_sreg_register +%sreg2 2:3 !function=ex_sreg_register # Immediates: %imm_ci 12:s1 2:5 @@ -45,6 +47,8 @@ %zcb_b_uimm 5:1 6:1 %zcb_h_uimm 5:1 !function=ex_shift_1 +%zcmp_spimm 2:2 !function=ex_shift_4 +%zcmp_rlist 4:4 # Argument sets imported from insn32.decode: &empty !extern @@ -56,7 +60,9 @@ &u imm rd !extern &shift shamt rs1 rd !extern &r2 rd rs1 !extern +&r2_s rs1 rs2 !extern +&zcmp zcmp_rlist zcmp_spimm # Formats 16: @cr .... ..... ..... .. &r rs2=%rs2_5 rs1=%rd %rd @@ -98,6 +104,8 @@ @zcb_lh ... . .. ... .. ... .. &i imm=%zcb_h_uimm rs1=%rs1_3 rd=%rs2_3 @zcb_sb ... . .. ... .. ... .. &s imm=%zcb_b_uimm rs1=%rs1_3 rs2=%rs2_3 @zcb_sh ... . .. ... .. ... .. &s imm=%zcb_h_uimm rs1=%rs1_3 rs2=%rs2_3 +@zcmp ... ... ........ .. &zcmp %zcmp_rlist %zcmp_spimm +@cm_mv ... ... ... .. ... .. &r2_s rs2=%sreg2 rs1=%sreg1 # *** RV32/64C Standard Extension (Quadrant 0) *** { @@ -177,6 +185,16 @@ slli 000 . ..... ..... 10 @c_shift2 { sq 101 ... ... .. ... 10 @c_sqsp c_fsd 101 ...... ..... 10 @c_sdsp + + # *** RV64 and RV32 Zcmp Extension *** + [ + cm_push 101 11000 .... .. 10 @zcmp + cm_pop 101 11010 .... .. 10 @zcmp + cm_popret 101 11110 .... .. 10 @zcmp + cm_popretz 101 11100 .... .. 10 @zcmp + cm_mva01s 101 011 ... 11 ... 10 @cm_mv + cm_mvsa01 101 011 ... 01 ... 10 @cm_mv + ] } sw 110 . ..... ..... 10 @c_swsp diff --git a/target/riscv/insn_trans/trans_rvzce.c.inc b/target/riscv/insn_trans/trans_rvzce.c.inc index de96c4afaf..f45224e388 100644 --- a/target/riscv/insn_trans/trans_rvzce.c.inc +++ b/target/riscv/insn_trans/trans_rvzce.c.inc @@ -1,5 +1,5 @@ /* - * RISC-V translation routines for the Zcb Standard Extension. + * RISC-V translation routines for the Zc[b,mp] Standard Extension. * * Copyright (c) 2021-2022 PLCT Lab * @@ -21,6 +21,11 @@ return false; \ } while (0) +#define REQUIRE_ZCMP(ctx) do { \ + if (!ctx->cfg_ptr->ext_zcmp) \ + return false; \ +} while (0) + static bool trans_c_zext_b(DisasContext *ctx, arg_c_zext_b *a) { REQUIRE_ZCB(ctx); @@ -98,3 +103,238 @@ static bool trans_c_sh(DisasContext *ctx, arg_c_sh *a) REQUIRE_ZCB(ctx); return gen_store(ctx, a, MO_UW); } + +static bool gen_zcmp_check(DisasContext *ctx, arg_zcmp *a) +{ + /* rlist 0 to 3 are reserved for future EABI variant */ + if (a->zcmp_rlist < 4) { + return false; + } + + /* rlist <= 6 when RV32E/RV64E */ + if (ctx->cfg_ptr->ext_e && a->zcmp_rlist > 6) { + return false; + } + + return true; +} + +#define X_S0 8 +#define X_S1 9 +#define X_Sn 16 + +static inline void update_push_pop_list(target_ulong rlist, bool *xreg_list) +{ + switch (rlist) { + case 15: + xreg_list[X_Sn + 11] = true; + xreg_list[X_Sn + 10] = true; + /* FALL THROUGH */ + case 14: + xreg_list[X_Sn + 9] = true; + /* FALL THROUGH */ + case 13: + xreg_list[X_Sn + 8] = true; + /* FALL THROUGH */ + case 12: + xreg_list[X_Sn + 7] = true; + /* FALL THROUGH */ + case 11: + xreg_list[X_Sn + 6] = true; + /* FALL THROUGH */ + case 10: + xreg_list[X_Sn + 5] = true; + /* FALL THROUGH */ + case 9: + xreg_list[X_Sn + 4] = true; + /* FALL THROUGH */ + case 8: + xreg_list[X_Sn + 3] = true; + /* FALL THROUGH */ + case 7: + xreg_list[X_Sn + 2] = true; + /* FALL THROUGH */ + case 6: + xreg_list[X_S1] = true; + /* FALL THROUGH */ + case 5: + xreg_list[X_S0] = true; + /* FALL THROUGH */ + case 4: + xreg_list[xRA] = true; + break; + } +} + +static inline target_ulong caculate_stack_adj(int bytes, target_ulong rlist, + target_ulong spimm) +{ + target_ulong stack_adj_base = 0; + switch (rlist) { + case 15: + stack_adj_base = bytes == 4 ? 64 : 112; + break; + case 14: + stack_adj_base = bytes == 4 ? 48 : 96; + break; + case 13: + case 12: + stack_adj_base = bytes == 4 ? 48 : 80; + break; + case 11: + case 10: + stack_adj_base = bytes == 4 ? 32 : 64; + break; + case 9: + case 8: + stack_adj_base = bytes == 4 ? 32 : 48; + break; + case 7: + case 6: + stack_adj_base = bytes == 4 ? 16 : 32; + break; + case 5: + case 4: + stack_adj_base = 16; + break; + } + + return stack_adj_base + spimm; +} + +static bool gen_pop(DisasContext *ctx, arg_zcmp *a, bool ret, bool ret_val) +{ + REQUIRE_ZCMP(ctx); + + if (!gen_zcmp_check(ctx, a)) { + return false; + } + + bool xreg_list[32] = {false}; + int bytes = get_ol(ctx) == MXL_RV32 ? 4 : 8; + target_ulong stack_adj = caculate_stack_adj(bytes, a->zcmp_rlist, + a->zcmp_spimm); + TCGv sp = dest_gpr(ctx, xSP); + TCGv addr = tcg_temp_new(); + int i; + + update_push_pop_list(a->zcmp_rlist, xreg_list); + tcg_gen_addi_tl(addr, sp, stack_adj - bytes); + + for (i = X_Sn + 11; i >= 0; i--) { + if (xreg_list[i]) { + TCGv dest = dest_gpr(ctx, i); + switch (bytes) { + case 4: + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, MO_32); + break; + case 8: + tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, MO_64); + break; + default: + break; + } + gen_set_gpr(ctx, i, dest); + tcg_gen_subi_tl(addr, addr, bytes); + } + } + + tcg_gen_addi_tl(sp, sp, stack_adj); + gen_set_gpr(ctx, xSP, sp); + + if (ret_val) { + gen_set_gpr(ctx, xA0, ctx->zero); + } + + if (ret) { + TCGv ret_addr = get_gpr(ctx, xRA, EXT_NONE); + gen_set_pc(ctx, ret_addr); + tcg_gen_lookup_and_goto_ptr(); + ctx->base.is_jmp = DISAS_NORETURN; + } + + tcg_temp_free(addr); + return true; +} + +static bool trans_cm_push(DisasContext *ctx, arg_cm_push *a) +{ + REQUIRE_ZCMP(ctx); + + if (!gen_zcmp_check(ctx, a)) { + return false; + } + + bool xreg_list[32] = {false}; + int bytes = get_ol(ctx) == MXL_RV32 ? 4 : 8; + target_ulong stack_adj = caculate_stack_adj(bytes, a->zcmp_rlist, + a->zcmp_spimm); + TCGv sp = dest_gpr(ctx, xSP); + TCGv addr = tcg_temp_new(); + int i; + + update_push_pop_list(a->zcmp_rlist, xreg_list); + tcg_gen_subi_tl(addr, sp, bytes); + + for (i = X_Sn + 11; i >= 0; i--) { + if (xreg_list[i]) { + TCGv val = get_gpr(ctx, i, EXT_NONE); + switch (bytes) { + case 4: + tcg_gen_qemu_st_tl(val, addr, ctx->mem_idx, MO_32); + break; + case 8: + tcg_gen_qemu_st_tl(val, addr, ctx->mem_idx, MO_64); + break; + default: + break; + } + tcg_gen_subi_tl(addr, addr, bytes); + } + } + + tcg_gen_subi_tl(sp, sp, stack_adj); + gen_set_gpr(ctx, xSP, sp); + return true; +} + +static bool trans_cm_pop(DisasContext *ctx, arg_cm_pop *a) +{ + return gen_pop(ctx, a, false, false); +} + +static bool trans_cm_popret(DisasContext *ctx, arg_cm_popret *a) +{ + return gen_pop(ctx, a, true, false); +} + +static bool trans_cm_popretz(DisasContext *ctx, arg_cm_popret *a) +{ + return gen_pop(ctx, a, true, true); +} + +static bool trans_cm_mva01s(DisasContext *ctx, arg_cm_mva01s *a) +{ + REQUIRE_ZCMP(ctx); + + TCGv src1 = get_gpr(ctx, a->rs1, EXT_NONE); + TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE); + + gen_set_gpr(ctx, xA0, src1); + gen_set_gpr(ctx, xA1, src2); + + return true; +} + +static bool trans_cm_mvsa01(DisasContext *ctx, arg_cm_mvsa01 *a) +{ + REQUIRE_ZCMP(ctx); + + TCGv a0 = get_gpr(ctx, xA0, EXT_NONE); + TCGv a1 = get_gpr(ctx, xA1, EXT_NONE); + + gen_set_gpr(ctx, a->rs1, a0); + gen_set_gpr(ctx, a->rs2, a1); + + return true; +} diff --git a/target/riscv/translate.c b/target/riscv/translate.c index ca01da3309..1b2515650f 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -738,6 +738,11 @@ static int ex_rvc_register(DisasContext *ctx, int reg) return 8 + reg; } +static int ex_sreg_register(DisasContext *ctx, int reg) +{ + return reg < 2 ? reg + 8 : reg + 16; +} + static int ex_rvc_shiftli(DisasContext *ctx, int imm) { /* For RV128 a shamt of 0 means a shift by 64. */