diff mbox series

[v3,6/9] target/riscv: add support for Zcmp extension

Message ID 20221117070316.58447-7-liweiwei@iscas.ac.cn (mailing list archive)
State New, archived
Headers show
Series support subsets of code size reduction extension | expand

Commit Message

Weiwei Li Nov. 17, 2022, 7:03 a.m. UTC
Add encode, trans* functions for Zcmp instructions

Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
---
 target/riscv/insn16.decode                |  18 ++
 target/riscv/insn_trans/trans_rvzce.c.inc | 242 +++++++++++++++++++++-
 target/riscv/translate.c                  |   5 +
 3 files changed, 264 insertions(+), 1 deletion(-)

Comments

Richard Henderson Nov. 17, 2022, 9:44 a.m. UTC | #1
On 11/16/22 23:03, Weiwei Li wrote:
> Add encode, trans* functions for Zcmp instructions
> 
> Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
> Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
> ---
>   target/riscv/insn16.decode                |  18 ++
>   target/riscv/insn_trans/trans_rvzce.c.inc | 242 +++++++++++++++++++++-
>   target/riscv/translate.c                  |   5 +
>   3 files changed, 264 insertions(+), 1 deletion(-)

Better, though...

> +static bool gen_zcmp_check(DisasContext *ctx, arg_zcmp *a)
> +{
> +    /* rlist 0 to 3 are reserved for future EABI variant */
> +    if (a->zcmp_rlist < 4) {
> +        return false;
> +    }
> +
> +    /* rlist <= 6 when RV32E/RV64E */
> +    if (ctx->cfg_ptr->ext_e && a->zcmp_rlist > 6) {
> +        return false;
> +    }
> +
> +    return true;
> +}

This could be merged into...

> +
> +#define X_S0    8
> +#define X_S1    9
> +#define X_Sn    16
> +
> +static inline void update_push_pop_list(target_ulong rlist, bool *xreg_list)

... here.

For instance, one way is to return false when the list is invalid.
Better is to return a uint32_t bitmap of the registers in the list, with 0 indicating invalid.

Nit 1: Remove the inline.
Nit 2: A better name might be decode_push_pop_list.

> +static inline target_ulong caculate_stack_adj(int bytes, target_ulong rlist,
> +                                              target_ulong spimm)
> +{
> +    target_ulong stack_adj_base = 0;
> +    switch (rlist) {
> +    case 15:
> +        stack_adj_base = bytes == 4 ? 64 : 112;
> +        break;
> +    case 14:
> +        stack_adj_base = bytes == 4 ? 48 : 96;
> +        break;
> +    case 13:
> +    case 12:
> +        stack_adj_base = bytes == 4 ? 48 : 80;
> +        break;
> +    case 11:
> +    case 10:
> +        stack_adj_base = bytes == 4 ? 32 : 64;
> +        break;
> +    case 9:
> +    case 8:
> +        stack_adj_base = bytes == 4 ? 32 : 48;
> +        break;
> +    case 7:
> +    case 6:
> +        stack_adj_base = bytes == 4 ? 16 : 32;
> +        break;
> +    case 5:
> +    case 4:
> +        stack_adj_base = 16;
> +        break;
> +    }

I really dislike this, as it replicates the decoding done just above.
I think this ought to be simply:

     ROUND_UP(ctpop32(reg_bitmap) * reg_size, 16) + spimm


> +static bool gen_pop(DisasContext *ctx, arg_zcmp *a, bool ret, bool ret_val)
> +{
> +    REQUIRE_ZCMP(ctx);
> +
> +    if (!gen_zcmp_check(ctx, a)) {
> +        return false;
> +    }
> +
> +    bool xreg_list[32] = {false};
> +    int bytes = get_ol(ctx) == MXL_RV32 ? 4 : 8;

Better with

     MemOp memop = get_ol(ctx) == MXL_RV32 ? MO_TEUL : MO_TEUQ;
     int reg_size = memop_size(memop);

> +            switch (bytes) {
> +            case 4:
> +                tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, MO_32);
> +                break;
> +            case 8:
> +                tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, MO_64);
> +                break;
> +            default:
> +                break;
> +            }

These are incorrect in that they do not indicate the target endianness.
Better to merge the two using the common memop computed above:

     tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, memop);

> +static bool trans_cm_mvsa01(DisasContext *ctx, arg_cm_mvsa01 *a)
> +{
> +    REQUIRE_ZCMP(ctx);
> +
> +    TCGv a0 = get_gpr(ctx, xA0, EXT_NONE);
> +    TCGv a1 = get_gpr(ctx, xA1, EXT_NONE);
> +
> +    gen_set_gpr(ctx, a->rs1, a0);
> +    gen_set_gpr(ctx, a->rs2, a1);

rs1 must not equal rs2.


r~
Weiwei Li Nov. 17, 2022, 11:41 a.m. UTC | #2
Thanks for your detail comments!

On 2022/11/17 17:44, Richard Henderson wrote:
> On 11/16/22 23:03, Weiwei Li wrote:
>> Add encode, trans* functions for Zcmp instructions
>>
>> Signed-off-by: Weiwei Li <liweiwei@iscas.ac.cn>
>> Signed-off-by: Junqiang Wang <wangjunqiang@iscas.ac.cn>
>> ---
>>   target/riscv/insn16.decode                |  18 ++
>>   target/riscv/insn_trans/trans_rvzce.c.inc | 242 +++++++++++++++++++++-
>>   target/riscv/translate.c                  |   5 +
>>   3 files changed, 264 insertions(+), 1 deletion(-)
>
> Better, though...
>
>> +static bool gen_zcmp_check(DisasContext *ctx, arg_zcmp *a)
>> +{
>> +    /* rlist 0 to 3 are reserved for future EABI variant */
>> +    if (a->zcmp_rlist < 4) {
>> +        return false;
>> +    }
>> +
>> +    /* rlist <= 6 when RV32E/RV64E */
>> +    if (ctx->cfg_ptr->ext_e && a->zcmp_rlist > 6) {
>> +        return false;
>> +    }
>> +
>> +    return true;
>> +}
>
> This could be merged into...
>
>> +
>> +#define X_S0    8
>> +#define X_S1    9
>> +#define X_Sn    16
>> +
>> +static inline void update_push_pop_list(target_ulong rlist, bool 
>> *xreg_list)
>
> ... here.
>
> For instance, one way is to return false when the list is invalid.
> Better is to return a uint32_t bitmap of the registers in the list, 
> with 0 indicating invalid.
>
> Nit 1: Remove the inline.
> Nit 2: A better name might be decode_push_pop_list.
OK. I'll update this in next version.
>
>> +static inline target_ulong caculate_stack_adj(int bytes, 
>> target_ulong rlist,
>> +                                              target_ulong spimm)
>> +{
>> +    target_ulong stack_adj_base = 0;
>> +    switch (rlist) {
>> +    case 15:
>> +        stack_adj_base = bytes == 4 ? 64 : 112;
>> +        break;
>> +    case 14:
>> +        stack_adj_base = bytes == 4 ? 48 : 96;
>> +        break;
>> +    case 13:
>> +    case 12:
>> +        stack_adj_base = bytes == 4 ? 48 : 80;
>> +        break;
>> +    case 11:
>> +    case 10:
>> +        stack_adj_base = bytes == 4 ? 32 : 64;
>> +        break;
>> +    case 9:
>> +    case 8:
>> +        stack_adj_base = bytes == 4 ? 32 : 48;
>> +        break;
>> +    case 7:
>> +    case 6:
>> +        stack_adj_base = bytes == 4 ? 16 : 32;
>> +        break;
>> +    case 5:
>> +    case 4:
>> +        stack_adj_base = 16;
>> +        break;
>> +    }
>
> I really dislike this, as it replicates the decoding done just above.
> I think this ought to be simply:
>
>     ROUND_UP(ctpop32(reg_bitmap) * reg_size, 16) + spimm
Yeah. This is more simply. I'll update this.
>
>
>> +static bool gen_pop(DisasContext *ctx, arg_zcmp *a, bool ret, bool 
>> ret_val)
>> +{
>> +    REQUIRE_ZCMP(ctx);
>> +
>> +    if (!gen_zcmp_check(ctx, a)) {
>> +        return false;
>> +    }
>> +
>> +    bool xreg_list[32] = {false};
>> +    int bytes = get_ol(ctx) == MXL_RV32 ? 4 : 8;
>
> Better with
>
>     MemOp memop = get_ol(ctx) == MXL_RV32 ? MO_TEUL : MO_TEUQ;
>     int reg_size = memop_size(memop);
OK.
>
>> +            switch (bytes) {
>> +            case 4:
>> +                tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, MO_32);
>> +                break;
>> +            case 8:
>> +                tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, MO_64);
>> +                break;
>> +            default:
>> +                break;
>> +            }
>
> These are incorrect in that they do not indicate the target endianness.
> Better to merge the two using the common memop computed above:
>
>     tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, memop);
Yeah. I didn't take the target endianness into consideration. I'll fix this.
>> +static bool trans_cm_mvsa01(DisasContext *ctx, arg_cm_mvsa01 *a)
>> +{
>> +    REQUIRE_ZCMP(ctx);
>> +
>> +    TCGv a0 = get_gpr(ctx, xA0, EXT_NONE);
>> +    TCGv a1 = get_gpr(ctx, xA1, EXT_NONE);
>> +
>> +    gen_set_gpr(ctx, a->rs1, a0);
>> +    gen_set_gpr(ctx, a->rs2, a1);
>
> rs1 must not equal rs2.
>
>
Yeah. I lost this check. I'll add it.

Regards,

Weiwei Li

> r~
diff mbox series

Patch

diff --git a/target/riscv/insn16.decode b/target/riscv/insn16.decode
index 47603ec1e0..4654c23052 100644
--- a/target/riscv/insn16.decode
+++ b/target/riscv/insn16.decode
@@ -21,6 +21,8 @@ 
 %rs1_3     7:3                !function=ex_rvc_register
 %rs2_3     2:3                !function=ex_rvc_register
 %rs2_5     2:5
+%sreg1     7:3                !function=ex_sreg_register
+%sreg2     2:3                !function=ex_sreg_register
 
 # Immediates:
 %imm_ci        12:s1 2:5
@@ -45,6 +47,8 @@ 
 
 %zcb_b_uimm  5:1 6:1
 %zcb_h_uimm  5:1                     !function=ex_shift_1
+%zcmp_spimm  2:2                     !function=ex_shift_4
+%zcmp_rlist  4:4
 
 # Argument sets imported from insn32.decode:
 &empty                  !extern
@@ -56,7 +60,9 @@ 
 &u         imm rd       !extern
 &shift     shamt rs1 rd !extern
 &r2        rd rs1       !extern
+&r2_s      rs1 rs2      !extern
 
+&zcmp      zcmp_rlist zcmp_spimm
 
 # Formats 16:
 @cr        ....  ..... .....  .. &r      rs2=%rs2_5       rs1=%rd     %rd
@@ -98,6 +104,8 @@ 
 @zcb_lh       ... . .. ... .. ... ..  &i  imm=%zcb_h_uimm  rs1=%rs1_3 rd=%rs2_3
 @zcb_sb       ... . .. ... .. ... ..  &s  imm=%zcb_b_uimm  rs1=%rs1_3 rs2=%rs2_3
 @zcb_sh       ... . .. ... .. ... ..  &s  imm=%zcb_h_uimm  rs1=%rs1_3 rs2=%rs2_3
+@zcmp         ... ...  ........   ..  &zcmp  %zcmp_rlist   %zcmp_spimm
+@cm_mv        ... ...  ... .. ... ..  &r2_s  rs2=%sreg2    rs1=%sreg1
 
 # *** RV32/64C Standard Extension (Quadrant 0) ***
 {
@@ -177,6 +185,16 @@  slli              000 .  .....  ..... 10 @c_shift2
 {
   sq              101  ... ... .. ... 10 @c_sqsp
   c_fsd           101   ......  ..... 10 @c_sdsp
+
+  # *** RV64 and RV32 Zcmp Extension ***
+  [
+    cm_push         101  11000  .... .. 10 @zcmp
+    cm_pop          101  11010  .... .. 10 @zcmp
+    cm_popret       101  11110  .... .. 10 @zcmp
+    cm_popretz      101  11100  .... .. 10 @zcmp
+    cm_mva01s       101  011 ... 11 ... 10 @cm_mv
+    cm_mvsa01       101  011 ... 01 ... 10 @cm_mv
+  ]
 }
 sw                110 .  .....  ..... 10 @c_swsp
 
diff --git a/target/riscv/insn_trans/trans_rvzce.c.inc b/target/riscv/insn_trans/trans_rvzce.c.inc
index de96c4afaf..f45224e388 100644
--- a/target/riscv/insn_trans/trans_rvzce.c.inc
+++ b/target/riscv/insn_trans/trans_rvzce.c.inc
@@ -1,5 +1,5 @@ 
 /*
- * RISC-V translation routines for the Zcb Standard Extension.
+ * RISC-V translation routines for the Zc[b,mp] Standard Extension.
  *
  * Copyright (c) 2021-2022 PLCT Lab
  *
@@ -21,6 +21,11 @@ 
         return false;           \
 } while (0)
 
+#define REQUIRE_ZCMP(ctx) do {   \
+    if (!ctx->cfg_ptr->ext_zcmp) \
+        return false;            \
+} while (0)
+
 static bool trans_c_zext_b(DisasContext *ctx, arg_c_zext_b *a)
 {
     REQUIRE_ZCB(ctx);
@@ -98,3 +103,238 @@  static bool trans_c_sh(DisasContext *ctx, arg_c_sh *a)
     REQUIRE_ZCB(ctx);
     return gen_store(ctx, a, MO_UW);
 }
+
+static bool gen_zcmp_check(DisasContext *ctx, arg_zcmp *a)
+{
+    /* rlist 0 to 3 are reserved for future EABI variant */
+    if (a->zcmp_rlist < 4) {
+        return false;
+    }
+
+    /* rlist <= 6 when RV32E/RV64E */
+    if (ctx->cfg_ptr->ext_e && a->zcmp_rlist > 6) {
+        return false;
+    }
+
+    return true;
+}
+
+#define X_S0    8
+#define X_S1    9
+#define X_Sn    16
+
+static inline void update_push_pop_list(target_ulong rlist, bool *xreg_list)
+{
+    switch (rlist) {
+    case 15:
+        xreg_list[X_Sn + 11] = true;
+        xreg_list[X_Sn + 10] = true;
+        /* FALL THROUGH */
+    case 14:
+        xreg_list[X_Sn + 9] = true;
+        /* FALL THROUGH */
+    case 13:
+        xreg_list[X_Sn + 8] = true;
+        /* FALL THROUGH */
+    case 12:
+        xreg_list[X_Sn + 7] = true;
+        /* FALL THROUGH */
+    case 11:
+        xreg_list[X_Sn + 6] = true;
+        /* FALL THROUGH */
+    case 10:
+        xreg_list[X_Sn + 5] = true;
+        /* FALL THROUGH */
+    case 9:
+        xreg_list[X_Sn + 4] = true;
+        /* FALL THROUGH */
+    case 8:
+        xreg_list[X_Sn + 3] = true;
+        /* FALL THROUGH */
+    case 7:
+        xreg_list[X_Sn + 2] = true;
+        /* FALL THROUGH */
+    case 6:
+        xreg_list[X_S1] = true;
+        /* FALL THROUGH */
+    case 5:
+        xreg_list[X_S0] = true;
+        /* FALL THROUGH */
+    case 4:
+        xreg_list[xRA] = true;
+        break;
+    }
+}
+
+static inline target_ulong caculate_stack_adj(int bytes, target_ulong rlist,
+                                              target_ulong spimm)
+{
+    target_ulong stack_adj_base = 0;
+    switch (rlist) {
+    case 15:
+        stack_adj_base = bytes == 4 ? 64 : 112;
+        break;
+    case 14:
+        stack_adj_base = bytes == 4 ? 48 : 96;
+        break;
+    case 13:
+    case 12:
+        stack_adj_base = bytes == 4 ? 48 : 80;
+        break;
+    case 11:
+    case 10:
+        stack_adj_base = bytes == 4 ? 32 : 64;
+        break;
+    case 9:
+    case 8:
+        stack_adj_base = bytes == 4 ? 32 : 48;
+        break;
+    case 7:
+    case 6:
+        stack_adj_base = bytes == 4 ? 16 : 32;
+        break;
+    case 5:
+    case 4:
+        stack_adj_base = 16;
+        break;
+    }
+
+    return stack_adj_base + spimm;
+}
+
+static bool gen_pop(DisasContext *ctx, arg_zcmp *a, bool ret, bool ret_val)
+{
+    REQUIRE_ZCMP(ctx);
+
+    if (!gen_zcmp_check(ctx, a)) {
+        return false;
+    }
+
+    bool xreg_list[32] = {false};
+    int bytes = get_ol(ctx) == MXL_RV32 ? 4 : 8;
+    target_ulong stack_adj = caculate_stack_adj(bytes, a->zcmp_rlist,
+                                                a->zcmp_spimm);
+    TCGv sp = dest_gpr(ctx, xSP);
+    TCGv addr = tcg_temp_new();
+    int i;
+
+    update_push_pop_list(a->zcmp_rlist, xreg_list);
+    tcg_gen_addi_tl(addr, sp, stack_adj - bytes);
+
+    for (i = X_Sn + 11; i >= 0; i--) {
+        if (xreg_list[i]) {
+            TCGv dest = dest_gpr(ctx, i);
+            switch (bytes) {
+            case 4:
+                tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, MO_32);
+                break;
+            case 8:
+                tcg_gen_qemu_ld_tl(dest, addr, ctx->mem_idx, MO_64);
+                break;
+            default:
+                break;
+            }
+            gen_set_gpr(ctx, i, dest);
+            tcg_gen_subi_tl(addr, addr, bytes);
+        }
+    }
+
+    tcg_gen_addi_tl(sp, sp, stack_adj);
+    gen_set_gpr(ctx, xSP, sp);
+
+    if (ret_val) {
+        gen_set_gpr(ctx, xA0, ctx->zero);
+    }
+
+    if (ret) {
+        TCGv ret_addr = get_gpr(ctx, xRA, EXT_NONE);
+        gen_set_pc(ctx, ret_addr);
+        tcg_gen_lookup_and_goto_ptr();
+        ctx->base.is_jmp = DISAS_NORETURN;
+    }
+
+    tcg_temp_free(addr);
+    return true;
+}
+
+static bool trans_cm_push(DisasContext *ctx, arg_cm_push *a)
+{
+    REQUIRE_ZCMP(ctx);
+
+    if (!gen_zcmp_check(ctx, a)) {
+        return false;
+    }
+
+    bool xreg_list[32] = {false};
+    int bytes = get_ol(ctx) == MXL_RV32 ? 4 : 8;
+    target_ulong stack_adj = caculate_stack_adj(bytes, a->zcmp_rlist,
+                                                a->zcmp_spimm);
+    TCGv sp = dest_gpr(ctx, xSP);
+    TCGv addr = tcg_temp_new();
+    int i;
+
+    update_push_pop_list(a->zcmp_rlist, xreg_list);
+    tcg_gen_subi_tl(addr, sp, bytes);
+
+    for (i = X_Sn + 11; i >= 0; i--) {
+        if (xreg_list[i]) {
+            TCGv val = get_gpr(ctx, i, EXT_NONE);
+            switch (bytes) {
+            case 4:
+                tcg_gen_qemu_st_tl(val, addr, ctx->mem_idx, MO_32);
+                break;
+            case 8:
+                tcg_gen_qemu_st_tl(val, addr, ctx->mem_idx, MO_64);
+                break;
+            default:
+                break;
+            }
+            tcg_gen_subi_tl(addr, addr, bytes);
+        }
+    }
+
+    tcg_gen_subi_tl(sp, sp, stack_adj);
+    gen_set_gpr(ctx, xSP, sp);
+    return true;
+}
+
+static bool trans_cm_pop(DisasContext *ctx, arg_cm_pop *a)
+{
+    return gen_pop(ctx, a, false, false);
+}
+
+static bool trans_cm_popret(DisasContext *ctx, arg_cm_popret *a)
+{
+    return gen_pop(ctx, a, true, false);
+}
+
+static bool trans_cm_popretz(DisasContext *ctx, arg_cm_popret *a)
+{
+    return gen_pop(ctx, a, true, true);
+}
+
+static bool trans_cm_mva01s(DisasContext *ctx, arg_cm_mva01s *a)
+{
+    REQUIRE_ZCMP(ctx);
+
+    TCGv src1 = get_gpr(ctx, a->rs1, EXT_NONE);
+    TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE);
+
+    gen_set_gpr(ctx, xA0, src1);
+    gen_set_gpr(ctx, xA1, src2);
+
+    return true;
+}
+
+static bool trans_cm_mvsa01(DisasContext *ctx, arg_cm_mvsa01 *a)
+{
+    REQUIRE_ZCMP(ctx);
+
+    TCGv a0 = get_gpr(ctx, xA0, EXT_NONE);
+    TCGv a1 = get_gpr(ctx, xA1, EXT_NONE);
+
+    gen_set_gpr(ctx, a->rs1, a0);
+    gen_set_gpr(ctx, a->rs2, a1);
+
+    return true;
+}
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index ca01da3309..1b2515650f 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -738,6 +738,11 @@  static int ex_rvc_register(DisasContext *ctx, int reg)
     return 8 + reg;
 }
 
+static int ex_sreg_register(DisasContext *ctx, int reg)
+{
+    return reg < 2 ? reg + 8 : reg + 16;
+}
+
 static int ex_rvc_shiftli(DisasContext *ctx, int imm)
 {
     /* For RV128 a shamt of 0 means a shift by 64. */