Message ID | 20210830111511.1905048-10-philipp.tomsich@vrull.eu (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | target/riscv: Update QEmu for Zb[abcs] 1.0.0 | expand |
On Mon, Aug 30, 2021 at 9:19 PM Philipp Tomsich <philipp.tomsich@vrull.eu> wrote: > > The 1.0.0 version of Zbb does not contain gorc/gorci. Instead, a > orc.b instruction (equivalent to the orc.b pseudo-instruction built on > gorci from pre-0.93 draft-B) is available, mainly targeting > string-processing workloads. > > This commit adds the new orc.b instruction and removed gorc/gorci. > > Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu> > Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Alistair > > --- > > Changes in v7: > - Free TCG temporary in gen_orc_b(). > > Changes in v6: > - Fixed orc.b (now passes SPEC w/ optimized string functions) by > adding the missing final negation. > > Changes in v4: > - Change orc.b to implementation suggested by Richard Henderson > > Changes in v3: > - Moved orc.b and gorc/gorci changes into separate commit. > - Using the simpler orc.b implementation suggested by Richard Henderson > > target/riscv/bitmanip_helper.c | 26 ---------------- > target/riscv/helper.h | 2 -- > target/riscv/insn32.decode | 6 +--- > target/riscv/insn_trans/trans_rvb.c.inc | 40 ++++++++++++------------- > target/riscv/translate.c | 6 ---- > 5 files changed, 21 insertions(+), 59 deletions(-) > > diff --git a/target/riscv/bitmanip_helper.c b/target/riscv/bitmanip_helper.c > index 73be5a81c7..bb48388fcd 100644 > --- a/target/riscv/bitmanip_helper.c > +++ b/target/riscv/bitmanip_helper.c > @@ -64,32 +64,6 @@ target_ulong HELPER(grevw)(target_ulong rs1, target_ulong rs2) > return do_grev(rs1, rs2, 32); > } > > -static target_ulong do_gorc(target_ulong rs1, > - target_ulong rs2, > - int bits) > -{ > - target_ulong x = rs1; > - int i, shift; > - > - for (i = 0, shift = 1; shift < bits; i++, shift <<= 1) { > - if (rs2 & shift) { > - x |= do_swap(x, adjacent_masks[i], shift); > - } > - } > - > - return x; > -} > - > -target_ulong HELPER(gorc)(target_ulong rs1, target_ulong rs2) > -{ > - return do_gorc(rs1, rs2, TARGET_LONG_BITS); > -} > - > -target_ulong HELPER(gorcw)(target_ulong rs1, target_ulong rs2) > -{ > - return do_gorc(rs1, rs2, 32); > -} > - > target_ulong HELPER(clmul)(target_ulong rs1, target_ulong rs2) > { > target_ulong result = 0; > diff --git a/target/riscv/helper.h b/target/riscv/helper.h > index c559c860a7..80561e8866 100644 > --- a/target/riscv/helper.h > +++ b/target/riscv/helper.h > @@ -61,8 +61,6 @@ DEF_HELPER_FLAGS_1(fclass_d, TCG_CALL_NO_RWG_SE, tl, i64) > /* Bitmanip */ > DEF_HELPER_FLAGS_2(grev, TCG_CALL_NO_RWG_SE, tl, tl, tl) > DEF_HELPER_FLAGS_2(grevw, TCG_CALL_NO_RWG_SE, tl, tl, tl) > -DEF_HELPER_FLAGS_2(gorc, TCG_CALL_NO_RWG_SE, tl, tl, tl) > -DEF_HELPER_FLAGS_2(gorcw, TCG_CALL_NO_RWG_SE, tl, tl, tl) > DEF_HELPER_FLAGS_2(clmul, TCG_CALL_NO_RWG_SE, tl, tl, tl) > DEF_HELPER_FLAGS_2(clmulr, TCG_CALL_NO_RWG_SE, tl, tl, tl) > > diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode > index faa56836d8..8bcb602455 100644 > --- a/target/riscv/insn32.decode > +++ b/target/riscv/insn32.decode > @@ -680,6 +680,7 @@ max 0000101 .......... 110 ..... 0110011 @r > maxu 0000101 .......... 111 ..... 0110011 @r > min 0000101 .......... 100 ..... 0110011 @r > minu 0000101 .......... 101 ..... 0110011 @r > +orc_b 001010 000111 ..... 101 ..... 0010011 @r2 > orn 0100000 .......... 110 ..... 0110011 @r > rol 0110000 .......... 001 ..... 0110011 @r > ror 0110000 .......... 101 ..... 0110011 @r > @@ -701,19 +702,14 @@ pack 0000100 .......... 100 ..... 0110011 @r > packu 0100100 .......... 100 ..... 0110011 @r > packh 0000100 .......... 111 ..... 0110011 @r > grev 0110100 .......... 101 ..... 0110011 @r > -gorc 0010100 .......... 101 ..... 0110011 @r > - > grevi 01101. ........... 101 ..... 0010011 @sh > -gorci 00101. ........... 101 ..... 0010011 @sh > > # *** RV64B Standard Extension (in addition to RV32B) *** > packw 0000100 .......... 100 ..... 0111011 @r > packuw 0100100 .......... 100 ..... 0111011 @r > grevw 0110100 .......... 101 ..... 0111011 @r > -gorcw 0010100 .......... 101 ..... 0111011 @r > > greviw 0110100 .......... 101 ..... 0011011 @sh5 > -gorciw 0010100 .......... 101 ..... 0011011 @sh5 > > # *** RV32 Zbc Standard Extension *** > clmul 0000101 .......... 001 ..... 0110011 @r > diff --git a/target/riscv/insn_trans/trans_rvb.c.inc b/target/riscv/insn_trans/trans_rvb.c.inc > index 03b3724c96..cb4aa168fb 100644 > --- a/target/riscv/insn_trans/trans_rvb.c.inc > +++ b/target/riscv/insn_trans/trans_rvb.c.inc > @@ -215,18 +215,32 @@ static bool trans_grevi(DisasContext *ctx, arg_grevi *a) > return gen_grevi(ctx, a); > } > > -static bool trans_gorc(DisasContext *ctx, arg_gorc *a) > +static void gen_orc_b(TCGv ret, TCGv source1) > { > - REQUIRE_EXT(ctx, RVB); > - return gen_shift(ctx, a, gen_helper_gorc); > + TCGv tmp = tcg_temp_new(); > + > + /* Set msb in each byte if the byte was zero. */ > + tcg_gen_subi_tl(tmp, source1, dup_const(MO_8, 0x01)); > + tcg_gen_andc_tl(tmp, tmp, source1); > + tcg_gen_andi_tl(tmp, tmp, dup_const(MO_8, 0x80)); > + > + /* Replicate the msb of each byte across the byte. */ > + tcg_gen_shri_tl(tmp, tmp, 7); > + tcg_gen_muli_tl(tmp, tmp, 0xff); > + > + /* Negate */ > + tcg_gen_not_tl(ret, tmp); > + > + tcg_temp_free(tmp); > } > > -static bool trans_gorci(DisasContext *ctx, arg_gorci *a) > +static bool trans_orc_b(DisasContext *ctx, arg_orc_b *a) > { > - REQUIRE_EXT(ctx, RVB); > - return gen_shifti(ctx, a, gen_helper_gorc); > + REQUIRE_ZBB(ctx); > + return gen_unary(ctx, a, &gen_orc_b); > } > > + > #define GEN_TRANS_SHADD(SHAMT) \ > static bool trans_sh##SHAMT##add(DisasContext *ctx, arg_sh##SHAMT##add *a) \ > { \ > @@ -308,20 +322,6 @@ static bool trans_greviw(DisasContext *ctx, arg_greviw *a) > return gen_shiftiw(ctx, a, gen_grevw); > } > > -static bool trans_gorcw(DisasContext *ctx, arg_gorcw *a) > -{ > - REQUIRE_64BIT(ctx); > - REQUIRE_EXT(ctx, RVB); > - return gen_shiftw(ctx, a, gen_gorcw); > -} > - > -static bool trans_gorciw(DisasContext *ctx, arg_gorciw *a) > -{ > - REQUIRE_64BIT(ctx); > - REQUIRE_EXT(ctx, RVB); > - return gen_shiftiw(ctx, a, gen_gorcw); > -} > - > #define GEN_TRANS_SHADD_UW(SHAMT) \ > static bool trans_sh##SHAMT##add_uw(DisasContext *ctx, \ > arg_sh##SHAMT##add_uw *a) \ > diff --git a/target/riscv/translate.c b/target/riscv/translate.c > index fc22ae82d0..5c099ff007 100644 > --- a/target/riscv/translate.c > +++ b/target/riscv/translate.c > @@ -710,12 +710,6 @@ static void gen_grevw(TCGv ret, TCGv arg1, TCGv arg2) > gen_helper_grev(ret, arg1, arg2); > } > > -static void gen_gorcw(TCGv ret, TCGv arg1, TCGv arg2) > -{ > - tcg_gen_ext32u_tl(arg1, arg1); > - gen_helper_gorcw(ret, arg1, arg2); > -} > - > #define GEN_SHADD_UW(SHAMT) \ > static void gen_sh##SHAMT##add_uw(TCGv ret, TCGv arg1, TCGv arg2) \ > { \ > -- > 2.25.1 > >
On 8/30/21 4:15 AM, Philipp Tomsich wrote: > + TCGv tmp = tcg_temp_new(); > + > + /* Set msb in each byte if the byte was zero. */ > + tcg_gen_subi_tl(tmp, source1, dup_const(MO_8, 0x01)); > + tcg_gen_andc_tl(tmp, tmp, source1); > + tcg_gen_andi_tl(tmp, tmp, dup_const(MO_8, 0x80)); > + > + /* Replicate the msb of each byte across the byte. */ > + tcg_gen_shri_tl(tmp, tmp, 7); > + tcg_gen_muli_tl(tmp, tmp, 0xff); > + > + /* Negate */ > + tcg_gen_not_tl(ret, tmp); It just occurred to me that we can swap the shift/andi and re-use the same constant, and we can fold in the negate with andc. TCGv ones = tcg_constant_tl(dup_const(MO_8, 1)); TCGv tmp = tcg_temp_new(); tcg_gen_sub_tl(tmp, src1, ones); tcg_gen_andc_tl(tmp, tmp, src1); tcg_gen_shri_tl(tmp, tmp, 7); tcg_gen_andc_tl(tmp, ones, tmp); tcg_gen_muli_tl(tmp, tmp, 0xff); tcg_temp_free(tmp); r~
diff --git a/target/riscv/bitmanip_helper.c b/target/riscv/bitmanip_helper.c index 73be5a81c7..bb48388fcd 100644 --- a/target/riscv/bitmanip_helper.c +++ b/target/riscv/bitmanip_helper.c @@ -64,32 +64,6 @@ target_ulong HELPER(grevw)(target_ulong rs1, target_ulong rs2) return do_grev(rs1, rs2, 32); } -static target_ulong do_gorc(target_ulong rs1, - target_ulong rs2, - int bits) -{ - target_ulong x = rs1; - int i, shift; - - for (i = 0, shift = 1; shift < bits; i++, shift <<= 1) { - if (rs2 & shift) { - x |= do_swap(x, adjacent_masks[i], shift); - } - } - - return x; -} - -target_ulong HELPER(gorc)(target_ulong rs1, target_ulong rs2) -{ - return do_gorc(rs1, rs2, TARGET_LONG_BITS); -} - -target_ulong HELPER(gorcw)(target_ulong rs1, target_ulong rs2) -{ - return do_gorc(rs1, rs2, 32); -} - target_ulong HELPER(clmul)(target_ulong rs1, target_ulong rs2) { target_ulong result = 0; diff --git a/target/riscv/helper.h b/target/riscv/helper.h index c559c860a7..80561e8866 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -61,8 +61,6 @@ DEF_HELPER_FLAGS_1(fclass_d, TCG_CALL_NO_RWG_SE, tl, i64) /* Bitmanip */ DEF_HELPER_FLAGS_2(grev, TCG_CALL_NO_RWG_SE, tl, tl, tl) DEF_HELPER_FLAGS_2(grevw, TCG_CALL_NO_RWG_SE, tl, tl, tl) -DEF_HELPER_FLAGS_2(gorc, TCG_CALL_NO_RWG_SE, tl, tl, tl) -DEF_HELPER_FLAGS_2(gorcw, TCG_CALL_NO_RWG_SE, tl, tl, tl) DEF_HELPER_FLAGS_2(clmul, TCG_CALL_NO_RWG_SE, tl, tl, tl) DEF_HELPER_FLAGS_2(clmulr, TCG_CALL_NO_RWG_SE, tl, tl, tl) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index faa56836d8..8bcb602455 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -680,6 +680,7 @@ max 0000101 .......... 110 ..... 0110011 @r maxu 0000101 .......... 111 ..... 0110011 @r min 0000101 .......... 100 ..... 0110011 @r minu 0000101 .......... 101 ..... 0110011 @r +orc_b 001010 000111 ..... 101 ..... 0010011 @r2 orn 0100000 .......... 110 ..... 0110011 @r rol 0110000 .......... 001 ..... 0110011 @r ror 0110000 .......... 101 ..... 0110011 @r @@ -701,19 +702,14 @@ pack 0000100 .......... 100 ..... 0110011 @r packu 0100100 .......... 100 ..... 0110011 @r packh 0000100 .......... 111 ..... 0110011 @r grev 0110100 .......... 101 ..... 0110011 @r -gorc 0010100 .......... 101 ..... 0110011 @r - grevi 01101. ........... 101 ..... 0010011 @sh -gorci 00101. ........... 101 ..... 0010011 @sh # *** RV64B Standard Extension (in addition to RV32B) *** packw 0000100 .......... 100 ..... 0111011 @r packuw 0100100 .......... 100 ..... 0111011 @r grevw 0110100 .......... 101 ..... 0111011 @r -gorcw 0010100 .......... 101 ..... 0111011 @r greviw 0110100 .......... 101 ..... 0011011 @sh5 -gorciw 0010100 .......... 101 ..... 0011011 @sh5 # *** RV32 Zbc Standard Extension *** clmul 0000101 .......... 001 ..... 0110011 @r diff --git a/target/riscv/insn_trans/trans_rvb.c.inc b/target/riscv/insn_trans/trans_rvb.c.inc index 03b3724c96..cb4aa168fb 100644 --- a/target/riscv/insn_trans/trans_rvb.c.inc +++ b/target/riscv/insn_trans/trans_rvb.c.inc @@ -215,18 +215,32 @@ static bool trans_grevi(DisasContext *ctx, arg_grevi *a) return gen_grevi(ctx, a); } -static bool trans_gorc(DisasContext *ctx, arg_gorc *a) +static void gen_orc_b(TCGv ret, TCGv source1) { - REQUIRE_EXT(ctx, RVB); - return gen_shift(ctx, a, gen_helper_gorc); + TCGv tmp = tcg_temp_new(); + + /* Set msb in each byte if the byte was zero. */ + tcg_gen_subi_tl(tmp, source1, dup_const(MO_8, 0x01)); + tcg_gen_andc_tl(tmp, tmp, source1); + tcg_gen_andi_tl(tmp, tmp, dup_const(MO_8, 0x80)); + + /* Replicate the msb of each byte across the byte. */ + tcg_gen_shri_tl(tmp, tmp, 7); + tcg_gen_muli_tl(tmp, tmp, 0xff); + + /* Negate */ + tcg_gen_not_tl(ret, tmp); + + tcg_temp_free(tmp); } -static bool trans_gorci(DisasContext *ctx, arg_gorci *a) +static bool trans_orc_b(DisasContext *ctx, arg_orc_b *a) { - REQUIRE_EXT(ctx, RVB); - return gen_shifti(ctx, a, gen_helper_gorc); + REQUIRE_ZBB(ctx); + return gen_unary(ctx, a, &gen_orc_b); } + #define GEN_TRANS_SHADD(SHAMT) \ static bool trans_sh##SHAMT##add(DisasContext *ctx, arg_sh##SHAMT##add *a) \ { \ @@ -308,20 +322,6 @@ static bool trans_greviw(DisasContext *ctx, arg_greviw *a) return gen_shiftiw(ctx, a, gen_grevw); } -static bool trans_gorcw(DisasContext *ctx, arg_gorcw *a) -{ - REQUIRE_64BIT(ctx); - REQUIRE_EXT(ctx, RVB); - return gen_shiftw(ctx, a, gen_gorcw); -} - -static bool trans_gorciw(DisasContext *ctx, arg_gorciw *a) -{ - REQUIRE_64BIT(ctx); - REQUIRE_EXT(ctx, RVB); - return gen_shiftiw(ctx, a, gen_gorcw); -} - #define GEN_TRANS_SHADD_UW(SHAMT) \ static bool trans_sh##SHAMT##add_uw(DisasContext *ctx, \ arg_sh##SHAMT##add_uw *a) \ diff --git a/target/riscv/translate.c b/target/riscv/translate.c index fc22ae82d0..5c099ff007 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -710,12 +710,6 @@ static void gen_grevw(TCGv ret, TCGv arg1, TCGv arg2) gen_helper_grev(ret, arg1, arg2); } -static void gen_gorcw(TCGv ret, TCGv arg1, TCGv arg2) -{ - tcg_gen_ext32u_tl(arg1, arg1); - gen_helper_gorcw(ret, arg1, arg2); -} - #define GEN_SHADD_UW(SHAMT) \ static void gen_sh##SHAMT##add_uw(TCGv ret, TCGv arg1, TCGv arg2) \ { \