Message ID | 20170512182132.jdw4g2pd5gvf2dti@aurel32.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 05/12/2017 11:21 AM, Aurelien Jarno wrote: > + uint64_t mask1 = sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff; > + uint64_t mask2 = sf ? 0xff00ff00ff00ff00ull : 0xff00ff00; > + > + tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); > + tcg_gen_andi_i64(tcg_tmp, tcg_tmp, mask1); > + tcg_gen_shli_i64(tcg_rd, tcg_rn, 8); > + tcg_gen_andi_i64(tcg_rd, tcg_rd, mask2); It would probably be better to use a single mask, since they're not free to instantiate in a register. So e.g. TCGv mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); tcg_gen_and_i64(tcg_rd, tcg_rn, mask); tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); r~
On 2017-05-12 12:05, Richard Henderson wrote: > On 05/12/2017 11:21 AM, Aurelien Jarno wrote: > > + uint64_t mask1 = sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff; > > + uint64_t mask2 = sf ? 0xff00ff00ff00ff00ull : 0xff00ff00; > > + > > + tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); > > + tcg_gen_andi_i64(tcg_tmp, tcg_tmp, mask1); > > + tcg_gen_shli_i64(tcg_rd, tcg_rn, 8); > > + tcg_gen_andi_i64(tcg_rd, tcg_rd, mask2); > > It would probably be better to use a single mask, since they're not free to > instantiate in a register. So e.g. > > TCGv mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); > tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); > tcg_gen_and_i64(tcg_rd, tcg_rn, mask); > tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); > tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); Indeed that improves things a bit for sf=1. For sf=0 though the constant is never loaded into a register, it is passed to the and instructions as an immediate.
On 05/12/2017 12:22 PM, Aurelien Jarno wrote: > On 2017-05-12 12:05, Richard Henderson wrote: >> On 05/12/2017 11:21 AM, Aurelien Jarno wrote: >>> + uint64_t mask1 = sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff; >>> + uint64_t mask2 = sf ? 0xff00ff00ff00ff00ull : 0xff00ff00; >>> + >>> + tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); >>> + tcg_gen_andi_i64(tcg_tmp, tcg_tmp, mask1); >>> + tcg_gen_shli_i64(tcg_rd, tcg_rn, 8); >>> + tcg_gen_andi_i64(tcg_rd, tcg_rd, mask2); >> >> It would probably be better to use a single mask, since they're not free to >> instantiate in a register. So e.g. >> >> TCGv mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); >> tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); >> tcg_gen_and_i64(tcg_rd, tcg_rn, mask); >> tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); >> tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); > > Indeed that improves things a bit for sf=1. For sf=0 though the > constant is never loaded into a register, it is passed to the and > instructions as an immediate. > For x86 (and sometimes s390) it isn't, but it certainly would be for all other hosts. r~
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 24de30d92c..ccb276417b 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -4034,25 +4034,14 @@ static void handle_rev16(DisasContext *s, unsigned int sf, TCGv_i64 tcg_rd = cpu_reg(s, rd); TCGv_i64 tcg_tmp = tcg_temp_new_i64(); TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); - - tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff); - tcg_gen_bswap16_i64(tcg_rd, tcg_tmp); - - tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16); - tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff); - tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp); - tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16); - - if (sf) { - tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32); - tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff); - tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp); - tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16); - - tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48); - tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp); - tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16); - } + uint64_t mask1 = sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff; + uint64_t mask2 = sf ? 0xff00ff00ff00ff00ull : 0xff00ff00; + + tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); + tcg_gen_andi_i64(tcg_tmp, tcg_tmp, mask1); + tcg_gen_shli_i64(tcg_rd, tcg_rn, 8); + tcg_gen_andi_i64(tcg_rd, tcg_rd, mask2); + tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); tcg_temp_free_i64(tcg_tmp); }