Message ID | 1473662506-27441-17-git-send-email-nikunj@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Sep 12, 2016 at 12:11:45PM +0530, Nikunj A Dadhania wrote: > Manipulate data and store 8bytes instead of 4bytes. > > Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> > --- > target-ppc/translate/vsx-impl.inc.c | 27 +++++++++++++-------------- > 1 file changed, 13 insertions(+), 14 deletions(-) > > diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c > index caa6660..f2fc5f9 100644 > --- a/target-ppc/translate/vsx-impl.inc.c > +++ b/target-ppc/translate/vsx-impl.inc.c > @@ -205,7 +205,8 @@ static void gen_stxvd2x(DisasContext *ctx) > > static void gen_stxvw4x(DisasContext *ctx) > { > - TCGv_i64 tmp; > + TCGv_i64 xsh = cpu_vsrh(xS(ctx->opcode)); > + TCGv_i64 xsl = cpu_vsrl(xS(ctx->opcode)); > TCGv EA; > if (unlikely(!ctx->vsx_enabled)) { > gen_exception(ctx, POWERPC_EXCP_VSXU); > @@ -214,21 +215,19 @@ static void gen_stxvw4x(DisasContext *ctx) > gen_set_access_type(ctx, ACCESS_INT); > EA = tcg_temp_new(); > gen_addr_reg_index(ctx, EA); > - tmp = tcg_temp_new_i64(); > - > - tcg_gen_shri_i64(tmp, cpu_vsrh(xS(ctx->opcode)), 32); > - gen_qemu_st32_i64(ctx, tmp, EA); > - tcg_gen_addi_tl(EA, EA, 4); > - gen_qemu_st32_i64(ctx, cpu_vsrh(xS(ctx->opcode)), EA); > - > - tcg_gen_shri_i64(tmp, cpu_vsrl(xS(ctx->opcode)), 32); > - tcg_gen_addi_tl(EA, EA, 4); > - gen_qemu_st32_i64(ctx, tmp, EA); > - tcg_gen_addi_tl(EA, EA, 4); > - gen_qemu_st32_i64(ctx, cpu_vsrl(xS(ctx->opcode)), EA); > > + if (ctx->le_mode) { > + tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEQ); This looks wrong again. The BE store will storethe two 32-bit halves in the right order, but nothing swaps the bytes within those halves back to LE. > + tcg_gen_addi_tl(EA, EA, 8); > + tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEQ); > + } else { > + gen_helper_bswap32x2(xsh, xsh); > + tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_LEQ); Whereas the LE store here will also get the bytes within each 32-bit word in the wrong order for a BE guest. (bswap32x2 possibly should be fixing that, but doesn't). > + tcg_gen_addi_tl(EA, EA, 8); > + gen_helper_bswap32x2(xsl, xsl); > + tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_LEQ); > + } > tcg_temp_free(EA); > - tcg_temp_free_i64(tmp); > } > > #define MV_VSRW(name, tcgop1, tcgop2, target, source) \
diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c index caa6660..f2fc5f9 100644 --- a/target-ppc/translate/vsx-impl.inc.c +++ b/target-ppc/translate/vsx-impl.inc.c @@ -205,7 +205,8 @@ static void gen_stxvd2x(DisasContext *ctx) static void gen_stxvw4x(DisasContext *ctx) { - TCGv_i64 tmp; + TCGv_i64 xsh = cpu_vsrh(xS(ctx->opcode)); + TCGv_i64 xsl = cpu_vsrl(xS(ctx->opcode)); TCGv EA; if (unlikely(!ctx->vsx_enabled)) { gen_exception(ctx, POWERPC_EXCP_VSXU); @@ -214,21 +215,19 @@ static void gen_stxvw4x(DisasContext *ctx) gen_set_access_type(ctx, ACCESS_INT); EA = tcg_temp_new(); gen_addr_reg_index(ctx, EA); - tmp = tcg_temp_new_i64(); - - tcg_gen_shri_i64(tmp, cpu_vsrh(xS(ctx->opcode)), 32); - gen_qemu_st32_i64(ctx, tmp, EA); - tcg_gen_addi_tl(EA, EA, 4); - gen_qemu_st32_i64(ctx, cpu_vsrh(xS(ctx->opcode)), EA); - - tcg_gen_shri_i64(tmp, cpu_vsrl(xS(ctx->opcode)), 32); - tcg_gen_addi_tl(EA, EA, 4); - gen_qemu_st32_i64(ctx, tmp, EA); - tcg_gen_addi_tl(EA, EA, 4); - gen_qemu_st32_i64(ctx, cpu_vsrl(xS(ctx->opcode)), EA); + if (ctx->le_mode) { + tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEQ); + tcg_gen_addi_tl(EA, EA, 8); + tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEQ); + } else { + gen_helper_bswap32x2(xsh, xsh); + tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_LEQ); + tcg_gen_addi_tl(EA, EA, 8); + gen_helper_bswap32x2(xsl, xsl); + tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_LEQ); + } tcg_temp_free(EA); - tcg_temp_free_i64(tmp); } #define MV_VSRW(name, tcgop1, tcgop2, target, source) \
Manipulate data and store 8bytes instead of 4bytes. Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> --- target-ppc/translate/vsx-impl.inc.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-)