diff mbox

[RESEND,v2,16/17] target-ppc: improve stxvw4x implementation

Message ID 1473662506-27441-17-git-send-email-nikunj@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nikunj A. Dadhania Sept. 12, 2016, 6:41 a.m. UTC
Manipulate data and store 8bytes instead of 4bytes.

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/translate/vsx-impl.inc.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

Comments

David Gibson Sept. 15, 2016, 1:44 a.m. UTC | #1
On Mon, Sep 12, 2016 at 12:11:45PM +0530, Nikunj A Dadhania wrote:
> Manipulate data and store 8bytes instead of 4bytes.
> 
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>  target-ppc/translate/vsx-impl.inc.c | 27 +++++++++++++--------------
>  1 file changed, 13 insertions(+), 14 deletions(-)
> 
> diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
> index caa6660..f2fc5f9 100644
> --- a/target-ppc/translate/vsx-impl.inc.c
> +++ b/target-ppc/translate/vsx-impl.inc.c
> @@ -205,7 +205,8 @@ static void gen_stxvd2x(DisasContext *ctx)
>  
>  static void gen_stxvw4x(DisasContext *ctx)
>  {
> -    TCGv_i64 tmp;
> +    TCGv_i64 xsh = cpu_vsrh(xS(ctx->opcode));
> +    TCGv_i64 xsl = cpu_vsrl(xS(ctx->opcode));
>      TCGv EA;
>      if (unlikely(!ctx->vsx_enabled)) {
>          gen_exception(ctx, POWERPC_EXCP_VSXU);
> @@ -214,21 +215,19 @@ static void gen_stxvw4x(DisasContext *ctx)
>      gen_set_access_type(ctx, ACCESS_INT);
>      EA = tcg_temp_new();
>      gen_addr_reg_index(ctx, EA);
> -    tmp = tcg_temp_new_i64();
> -
> -    tcg_gen_shri_i64(tmp, cpu_vsrh(xS(ctx->opcode)), 32);
> -    gen_qemu_st32_i64(ctx, tmp, EA);
> -    tcg_gen_addi_tl(EA, EA, 4);
> -    gen_qemu_st32_i64(ctx, cpu_vsrh(xS(ctx->opcode)), EA);
> -
> -    tcg_gen_shri_i64(tmp, cpu_vsrl(xS(ctx->opcode)), 32);
> -    tcg_gen_addi_tl(EA, EA, 4);
> -    gen_qemu_st32_i64(ctx, tmp, EA);
> -    tcg_gen_addi_tl(EA, EA, 4);
> -    gen_qemu_st32_i64(ctx, cpu_vsrl(xS(ctx->opcode)), EA);
>  
> +    if (ctx->le_mode) {
> +        tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEQ);

This looks wrong again.  The BE store will storethe two 32-bit halves
in the right order, but nothing swaps the bytes within those halves
back to LE.

> +        tcg_gen_addi_tl(EA, EA, 8);
> +        tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEQ);
> +    } else {
> +        gen_helper_bswap32x2(xsh, xsh);
> +        tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_LEQ);

Whereas the LE store here will also get the bytes within each 32-bit
word in the wrong order for a BE guest. (bswap32x2 possibly should be
fixing that, but doesn't).

> +        tcg_gen_addi_tl(EA, EA, 8);
> +        gen_helper_bswap32x2(xsl, xsl);
> +        tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_LEQ);
> +    }
>      tcg_temp_free(EA);
> -    tcg_temp_free_i64(tmp);
>  }
>  
>  #define MV_VSRW(name, tcgop1, tcgop2, target, source)           \
diff mbox

Patch

diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index caa6660..f2fc5f9 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -205,7 +205,8 @@  static void gen_stxvd2x(DisasContext *ctx)
 
 static void gen_stxvw4x(DisasContext *ctx)
 {
-    TCGv_i64 tmp;
+    TCGv_i64 xsh = cpu_vsrh(xS(ctx->opcode));
+    TCGv_i64 xsl = cpu_vsrl(xS(ctx->opcode));
     TCGv EA;
     if (unlikely(!ctx->vsx_enabled)) {
         gen_exception(ctx, POWERPC_EXCP_VSXU);
@@ -214,21 +215,19 @@  static void gen_stxvw4x(DisasContext *ctx)
     gen_set_access_type(ctx, ACCESS_INT);
     EA = tcg_temp_new();
     gen_addr_reg_index(ctx, EA);
-    tmp = tcg_temp_new_i64();
-
-    tcg_gen_shri_i64(tmp, cpu_vsrh(xS(ctx->opcode)), 32);
-    gen_qemu_st32_i64(ctx, tmp, EA);
-    tcg_gen_addi_tl(EA, EA, 4);
-    gen_qemu_st32_i64(ctx, cpu_vsrh(xS(ctx->opcode)), EA);
-
-    tcg_gen_shri_i64(tmp, cpu_vsrl(xS(ctx->opcode)), 32);
-    tcg_gen_addi_tl(EA, EA, 4);
-    gen_qemu_st32_i64(ctx, tmp, EA);
-    tcg_gen_addi_tl(EA, EA, 4);
-    gen_qemu_st32_i64(ctx, cpu_vsrl(xS(ctx->opcode)), EA);
 
+    if (ctx->le_mode) {
+        tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_BEQ);
+        tcg_gen_addi_tl(EA, EA, 8);
+        tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_BEQ);
+    } else {
+        gen_helper_bswap32x2(xsh, xsh);
+        tcg_gen_qemu_st_i64(xsh, EA, ctx->mem_idx, MO_LEQ);
+        tcg_gen_addi_tl(EA, EA, 8);
+        gen_helper_bswap32x2(xsl, xsl);
+        tcg_gen_qemu_st_i64(xsl, EA, ctx->mem_idx, MO_LEQ);
+    }
     tcg_temp_free(EA);
-    tcg_temp_free_i64(tmp);
 }
 
 #define MV_VSRW(name, tcgop1, tcgop2, target, source)           \