diff mbox

[v1,3/3] target-ppc: implement xxbr[qdwh] instruction

Message ID 1476248933-25562-4-git-send-email-nikunj@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nikunj A. Dadhania Oct. 12, 2016, 5:08 a.m. UTC
Add required helpers (GEN_XX2FORM_EO) for supporting this instruction.

xxbrh: VSX Vector Byte-Reverse Halfword
xxbrw: VSX Vector Byte-Reverse Word
xxbrd: VSX Vector Byte-Reverse Doubleword
xxbrq: VSX Vector Byte-Reverse Quadword

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/translate.c              | 32 +++++++++++++++
 target-ppc/translate/vsx-impl.inc.c | 77 +++++++++++++++++++++++++++++++++++++
 target-ppc/translate/vsx-ops.inc.c  |  8 ++++
 3 files changed, 117 insertions(+)

Comments

David Gibson Oct. 13, 2016, 12:21 a.m. UTC | #1
On Wed, Oct 12, 2016 at 10:38:53AM +0530, Nikunj A Dadhania wrote:
> Add required helpers (GEN_XX2FORM_EO) for supporting this instruction.
> 
> xxbrh: VSX Vector Byte-Reverse Halfword
> xxbrw: VSX Vector Byte-Reverse Word
> xxbrd: VSX Vector Byte-Reverse Doubleword
> xxbrq: VSX Vector Byte-Reverse Quadword
> 
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>  target-ppc/translate.c              | 32 +++++++++++++++
>  target-ppc/translate/vsx-impl.inc.c | 77 +++++++++++++++++++++++++++++++++++++
>  target-ppc/translate/vsx-ops.inc.c  |  8 ++++
>  3 files changed, 117 insertions(+)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index dab8f19..94989b2 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -376,6 +376,9 @@ GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type, type2)
>  #define GEN_HANDLER_E_2(name, opc1, opc2, opc3, opc4, inval, type, type2)     \
>  GEN_OPCODE3(name, opc1, opc2, opc3, opc4, inval, type, type2)
>  
> +#define GEN_HANDLER2_E_2(name, onam, opc1, opc2, opc3, opc4, inval, typ, typ2) \
> +GEN_OPCODE4(name, onam, opc1, opc2, opc3, opc4, inval, typ, typ2)
> +
>  typedef struct opcode_t {
>      unsigned char opc1, opc2, opc3, opc4;
>  #if HOST_LONG_BITS == 64 /* Explicitly align to 64 bits */
> @@ -662,6 +665,21 @@ EXTRACT_HELPER(IMM8, 11, 8);
>      },                                                                        \
>      .oname = stringify(name),                                                 \
>  }
> +#define GEN_OPCODE4(name, onam, op1, op2, op3, op4, invl, _typ, _typ2)        \
> +{                                                                             \
> +    .opc1 = op1,                                                              \
> +    .opc2 = op2,                                                              \
> +    .opc3 = op3,                                                              \
> +    .opc4 = op4,                                                              \
> +    .handler = {                                                              \
> +        .inval1  = invl,                                                      \
> +        .type = _typ,                                                         \
> +        .type2 = _typ2,                                                       \
> +        .handler = &gen_##name,                                               \
> +        .oname = onam,                                                        \
> +    },                                                                        \
> +    .oname = onam,                                                            \
> +}
>  #else
>  #define GEN_OPCODE(name, op1, op2, op3, invl, _typ, _typ2)                    \
>  {                                                                             \
> @@ -720,6 +738,20 @@ EXTRACT_HELPER(IMM8, 11, 8);
>      },                                                                        \
>      .oname = stringify(name),                                                 \
>  }
> +#define GEN_OPCODE4(name, onam, op1, op2, op3, op4, invl, _typ, _typ2)        \
> +{                                                                             \
> +    .opc1 = op1,                                                              \
> +    .opc2 = op2,                                                              \
> +    .opc3 = op3,                                                              \
> +    .opc4 = op4,                                                              \
> +    .handler = {                                                              \
> +        .inval1  = invl,                                                      \
> +        .type = _typ,                                                         \
> +        .type2 = _typ2,                                                       \
> +        .handler = &gen_##name,                                               \
> +    },                                                                        \
> +    .oname = onam,                                                            \
> +}
>  #endif
>  
>  /* SPR load/store helpers */
> diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
> index 23ec1e1..52af5c1 100644
> --- a/target-ppc/translate/vsx-impl.inc.c
> +++ b/target-ppc/translate/vsx-impl.inc.c
> @@ -132,6 +132,22 @@ static void gen_bswap16x8(TCGv_i64 outh, TCGv_i64 outl,
>      tcg_temp_free_i64(mask);
>  }
>  
> +static void gen_bswap32x4(TCGv_i64 outh, TCGv_i64 outl,
> +                          TCGv_i64 inh, TCGv_i64 inl)
> +{
> +    TCGv_i64 hi = tcg_temp_new_i64();
> +    TCGv_i64 lo = tcg_temp_new_i64();
> +
> +    tcg_gen_bswap64_i64(hi, inh);
> +    tcg_gen_bswap64_i64(lo, inl);
> +    tcg_gen_shri_i64(outh, hi, 32);
> +    tcg_gen_deposit_i64(outh, outh, hi, 32, 32);
> +    tcg_gen_shri_i64(outl, lo, 32);
> +    tcg_gen_deposit_i64(outl, outl, lo, 32, 32);
> +
> +    tcg_temp_free_i64(hi);
> +    tcg_temp_free_i64(lo);
> +}

Is there actually any advantage to having this 128-bit operation
working on two 64-bit "register"s, as opposed to having a bswap32x2
that operates on a single 64-bit register amd calling it twice?

>  static void gen_lxvh8x(DisasContext *ctx)
>  {
>      TCGv EA;
> @@ -717,6 +733,67 @@ GEN_VSX_HELPER_2(xvrspim, 0x12, 0x0B, 0, PPC2_VSX)
>  GEN_VSX_HELPER_2(xvrspip, 0x12, 0x0A, 0, PPC2_VSX)
>  GEN_VSX_HELPER_2(xvrspiz, 0x12, 0x09, 0, PPC2_VSX)
>  
> +static void gen_xxbrd(DisasContext *ctx)
> +{
> +    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
> +    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
> +    TCGv_i64 xbh = cpu_vsrh(xB(ctx->opcode));
> +    TCGv_i64 xbl = cpu_vsrl(xB(ctx->opcode));
> +
> +    if (unlikely(!ctx->vsx_enabled)) {
> +        gen_exception(ctx, POWERPC_EXCP_VSXU);
> +        return;
> +    }
> +    tcg_gen_bswap64_i64(xth, xbh);
> +    tcg_gen_bswap64_i64(xtl, xbl);
> +}
> +
> +static void gen_xxbrh(DisasContext *ctx)
> +{
> +    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
> +    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
> +    TCGv_i64 xbh = cpu_vsrh(xB(ctx->opcode));
> +    TCGv_i64 xbl = cpu_vsrl(xB(ctx->opcode));
> +
> +    if (unlikely(!ctx->vsx_enabled)) {
> +        gen_exception(ctx, POWERPC_EXCP_VSXU);
> +        return;
> +    }
> +    gen_bswap16x8(xth, xtl, xbh, xbl);

Likewise for the 16x8 version, I guess, although that would mean
changing the existing users.

> +}
> +
> +static void gen_xxbrq(DisasContext *ctx)
> +{
> +    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
> +    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
> +    TCGv_i64 xbh = cpu_vsrh(xB(ctx->opcode));
> +    TCGv_i64 xbl = cpu_vsrl(xB(ctx->opcode));
> +    TCGv_i64 t0 = tcg_temp_new_i64();
> +
> +    if (unlikely(!ctx->vsx_enabled)) {
> +        gen_exception(ctx, POWERPC_EXCP_VSXU);
> +        return;
> +    }
> +    tcg_gen_bswap64_i64(t0, xbl);
> +    tcg_gen_bswap64_i64(xtl, xbh);
> +    tcg_gen_bswap64_i64(xth, t0);

This looks wrong.  You swap xbl as you move it to t0, then swap it
again as you put it back into xth.  So it looks like you'll translate
           0011223344556677 8899AABBCCDDEEFF
to
           8899AABBCCDDEEFF 7766554433221100
whereas it should become
	   FFEEDDCCBBAA9977 7766554433221100

> +    tcg_temp_free_i64(t0);
> +}
> +
> +static void gen_xxbrw(DisasContext *ctx)
> +{
> +    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
> +    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
> +    TCGv_i64 xbh = cpu_vsrh(xB(ctx->opcode));
> +    TCGv_i64 xbl = cpu_vsrl(xB(ctx->opcode));
> +
> +    if (unlikely(!ctx->vsx_enabled)) {
> +        gen_exception(ctx, POWERPC_EXCP_VSXU);
> +        return;
> +    }
> +    gen_bswap32x4(xth, xtl, xbh, xbl);
> +}
> +
>  #define VSX_LOGICAL(name, tcg_op)                                    \
>  static void glue(gen_, name)(DisasContext * ctx)                     \
>      {                                                                \
> diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
> index 10eb4b9..af0d27e 100644
> --- a/target-ppc/translate/vsx-ops.inc.c
> +++ b/target-ppc/translate/vsx-ops.inc.c
> @@ -39,6 +39,10 @@ GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2)
>  GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
>  GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2)
>  
> +#define GEN_XX2FORM_EO(name, opc2, opc3, opc4, fl2)                          \
> +GEN_HANDLER2_E_2(name, #name, 0x3C, opc2 | 0, opc3, opc4, 0, PPC_NONE, fl2), \
> +GEN_HANDLER2_E_2(name, #name, 0x3C, opc2 | 1, opc3, opc4, 0, PPC_NONE, fl2)
> +
>  #define GEN_XX3FORM(name, opc2, opc3, fl2)                           \
>  GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
>  GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2), \
> @@ -222,6 +226,10 @@ GEN_XX2FORM(xvrspic, 0x16, 0x0A, PPC2_VSX),
>  GEN_XX2FORM(xvrspim, 0x12, 0x0B, PPC2_VSX),
>  GEN_XX2FORM(xvrspip, 0x12, 0x0A, PPC2_VSX),
>  GEN_XX2FORM(xvrspiz, 0x12, 0x09, PPC2_VSX),
> +GEN_XX2FORM_EO(xxbrh, 0x16, 0x1D, 0x07, PPC2_ISA300),
> +GEN_XX2FORM_EO(xxbrw, 0x16, 0x1D, 0x0F, PPC2_ISA300),
> +GEN_XX2FORM_EO(xxbrd, 0x16, 0x1D, 0x17, PPC2_ISA300),
> +GEN_XX2FORM_EO(xxbrq, 0x16, 0x1D, 0x1F, PPC2_ISA300),
>  
>  #define VSX_LOGICAL(name, opc2, opc3, fl2) \
>  GEN_XX3FORM(name, opc2, opc3, fl2)
Richard Henderson Oct. 13, 2016, 6:14 p.m. UTC | #2
On 10/12/2016 07:21 PM, David Gibson wrote:
>> +static void gen_bswap32x4(TCGv_i64 outh, TCGv_i64 outl,
>> +                          TCGv_i64 inh, TCGv_i64 inl)
>> +{
>> +    TCGv_i64 hi = tcg_temp_new_i64();
>> +    TCGv_i64 lo = tcg_temp_new_i64();
>> +
>> +    tcg_gen_bswap64_i64(hi, inh);
>> +    tcg_gen_bswap64_i64(lo, inl);
>> +    tcg_gen_shri_i64(outh, hi, 32);
>> +    tcg_gen_deposit_i64(outh, outh, hi, 32, 32);
>> +    tcg_gen_shri_i64(outl, lo, 32);
>> +    tcg_gen_deposit_i64(outl, outl, lo, 32, 32);
>> +
>> +    tcg_temp_free_i64(hi);
>> +    tcg_temp_free_i64(lo);
>> +}
>
> Is there actually any advantage to having this 128-bit operation
> working on two 64-bit "register"s, as opposed to having a bswap32x2
> that operates on a single 64-bit register amd calling it twice?

For this one, no particular advantage.

>> +    gen_bswap16x8(xth, xtl, xbh, xbl);
>
> Likewise for the 16x8 version, I guess, although that would mean
> changing the existing users.

For this one, we have to build a 64-bit constant, 0x00ff00ff00ff00ff.  On some 
hosts that's up to 6 insns.  Being about to reuse that for both swaps is useful.

>> +    tcg_gen_bswap64_i64(t0, xbl);
>> +    tcg_gen_bswap64_i64(xtl, xbh);
>> +    tcg_gen_bswap64_i64(xth, t0);
>
> This looks wrong.  You swap xbl as you move it to t0, then swap it
> again as you put it back into xth.  So it looks like you'll translate
>            0011223344556677 8899AABBCCDDEEFF
> to
>            8899AABBCCDDEEFF 7766554433221100
> whereas it should become
> 	   FFEEDDCCBBAA9977 7766554433221100

Indeed, the third line should be a move, not a swap.


r~
David Gibson Oct. 13, 2016, 10:31 p.m. UTC | #3
On Thu, Oct 13, 2016 at 01:14:35PM -0500, Richard Henderson wrote:
> On 10/12/2016 07:21 PM, David Gibson wrote:
> > > +static void gen_bswap32x4(TCGv_i64 outh, TCGv_i64 outl,
> > > +                          TCGv_i64 inh, TCGv_i64 inl)
> > > +{
> > > +    TCGv_i64 hi = tcg_temp_new_i64();
> > > +    TCGv_i64 lo = tcg_temp_new_i64();
> > > +
> > > +    tcg_gen_bswap64_i64(hi, inh);
> > > +    tcg_gen_bswap64_i64(lo, inl);
> > > +    tcg_gen_shri_i64(outh, hi, 32);
> > > +    tcg_gen_deposit_i64(outh, outh, hi, 32, 32);
> > > +    tcg_gen_shri_i64(outl, lo, 32);
> > > +    tcg_gen_deposit_i64(outl, outl, lo, 32, 32);
> > > +
> > > +    tcg_temp_free_i64(hi);
> > > +    tcg_temp_free_i64(lo);
> > > +}
> > 
> > Is there actually any advantage to having this 128-bit operation
> > working on two 64-bit "register"s, as opposed to having a bswap32x2
> > that operates on a single 64-bit register amd calling it twice?
> 
> For this one, no particular advantage.
> 
> > > +    gen_bswap16x8(xth, xtl, xbh, xbl);
> > 
> > Likewise for the 16x8 version, I guess, although that would mean
> > changing the existing users.
> 
> For this one, we have to build a 64-bit constant, 0x00ff00ff00ff00ff.  On
> some hosts that's up to 6 insns.  Being about to reuse that for both swaps
> is useful.

Ah, good point.

> > > +    tcg_gen_bswap64_i64(t0, xbl);
> > > +    tcg_gen_bswap64_i64(xtl, xbh);
> > > +    tcg_gen_bswap64_i64(xth, t0);
> > 
> > This looks wrong.  You swap xbl as you move it to t0, then swap it
> > again as you put it back into xth.  So it looks like you'll translate
> >            0011223344556677 8899AABBCCDDEEFF
> > to
> >            8899AABBCCDDEEFF 7766554433221100
> > whereas it should become
> > 	   FFEEDDCCBBAA9977 7766554433221100
> 
> Indeed, the third line should be a move, not a swap.
> 
> 
> r~
>
Nikunj A. Dadhania Oct. 19, 2016, 5:13 a.m. UTC | #4
Richard Henderson <rth@twiddle.net> writes:

> On 10/12/2016 07:21 PM, David Gibson wrote:
>>> +static void gen_bswap32x4(TCGv_i64 outh, TCGv_i64 outl,
>>> +                          TCGv_i64 inh, TCGv_i64 inl)
>>> +{
>>> +    TCGv_i64 hi = tcg_temp_new_i64();
>>> +    TCGv_i64 lo = tcg_temp_new_i64();
>>> +
>>> +    tcg_gen_bswap64_i64(hi, inh);
>>> +    tcg_gen_bswap64_i64(lo, inl);
>>> +    tcg_gen_shri_i64(outh, hi, 32);
>>> +    tcg_gen_deposit_i64(outh, outh, hi, 32, 32);
>>> +    tcg_gen_shri_i64(outl, lo, 32);
>>> +    tcg_gen_deposit_i64(outl, outl, lo, 32, 32);
>>> +
>>> +    tcg_temp_free_i64(hi);
>>> +    tcg_temp_free_i64(lo);
>>> +}
>>
>> Is there actually any advantage to having this 128-bit operation
>> working on two 64-bit "register"s, as opposed to having a bswap32x2
>> that operates on a single 64-bit register amd calling it twice?
>
> For this one, no particular advantage.
>
>>> +    gen_bswap16x8(xth, xtl, xbh, xbl);
>>
>> Likewise for the 16x8 version, I guess, although that would mean
>> changing the existing users.
>
> For this one, we have to build a 64-bit constant, 0x00ff00ff00ff00ff.  On some 
> hosts that's up to 6 insns.  Being about to reuse that for both swaps is useful.
>
>>> +    tcg_gen_bswap64_i64(t0, xbl);
>>> +    tcg_gen_bswap64_i64(xtl, xbh);
>>> +    tcg_gen_bswap64_i64(xth, t0);
>>
>> This looks wrong.  You swap xbl as you move it to t0, then swap it
>> again as you put it back into xth.  So it looks like you'll translate
>>            0011223344556677 8899AABBCCDDEEFF
>> to
>>            8899AABBCCDDEEFF 7766554433221100
>> whereas it should become
>> 	   FFEEDDCCBBAA9977 7766554433221100
>
> Indeed, the third line should be a move, not a swap.

Correct, will send updated patch.

Regards
Nikunj
diff mbox

Patch

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index dab8f19..94989b2 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -376,6 +376,9 @@  GEN_OPCODE2(name, onam, opc1, opc2, opc3, inval, type, type2)
 #define GEN_HANDLER_E_2(name, opc1, opc2, opc3, opc4, inval, type, type2)     \
 GEN_OPCODE3(name, opc1, opc2, opc3, opc4, inval, type, type2)
 
+#define GEN_HANDLER2_E_2(name, onam, opc1, opc2, opc3, opc4, inval, typ, typ2) \
+GEN_OPCODE4(name, onam, opc1, opc2, opc3, opc4, inval, typ, typ2)
+
 typedef struct opcode_t {
     unsigned char opc1, opc2, opc3, opc4;
 #if HOST_LONG_BITS == 64 /* Explicitly align to 64 bits */
@@ -662,6 +665,21 @@  EXTRACT_HELPER(IMM8, 11, 8);
     },                                                                        \
     .oname = stringify(name),                                                 \
 }
+#define GEN_OPCODE4(name, onam, op1, op2, op3, op4, invl, _typ, _typ2)        \
+{                                                                             \
+    .opc1 = op1,                                                              \
+    .opc2 = op2,                                                              \
+    .opc3 = op3,                                                              \
+    .opc4 = op4,                                                              \
+    .handler = {                                                              \
+        .inval1  = invl,                                                      \
+        .type = _typ,                                                         \
+        .type2 = _typ2,                                                       \
+        .handler = &gen_##name,                                               \
+        .oname = onam,                                                        \
+    },                                                                        \
+    .oname = onam,                                                            \
+}
 #else
 #define GEN_OPCODE(name, op1, op2, op3, invl, _typ, _typ2)                    \
 {                                                                             \
@@ -720,6 +738,20 @@  EXTRACT_HELPER(IMM8, 11, 8);
     },                                                                        \
     .oname = stringify(name),                                                 \
 }
+#define GEN_OPCODE4(name, onam, op1, op2, op3, op4, invl, _typ, _typ2)        \
+{                                                                             \
+    .opc1 = op1,                                                              \
+    .opc2 = op2,                                                              \
+    .opc3 = op3,                                                              \
+    .opc4 = op4,                                                              \
+    .handler = {                                                              \
+        .inval1  = invl,                                                      \
+        .type = _typ,                                                         \
+        .type2 = _typ2,                                                       \
+        .handler = &gen_##name,                                               \
+    },                                                                        \
+    .oname = onam,                                                            \
+}
 #endif
 
 /* SPR load/store helpers */
diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index 23ec1e1..52af5c1 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -132,6 +132,22 @@  static void gen_bswap16x8(TCGv_i64 outh, TCGv_i64 outl,
     tcg_temp_free_i64(mask);
 }
 
+static void gen_bswap32x4(TCGv_i64 outh, TCGv_i64 outl,
+                          TCGv_i64 inh, TCGv_i64 inl)
+{
+    TCGv_i64 hi = tcg_temp_new_i64();
+    TCGv_i64 lo = tcg_temp_new_i64();
+
+    tcg_gen_bswap64_i64(hi, inh);
+    tcg_gen_bswap64_i64(lo, inl);
+    tcg_gen_shri_i64(outh, hi, 32);
+    tcg_gen_deposit_i64(outh, outh, hi, 32, 32);
+    tcg_gen_shri_i64(outl, lo, 32);
+    tcg_gen_deposit_i64(outl, outl, lo, 32, 32);
+
+    tcg_temp_free_i64(hi);
+    tcg_temp_free_i64(lo);
+}
 static void gen_lxvh8x(DisasContext *ctx)
 {
     TCGv EA;
@@ -717,6 +733,67 @@  GEN_VSX_HELPER_2(xvrspim, 0x12, 0x0B, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xvrspip, 0x12, 0x0A, 0, PPC2_VSX)
 GEN_VSX_HELPER_2(xvrspiz, 0x12, 0x09, 0, PPC2_VSX)
 
+static void gen_xxbrd(DisasContext *ctx)
+{
+    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
+    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
+    TCGv_i64 xbh = cpu_vsrh(xB(ctx->opcode));
+    TCGv_i64 xbl = cpu_vsrl(xB(ctx->opcode));
+
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    tcg_gen_bswap64_i64(xth, xbh);
+    tcg_gen_bswap64_i64(xtl, xbl);
+}
+
+static void gen_xxbrh(DisasContext *ctx)
+{
+    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
+    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
+    TCGv_i64 xbh = cpu_vsrh(xB(ctx->opcode));
+    TCGv_i64 xbl = cpu_vsrl(xB(ctx->opcode));
+
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    gen_bswap16x8(xth, xtl, xbh, xbl);
+}
+
+static void gen_xxbrq(DisasContext *ctx)
+{
+    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
+    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
+    TCGv_i64 xbh = cpu_vsrh(xB(ctx->opcode));
+    TCGv_i64 xbl = cpu_vsrl(xB(ctx->opcode));
+    TCGv_i64 t0 = tcg_temp_new_i64();
+
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    tcg_gen_bswap64_i64(t0, xbl);
+    tcg_gen_bswap64_i64(xtl, xbh);
+    tcg_gen_bswap64_i64(xth, t0);
+    tcg_temp_free_i64(t0);
+}
+
+static void gen_xxbrw(DisasContext *ctx)
+{
+    TCGv_i64 xth = cpu_vsrh(xT(ctx->opcode));
+    TCGv_i64 xtl = cpu_vsrl(xT(ctx->opcode));
+    TCGv_i64 xbh = cpu_vsrh(xB(ctx->opcode));
+    TCGv_i64 xbl = cpu_vsrl(xB(ctx->opcode));
+
+    if (unlikely(!ctx->vsx_enabled)) {
+        gen_exception(ctx, POWERPC_EXCP_VSXU);
+        return;
+    }
+    gen_bswap32x4(xth, xtl, xbh, xbl);
+}
+
 #define VSX_LOGICAL(name, tcg_op)                                    \
 static void glue(gen_, name)(DisasContext * ctx)                     \
     {                                                                \
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index 10eb4b9..af0d27e 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -39,6 +39,10 @@  GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2)
 GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
 GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2)
 
+#define GEN_XX2FORM_EO(name, opc2, opc3, opc4, fl2)                          \
+GEN_HANDLER2_E_2(name, #name, 0x3C, opc2 | 0, opc3, opc4, 0, PPC_NONE, fl2), \
+GEN_HANDLER2_E_2(name, #name, 0x3C, opc2 | 1, opc3, opc4, 0, PPC_NONE, fl2)
+
 #define GEN_XX3FORM(name, opc2, opc3, fl2)                           \
 GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
 GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2), \
@@ -222,6 +226,10 @@  GEN_XX2FORM(xvrspic, 0x16, 0x0A, PPC2_VSX),
 GEN_XX2FORM(xvrspim, 0x12, 0x0B, PPC2_VSX),
 GEN_XX2FORM(xvrspip, 0x12, 0x0A, PPC2_VSX),
 GEN_XX2FORM(xvrspiz, 0x12, 0x09, PPC2_VSX),
+GEN_XX2FORM_EO(xxbrh, 0x16, 0x1D, 0x07, PPC2_ISA300),
+GEN_XX2FORM_EO(xxbrw, 0x16, 0x1D, 0x0F, PPC2_ISA300),
+GEN_XX2FORM_EO(xxbrd, 0x16, 0x1D, 0x17, PPC2_ISA300),
+GEN_XX2FORM_EO(xxbrq, 0x16, 0x1D, 0x1F, PPC2_ISA300),
 
 #define VSX_LOGICAL(name, opc2, opc3, fl2) \
 GEN_XX3FORM(name, opc2, opc3, fl2)