diff mbox

[v2,5/6] target-ppc: implement xxextractuw instruction

Message ID 1481285845-16415-6-git-send-email-nikunj@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nikunj A. Dadhania Dec. 9, 2016, 12:17 p.m. UTC
xxextractuw: VSX Vector Extract Unsigned Word

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 target-ppc/helper.h                 |  1 +
 target-ppc/int_helper.c             | 21 +++++++++++++++++++++
 target-ppc/translate/vsx-impl.inc.c | 27 +++++++++++++++++++++++++++
 target-ppc/translate/vsx-ops.inc.c  |  5 +++++
 4 files changed, 54 insertions(+)

Comments

David Gibson Dec. 12, 2016, 12:30 a.m. UTC | #1
On Fri, Dec 09, 2016 at 05:47:24PM +0530, Nikunj A Dadhania wrote:
> xxextractuw: VSX Vector Extract Unsigned Word
> 
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>  target-ppc/helper.h                 |  1 +
>  target-ppc/int_helper.c             | 21 +++++++++++++++++++++
>  target-ppc/translate/vsx-impl.inc.c | 27 +++++++++++++++++++++++++++
>  target-ppc/translate/vsx-ops.inc.c  |  5 +++++
>  4 files changed, 54 insertions(+)
> 
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 4707db4..8b30420 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -540,6 +540,7 @@ DEF_HELPER_2(xvrspip, void, env, i32)
>  DEF_HELPER_2(xvrspiz, void, env, i32)
>  DEF_HELPER_2(xxperm, void, env, i32)
>  DEF_HELPER_2(xxpermr, void, env, i32)
> +DEF_HELPER_4(xxextractuw, void, env, tl, tl, i32)
>  
>  DEF_HELPER_2(efscfsi, i32, env, i32)
>  DEF_HELPER_2(efscfui, i32, env, i32)
> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> index 7989b1f..e3f66ac 100644
> --- a/target-ppc/int_helper.c
> +++ b/target-ppc/int_helper.c
> @@ -2033,6 +2033,27 @@ VEXTRACT(uw, u32)
>  VEXTRACT(d, u64)
>  #undef VEXTRACT
>  
> +void helper_xxextractuw(CPUPPCState *env, target_ulong xtn,
> +                        target_ulong xbn, uint32_t index)
> +{
> +    ppc_vsr_t xt, xb;
> +    size_t es = sizeof(uint32_t);
> +    uint32_t ext_index;
> +
> +    getVSR(xbn, &xb, env);
> +    memset(&xt, 0, sizeof(xt));
> +
> +#if defined(HOST_WORDS_BIGENDIAN)
> +    ext_index = index;
> +    memcpy(&xt.u8[8 - es], &xb.u8[ext_index], es);
> +#else
> +    ext_index = (16 - index) - es;
> +    memcpy(&xt.u8[8], &xb.u8[ext_index], es);

Hm.  So, IIUC, ext_index is the byte element - in IBM numbering - to
start copying from.  But I thought that when we have an LE host, the
IBM byte element ordering is reversed from the actual order in host
memory, so we'd need &xb.u8[16 - ext_index - es]

> +#endif
> +
> +    putVSR(xtn, &xt, env);
> +}
> +
>  #define VEXT_SIGNED(name, element, mask, cast, recast)              \
>  void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
>  {                                                                   \
> diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
> index 2a17c35..1c40a35 100644
> --- a/target-ppc/translate/vsx-impl.inc.c
> +++ b/target-ppc/translate/vsx-impl.inc.c
> @@ -1180,6 +1180,33 @@ static void gen_xxsldwi(DisasContext *ctx)
>      tcg_temp_free_i64(xtl);
>  }
>  
> +#define VSX_EXTRACT(name)                                       \
> +static void gen_##name(DisasContext *ctx)                       \
> +{                                                               \
> +    TCGv xt, xb;                                                \
> +    TCGv_i32 t0 = tcg_temp_new_i32();                           \
> +    uint8_t uimm = UIMM4(ctx->opcode);                          \
> +                                                                \
> +    if (unlikely(!ctx->vsx_enabled)) {                          \
> +        gen_exception(ctx, POWERPC_EXCP_VSXU);                  \
> +        return;                                                 \
> +    }                                                           \
> +    if (uimm > 12) {                                            \

Throughout the helper you use es == sizeof(uint32_t), but here you
hardcode the assumption of 4 bytes, seems a bit inconsistent.

> +        tcg_gen_movi_i64(cpu_vsrh(xT(ctx->opcode)), 0);         \
> +        tcg_gen_movi_i64(cpu_vsrl(xT(ctx->opcode)), 0);         \
> +        return;                                                 \

So, I know the architecture says it is undefined.  But since you're
testing for the bogus case anyway, why not turn this into an
exception.  That seems like it would be more helpful for debugging the
guest than just setting the result to zero.  Or is this done to match
actual hardware behaviour?

> +    }                                                           \
> +    xt = tcg_const_tl(xT(ctx->opcode));                         \
> +    xb = tcg_const_tl(xB(ctx->opcode));                         \
> +    tcg_gen_movi_i32(t0, uimm);                                 \
> +    gen_helper_##name(cpu_env, xt, xb, t0);                     \
> +    tcg_temp_free(xb);                                          \
> +    tcg_temp_free(xt);                                          \
> +    tcg_temp_free_i32(t0);                                      \
> +}
> +
> +VSX_EXTRACT(xxextractuw)
> +
>  #undef GEN_XX2FORM
>  #undef GEN_XX3FORM
>  #undef GEN_XX2IFORM
> diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
> index 46b95e3..473d925 100644
> --- a/target-ppc/translate/vsx-ops.inc.c
> +++ b/target-ppc/translate/vsx-ops.inc.c
> @@ -49,6 +49,10 @@ GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2)
>  GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
>  GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2)
>  
> +#define GEN_XX2FORM_EXT(name, opc2, opc3, fl2)                          \
> +GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0x00100000, PPC_NONE, fl2), \
> +GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0x00100000, PPC_NONE, fl2)
> +
>  #define GEN_XX2FORM_EO(name, opc2, opc3, opc4, fl2)                          \
>  GEN_HANDLER2_E_2(name, #name, 0x3C, opc2 | 0, opc3, opc4, 0, PPC_NONE, fl2), \
>  GEN_HANDLER2_E_2(name, #name, 0x3C, opc2 | 1, opc3, opc4, 0, PPC_NONE, fl2)
> @@ -280,6 +284,7 @@ GEN_XX3FORM(xxpermr, 0x08, 0x07, PPC2_ISA300),
>  GEN_XX2FORM(xxspltw, 0x08, 0x0A, PPC2_VSX),
>  GEN_XX1FORM(xxspltib, 0x08, 0x0B, PPC2_ISA300),
>  GEN_XX3FORM_DM(xxsldwi, 0x08, 0x00),
> +GEN_XX2FORM_EXT(xxextractuw, 0x0A, 0x0A, PPC2_ISA300),
>  
>  #define GEN_XXSEL_ROW(opc3) \
>  GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x18, opc3, 0, PPC_NONE, PPC2_VSX), \
Nikunj Dadhania Dec. 12, 2016, 4:01 a.m. UTC | #2
On 12 December 2016 at 06:00, David Gibson <david@gibson.dropbear.id.au> wrote:
> On Fri, Dec 09, 2016 at 05:47:24PM +0530, Nikunj A Dadhania wrote:
>> xxextractuw: VSX Vector Extract Unsigned Word
>>
>> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> ---
>>  target-ppc/helper.h                 |  1 +
>>  target-ppc/int_helper.c             | 21 +++++++++++++++++++++
>>  target-ppc/translate/vsx-impl.inc.c | 27 +++++++++++++++++++++++++++
>>  target-ppc/translate/vsx-ops.inc.c  |  5 +++++
>>  4 files changed, 54 insertions(+)
>>
>> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
>> index 4707db4..8b30420 100644
>> --- a/target-ppc/helper.h
>> +++ b/target-ppc/helper.h
>> @@ -540,6 +540,7 @@ DEF_HELPER_2(xvrspip, void, env, i32)
>>  DEF_HELPER_2(xvrspiz, void, env, i32)
>>  DEF_HELPER_2(xxperm, void, env, i32)
>>  DEF_HELPER_2(xxpermr, void, env, i32)
>> +DEF_HELPER_4(xxextractuw, void, env, tl, tl, i32)
>>
>>  DEF_HELPER_2(efscfsi, i32, env, i32)
>>  DEF_HELPER_2(efscfui, i32, env, i32)
>> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
>> index 7989b1f..e3f66ac 100644
>> --- a/target-ppc/int_helper.c
>> +++ b/target-ppc/int_helper.c
>> @@ -2033,6 +2033,27 @@ VEXTRACT(uw, u32)
>>  VEXTRACT(d, u64)
>>  #undef VEXTRACT
>>
>> +void helper_xxextractuw(CPUPPCState *env, target_ulong xtn,
>> +                        target_ulong xbn, uint32_t index)
>> +{
>> +    ppc_vsr_t xt, xb;
>> +    size_t es = sizeof(uint32_t);
>> +    uint32_t ext_index;
>> +
>> +    getVSR(xbn, &xb, env);
>> +    memset(&xt, 0, sizeof(xt));
>> +
>> +#if defined(HOST_WORDS_BIGENDIAN)
>> +    ext_index = index;
>> +    memcpy(&xt.u8[8 - es], &xb.u8[ext_index], es);
>> +#else
>> +    ext_index = (16 - index) - es;
>> +    memcpy(&xt.u8[8], &xb.u8[ext_index], es);
>
> Hm.  So, IIUC, ext_index is the byte element - in IBM numbering - to
> start copying from.  But I thought that when we have an LE host, the
> IBM byte element ordering is reversed from the actual order in host
> memory, so we'd need &xb.u8[16 - ext_index - es]

I am not getting you, I am getting index from user. So in case of BE host:

ext_index = index;

LE Host:

ext_index = (16 - index) - es;

I am already doing that. Am I missing something.

>
>> +#endif
>> +
>> +    putVSR(xtn, &xt, env);
>> +}
>> +
>>  #define VEXT_SIGNED(name, element, mask, cast, recast)              \
>>  void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
>>  {                                                                   \
>> diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
>> index 2a17c35..1c40a35 100644
>> --- a/target-ppc/translate/vsx-impl.inc.c
>> +++ b/target-ppc/translate/vsx-impl.inc.c
>> @@ -1180,6 +1180,33 @@ static void gen_xxsldwi(DisasContext *ctx)
>>      tcg_temp_free_i64(xtl);
>>  }
>>
>> +#define VSX_EXTRACT(name)                                       \
>> +static void gen_##name(DisasContext *ctx)                       \
>> +{                                                               \
>> +    TCGv xt, xb;                                                \
>> +    TCGv_i32 t0 = tcg_temp_new_i32();                           \
>> +    uint8_t uimm = UIMM4(ctx->opcode);                          \
>> +                                                                \
>> +    if (unlikely(!ctx->vsx_enabled)) {                          \
>> +        gen_exception(ctx, POWERPC_EXCP_VSXU);                  \
>> +        return;                                                 \
>> +    }                                                           \
>> +    if (uimm > 12) {                                            \
>
> Throughout the helper you use es == sizeof(uint32_t), but here you
> hardcode the assumption of 4 bytes, seems a bit inconsistent.
>
>> +        tcg_gen_movi_i64(cpu_vsrh(xT(ctx->opcode)), 0);         \
>> +        tcg_gen_movi_i64(cpu_vsrl(xT(ctx->opcode)), 0);         \
>> +        return;                                                 \
>
> So, I know the architecture says it is undefined.  But since you're
> testing for the bogus case anyway, why not turn this into an
> exception. That seems like it would be more helpful for debugging the
> guest than just setting the result to zero.  Or is this done to match
> actual hardware behaviour?

I havent had a change to run on the real hardware, but on the system
simulator, it happily
returns extracted content even if UIMM > 12.

Regards
Nikunj
David Gibson Dec. 12, 2016, 4:07 a.m. UTC | #3
On Mon, Dec 12, 2016 at 09:31:11AM +0530, Nikunj Dadhania wrote:
> On 12 December 2016 at 06:00, David Gibson <david@gibson.dropbear.id.au> wrote:
> > On Fri, Dec 09, 2016 at 05:47:24PM +0530, Nikunj A Dadhania wrote:
> >> xxextractuw: VSX Vector Extract Unsigned Word
> >>
> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> >> ---
> >>  target-ppc/helper.h                 |  1 +
> >>  target-ppc/int_helper.c             | 21 +++++++++++++++++++++
> >>  target-ppc/translate/vsx-impl.inc.c | 27 +++++++++++++++++++++++++++
> >>  target-ppc/translate/vsx-ops.inc.c  |  5 +++++
> >>  4 files changed, 54 insertions(+)
> >>
> >> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> >> index 4707db4..8b30420 100644
> >> --- a/target-ppc/helper.h
> >> +++ b/target-ppc/helper.h
> >> @@ -540,6 +540,7 @@ DEF_HELPER_2(xvrspip, void, env, i32)
> >>  DEF_HELPER_2(xvrspiz, void, env, i32)
> >>  DEF_HELPER_2(xxperm, void, env, i32)
> >>  DEF_HELPER_2(xxpermr, void, env, i32)
> >> +DEF_HELPER_4(xxextractuw, void, env, tl, tl, i32)
> >>
> >>  DEF_HELPER_2(efscfsi, i32, env, i32)
> >>  DEF_HELPER_2(efscfui, i32, env, i32)
> >> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> >> index 7989b1f..e3f66ac 100644
> >> --- a/target-ppc/int_helper.c
> >> +++ b/target-ppc/int_helper.c
> >> @@ -2033,6 +2033,27 @@ VEXTRACT(uw, u32)
> >>  VEXTRACT(d, u64)
> >>  #undef VEXTRACT
> >>
> >> +void helper_xxextractuw(CPUPPCState *env, target_ulong xtn,
> >> +                        target_ulong xbn, uint32_t index)
> >> +{
> >> +    ppc_vsr_t xt, xb;
> >> +    size_t es = sizeof(uint32_t);
> >> +    uint32_t ext_index;
> >> +
> >> +    getVSR(xbn, &xb, env);
> >> +    memset(&xt, 0, sizeof(xt));
> >> +
> >> +#if defined(HOST_WORDS_BIGENDIAN)
> >> +    ext_index = index;
> >> +    memcpy(&xt.u8[8 - es], &xb.u8[ext_index], es);
> >> +#else
> >> +    ext_index = (16 - index) - es;
> >> +    memcpy(&xt.u8[8], &xb.u8[ext_index], es);
> >
> > Hm.  So, IIUC, ext_index is the byte element - in IBM numbering - to
> > start copying from.  But I thought that when we have an LE host, the
> > IBM byte element ordering is reversed from the actual order in host
> > memory, so we'd need &xb.u8[16 - ext_index - es]
> 
> I am not getting you, I am getting index from user. So in case of BE host:
> 
> ext_index = index;
> 
> LE Host:
> 
> ext_index = (16 - index) - es;
> 
> I am already doing that. Am I missing something.

Duh, sorry, apparently I'm blind and missed that logic.

> >> +#endif
> >> +
> >> +    putVSR(xtn, &xt, env);
> >> +}
> >> +
> >>  #define VEXT_SIGNED(name, element, mask, cast, recast)              \
> >>  void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
> >>  {                                                                   \
> >> diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
> >> index 2a17c35..1c40a35 100644
> >> --- a/target-ppc/translate/vsx-impl.inc.c
> >> +++ b/target-ppc/translate/vsx-impl.inc.c
> >> @@ -1180,6 +1180,33 @@ static void gen_xxsldwi(DisasContext *ctx)
> >>      tcg_temp_free_i64(xtl);
> >>  }
> >>
> >> +#define VSX_EXTRACT(name)                                       \
> >> +static void gen_##name(DisasContext *ctx)                       \
> >> +{                                                               \
> >> +    TCGv xt, xb;                                                \
> >> +    TCGv_i32 t0 = tcg_temp_new_i32();                           \
> >> +    uint8_t uimm = UIMM4(ctx->opcode);                          \
> >> +                                                                \
> >> +    if (unlikely(!ctx->vsx_enabled)) {                          \
> >> +        gen_exception(ctx, POWERPC_EXCP_VSXU);                  \
> >> +        return;                                                 \
> >> +    }                                                           \
> >> +    if (uimm > 12) {                                            \
> >
> > Throughout the helper you use es == sizeof(uint32_t), but here you
> > hardcode the assumption of 4 bytes, seems a bit inconsistent.
> >
> >> +        tcg_gen_movi_i64(cpu_vsrh(xT(ctx->opcode)), 0);         \
> >> +        tcg_gen_movi_i64(cpu_vsrl(xT(ctx->opcode)), 0);         \
> >> +        return;                                                 \
> >
> > So, I know the architecture says it is undefined.  But since you're
> > testing for the bogus case anyway, why not turn this into an
> > exception. That seems like it would be more helpful for debugging the
> > guest than just setting the result to zero.  Or is this done to match
> > actual hardware behaviour?
> 
> I havent had a change to run on the real hardware, but on the system
> simulator, it happily
> returns extracted content even if UIMM > 12.

Hm.  Returns what exactly?
Nikunj A. Dadhania Dec. 14, 2016, 8:44 a.m. UTC | #4
David Gibson <david@gibson.dropbear.id.au> writes:

> [ Unknown signature status ]
> On Mon, Dec 12, 2016 at 09:31:11AM +0530, Nikunj Dadhania wrote:
>> On 12 December 2016 at 06:00, David Gibson <david@gibson.dropbear.id.au> wrote:
>> > On Fri, Dec 09, 2016 at 05:47:24PM +0530, Nikunj A Dadhania wrote:
>> >> xxextractuw: VSX Vector Extract Unsigned Word
>> >>
>> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> >> +        tcg_gen_movi_i64(cpu_vsrh(xT(ctx->opcode)), 0);         \
>> >> +        tcg_gen_movi_i64(cpu_vsrl(xT(ctx->opcode)), 0);         \
>> >> +        return;                                                 \
>> >
>> > So, I know the architecture says it is undefined.  But since you're
>> > testing for the bogus case anyway, why not turn this into an
>> > exception. That seems like it would be more helpful for debugging the
>> > guest than just setting the result to zero.  Or is this done to match
>> > actual hardware behaviour?
>> 
>> I havent had a change to run on the real hardware, but on the system
>> simulator, it happily
>> returns extracted content even if UIMM > 12.
>
> Hm.  Returns what exactly?

So for LE case extracting from 15 returns following, basically its
rounding up to 0.

xxextractuw: 15 - ooTSET a si sihT
                  ________ihTo____

Regards,
Nikunj
David Gibson Dec. 16, 2016, 4:19 a.m. UTC | #5
On Wed, Dec 14, 2016 at 02:14:26PM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > [ Unknown signature status ]
> > On Mon, Dec 12, 2016 at 09:31:11AM +0530, Nikunj Dadhania wrote:
> >> On 12 December 2016 at 06:00, David Gibson <david@gibson.dropbear.id.au> wrote:
> >> > On Fri, Dec 09, 2016 at 05:47:24PM +0530, Nikunj A Dadhania wrote:
> >> >> xxextractuw: VSX Vector Extract Unsigned Word
> >> >>
> >> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> >> >> +        tcg_gen_movi_i64(cpu_vsrh(xT(ctx->opcode)), 0);         \
> >> >> +        tcg_gen_movi_i64(cpu_vsrl(xT(ctx->opcode)), 0);         \
> >> >> +        return;                                                 \
> >> >
> >> > So, I know the architecture says it is undefined.  But since you're
> >> > testing for the bogus case anyway, why not turn this into an
> >> > exception. That seems like it would be more helpful for debugging the
> >> > guest than just setting the result to zero.  Or is this done to match
> >> > actual hardware behaviour?
> >> 
> >> I havent had a change to run on the real hardware, but on the system
> >> simulator, it happily
> >> returns extracted content even if UIMM > 12.
> >
> > Hm.  Returns what exactly?
> 
> So for LE case extracting from 15 returns following, basically its
> rounding up to 0.

> xxextractuw: 15 - ooTSET a si sihT
>                   ________ihTo____

Ok.  IIRC your implementation did not do this - it set the
"overflowed" bytes to 0 instead of wrapping round and taking them from
the other end of the input.  If think we should either match hardware
behaviour or simply trap here, rather than do something else.
Nikunj A. Dadhania Dec. 19, 2016, 4:25 a.m. UTC | #6
David Gibson <david@gibson.dropbear.id.au> writes:

> [ Unknown signature status ]
> On Wed, Dec 14, 2016 at 02:14:26PM +0530, Nikunj A Dadhania wrote:
>> David Gibson <david@gibson.dropbear.id.au> writes:
>> 
>> > [ Unknown signature status ]
>> > On Mon, Dec 12, 2016 at 09:31:11AM +0530, Nikunj Dadhania wrote:
>> >> On 12 December 2016 at 06:00, David Gibson <david@gibson.dropbear.id.au> wrote:
>> >> > On Fri, Dec 09, 2016 at 05:47:24PM +0530, Nikunj A Dadhania wrote:
>> >> >> xxextractuw: VSX Vector Extract Unsigned Word
>> >> >>
>> >> >> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
>> >> >> +        tcg_gen_movi_i64(cpu_vsrh(xT(ctx->opcode)), 0);         \
>> >> >> +        tcg_gen_movi_i64(cpu_vsrl(xT(ctx->opcode)), 0);         \
>> >> >> +        return;                                                 \
>> >> >
>> >> > So, I know the architecture says it is undefined.  But since you're
>> >> > testing for the bogus case anyway, why not turn this into an
>> >> > exception. That seems like it would be more helpful for debugging the
>> >> > guest than just setting the result to zero.  Or is this done to match
>> >> > actual hardware behaviour?
>> >> 
>> >> I havent had a change to run on the real hardware, but on the system
>> >> simulator, it happily
>> >> returns extracted content even if UIMM > 12.
>> >
>> > Hm.  Returns what exactly?
>> 
>> So for LE case extracting from 15 returns following, basically its
>> rounding up to 0.
>
>> xxextractuw: 15 - ooTSET a si sihT
>>                   ________ihTo____
>
> Ok.  IIRC your implementation did not do this - it set the
> "overflowed" bytes to 0 instead of wrapping round and taking them from
> the other end of the input.  If think we should either match hardware
> behaviour or simply trap here, rather than do something else.

Sure, will update and send the patch matching hardware behaviour.

Regards
Nikunj
diff mbox

Patch

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 4707db4..8b30420 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -540,6 +540,7 @@  DEF_HELPER_2(xvrspip, void, env, i32)
 DEF_HELPER_2(xvrspiz, void, env, i32)
 DEF_HELPER_2(xxperm, void, env, i32)
 DEF_HELPER_2(xxpermr, void, env, i32)
+DEF_HELPER_4(xxextractuw, void, env, tl, tl, i32)
 
 DEF_HELPER_2(efscfsi, i32, env, i32)
 DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 7989b1f..e3f66ac 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -2033,6 +2033,27 @@  VEXTRACT(uw, u32)
 VEXTRACT(d, u64)
 #undef VEXTRACT
 
+void helper_xxextractuw(CPUPPCState *env, target_ulong xtn,
+                        target_ulong xbn, uint32_t index)
+{
+    ppc_vsr_t xt, xb;
+    size_t es = sizeof(uint32_t);
+    uint32_t ext_index;
+
+    getVSR(xbn, &xb, env);
+    memset(&xt, 0, sizeof(xt));
+
+#if defined(HOST_WORDS_BIGENDIAN)
+    ext_index = index;
+    memcpy(&xt.u8[8 - es], &xb.u8[ext_index], es);
+#else
+    ext_index = (16 - index) - es;
+    memcpy(&xt.u8[8], &xb.u8[ext_index], es);
+#endif
+
+    putVSR(xtn, &xt, env);
+}
+
 #define VEXT_SIGNED(name, element, mask, cast, recast)              \
 void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
 {                                                                   \
diff --git a/target-ppc/translate/vsx-impl.inc.c b/target-ppc/translate/vsx-impl.inc.c
index 2a17c35..1c40a35 100644
--- a/target-ppc/translate/vsx-impl.inc.c
+++ b/target-ppc/translate/vsx-impl.inc.c
@@ -1180,6 +1180,33 @@  static void gen_xxsldwi(DisasContext *ctx)
     tcg_temp_free_i64(xtl);
 }
 
+#define VSX_EXTRACT(name)                                       \
+static void gen_##name(DisasContext *ctx)                       \
+{                                                               \
+    TCGv xt, xb;                                                \
+    TCGv_i32 t0 = tcg_temp_new_i32();                           \
+    uint8_t uimm = UIMM4(ctx->opcode);                          \
+                                                                \
+    if (unlikely(!ctx->vsx_enabled)) {                          \
+        gen_exception(ctx, POWERPC_EXCP_VSXU);                  \
+        return;                                                 \
+    }                                                           \
+    if (uimm > 12) {                                            \
+        tcg_gen_movi_i64(cpu_vsrh(xT(ctx->opcode)), 0);         \
+        tcg_gen_movi_i64(cpu_vsrl(xT(ctx->opcode)), 0);         \
+        return;                                                 \
+    }                                                           \
+    xt = tcg_const_tl(xT(ctx->opcode));                         \
+    xb = tcg_const_tl(xB(ctx->opcode));                         \
+    tcg_gen_movi_i32(t0, uimm);                                 \
+    gen_helper_##name(cpu_env, xt, xb, t0);                     \
+    tcg_temp_free(xb);                                          \
+    tcg_temp_free(xt);                                          \
+    tcg_temp_free_i32(t0);                                      \
+}
+
+VSX_EXTRACT(xxextractuw)
+
 #undef GEN_XX2FORM
 #undef GEN_XX3FORM
 #undef GEN_XX2IFORM
diff --git a/target-ppc/translate/vsx-ops.inc.c b/target-ppc/translate/vsx-ops.inc.c
index 46b95e3..473d925 100644
--- a/target-ppc/translate/vsx-ops.inc.c
+++ b/target-ppc/translate/vsx-ops.inc.c
@@ -49,6 +49,10 @@  GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2)
 GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0, PPC_NONE, fl2), \
 GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0, PPC_NONE, fl2)
 
+#define GEN_XX2FORM_EXT(name, opc2, opc3, fl2)                          \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 0x00100000, PPC_NONE, fl2), \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 0x00100000, PPC_NONE, fl2)
+
 #define GEN_XX2FORM_EO(name, opc2, opc3, opc4, fl2)                          \
 GEN_HANDLER2_E_2(name, #name, 0x3C, opc2 | 0, opc3, opc4, 0, PPC_NONE, fl2), \
 GEN_HANDLER2_E_2(name, #name, 0x3C, opc2 | 1, opc3, opc4, 0, PPC_NONE, fl2)
@@ -280,6 +284,7 @@  GEN_XX3FORM(xxpermr, 0x08, 0x07, PPC2_ISA300),
 GEN_XX2FORM(xxspltw, 0x08, 0x0A, PPC2_VSX),
 GEN_XX1FORM(xxspltib, 0x08, 0x0B, PPC2_ISA300),
 GEN_XX3FORM_DM(xxsldwi, 0x08, 0x00),
+GEN_XX2FORM_EXT(xxextractuw, 0x0A, 0x0A, PPC2_ISA300),
 
 #define GEN_XXSEL_ROW(opc3) \
 GEN_HANDLER2_E(xxsel, "xxsel", 0x3C, 0x18, opc3, 0, PPC_NONE, PPC2_VSX), \