diff mbox

[v2,3/6] target-ppc: add vrldnmi and vrlwmi instructions

Message ID 1477463189-26971-4-git-send-email-nikunj@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nikunj A. Dadhania Oct. 26, 2016, 6:26 a.m. UTC
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>

vrldmi: Vector Rotate Left Dword then Mask Insert
vrlwmi: Vector Rotate Left Word then Mask Insert

Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
( use extract[32,64] and rol[32,64] )
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---
 disas/ppc.c                         |  2 ++
 target-ppc/helper.h                 |  2 ++
 target-ppc/int_helper.c             | 46 +++++++++++++++++++++++++++++++++++++
 target-ppc/translate/vmx-impl.inc.c |  6 +++++
 target-ppc/translate/vmx-ops.inc.c  |  4 ++--
 5 files changed, 58 insertions(+), 2 deletions(-)

Comments

David Gibson Oct. 27, 2016, 3:38 a.m. UTC | #1
On Wed, Oct 26, 2016 at 11:56:26AM +0530, Nikunj A Dadhania wrote:
> From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
> 
> vrldmi: Vector Rotate Left Dword then Mask Insert
> vrlwmi: Vector Rotate Left Word then Mask Insert
> 
> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
> ( use extract[32,64] and rol[32,64] )
> Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
> ---
>  disas/ppc.c                         |  2 ++
>  target-ppc/helper.h                 |  2 ++
>  target-ppc/int_helper.c             | 46 +++++++++++++++++++++++++++++++++++++
>  target-ppc/translate/vmx-impl.inc.c |  6 +++++
>  target-ppc/translate/vmx-ops.inc.c  |  4 ++--
>  5 files changed, 58 insertions(+), 2 deletions(-)
> 
> diff --git a/disas/ppc.c b/disas/ppc.c
> index 052cebe..32f0d8d 100644
> --- a/disas/ppc.c
> +++ b/disas/ppc.c
> @@ -2286,6 +2286,8 @@ const struct powerpc_opcode powerpc_opcodes[] = {
>  { "vrlh",      VX(4,   68), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
>  { "vrlw",      VX(4,  132), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
>  { "vrsqrtefp", VX(4,  330), VX_MASK,	PPCVEC,		{ VD, VB } },
> +{ "vrldmi",    VX(4,  197), VX_MASK,    PPCVEC,         { VD, VA, VB } },
> +{ "vrlwmi",    VX(4,  133), VX_MASK,    PPCVEC,         { VD, VA, VB} },
>  { "vsel",      VXA(4,  42), VXA_MASK,	PPCVEC,		{ VD, VA, VB, VC } },
>  { "vsl",       VX(4,  452), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
>  { "vslb",      VX(4,  260), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 0337292..9fb8f0d 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -325,6 +325,8 @@ DEF_HELPER_4(vmaxfp, void, env, avr, avr, avr)
>  DEF_HELPER_4(vminfp, void, env, avr, avr, avr)
>  DEF_HELPER_3(vrefp, void, env, avr, avr)
>  DEF_HELPER_3(vrsqrtefp, void, env, avr, avr)
> +DEF_HELPER_3(vrlwmi, void, avr, avr, avr)
> +DEF_HELPER_3(vrldmi, void, avr, avr, avr)
>  DEF_HELPER_5(vmaddfp, void, env, avr, avr, avr, avr)
>  DEF_HELPER_5(vnmsubfp, void, env, avr, avr, avr, avr)
>  DEF_HELPER_3(vexptefp, void, env, avr, avr)
> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> index dca4798..b54cd7c 100644
> --- a/target-ppc/int_helper.c
> +++ b/target-ppc/int_helper.c
> @@ -1717,6 +1717,52 @@ void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
>      }
>  }
>  
> +#define MASK(size, max_val)                                     \
> +static inline uint##size##_t mask_u##size(uint##size##_t start, \
> +                                uint##size##_t end)             \
> +{                                                               \
> +    uint##size##_t ret, max_bit = size - 1;                     \
> +                                                                \
> +    if (likely(start == 0)) {                                   \
> +        ret = max_val << (max_bit - end);                       \
> +    } else if (likely(end == max_bit)) {                        \
> +        ret = max_val >> start;                                 \
> +    } else {                                                    \
> +        ret = (((uint##size##_t)(-1ULL)) >> (start)) ^          \
> +            (((uint##size##_t)(-1ULL) >> (end)) >> 1);          \
> +        if (unlikely(start > end)) {                            \
> +            return ~ret;                                        \
> +        }                                                       \
> +    }                                                           \
> +                                                                \
> +    return ret;                                                 \
> +}
> +
> +MASK(32, UINT32_MAX);
> +MASK(64, UINT64_MAX);

It would be nicer to merge this mask generation with the
implementation in target-ppc/translate.c (called MASK()).

> +
> +#define VRLMI(name, size, element)                                    \
> +void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)          \
> +{                                                                     \
> +    int i;                                                            \
> +    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                    \
> +        uint##size##_t src1 = a->element[i];                          \
> +        uint##size##_t src2 = b->element[i];                          \
> +        uint##size##_t src3 = r->element[i];                          \
> +        uint##size##_t begin, end, shift, mask, rot_val;              \
> +                                                                      \
> +        shift = extract##size(src2, 0, 6);                            \
> +        end   = extract##size(src2, 8, 6);                            \
> +        begin = extract##size(src2, 16, 6);                           \
> +        rot_val = rol##size(src1, shift);                             \
> +        mask = mask_u##size(begin, end);                              \
> +        r->element[i] = (rot_val & mask) | (src3 & ~mask);            \
> +    }                                                                 \
> +}
> +
> +VRLMI(vrldmi, 64, u64);
> +VRLMI(vrlwmi, 32, u32);
> +
>  void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
>                   ppc_avr_t *c)
>  {
> diff --git a/target-ppc/translate/vmx-impl.inc.c b/target-ppc/translate/vmx-impl.inc.c
> index fc612d9..fdfbd6a 100644
> --- a/target-ppc/translate/vmx-impl.inc.c
> +++ b/target-ppc/translate/vmx-impl.inc.c
> @@ -488,7 +488,13 @@ GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
>  GEN_VXFORM(vrlb, 2, 0);
>  GEN_VXFORM(vrlh, 2, 1);
>  GEN_VXFORM(vrlw, 2, 2);
> +GEN_VXFORM(vrlwmi, 2, 2);
> +GEN_VXFORM_DUAL(vrlw, PPC_ALTIVEC, PPC_NONE, \
> +                vrlwmi, PPC_NONE, PPC2_ISA300)
>  GEN_VXFORM(vrld, 2, 3);
> +GEN_VXFORM(vrldmi, 2, 3);
> +GEN_VXFORM_DUAL(vrld, PPC_NONE, PPC2_ALTIVEC_207, \
> +                vrldmi, PPC_NONE, PPC2_ISA300)
>  GEN_VXFORM(vsl, 2, 7);
>  GEN_VXFORM(vsr, 2, 11);
>  GEN_VXFORM_ENV(vpkuhum, 7, 0);
> diff --git a/target-ppc/translate/vmx-ops.inc.c b/target-ppc/translate/vmx-ops.inc.c
> index cc7ed7e..76b3593 100644
> --- a/target-ppc/translate/vmx-ops.inc.c
> +++ b/target-ppc/translate/vmx-ops.inc.c
> @@ -143,8 +143,8 @@ GEN_VXFORM_207(vsubcuq, 0, 21),
>  GEN_VXFORM_DUAL(vsubeuqm, vsubecuq, 31, 0xFF, PPC_NONE, PPC2_ALTIVEC_207),
>  GEN_VXFORM(vrlb, 2, 0),
>  GEN_VXFORM(vrlh, 2, 1),
> -GEN_VXFORM(vrlw, 2, 2),
> -GEN_VXFORM_207(vrld, 2, 3),
> +GEN_VXFORM_DUAL(vrlw, vrlwmi, 2, 2, PPC_ALTIVEC, PPC_NONE),
> +GEN_VXFORM_DUAL(vrld, vrldmi, 2, 3, PPC_NONE, PPC2_ALTIVEC_207),
>  GEN_VXFORM(vsl, 2, 7),
>  GEN_VXFORM(vsr, 2, 11),
>  GEN_VXFORM(vpkuhum, 7, 0),
Nikunj A. Dadhania Oct. 27, 2016, 8:33 a.m. UTC | #2
David Gibson <david@gibson.dropbear.id.au> writes:
>> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
>> index dca4798..b54cd7c 100644
>> --- a/target-ppc/int_helper.c
>> +++ b/target-ppc/int_helper.c
>> @@ -1717,6 +1717,52 @@ void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
>>      }
>>  }
>>  
>> +#define MASK(size, max_val)                                     \
>> +static inline uint##size##_t mask_u##size(uint##size##_t start, \
>> +                                uint##size##_t end)             \
>> +{                                                               \
>> +    uint##size##_t ret, max_bit = size - 1;                     \
>> +                                                                \
>> +    if (likely(start == 0)) {                                   \
>> +        ret = max_val << (max_bit - end);                       \
>> +    } else if (likely(end == max_bit)) {                        \
>> +        ret = max_val >> start;                                 \
>> +    } else {                                                    \
>> +        ret = (((uint##size##_t)(-1ULL)) >> (start)) ^          \
>> +            (((uint##size##_t)(-1ULL) >> (end)) >> 1);          \
>> +        if (unlikely(start > end)) {                            \
>> +            return ~ret;                                        \
>> +        }                                                       \
>> +    }                                                           \
>> +                                                                \
>> +    return ret;                                                 \
>> +}
>> +
>> +MASK(32, UINT32_MAX);
>> +MASK(64, UINT64_MAX);
>
> It would be nicer to merge this mask generation with the
> implementation in target-ppc/translate.c (called MASK()).

How about something like this in target-ppc/cpu.h

#define FUNC_MASK(name, ret_type, size, max_val)                  \
static inline ret_type name (uint##size##_t start,                \
                             uint##size##_t end)                  \
{                                                                 \
    ret_type ret, max_bit = size - 1;                             \
                                                                  \
    if (likely(start == 0)) {                                     \
        ret = max_val << (max_bit - end);                         \
    } else if (likely(end == max_bit)) {                          \
        ret = max_val >> start;                                   \
    } else {                                                      \
        ret = (((uint##size##_t)(-1ULL)) >> (start)) ^            \
            (((uint##size##_t)(-1ULL) >> (end)) >> 1);            \
        if (unlikely(start > end)) {                              \
            return ~ret;                                          \
        }                                                         \
    }                                                             \
                                                                  \
    return ret;                                                   \
}

#if defined(TARGET_PPC64)
FUNC_MASK(MASK, target_ulong, 64, UINT64_MAX);
#else
FUNC_MASK(MASK, target_ulong, 32, UINT32_MAX);
#endif
FUNC_MASK(mask_u32, uint32_t, 32, UINT32_MAX);
FUNC_MASK(mask_u64, uint64_t, 64, UINT64_MAX);

Regards
Nikunj
David Gibson Oct. 28, 2016, 1:30 a.m. UTC | #3
On Thu, Oct 27, 2016 at 02:03:01PM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> >> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> >> index dca4798..b54cd7c 100644
> >> --- a/target-ppc/int_helper.c
> >> +++ b/target-ppc/int_helper.c
> >> @@ -1717,6 +1717,52 @@ void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
> >>      }
> >>  }
> >>  
> >> +#define MASK(size, max_val)                                     \
> >> +static inline uint##size##_t mask_u##size(uint##size##_t start, \
> >> +                                uint##size##_t end)             \
> >> +{                                                               \
> >> +    uint##size##_t ret, max_bit = size - 1;                     \
> >> +                                                                \
> >> +    if (likely(start == 0)) {                                   \
> >> +        ret = max_val << (max_bit - end);                       \
> >> +    } else if (likely(end == max_bit)) {                        \
> >> +        ret = max_val >> start;                                 \
> >> +    } else {                                                    \
> >> +        ret = (((uint##size##_t)(-1ULL)) >> (start)) ^          \
> >> +            (((uint##size##_t)(-1ULL) >> (end)) >> 1);          \
> >> +        if (unlikely(start > end)) {                            \
> >> +            return ~ret;                                        \
> >> +        }                                                       \
> >> +    }                                                           \
> >> +                                                                \
> >> +    return ret;                                                 \
> >> +}
> >> +
> >> +MASK(32, UINT32_MAX);
> >> +MASK(64, UINT64_MAX);
> >
> > It would be nicer to merge this mask generation with the
> > implementation in target-ppc/translate.c (called MASK()).
> 
> How about something like this in target-ppc/cpu.h
> 
> #define FUNC_MASK(name, ret_type, size, max_val)                  \
> static inline ret_type name (uint##size##_t start,                \
>                              uint##size##_t end)                  \
> {                                                                 \
>     ret_type ret, max_bit = size - 1;                             \
>                                                                   \
>     if (likely(start == 0)) {                                     \
>         ret = max_val << (max_bit - end);                         \
>     } else if (likely(end == max_bit)) {                          \
>         ret = max_val >> start;                                   \
>     } else {                                                      \
>         ret = (((uint##size##_t)(-1ULL)) >> (start)) ^            \
>             (((uint##size##_t)(-1ULL) >> (end)) >> 1);            \
>         if (unlikely(start > end)) {                              \
>             return ~ret;                                          \
>         }                                                         \
>     }                                                             \
>                                                                   \
>     return ret;                                                   \
> }
> 
> #if defined(TARGET_PPC64)
> FUNC_MASK(MASK, target_ulong, 64, UINT64_MAX);
> #else
> FUNC_MASK(MASK, target_ulong, 32, UINT32_MAX);
> #endif
> FUNC_MASK(mask_u32, uint32_t, 32, UINT32_MAX);
> FUNC_MASK(mask_u64, uint64_t, 64, UINT64_MAX);

That seems reasonable.
Richard Henderson Oct. 28, 2016, 4:28 p.m. UTC | #4
On 10/27/2016 06:30 PM, David Gibson wrote:
>> How about something like this in target-ppc/cpu.h
>>
>> #define FUNC_MASK(name, ret_type, size, max_val)                  \
>> static inline ret_type name (uint##size##_t start,                \
>>                              uint##size##_t end)                  \

Consider introducing an internals.h, for stuff that needs to be shared within 
target-ppc/, but is not required by any other user of cpu.h.


r~
diff mbox

Patch

diff --git a/disas/ppc.c b/disas/ppc.c
index 052cebe..32f0d8d 100644
--- a/disas/ppc.c
+++ b/disas/ppc.c
@@ -2286,6 +2286,8 @@  const struct powerpc_opcode powerpc_opcodes[] = {
 { "vrlh",      VX(4,   68), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
 { "vrlw",      VX(4,  132), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
 { "vrsqrtefp", VX(4,  330), VX_MASK,	PPCVEC,		{ VD, VB } },
+{ "vrldmi",    VX(4,  197), VX_MASK,    PPCVEC,         { VD, VA, VB } },
+{ "vrlwmi",    VX(4,  133), VX_MASK,    PPCVEC,         { VD, VA, VB} },
 { "vsel",      VXA(4,  42), VXA_MASK,	PPCVEC,		{ VD, VA, VB, VC } },
 { "vsl",       VX(4,  452), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
 { "vslb",      VX(4,  260), VX_MASK,	PPCVEC,		{ VD, VA, VB } },
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 0337292..9fb8f0d 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -325,6 +325,8 @@  DEF_HELPER_4(vmaxfp, void, env, avr, avr, avr)
 DEF_HELPER_4(vminfp, void, env, avr, avr, avr)
 DEF_HELPER_3(vrefp, void, env, avr, avr)
 DEF_HELPER_3(vrsqrtefp, void, env, avr, avr)
+DEF_HELPER_3(vrlwmi, void, avr, avr, avr)
+DEF_HELPER_3(vrldmi, void, avr, avr, avr)
 DEF_HELPER_5(vmaddfp, void, env, avr, avr, avr, avr)
 DEF_HELPER_5(vnmsubfp, void, env, avr, avr, avr, avr)
 DEF_HELPER_3(vexptefp, void, env, avr, avr)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index dca4798..b54cd7c 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -1717,6 +1717,52 @@  void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
     }
 }
 
+#define MASK(size, max_val)                                     \
+static inline uint##size##_t mask_u##size(uint##size##_t start, \
+                                uint##size##_t end)             \
+{                                                               \
+    uint##size##_t ret, max_bit = size - 1;                     \
+                                                                \
+    if (likely(start == 0)) {                                   \
+        ret = max_val << (max_bit - end);                       \
+    } else if (likely(end == max_bit)) {                        \
+        ret = max_val >> start;                                 \
+    } else {                                                    \
+        ret = (((uint##size##_t)(-1ULL)) >> (start)) ^          \
+            (((uint##size##_t)(-1ULL) >> (end)) >> 1);          \
+        if (unlikely(start > end)) {                            \
+            return ~ret;                                        \
+        }                                                       \
+    }                                                           \
+                                                                \
+    return ret;                                                 \
+}
+
+MASK(32, UINT32_MAX);
+MASK(64, UINT64_MAX);
+
+#define VRLMI(name, size, element)                                    \
+void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)          \
+{                                                                     \
+    int i;                                                            \
+    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                    \
+        uint##size##_t src1 = a->element[i];                          \
+        uint##size##_t src2 = b->element[i];                          \
+        uint##size##_t src3 = r->element[i];                          \
+        uint##size##_t begin, end, shift, mask, rot_val;              \
+                                                                      \
+        shift = extract##size(src2, 0, 6);                            \
+        end   = extract##size(src2, 8, 6);                            \
+        begin = extract##size(src2, 16, 6);                           \
+        rot_val = rol##size(src1, shift);                             \
+        mask = mask_u##size(begin, end);                              \
+        r->element[i] = (rot_val & mask) | (src3 & ~mask);            \
+    }                                                                 \
+}
+
+VRLMI(vrldmi, 64, u64);
+VRLMI(vrlwmi, 32, u32);
+
 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
                  ppc_avr_t *c)
 {
diff --git a/target-ppc/translate/vmx-impl.inc.c b/target-ppc/translate/vmx-impl.inc.c
index fc612d9..fdfbd6a 100644
--- a/target-ppc/translate/vmx-impl.inc.c
+++ b/target-ppc/translate/vmx-impl.inc.c
@@ -488,7 +488,13 @@  GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
 GEN_VXFORM(vrlb, 2, 0);
 GEN_VXFORM(vrlh, 2, 1);
 GEN_VXFORM(vrlw, 2, 2);
+GEN_VXFORM(vrlwmi, 2, 2);
+GEN_VXFORM_DUAL(vrlw, PPC_ALTIVEC, PPC_NONE, \
+                vrlwmi, PPC_NONE, PPC2_ISA300)
 GEN_VXFORM(vrld, 2, 3);
+GEN_VXFORM(vrldmi, 2, 3);
+GEN_VXFORM_DUAL(vrld, PPC_NONE, PPC2_ALTIVEC_207, \
+                vrldmi, PPC_NONE, PPC2_ISA300)
 GEN_VXFORM(vsl, 2, 7);
 GEN_VXFORM(vsr, 2, 11);
 GEN_VXFORM_ENV(vpkuhum, 7, 0);
diff --git a/target-ppc/translate/vmx-ops.inc.c b/target-ppc/translate/vmx-ops.inc.c
index cc7ed7e..76b3593 100644
--- a/target-ppc/translate/vmx-ops.inc.c
+++ b/target-ppc/translate/vmx-ops.inc.c
@@ -143,8 +143,8 @@  GEN_VXFORM_207(vsubcuq, 0, 21),
 GEN_VXFORM_DUAL(vsubeuqm, vsubecuq, 31, 0xFF, PPC_NONE, PPC2_ALTIVEC_207),
 GEN_VXFORM(vrlb, 2, 0),
 GEN_VXFORM(vrlh, 2, 1),
-GEN_VXFORM(vrlw, 2, 2),
-GEN_VXFORM_207(vrld, 2, 3),
+GEN_VXFORM_DUAL(vrlw, vrlwmi, 2, 2, PPC_ALTIVEC, PPC_NONE),
+GEN_VXFORM_DUAL(vrld, vrldmi, 2, 3, PPC_NONE, PPC2_ALTIVEC_207),
 GEN_VXFORM(vsl, 2, 7),
 GEN_VXFORM(vsr, 2, 11),
 GEN_VXFORM(vpkuhum, 7, 0),