Message ID | 1477463189-26971-4-git-send-email-nikunj@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Oct 26, 2016 at 11:56:26AM +0530, Nikunj A Dadhania wrote: > From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com> > > vrldmi: Vector Rotate Left Dword then Mask Insert > vrlwmi: Vector Rotate Left Word then Mask Insert > > Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com> > Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com> > ( use extract[32,64] and rol[32,64] ) > Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> > --- > disas/ppc.c | 2 ++ > target-ppc/helper.h | 2 ++ > target-ppc/int_helper.c | 46 +++++++++++++++++++++++++++++++++++++ > target-ppc/translate/vmx-impl.inc.c | 6 +++++ > target-ppc/translate/vmx-ops.inc.c | 4 ++-- > 5 files changed, 58 insertions(+), 2 deletions(-) > > diff --git a/disas/ppc.c b/disas/ppc.c > index 052cebe..32f0d8d 100644 > --- a/disas/ppc.c > +++ b/disas/ppc.c > @@ -2286,6 +2286,8 @@ const struct powerpc_opcode powerpc_opcodes[] = { > { "vrlh", VX(4, 68), VX_MASK, PPCVEC, { VD, VA, VB } }, > { "vrlw", VX(4, 132), VX_MASK, PPCVEC, { VD, VA, VB } }, > { "vrsqrtefp", VX(4, 330), VX_MASK, PPCVEC, { VD, VB } }, > +{ "vrldmi", VX(4, 197), VX_MASK, PPCVEC, { VD, VA, VB } }, > +{ "vrlwmi", VX(4, 133), VX_MASK, PPCVEC, { VD, VA, VB} }, > { "vsel", VXA(4, 42), VXA_MASK, PPCVEC, { VD, VA, VB, VC } }, > { "vsl", VX(4, 452), VX_MASK, PPCVEC, { VD, VA, VB } }, > { "vslb", VX(4, 260), VX_MASK, PPCVEC, { VD, VA, VB } }, > diff --git a/target-ppc/helper.h b/target-ppc/helper.h > index 0337292..9fb8f0d 100644 > --- a/target-ppc/helper.h > +++ b/target-ppc/helper.h > @@ -325,6 +325,8 @@ DEF_HELPER_4(vmaxfp, void, env, avr, avr, avr) > DEF_HELPER_4(vminfp, void, env, avr, avr, avr) > DEF_HELPER_3(vrefp, void, env, avr, avr) > DEF_HELPER_3(vrsqrtefp, void, env, avr, avr) > +DEF_HELPER_3(vrlwmi, void, avr, avr, avr) > +DEF_HELPER_3(vrldmi, void, avr, avr, avr) > DEF_HELPER_5(vmaddfp, void, env, avr, avr, avr, avr) > DEF_HELPER_5(vnmsubfp, void, env, avr, avr, avr, avr) > DEF_HELPER_3(vexptefp, void, env, avr, avr) > diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c > index dca4798..b54cd7c 100644 > --- a/target-ppc/int_helper.c > +++ b/target-ppc/int_helper.c > @@ -1717,6 +1717,52 @@ void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) > } > } > > +#define MASK(size, max_val) \ > +static inline uint##size##_t mask_u##size(uint##size##_t start, \ > + uint##size##_t end) \ > +{ \ > + uint##size##_t ret, max_bit = size - 1; \ > + \ > + if (likely(start == 0)) { \ > + ret = max_val << (max_bit - end); \ > + } else if (likely(end == max_bit)) { \ > + ret = max_val >> start; \ > + } else { \ > + ret = (((uint##size##_t)(-1ULL)) >> (start)) ^ \ > + (((uint##size##_t)(-1ULL) >> (end)) >> 1); \ > + if (unlikely(start > end)) { \ > + return ~ret; \ > + } \ > + } \ > + \ > + return ret; \ > +} > + > +MASK(32, UINT32_MAX); > +MASK(64, UINT64_MAX); It would be nicer to merge this mask generation with the implementation in target-ppc/translate.c (called MASK()). > + > +#define VRLMI(name, size, element) \ > +void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ > +{ \ > + int i; \ > + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ > + uint##size##_t src1 = a->element[i]; \ > + uint##size##_t src2 = b->element[i]; \ > + uint##size##_t src3 = r->element[i]; \ > + uint##size##_t begin, end, shift, mask, rot_val; \ > + \ > + shift = extract##size(src2, 0, 6); \ > + end = extract##size(src2, 8, 6); \ > + begin = extract##size(src2, 16, 6); \ > + rot_val = rol##size(src1, shift); \ > + mask = mask_u##size(begin, end); \ > + r->element[i] = (rot_val & mask) | (src3 & ~mask); \ > + } \ > +} > + > +VRLMI(vrldmi, 64, u64); > +VRLMI(vrlwmi, 32, u32); > + > void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, > ppc_avr_t *c) > { > diff --git a/target-ppc/translate/vmx-impl.inc.c b/target-ppc/translate/vmx-impl.inc.c > index fc612d9..fdfbd6a 100644 > --- a/target-ppc/translate/vmx-impl.inc.c > +++ b/target-ppc/translate/vmx-impl.inc.c > @@ -488,7 +488,13 @@ GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \ > GEN_VXFORM(vrlb, 2, 0); > GEN_VXFORM(vrlh, 2, 1); > GEN_VXFORM(vrlw, 2, 2); > +GEN_VXFORM(vrlwmi, 2, 2); > +GEN_VXFORM_DUAL(vrlw, PPC_ALTIVEC, PPC_NONE, \ > + vrlwmi, PPC_NONE, PPC2_ISA300) > GEN_VXFORM(vrld, 2, 3); > +GEN_VXFORM(vrldmi, 2, 3); > +GEN_VXFORM_DUAL(vrld, PPC_NONE, PPC2_ALTIVEC_207, \ > + vrldmi, PPC_NONE, PPC2_ISA300) > GEN_VXFORM(vsl, 2, 7); > GEN_VXFORM(vsr, 2, 11); > GEN_VXFORM_ENV(vpkuhum, 7, 0); > diff --git a/target-ppc/translate/vmx-ops.inc.c b/target-ppc/translate/vmx-ops.inc.c > index cc7ed7e..76b3593 100644 > --- a/target-ppc/translate/vmx-ops.inc.c > +++ b/target-ppc/translate/vmx-ops.inc.c > @@ -143,8 +143,8 @@ GEN_VXFORM_207(vsubcuq, 0, 21), > GEN_VXFORM_DUAL(vsubeuqm, vsubecuq, 31, 0xFF, PPC_NONE, PPC2_ALTIVEC_207), > GEN_VXFORM(vrlb, 2, 0), > GEN_VXFORM(vrlh, 2, 1), > -GEN_VXFORM(vrlw, 2, 2), > -GEN_VXFORM_207(vrld, 2, 3), > +GEN_VXFORM_DUAL(vrlw, vrlwmi, 2, 2, PPC_ALTIVEC, PPC_NONE), > +GEN_VXFORM_DUAL(vrld, vrldmi, 2, 3, PPC_NONE, PPC2_ALTIVEC_207), > GEN_VXFORM(vsl, 2, 7), > GEN_VXFORM(vsr, 2, 11), > GEN_VXFORM(vpkuhum, 7, 0),
David Gibson <david@gibson.dropbear.id.au> writes: >> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c >> index dca4798..b54cd7c 100644 >> --- a/target-ppc/int_helper.c >> +++ b/target-ppc/int_helper.c >> @@ -1717,6 +1717,52 @@ void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) >> } >> } >> >> +#define MASK(size, max_val) \ >> +static inline uint##size##_t mask_u##size(uint##size##_t start, \ >> + uint##size##_t end) \ >> +{ \ >> + uint##size##_t ret, max_bit = size - 1; \ >> + \ >> + if (likely(start == 0)) { \ >> + ret = max_val << (max_bit - end); \ >> + } else if (likely(end == max_bit)) { \ >> + ret = max_val >> start; \ >> + } else { \ >> + ret = (((uint##size##_t)(-1ULL)) >> (start)) ^ \ >> + (((uint##size##_t)(-1ULL) >> (end)) >> 1); \ >> + if (unlikely(start > end)) { \ >> + return ~ret; \ >> + } \ >> + } \ >> + \ >> + return ret; \ >> +} >> + >> +MASK(32, UINT32_MAX); >> +MASK(64, UINT64_MAX); > > It would be nicer to merge this mask generation with the > implementation in target-ppc/translate.c (called MASK()). How about something like this in target-ppc/cpu.h #define FUNC_MASK(name, ret_type, size, max_val) \ static inline ret_type name (uint##size##_t start, \ uint##size##_t end) \ { \ ret_type ret, max_bit = size - 1; \ \ if (likely(start == 0)) { \ ret = max_val << (max_bit - end); \ } else if (likely(end == max_bit)) { \ ret = max_val >> start; \ } else { \ ret = (((uint##size##_t)(-1ULL)) >> (start)) ^ \ (((uint##size##_t)(-1ULL) >> (end)) >> 1); \ if (unlikely(start > end)) { \ return ~ret; \ } \ } \ \ return ret; \ } #if defined(TARGET_PPC64) FUNC_MASK(MASK, target_ulong, 64, UINT64_MAX); #else FUNC_MASK(MASK, target_ulong, 32, UINT32_MAX); #endif FUNC_MASK(mask_u32, uint32_t, 32, UINT32_MAX); FUNC_MASK(mask_u64, uint64_t, 64, UINT64_MAX); Regards Nikunj
On Thu, Oct 27, 2016 at 02:03:01PM +0530, Nikunj A Dadhania wrote: > David Gibson <david@gibson.dropbear.id.au> writes: > >> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c > >> index dca4798..b54cd7c 100644 > >> --- a/target-ppc/int_helper.c > >> +++ b/target-ppc/int_helper.c > >> @@ -1717,6 +1717,52 @@ void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) > >> } > >> } > >> > >> +#define MASK(size, max_val) \ > >> +static inline uint##size##_t mask_u##size(uint##size##_t start, \ > >> + uint##size##_t end) \ > >> +{ \ > >> + uint##size##_t ret, max_bit = size - 1; \ > >> + \ > >> + if (likely(start == 0)) { \ > >> + ret = max_val << (max_bit - end); \ > >> + } else if (likely(end == max_bit)) { \ > >> + ret = max_val >> start; \ > >> + } else { \ > >> + ret = (((uint##size##_t)(-1ULL)) >> (start)) ^ \ > >> + (((uint##size##_t)(-1ULL) >> (end)) >> 1); \ > >> + if (unlikely(start > end)) { \ > >> + return ~ret; \ > >> + } \ > >> + } \ > >> + \ > >> + return ret; \ > >> +} > >> + > >> +MASK(32, UINT32_MAX); > >> +MASK(64, UINT64_MAX); > > > > It would be nicer to merge this mask generation with the > > implementation in target-ppc/translate.c (called MASK()). > > How about something like this in target-ppc/cpu.h > > #define FUNC_MASK(name, ret_type, size, max_val) \ > static inline ret_type name (uint##size##_t start, \ > uint##size##_t end) \ > { \ > ret_type ret, max_bit = size - 1; \ > \ > if (likely(start == 0)) { \ > ret = max_val << (max_bit - end); \ > } else if (likely(end == max_bit)) { \ > ret = max_val >> start; \ > } else { \ > ret = (((uint##size##_t)(-1ULL)) >> (start)) ^ \ > (((uint##size##_t)(-1ULL) >> (end)) >> 1); \ > if (unlikely(start > end)) { \ > return ~ret; \ > } \ > } \ > \ > return ret; \ > } > > #if defined(TARGET_PPC64) > FUNC_MASK(MASK, target_ulong, 64, UINT64_MAX); > #else > FUNC_MASK(MASK, target_ulong, 32, UINT32_MAX); > #endif > FUNC_MASK(mask_u32, uint32_t, 32, UINT32_MAX); > FUNC_MASK(mask_u64, uint64_t, 64, UINT64_MAX); That seems reasonable.
On 10/27/2016 06:30 PM, David Gibson wrote: >> How about something like this in target-ppc/cpu.h >> >> #define FUNC_MASK(name, ret_type, size, max_val) \ >> static inline ret_type name (uint##size##_t start, \ >> uint##size##_t end) \ Consider introducing an internals.h, for stuff that needs to be shared within target-ppc/, but is not required by any other user of cpu.h. r~
diff --git a/disas/ppc.c b/disas/ppc.c index 052cebe..32f0d8d 100644 --- a/disas/ppc.c +++ b/disas/ppc.c @@ -2286,6 +2286,8 @@ const struct powerpc_opcode powerpc_opcodes[] = { { "vrlh", VX(4, 68), VX_MASK, PPCVEC, { VD, VA, VB } }, { "vrlw", VX(4, 132), VX_MASK, PPCVEC, { VD, VA, VB } }, { "vrsqrtefp", VX(4, 330), VX_MASK, PPCVEC, { VD, VB } }, +{ "vrldmi", VX(4, 197), VX_MASK, PPCVEC, { VD, VA, VB } }, +{ "vrlwmi", VX(4, 133), VX_MASK, PPCVEC, { VD, VA, VB} }, { "vsel", VXA(4, 42), VXA_MASK, PPCVEC, { VD, VA, VB, VC } }, { "vsl", VX(4, 452), VX_MASK, PPCVEC, { VD, VA, VB } }, { "vslb", VX(4, 260), VX_MASK, PPCVEC, { VD, VA, VB } }, diff --git a/target-ppc/helper.h b/target-ppc/helper.h index 0337292..9fb8f0d 100644 --- a/target-ppc/helper.h +++ b/target-ppc/helper.h @@ -325,6 +325,8 @@ DEF_HELPER_4(vmaxfp, void, env, avr, avr, avr) DEF_HELPER_4(vminfp, void, env, avr, avr, avr) DEF_HELPER_3(vrefp, void, env, avr, avr) DEF_HELPER_3(vrsqrtefp, void, env, avr, avr) +DEF_HELPER_3(vrlwmi, void, avr, avr, avr) +DEF_HELPER_3(vrldmi, void, avr, avr, avr) DEF_HELPER_5(vmaddfp, void, env, avr, avr, avr, avr) DEF_HELPER_5(vnmsubfp, void, env, avr, avr, avr, avr) DEF_HELPER_3(vexptefp, void, env, avr, avr) diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index dca4798..b54cd7c 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -1717,6 +1717,52 @@ void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) } } +#define MASK(size, max_val) \ +static inline uint##size##_t mask_u##size(uint##size##_t start, \ + uint##size##_t end) \ +{ \ + uint##size##_t ret, max_bit = size - 1; \ + \ + if (likely(start == 0)) { \ + ret = max_val << (max_bit - end); \ + } else if (likely(end == max_bit)) { \ + ret = max_val >> start; \ + } else { \ + ret = (((uint##size##_t)(-1ULL)) >> (start)) ^ \ + (((uint##size##_t)(-1ULL) >> (end)) >> 1); \ + if (unlikely(start > end)) { \ + return ~ret; \ + } \ + } \ + \ + return ret; \ +} + +MASK(32, UINT32_MAX); +MASK(64, UINT64_MAX); + +#define VRLMI(name, size, element) \ +void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ +{ \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + uint##size##_t src1 = a->element[i]; \ + uint##size##_t src2 = b->element[i]; \ + uint##size##_t src3 = r->element[i]; \ + uint##size##_t begin, end, shift, mask, rot_val; \ + \ + shift = extract##size(src2, 0, 6); \ + end = extract##size(src2, 8, 6); \ + begin = extract##size(src2, 16, 6); \ + rot_val = rol##size(src1, shift); \ + mask = mask_u##size(begin, end); \ + r->element[i] = (rot_val & mask) | (src3 & ~mask); \ + } \ +} + +VRLMI(vrldmi, 64, u64); +VRLMI(vrlwmi, 32, u32); + void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) { diff --git a/target-ppc/translate/vmx-impl.inc.c b/target-ppc/translate/vmx-impl.inc.c index fc612d9..fdfbd6a 100644 --- a/target-ppc/translate/vmx-impl.inc.c +++ b/target-ppc/translate/vmx-impl.inc.c @@ -488,7 +488,13 @@ GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \ GEN_VXFORM(vrlb, 2, 0); GEN_VXFORM(vrlh, 2, 1); GEN_VXFORM(vrlw, 2, 2); +GEN_VXFORM(vrlwmi, 2, 2); +GEN_VXFORM_DUAL(vrlw, PPC_ALTIVEC, PPC_NONE, \ + vrlwmi, PPC_NONE, PPC2_ISA300) GEN_VXFORM(vrld, 2, 3); +GEN_VXFORM(vrldmi, 2, 3); +GEN_VXFORM_DUAL(vrld, PPC_NONE, PPC2_ALTIVEC_207, \ + vrldmi, PPC_NONE, PPC2_ISA300) GEN_VXFORM(vsl, 2, 7); GEN_VXFORM(vsr, 2, 11); GEN_VXFORM_ENV(vpkuhum, 7, 0); diff --git a/target-ppc/translate/vmx-ops.inc.c b/target-ppc/translate/vmx-ops.inc.c index cc7ed7e..76b3593 100644 --- a/target-ppc/translate/vmx-ops.inc.c +++ b/target-ppc/translate/vmx-ops.inc.c @@ -143,8 +143,8 @@ GEN_VXFORM_207(vsubcuq, 0, 21), GEN_VXFORM_DUAL(vsubeuqm, vsubecuq, 31, 0xFF, PPC_NONE, PPC2_ALTIVEC_207), GEN_VXFORM(vrlb, 2, 0), GEN_VXFORM(vrlh, 2, 1), -GEN_VXFORM(vrlw, 2, 2), -GEN_VXFORM_207(vrld, 2, 3), +GEN_VXFORM_DUAL(vrlw, vrlwmi, 2, 2, PPC_ALTIVEC, PPC_NONE), +GEN_VXFORM_DUAL(vrld, vrldmi, 2, 3, PPC_NONE, PPC2_ALTIVEC_207), GEN_VXFORM(vsl, 2, 7), GEN_VXFORM(vsr, 2, 11), GEN_VXFORM(vpkuhum, 7, 0),