Message ID | 1488381854-7275-1-git-send-email-nikunj@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Mar 01, 2017 at 08:54:14PM +0530, Nikunj A Dadhania wrote: > Use the softfloat api for fused multiply-add. Also, generate VXISI using > a helper function by computing intermediate result. Um.. I really need some information on why this is a good thing to do. Is it a bugfix? Enhancement? Simplification? > > Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> > > --- > > v0: > * Use MADD/MSUB_FLAGS as used by VSX instructions > * Introduce helper float64_madd_set_vxisi() > --- > target/ppc/fpu_helper.c | 218 +++++++++++------------------------------------- > 1 file changed, 49 insertions(+), 169 deletions(-) > > diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c > index 58aee64..ed7e84a 100644 > --- a/target/ppc/fpu_helper.c > +++ b/target/ppc/fpu_helper.c > @@ -743,178 +743,63 @@ uint64_t helper_frim(CPUPPCState *env, uint64_t arg) > return do_fri(env, arg, float_round_down); > } > > -/* fmadd - fmadd. */ > -uint64_t helper_fmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2, > - uint64_t arg3) > -{ > - CPU_DoubleU farg1, farg2, farg3; > - > - farg1.ll = arg1; > - farg2.ll = arg2; > - farg3.ll = arg3; > - > - if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) || > - (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) { > - /* Multiplication of zero by infinity */ > - farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1); > - } else { > - if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) || > - float64_is_signaling_nan(farg2.d, &env->fp_status) || > - float64_is_signaling_nan(farg3.d, &env->fp_status))) { > - /* sNaN operation */ > - float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); > - } > - /* This is the way the PowerPC specification defines it */ > - float128 ft0_128, ft1_128; > - > - ft0_128 = float64_to_float128(farg1.d, &env->fp_status); > - ft1_128 = float64_to_float128(farg2.d, &env->fp_status); > - ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status); > - if (unlikely(float128_is_infinity(ft0_128) && > - float64_is_infinity(farg3.d) && > - float128_is_neg(ft0_128) != float64_is_neg(farg3.d))) { > - /* Magnitude subtraction of infinities */ > - farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1); > - } else { > - ft1_128 = float64_to_float128(farg3.d, &env->fp_status); > - ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status); > - farg1.d = float128_to_float64(ft0_128, &env->fp_status); > - } > - } > - > - return farg1.ll; > -} > - > -/* fmsub - fmsub. */ > -uint64_t helper_fmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2, > - uint64_t arg3) > -{ > - CPU_DoubleU farg1, farg2, farg3; > - > - farg1.ll = arg1; > - farg2.ll = arg2; > - farg3.ll = arg3; > - > - if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) || > - (float64_is_zero(farg1.d) && > - float64_is_infinity(farg2.d)))) { > - /* Multiplication of zero by infinity */ > - farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1); > - } else { > - if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) || > - float64_is_signaling_nan(farg2.d, &env->fp_status) || > - float64_is_signaling_nan(farg3.d, &env->fp_status))) { > - /* sNaN operation */ > - float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); > - } > - /* This is the way the PowerPC specification defines it */ > - float128 ft0_128, ft1_128; > - > - ft0_128 = float64_to_float128(farg1.d, &env->fp_status); > - ft1_128 = float64_to_float128(farg2.d, &env->fp_status); > - ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status); > - if (unlikely(float128_is_infinity(ft0_128) && > - float64_is_infinity(farg3.d) && > - float128_is_neg(ft0_128) == float64_is_neg(farg3.d))) { > - /* Magnitude subtraction of infinities */ > - farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1); > - } else { > - ft1_128 = float64_to_float128(farg3.d, &env->fp_status); > - ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status); > - farg1.d = float128_to_float64(ft0_128, &env->fp_status); > - } > - } > - return farg1.ll; > -} > +#define MADD_FLGS 0 > +#define MSUB_FLGS float_muladd_negate_c > +#define NMADD_FLGS float_muladd_negate_result > +#define NMSUB_FLGS (float_muladd_negate_c | float_muladd_negate_result) > > -/* fnmadd - fnmadd. */ > -uint64_t helper_fnmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2, > - uint64_t arg3) > +static void float64_madd_set_vxisi(CPUPPCState *env, float64 a, float64 b, > + float64 c, unsigned int flags) > { > - CPU_DoubleU farg1, farg2, farg3; > - > - farg1.ll = arg1; > - farg2.ll = arg2; > - farg3.ll = arg3; > + float64 f = float64_mul(a, b, &env->fp_status); > > - if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) || > - (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) { > - /* Multiplication of zero by infinity */ > - farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1); > - } else { > - if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) || > - float64_is_signaling_nan(farg2.d, &env->fp_status) || > - float64_is_signaling_nan(farg3.d, &env->fp_status))) { > - /* sNaN operation */ > - float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); > - } > - /* This is the way the PowerPC specification defines it */ > - float128 ft0_128, ft1_128; > - > - ft0_128 = float64_to_float128(farg1.d, &env->fp_status); > - ft1_128 = float64_to_float128(farg2.d, &env->fp_status); > - ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status); > - if (unlikely(float128_is_infinity(ft0_128) && > - float64_is_infinity(farg3.d) && > - float128_is_neg(ft0_128) != float64_is_neg(farg3.d))) { > - /* Magnitude subtraction of infinities */ > - farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1); > - } else { > - ft1_128 = float64_to_float128(farg3.d, &env->fp_status); > - ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status); > - farg1.d = float128_to_float64(ft0_128, &env->fp_status); > - } > - if (likely(!float64_is_any_nan(farg1.d))) { > - farg1.d = float64_chs(farg1.d); > + /* a*b = ∞ and c = ∞, find ∞ - ∞ case and set VXISI */ > + if (float64_is_infinity(f) && float64_is_infinity(c)) { > + if ((f ^ c) == 0) { > + /* Both negative/positive inifinity and substraction*/ > + if (flags & MSUB_FLGS) { > + /* 1. ∞ - ∞ > + * 2. (-∞) - (-∞) > + */ > + float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1); > + } > + } else if (!(flags & MSUB_FLGS)) { > + /* Opposite sign and addition > + * 1) ∞ + (-∞) > + * 2) (-∞) + ∞ > + */ > + float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1); > } > } > - return farg1.ll; > } > > -/* fnmsub - fnmsub. */ > -uint64_t helper_fnmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2, > - uint64_t arg3) > -{ > - CPU_DoubleU farg1, farg2, farg3; > - > - farg1.ll = arg1; > - farg2.ll = arg2; > - farg3.ll = arg3; > - > - if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) || > - (float64_is_zero(farg1.d) && > - float64_is_infinity(farg2.d)))) { > - /* Multiplication of zero by infinity */ > - farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1); > - } else { > - if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) || > - float64_is_signaling_nan(farg2.d, &env->fp_status) || > - float64_is_signaling_nan(farg3.d, &env->fp_status))) { > - /* sNaN operation */ > - float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); > - } > - /* This is the way the PowerPC specification defines it */ > - float128 ft0_128, ft1_128; > - > - ft0_128 = float64_to_float128(farg1.d, &env->fp_status); > - ft1_128 = float64_to_float128(farg2.d, &env->fp_status); > - ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status); > - if (unlikely(float128_is_infinity(ft0_128) && > - float64_is_infinity(farg3.d) && > - float128_is_neg(ft0_128) == float64_is_neg(farg3.d))) { > - /* Magnitude subtraction of infinities */ > - farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1); > - } else { > - ft1_128 = float64_to_float128(farg3.d, &env->fp_status); > - ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status); > - farg1.d = float128_to_float64(ft0_128, &env->fp_status); > - } > - if (likely(!float64_is_any_nan(farg1.d))) { > - farg1.d = float64_chs(farg1.d); > - } > - } > - return farg1.ll; > +#define FPU_FMADD(op, madd_flags) \ > +uint64_t helper_##op(CPUPPCState *env, uint64_t arg1, \ > + uint64_t arg2, uint64_t arg3) \ > +{ \ > + if (unlikely((float64_is_infinity(arg1) && float64_is_zero(arg2)) || \ > + (float64_is_zero(arg1) && float64_is_infinity(arg2)))) { \ > + /* Multiplication of zero by infinity */ \ > + arg1 = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1); \ > + } else { \ > + if (unlikely(float64_is_signaling_nan(arg1, &env->fp_status) || \ > + float64_is_signaling_nan(arg2, &env->fp_status) || \ > + float64_is_signaling_nan(arg3, &env->fp_status))) { \ > + /* sNaN operation */ \ > + float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); \ > + } \ > + \ > + float64_madd_set_vxisi(env, arg1, arg2, arg3, madd_flags); \ > + arg1 = float64_muladd(arg1, arg2, arg3, madd_flags, \ > + &env->fp_status); \ > + float_check_status(env); \ > + } \ > + return arg1; \ > } > +FPU_FMADD(fmadd, MADD_FLGS) > +FPU_FMADD(fnmadd, NMADD_FLGS) > +FPU_FMADD(fmsub, MSUB_FLGS) > +FPU_FMADD(fnmsub, NMSUB_FLGS) > > /* frsp - frsp. */ > uint64_t helper_frsp(CPUPPCState *env, uint64_t arg) > @@ -2384,11 +2269,6 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \ > float_check_status(env); \ > } > > -#define MADD_FLGS 0 > -#define MSUB_FLGS float_muladd_negate_c > -#define NMADD_FLGS float_muladd_negate_result > -#define NMSUB_FLGS (float_muladd_negate_c | float_muladd_negate_result) > - > VSX_MADD(xsmaddadp, 1, float64, VsrD(0), MADD_FLGS, 1, 1, 0) > VSX_MADD(xsmaddmdp, 1, float64, VsrD(0), MADD_FLGS, 0, 1, 0) > VSX_MADD(xsmsubadp, 1, float64, VsrD(0), MSUB_FLGS, 1, 1, 0)
On 03/02/2017 11:29 AM, David Gibson wrote: > On Wed, Mar 01, 2017 at 08:54:14PM +0530, Nikunj A Dadhania wrote: >> Use the softfloat api for fused multiply-add. Also, generate VXISI using >> a helper function by computing intermediate result. > > Um.. I really need some information on why this is a good thing to > do. Is it a bugfix? Enhancement? Simplification? Looks like a bugfix to me. Previously we were attempting the operation via float128 as an intermediate type, which can result in double rounding errors. r~
On 03/02/2017 02:24 AM, Nikunj A Dadhania wrote: > +static void float64_madd_set_vxisi(CPUPPCState *env, float64 a, float64 b, > + float64 c, unsigned int flags) > { > + float64 f = float64_mul(a, b, &env->fp_status); What is the point of this multiply? > > + /* a*b = ∞ and c = ∞, find ∞ - ∞ case and set VXISI */ > + if (float64_is_infinity(f) && float64_is_infinity(c)) { > + if ((f ^ c) == 0) { > + /* Both negative/positive inifinity and substraction*/ > + if (flags & MSUB_FLGS) { I would really prefer you use the float_muladd_* names. > +uint64_t helper_##op(CPUPPCState *env, uint64_t arg1, \ > + uint64_t arg2, uint64_t arg3) \ > +{ \ > + if (unlikely((float64_is_infinity(arg1) && float64_is_zero(arg2)) || \ > + (float64_is_zero(arg1) && float64_is_infinity(arg2)))) { \ > + /* Multiplication of zero by infinity */ \ > + arg1 = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1); \ > + } else { \ > + if (unlikely(float64_is_signaling_nan(arg1, &env->fp_status) || \ > + float64_is_signaling_nan(arg2, &env->fp_status) || \ > + float64_is_signaling_nan(arg3, &env->fp_status))) { \ > + /* sNaN operation */ \ > + float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); \ > + } \ > + \ > + float64_madd_set_vxisi(env, arg1, arg2, arg3, madd_flags); \ > + arg1 = float64_muladd(arg1, arg2, arg3, madd_flags, \ > + &env->fp_status); \ > + float_check_status(env); \ I know this is the layout of the bulk of the ppc target, but it's inefficient. Let's do this one correctly, akin to target/tricore: result = float64_muladd(args...); flags = get_float_exception_flags(&env->fp_status); if (flags) { if (flags & float_flag_invalid) { // examine inputs to see why we return NaN } float_check_status(env); } r~
On 02-Mar-2017 7:53 AM, "Richard Henderson" <rth@twiddle.net> wrote: On 03/02/2017 11:29 AM, David Gibson wrote: > On Wed, Mar 01, 2017 at 08:54:14PM +0530, Nikunj A Dadhania wrote: > >> Use the softfloat api for fused multiply-add. Also, generate VXISI using >> a helper function by computing intermediate result. >> > > Um.. I really need some information on why this is a good thing to > do. Is it a bugfix? Enhancement? Simplification? > Looks like a bugfix to me. Previously we were attempting the operation via float128 as an intermediate type, which can result in double rounding errors. Was discussed here https://lists.gnu.org/archive/html/qemu-devel/2016-10/msg02000.html Nikunj
On 02-Mar-2017 8:07 AM, "Richard Henderson" <rth@twiddle.net> wrote: On 03/02/2017 02:24 AM, Nikunj A Dadhania wrote: > +static void float64_madd_set_vxisi(CPUPPCState *env, float64 a, float64 > b, > + float64 c, unsigned int flags) > { > + float64 f = float64_mul(a, b, &env->fp_status); > What is the point of this multiply? Only to compute vxisi as stated in the thread "If the product of x and y is an Infinity and z is an Infinity of the opposite sign, vxisi_flag is set to 1." Let me know if I there is an alternative way to achieve this. > + /* a*b = ∞ and c = ∞, find ∞ - ∞ case and set VXISI */ > + if (float64_is_infinity(f) && float64_is_infinity(c)) { > + if ((f ^ c) == 0) { > + /* Both negative/positive inifinity and substraction*/ > + if (flags & MSUB_FLGS) { > I would really prefer you use the float_muladd_* names. Sure. +uint64_t helper_##op(CPUPPCState *env, uint64_t arg1, \ > + uint64_t arg2, uint64_t arg3) \ > +{ \ > + if (unlikely((float64_is_infinity(arg1) && float64_is_zero(arg2)) || > \ > + (float64_is_zero(arg1) && float64_is_infinity(arg2)))) { > \ > + /* Multiplication of zero by infinity */ \ > + arg1 = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1); \ > + } else { \ > + if (unlikely(float64_is_signaling_nan(arg1, &env->fp_status) || \ > + float64_is_signaling_nan(arg2, &env->fp_status) || \ > + float64_is_signaling_nan(arg3, &env->fp_status))) { > \ > + /* sNaN operation */ \ > + float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); \ > + } \ > + \ > + float64_madd_set_vxisi(env, arg1, arg2, arg3, madd_flags); \ > + arg1 = float64_muladd(arg1, arg2, arg3, madd_flags, \ > + &env->fp_status); \ > + float_check_status(env); \ > I know this is the layout of the bulk of the ppc target, but it's inefficient. Let's do this one correctly, akin to target/tricore: result = float64_muladd(args...); flags = get_float_exception_flags(&env->fp_status); if (flags) { if (flags & float_flag_invalid) { // examine inputs to see why we return NaN } float_check_status(env); } Sure. Nikunj
Oh, some gmail issues, resending: > On 02-Mar-2017 7:53 AM, "Richard Henderson" <rth@twiddle.net> wrote: > > On 03/02/2017 11:29 AM, David Gibson wrote: > >> On Wed, Mar 01, 2017 at 08:54:14PM +0530, Nikunj A Dadhania wrote: >> >>> Use the softfloat api for fused multiply-add. Also, generate VXISI using >>> a helper function by computing intermediate result. >>> >> >> Um.. I really need some information on why this is a good thing to >> do. Is it a bugfix? Enhancement? Simplification? >> > > Looks like a bugfix to me. Previously we were attempting the operation via > float128 as an intermediate type, which can result in double rounding > errors. > Was discussed here https://lists.gnu.org/archive/html/qemu-devel/2016-10/msg02000.html Nikunj
On 02-Mar-2017 8:07 AM, "Richard Henderson" <rth@twiddle.net> wrote: > > On 03/02/2017 02:24 AM, Nikunj A Dadhania wrote: > >> +static void float64_madd_set_vxisi(CPUPPCState *env, float64 a, float64 >> b, >> + float64 c, unsigned int flags) >> { >> + float64 f = float64_mul(a, b, &env->fp_status); >> > > What is the point of this multiply? > > Only to compute vxisi as stated in the thread "If the product of x and y is an Infinity and z is an Infinity of the opposite sign, vxisi_flag is set to 1." Let me know if I there is an alternative way to achieve this. >> + /* a*b = ∞ and c = ∞, find ∞ - ∞ case and set VXISI */ >> + if (float64_is_infinity(f) && float64_is_infinity(c)) { >> + if ((f ^ c) == 0) { >> + /* Both negative/positive inifinity and substraction*/ >> + if (flags & MSUB_FLGS) { >> > > I would really prefer you use the float_muladd_* names. Sure. > +uint64_t helper_##op(CPUPPCState *env, uint64_t arg1, \ >> + uint64_t arg2, uint64_t arg3) \ >> +{ \ >> + if (unlikely((float64_is_infinity(arg1) && float64_is_zero(arg2)) || >> \ >> + (float64_is_zero(arg1) && float64_is_infinity(arg2)))) { >> \ >> + /* Multiplication of zero by infinity */ \ >> + arg1 = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1); \ >> + } else { \ >> + if (unlikely(float64_is_signaling_nan(arg1, &env->fp_status) || \ >> + float64_is_signaling_nan(arg2, &env->fp_status) || \ >> + float64_is_signaling_nan(arg3, &env->fp_status))) { >> \ >> + /* sNaN operation */ \ >> + float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); \ >> + } \ >> + \ >> + float64_madd_set_vxisi(env, arg1, arg2, arg3, madd_flags); \ >> + arg1 = float64_muladd(arg1, arg2, arg3, madd_flags, \ >> + &env->fp_status); \ >> + float_check_status(env); \ >> > > I know this is the layout of the bulk of the ppc target, but it's > inefficient. Let's do this one correctly, akin to target/tricore: > > result = float64_muladd(args...); > flags = get_float_exception_flags(&env->fp_status); > if (flags) { > if (flags & float_flag_invalid) { > // examine inputs to see why we return NaN > } > float_check_status(env); > } Sure. Nikunj
diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 58aee64..ed7e84a 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -743,178 +743,63 @@ uint64_t helper_frim(CPUPPCState *env, uint64_t arg) return do_fri(env, arg, float_round_down); } -/* fmadd - fmadd. */ -uint64_t helper_fmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2, - uint64_t arg3) -{ - CPU_DoubleU farg1, farg2, farg3; - - farg1.ll = arg1; - farg2.ll = arg2; - farg3.ll = arg3; - - if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) || - (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) { - /* Multiplication of zero by infinity */ - farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1); - } else { - if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) || - float64_is_signaling_nan(farg2.d, &env->fp_status) || - float64_is_signaling_nan(farg3.d, &env->fp_status))) { - /* sNaN operation */ - float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); - } - /* This is the way the PowerPC specification defines it */ - float128 ft0_128, ft1_128; - - ft0_128 = float64_to_float128(farg1.d, &env->fp_status); - ft1_128 = float64_to_float128(farg2.d, &env->fp_status); - ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status); - if (unlikely(float128_is_infinity(ft0_128) && - float64_is_infinity(farg3.d) && - float128_is_neg(ft0_128) != float64_is_neg(farg3.d))) { - /* Magnitude subtraction of infinities */ - farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1); - } else { - ft1_128 = float64_to_float128(farg3.d, &env->fp_status); - ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status); - farg1.d = float128_to_float64(ft0_128, &env->fp_status); - } - } - - return farg1.ll; -} - -/* fmsub - fmsub. */ -uint64_t helper_fmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2, - uint64_t arg3) -{ - CPU_DoubleU farg1, farg2, farg3; - - farg1.ll = arg1; - farg2.ll = arg2; - farg3.ll = arg3; - - if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) || - (float64_is_zero(farg1.d) && - float64_is_infinity(farg2.d)))) { - /* Multiplication of zero by infinity */ - farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1); - } else { - if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) || - float64_is_signaling_nan(farg2.d, &env->fp_status) || - float64_is_signaling_nan(farg3.d, &env->fp_status))) { - /* sNaN operation */ - float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); - } - /* This is the way the PowerPC specification defines it */ - float128 ft0_128, ft1_128; - - ft0_128 = float64_to_float128(farg1.d, &env->fp_status); - ft1_128 = float64_to_float128(farg2.d, &env->fp_status); - ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status); - if (unlikely(float128_is_infinity(ft0_128) && - float64_is_infinity(farg3.d) && - float128_is_neg(ft0_128) == float64_is_neg(farg3.d))) { - /* Magnitude subtraction of infinities */ - farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1); - } else { - ft1_128 = float64_to_float128(farg3.d, &env->fp_status); - ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status); - farg1.d = float128_to_float64(ft0_128, &env->fp_status); - } - } - return farg1.ll; -} +#define MADD_FLGS 0 +#define MSUB_FLGS float_muladd_negate_c +#define NMADD_FLGS float_muladd_negate_result +#define NMSUB_FLGS (float_muladd_negate_c | float_muladd_negate_result) -/* fnmadd - fnmadd. */ -uint64_t helper_fnmadd(CPUPPCState *env, uint64_t arg1, uint64_t arg2, - uint64_t arg3) +static void float64_madd_set_vxisi(CPUPPCState *env, float64 a, float64 b, + float64 c, unsigned int flags) { - CPU_DoubleU farg1, farg2, farg3; - - farg1.ll = arg1; - farg2.ll = arg2; - farg3.ll = arg3; + float64 f = float64_mul(a, b, &env->fp_status); - if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) || - (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) { - /* Multiplication of zero by infinity */ - farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1); - } else { - if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) || - float64_is_signaling_nan(farg2.d, &env->fp_status) || - float64_is_signaling_nan(farg3.d, &env->fp_status))) { - /* sNaN operation */ - float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); - } - /* This is the way the PowerPC specification defines it */ - float128 ft0_128, ft1_128; - - ft0_128 = float64_to_float128(farg1.d, &env->fp_status); - ft1_128 = float64_to_float128(farg2.d, &env->fp_status); - ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status); - if (unlikely(float128_is_infinity(ft0_128) && - float64_is_infinity(farg3.d) && - float128_is_neg(ft0_128) != float64_is_neg(farg3.d))) { - /* Magnitude subtraction of infinities */ - farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1); - } else { - ft1_128 = float64_to_float128(farg3.d, &env->fp_status); - ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status); - farg1.d = float128_to_float64(ft0_128, &env->fp_status); - } - if (likely(!float64_is_any_nan(farg1.d))) { - farg1.d = float64_chs(farg1.d); + /* a*b = ∞ and c = ∞, find ∞ - ∞ case and set VXISI */ + if (float64_is_infinity(f) && float64_is_infinity(c)) { + if ((f ^ c) == 0) { + /* Both negative/positive inifinity and substraction*/ + if (flags & MSUB_FLGS) { + /* 1. ∞ - ∞ + * 2. (-∞) - (-∞) + */ + float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1); + } + } else if (!(flags & MSUB_FLGS)) { + /* Opposite sign and addition + * 1) ∞ + (-∞) + * 2) (-∞) + ∞ + */ + float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1); } } - return farg1.ll; } -/* fnmsub - fnmsub. */ -uint64_t helper_fnmsub(CPUPPCState *env, uint64_t arg1, uint64_t arg2, - uint64_t arg3) -{ - CPU_DoubleU farg1, farg2, farg3; - - farg1.ll = arg1; - farg2.ll = arg2; - farg3.ll = arg3; - - if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) || - (float64_is_zero(farg1.d) && - float64_is_infinity(farg2.d)))) { - /* Multiplication of zero by infinity */ - farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1); - } else { - if (unlikely(float64_is_signaling_nan(farg1.d, &env->fp_status) || - float64_is_signaling_nan(farg2.d, &env->fp_status) || - float64_is_signaling_nan(farg3.d, &env->fp_status))) { - /* sNaN operation */ - float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); - } - /* This is the way the PowerPC specification defines it */ - float128 ft0_128, ft1_128; - - ft0_128 = float64_to_float128(farg1.d, &env->fp_status); - ft1_128 = float64_to_float128(farg2.d, &env->fp_status); - ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status); - if (unlikely(float128_is_infinity(ft0_128) && - float64_is_infinity(farg3.d) && - float128_is_neg(ft0_128) == float64_is_neg(farg3.d))) { - /* Magnitude subtraction of infinities */ - farg1.ll = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXISI, 1); - } else { - ft1_128 = float64_to_float128(farg3.d, &env->fp_status); - ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status); - farg1.d = float128_to_float64(ft0_128, &env->fp_status); - } - if (likely(!float64_is_any_nan(farg1.d))) { - farg1.d = float64_chs(farg1.d); - } - } - return farg1.ll; +#define FPU_FMADD(op, madd_flags) \ +uint64_t helper_##op(CPUPPCState *env, uint64_t arg1, \ + uint64_t arg2, uint64_t arg3) \ +{ \ + if (unlikely((float64_is_infinity(arg1) && float64_is_zero(arg2)) || \ + (float64_is_zero(arg1) && float64_is_infinity(arg2)))) { \ + /* Multiplication of zero by infinity */ \ + arg1 = float_invalid_op_excp(env, POWERPC_EXCP_FP_VXIMZ, 1); \ + } else { \ + if (unlikely(float64_is_signaling_nan(arg1, &env->fp_status) || \ + float64_is_signaling_nan(arg2, &env->fp_status) || \ + float64_is_signaling_nan(arg3, &env->fp_status))) { \ + /* sNaN operation */ \ + float_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1); \ + } \ + \ + float64_madd_set_vxisi(env, arg1, arg2, arg3, madd_flags); \ + arg1 = float64_muladd(arg1, arg2, arg3, madd_flags, \ + &env->fp_status); \ + float_check_status(env); \ + } \ + return arg1; \ } +FPU_FMADD(fmadd, MADD_FLGS) +FPU_FMADD(fnmadd, NMADD_FLGS) +FPU_FMADD(fmsub, MSUB_FLGS) +FPU_FMADD(fnmsub, NMSUB_FLGS) /* frsp - frsp. */ uint64_t helper_frsp(CPUPPCState *env, uint64_t arg) @@ -2384,11 +2269,6 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \ float_check_status(env); \ } -#define MADD_FLGS 0 -#define MSUB_FLGS float_muladd_negate_c -#define NMADD_FLGS float_muladd_negate_result -#define NMSUB_FLGS (float_muladd_negate_c | float_muladd_negate_result) - VSX_MADD(xsmaddadp, 1, float64, VsrD(0), MADD_FLGS, 1, 1, 0) VSX_MADD(xsmaddmdp, 1, float64, VsrD(0), MADD_FLGS, 0, 1, 0) VSX_MADD(xsmsubadp, 1, float64, VsrD(0), MSUB_FLGS, 1, 1, 0)
Use the softfloat api for fused multiply-add. Also, generate VXISI using a helper function by computing intermediate result. Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com> --- v0: * Use MADD/MSUB_FLAGS as used by VSX instructions * Introduce helper float64_madd_set_vxisi() --- target/ppc/fpu_helper.c | 218 +++++++++++------------------------------------- 1 file changed, 49 insertions(+), 169 deletions(-)