@@ -912,6 +912,13 @@ static inline vec_t movlhps(vec_t x, vec
})
# endif
# endif
+#elif VEC_SIZE == 64
+# if FLOAT_SIZE == 4
+# define dup_hi(x) B(movshdup, _mask, x, undef(), ~0)
+# define dup_lo(x) B(movsldup, _mask, x, undef(), ~0)
+# elif FLOAT_SIZE == 8
+# define dup_lo(x) B(movddup, _mask, x, undef(), ~0)
+# endif
#endif
#if VEC_SIZE == 16 && defined(__SSSE3__) && !defined(__AVX512VL__)
# if INT_SIZE == 1
@@ -63,6 +63,9 @@ int fma_test(void)
{
unsigned int i;
vec_t x, y, z, src, inv, one;
+#ifdef __AVX512F__
+ typeof(one[0]) one_ = 1;
+#endif
for ( i = 0; i < ELEM_COUNT; ++i )
{
@@ -71,6 +74,10 @@ int fma_test(void)
one[i] = 1;
}
+#ifdef __AVX512F__
+# define one one_
+#endif
+
x = (src + one) * inv;
y = (src - one) * inv;
touch(src);
@@ -93,22 +100,28 @@ int fma_test(void)
x = src + inv;
y = src - inv;
touch(inv);
+ touch(one);
z = src * one + inv;
if ( !eq(x, z) ) return __LINE__;
touch(inv);
+ touch(one);
z = -src * one - inv;
if ( !eq(-x, z) ) return __LINE__;
touch(inv);
+ touch(one);
z = src * one - inv;
if ( !eq(y, z) ) return __LINE__;
touch(inv);
+ touch(one);
z = -src * one + inv;
if ( !eq(-y, z) ) return __LINE__;
touch(inv);
+#undef one
+
#if defined(addsub) && defined(fmaddsub)
x = addsub(src * inv, one);
y = addsub(src * inv, -one);
For one it was an oversight to leave dup_{hi,lo}() undefined for 512-bit vector size. And then in FMA testing we can also arrange for the compiler to (hopefully) recognize broadcasting potential. Signed-off-by: Jan Beulich <jbeulich@suse.com>