diff mbox series

[v4,10/11] tests/tcg/s390x: Tests for Vector Enhancements Facility 2

Message ID 20220322000441.26495-11-dmiller423@gmail.com (mailing list archive)
State New, archived
Headers show
Series s390x/tcg: Implement Vector-Enhancements Facility 2 | expand

Commit Message

David Miller March 22, 2022, 12:04 a.m. UTC
Signed-off-by: David Miller <dmiller423@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tests/tcg/s390x/Makefile.target |   8 ++
 tests/tcg/s390x/vxeh2_vcvt.c    |  97 +++++++++++++++++++++
 tests/tcg/s390x/vxeh2_vlstr.c   | 146 ++++++++++++++++++++++++++++++++
 tests/tcg/s390x/vxeh2_vs.c      |  91 ++++++++++++++++++++
 4 files changed, 342 insertions(+)
 create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
 create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
 create mode 100644 tests/tcg/s390x/vxeh2_vs.c

Comments

David Hildenbrand March 22, 2022, 8:53 a.m. UTC | #1
On 22.03.22 01:04, David Miller wrote:
> Signed-off-by: David Miller <dmiller423@gmail.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Checkpatch complains about three things:

ERROR: space prohibited between function name and open parenthesis '('
#262: FILE: tests/tcg/s390x/vxeh2_vlstr.c:115:
+    vler (&vd, &vs, ES16);  vtst(vd, vt_v_er16);

ERROR: space prohibited between function name and open parenthesis '('
#265: FILE: tests/tcg/s390x/vxeh2_vlstr.c:118:
+    vlbr (&vd, &vs, ES16);  vtst(vd, vt_v_br16);

ERROR: space prohibited between function name and open parenthesis '('
#383: FILE: tests/tcg/s390x/vxeh2_vs.c:84:
+    vsl (&vd, &vs, &vsi);       vtst(vd, vt_vsl);

total: 3 errors, 1 warnings, 348 lines checked


> ---
>  tests/tcg/s390x/Makefile.target |   8 ++
>  tests/tcg/s390x/vxeh2_vcvt.c    |  97 +++++++++++++++++++++
>  tests/tcg/s390x/vxeh2_vlstr.c   | 146 ++++++++++++++++++++++++++++++++
>  tests/tcg/s390x/vxeh2_vs.c      |  91 ++++++++++++++++++++
>  4 files changed, 342 insertions(+)
>  create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
>  create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
>  create mode 100644 tests/tcg/s390x/vxeh2_vs.c
> 
> diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
> index 8c9b6a13ce..921a056dd1 100644
> --- a/tests/tcg/s390x/Makefile.target
> +++ b/tests/tcg/s390x/Makefile.target
> @@ -16,6 +16,14 @@ TESTS+=shift
>  TESTS+=trap
>  TESTS+=signals-s390x
>  
> +VECTOR_TESTS=vxeh2_vs
> +VECTOR_TESTS+=vxeh2_vcvt
> +VECTOR_TESTS+=vxeh2_vlstr
> +
> +TESTS+=$(VECTOR_TESTS)
> +
> +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2


@Thomas, will that survive our test framework already, or do we have to
wait for the debain11 changes?

> +
>  ifneq ($(HAVE_GDB_BIN),)
>  GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py
>  
> diff --git a/tests/tcg/s390x/vxeh2_vcvt.c b/tests/tcg/s390x/vxeh2_vcvt.c
> new file mode 100644
> index 0000000000..71ecbd77b0
> --- /dev/null
> +++ b/tests/tcg/s390x/vxeh2_vcvt.c
> @@ -0,0 +1,97 @@
> +/*
> + * vxeh2_vcvt: vector-enhancements facility 2 vector convert *
> + */
> +#include <stdint.h>
> +
> +typedef union S390Vector {
> +    uint64_t d[2];  /* doubleword */
> +    uint32_t w[4];  /* word */
> +    uint16_t h[8];  /* halfword */
> +    uint8_t  b[16]; /* byte */
> +    float    f[4];
> +    double   fd[2];
> +    __uint128_t v;
> +} S390Vector;

Let's move that into a separate header (vx.h?) so we can reuse it.

> +
> +#define M_S 8
> +#define M4_XxC 4
> +#define M4_def M4_XxC
[...]

> diff --git a/tests/tcg/s390x/vxeh2_vlstr.c b/tests/tcg/s390x/vxeh2_vlstr.c
> new file mode 100644
> index 0000000000..bf2954e86d
> --- /dev/null
> +++ b/tests/tcg/s390x/vxeh2_vlstr.c
> @@ -0,0 +1,146 @@
> +/*
> + * vxeh2_vlstr: vector-enhancements facility 2 vector load/store reversed *
> + */
> +#include <stdint.h>
> +
> +typedef union S390Vector {
> +    uint64_t d[2];  /* doubleword */
> +    uint32_t w[4];  /* word */
> +    uint16_t h[8];  /* halfword */
> +    uint8_t  b[16]; /* byte */
> +    __uint128_t v;
> +} S390Vector;
> +
> +#define ES8  0
> +#define ES16 1
> +#define ES32 2
> +#define ES64 3

These should probably also go to the new header.

> +
> +#define vtst(v1, v2) \
> +    if (v1.d[0] != v2.d[0] || v1.d[1] != v2.d[1]) { \
> +        return 1;     \
> +    }
> +
> +static inline void vler(S390Vector *v1, const void *va, uint8_t m3)
> +{
> +    asm volatile("vler %[v1], 0(%[va]), %[m3]\n"
> +                : [v1] "+v" (v1->v)
> +                : [va]  "d" (va)
> +                , [m3]  "i" (m3)
> +                : "memory");
> +}
> +
> +static inline void vster(S390Vector *v1, const void *va, uint8_t m3)
> +{
> +    asm volatile("vster %[v1], 0(%[va]), %[m3]\n"
> +                : [va] "+d" (va)
> +                : [v1]  "v" (v1->v)
> +                , [m3]  "i" (m3)
> +                : "memory");
> +}
> +
> +static inline void vlbr(S390Vector *v1, void *va, const uint8_t m3)
> +{
> +    asm volatile("vlbr %[v1], 0(%[va]), %[m3]\n"
> +                : [v1] "+v" (v1->v)
> +                : [va]  "d" (va)
> +                , [m3]  "i" (m3)
> +                : "memory");
> +}
> +
> +static inline void vstbr(S390Vector *v1, void *va, const uint8_t m3)
> +{
> +    asm volatile("vstbr %[v1], 0(%[va]), %[m3]\n"
> +                : [va] "+d" (va)
> +                : [v1]  "v" (v1->v)
> +                , [m3]  "i" (m3)
> +                : "memory");
> +}
> +
> +
> +static inline void vlebrh(S390Vector *v1, void *va, const uint8_t m3)
> +{
> +    asm volatile("vlebrh %[v1], 0(%[va]), %[m3]\n"
> +                : [v1] "+v" (v1->v)
> +                : [va]  "d" (va)
> +                , [m3]  "i" (m3)
> +                : "memory");
> +}
> +
> +static inline void vstebrh(S390Vector *v1, void *va, const uint8_t m3)
> +{
> +    asm volatile("vstebrh %[v1], 0(%[va]), %[m3]\n"
> +                : [va] "+d" (va)
> +                : [v1]  "v" (v1->v)
> +                , [m3]  "i" (m3)
> +                : "memory");
> +}
> +
> +static inline void vllebrz(S390Vector *v1, void *va, const uint8_t m3)
> +{
> +    asm volatile("vllebrz %[v1], 0(%[va]), %[m3]\n"
> +                : [v1] "+v" (v1->v)
> +                : [va]  "d" (va)
> +                , [m3]  "i" (m3)
> +                : "memory");
> +}
> +
> +static inline void vlbrrep(S390Vector *v1, void *va, const uint8_t m3)
> +{
> +    asm volatile("vlbrrep %[v1], 0(%[va]), %[m3]\n"
> +                : [v1] "+v" (v1->v)
> +                : [va]  "d" (va)
> +                , [m3]  "i" (m3)
> +                : "memory");
> +}
> +
> +

Superfluous empty line.

> +int main(int argc, char *argv[])
> +{
> +    S390Vector vd = { .d[0] = 0, .d[1] = 0 };
> +    S390Vector vs = { .d[0] = 0x8FEEDDCCBBAA9988ull,
> +                      .d[1] = 0x7766554433221107ull };
> +
> +    const S390Vector vt_v_er16 = {
> +        .h[0] = 0x1107, .h[1] = 0x3322, .h[2] = 0x5544, .h[3] = 0x7766,
> +        .h[4] = 0x9988, .h[5] = 0xBBAA, .h[6] = 0xDDCC, .h[7] = 0x8FEE };
> +
> +    const S390Vector vt_v_br16 = {
> +        .h[0] = 0xEE8F, .h[1] = 0xCCDD, .h[2] = 0xAABB, .h[3] = 0x8899,
> +        .h[4] = 0x6677, .h[5] = 0x4455, .h[6] = 0x2233, .h[7] = 0x0711 };
> +
> +    int ix;
> +    uint64_t ss64 = 0xFEEDFACE0BADBEEFull, sd64 = 0;
> +
> +    vler (&vd, &vs, ES16);  vtst(vd, vt_v_er16);
> +    vster(&vs, &vd, ES16);  vtst(vd, vt_v_er16);
> +
> +    vlbr (&vd, &vs, ES16);  vtst(vd, vt_v_br16);
> +    vstbr(&vs, &vd, ES16);  vtst(vd, vt_v_br16);
> +

Please put each statement on a new line.

> +    vlebrh(&vd, &ss64, 5);
> +    if (0xEDFE != vd.h[5]) {
> +        return 1;
> +    }
> +
> +    vstebrh(&vs, (uint8_t *)&sd64 + 4, 7);
> +    if (0x0000000007110000ull != sd64) {
> +        return 1;
> +    }
> +
> +    vllebrz(&vd, (uint8_t *)&ss64 + 3, 2);
> +    for (ix = 0; ix < 4; ix++) {
> +        if (vd.w[ix] != (ix != 1 ? 0 : 0xBEAD0BCE)) {
> +            return 1;
> +        }
> +    }
> +
> +    vlbrrep(&vd, (uint8_t *)&ss64 + 4, 1);
> +    for (ix = 0; ix < 8; ix++) {
> +        if (0xAD0B != vd.h[ix]) {
> +            return 1;
> +        }
> +    }
> +
> +    return 0;
> +}
> diff --git a/tests/tcg/s390x/vxeh2_vs.c b/tests/tcg/s390x/vxeh2_vs.c
> new file mode 100644
> index 0000000000..04a3d4d7bb
> --- /dev/null
> +++ b/tests/tcg/s390x/vxeh2_vs.c

[...]

> +int main(int argc, char *argv[])
> +{
> +    const S390Vector vt_vsl  = { .d[0] = 0x7FEDBB32D5AA311Dull,
> +                                 .d[1] = 0xBB65AA10912220C0ull };
> +    const S390Vector vt_vsra = { .d[0] = 0xF1FE6E7399AA5466ull,
> +                                 .d[1] = 0x0E762A5188221044ull };
> +    const S390Vector vt_vsrl = { .d[0] = 0x11FE6E7399AA5466ull,
> +                                 .d[1] = 0x0E762A5188221044ull };
> +    const S390Vector vt_vsld = { .d[0] = 0x7F76EE65DD54CC43ull,
> +                                 .d[1] = 0xBB32AA2199108838ull };
> +    const S390Vector vt_vsrd = { .d[0] = 0x0E060802040E000Aull,
> +                                 .d[1] = 0x0C060802040E000Aull };
> +    S390Vector vs  = { .d[0] = 0x8FEEDDCCBBAA9988ull,
> +                       .d[1] = 0x7766554433221107ull };
> +    S390Vector  vd = { .d[0] = 0, .d[1] = 0 };
> +    S390Vector vsi = { .d[0] = 0, .d[1] = 0 };
> +
> +    for (int ix = 0; ix < 16; ix++) {
> +        vsi.b[ix] = (1 + (5 ^ ~ix)) & 7;
> +    }
> +
> +    vsl (&vd, &vs, &vsi);       vtst(vd, vt_vsl);
> +    vsra(&vd, &vs, &vsi);       vtst(vd, vt_vsra);
> +    vsrl(&vd, &vs, &vsi);       vtst(vd, vt_vsrl);
> +    vsld(&vd, &vs, &vsi, 3);  vtst(vd, vt_vsld);
> +    vsrd(&vd, &vs, &vsi, 15); vtst(vd, vt_vsrd);

Dito. Please put each statement on a new line.

> +
> +    return 0;
> +}
Thomas Huth March 22, 2022, 10:31 a.m. UTC | #2
On 22/03/2022 09.53, David Hildenbrand wrote:
> On 22.03.22 01:04, David Miller wrote:
[...]
>> diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
>> index 8c9b6a13ce..921a056dd1 100644
>> --- a/tests/tcg/s390x/Makefile.target
>> +++ b/tests/tcg/s390x/Makefile.target
>> @@ -16,6 +16,14 @@ TESTS+=shift
>>   TESTS+=trap
>>   TESTS+=signals-s390x
>>   
>> +VECTOR_TESTS=vxeh2_vs
>> +VECTOR_TESTS+=vxeh2_vcvt
>> +VECTOR_TESTS+=vxeh2_vlstr
>> +
>> +TESTS+=$(VECTOR_TESTS)
>> +
>> +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
> 
> @Thomas, will that survive our test framework already, or do we have to
> wait for the debain11 changes?

Alex' update to the container has already been merged:

https://gitlab.com/qemu-project/qemu/-/commit/89767579cad2e371b

... and seems like it's working in Travis on s390x, too:

https://app.travis-ci.com/github/huth/qemu/jobs/564188977#L12797

... so it seems like it should be OK now (considering that we drop support 
for the old Ubuntu version 18.04 in QEMU 7.1, too).

  Thomas
Thomas Huth March 23, 2022, 5:13 p.m. UTC | #3
On 22/03/2022 11.31, Thomas Huth wrote:
> On 22/03/2022 09.53, David Hildenbrand wrote:
>> On 22.03.22 01:04, David Miller wrote:
> [...]
>>> diff --git a/tests/tcg/s390x/Makefile.target 
>>> b/tests/tcg/s390x/Makefile.target
>>> index 8c9b6a13ce..921a056dd1 100644
>>> --- a/tests/tcg/s390x/Makefile.target
>>> +++ b/tests/tcg/s390x/Makefile.target
>>> @@ -16,6 +16,14 @@ TESTS+=shift
>>>   TESTS+=trap
>>>   TESTS+=signals-s390x
>>> +VECTOR_TESTS=vxeh2_vs
>>> +VECTOR_TESTS+=vxeh2_vcvt
>>> +VECTOR_TESTS+=vxeh2_vlstr
>>> +
>>> +TESTS+=$(VECTOR_TESTS)
>>> +
>>> +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
>>
>> @Thomas, will that survive our test framework already, or do we have to
>> wait for the debain11 changes?
> 
> Alex' update to the container has already been merged:
> 
> https://gitlab.com/qemu-project/qemu/-/commit/89767579cad2e371b
> 
> ... and seems like it's working in Travis on s390x, too:
> 
> https://app.travis-ci.com/github/huth/qemu/jobs/564188977#L12797
> 
> ... so it seems like it should be OK now (considering that we drop support 
> for the old Ubuntu version 18.04 in QEMU 7.1, too).

Looks like I spoke a little bit too soon - some of the CI pipelines are 
still using Debian 10 for running the TCG tests, and they are failing with 
these patches applied:

https://gitlab.com/thuth/qemu/-/jobs/2238422870#L3499

Thus we either need to update the CI jobs to use Debian 11, or use 
handcrafted instruction opcodes here again...

  Thomas
David Miller March 31, 2022, 6:26 p.m. UTC | #4
Sorry,
   Didn't notice this, as it was on v4 patch emails.
I assume since there is no other follow up after a week,
 CI jobs are not being updated and I should change samples to use .insn.
I will try to get this out tomorrow.

Thanks,
- David Miller

On Wed, Mar 23, 2022 at 1:13 PM Thomas Huth <thuth@redhat.com> wrote:
>
> On 22/03/2022 11.31, Thomas Huth wrote:
> > On 22/03/2022 09.53, David Hildenbrand wrote:
> >> On 22.03.22 01:04, David Miller wrote:
> > [...]
> >>> diff --git a/tests/tcg/s390x/Makefile.target
> >>> b/tests/tcg/s390x/Makefile.target
> >>> index 8c9b6a13ce..921a056dd1 100644
> >>> --- a/tests/tcg/s390x/Makefile.target
> >>> +++ b/tests/tcg/s390x/Makefile.target
> >>> @@ -16,6 +16,14 @@ TESTS+=shift
> >>>   TESTS+=trap
> >>>   TESTS+=signals-s390x
> >>> +VECTOR_TESTS=vxeh2_vs
> >>> +VECTOR_TESTS+=vxeh2_vcvt
> >>> +VECTOR_TESTS+=vxeh2_vlstr
> >>> +
> >>> +TESTS+=$(VECTOR_TESTS)
> >>> +
> >>> +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
> >>
> >> @Thomas, will that survive our test framework already, or do we have to
> >> wait for the debain11 changes?
> >
> > Alex' update to the container has already been merged:
> >
> > https://gitlab.com/qemu-project/qemu/-/commit/89767579cad2e371b
> >
> > ... and seems like it's working in Travis on s390x, too:
> >
> > https://app.travis-ci.com/github/huth/qemu/jobs/564188977#L12797
> >
> > ... so it seems like it should be OK now (considering that we drop support
> > for the old Ubuntu version 18.04 in QEMU 7.1, too).
>
> Looks like I spoke a little bit too soon - some of the CI pipelines are
> still using Debian 10 for running the TCG tests, and they are failing with
> these patches applied:
>
> https://gitlab.com/thuth/qemu/-/jobs/2238422870#L3499
>
> Thus we either need to update the CI jobs to use Debian 11, or use
> handcrafted instruction opcodes here again...
>
>   Thomas
>
David Miller April 1, 2022, 2:15 a.m. UTC | #5
Hi,

There is some issue with instruction sub/alt encodings not matching,
but I worked around it easily.

I'm dropping the updated patch for the tests in here.
I know I should resend the entire patch series as a higher version
really, and will do so.
I'm hoping someone can tell me if it's ok to use .insn vrr  in place
of vri(-d) as it doesn't match vri.
[https://sourceware.org/binutils/docs-2.37/as/s390-Formats.html]

.insn doesn't deal with sub encodings and there is no good alternative
that I know of.

example:

    /* vri-d as vrr */
    asm volatile(".insn vrr, 0xE70000000086, %[v1], %[v2], %[v3], 0, %[I], 0\n"
                : [v1] "=v" (v1->v)
                : [v2]  "v" (v2->v)
                , [v3]  "v" (v3->v)
                , [I]   "i" (I & 7));

Patch is attached


Thanks
- David Miller


On Thu, Mar 31, 2022 at 2:26 PM David Miller <dmiller423@gmail.com> wrote:
>
> Sorry,
>    Didn't notice this, as it was on v4 patch emails.
> I assume since there is no other follow up after a week,
>  CI jobs are not being updated and I should change samples to use .insn.
> I will try to get this out tomorrow.
>
> Thanks,
> - David Miller
>
> On Wed, Mar 23, 2022 at 1:13 PM Thomas Huth <thuth@redhat.com> wrote:
> >
> > On 22/03/2022 11.31, Thomas Huth wrote:
> > > On 22/03/2022 09.53, David Hildenbrand wrote:
> > >> On 22.03.22 01:04, David Miller wrote:
> > > [...]
> > >>> diff --git a/tests/tcg/s390x/Makefile.target
> > >>> b/tests/tcg/s390x/Makefile.target
> > >>> index 8c9b6a13ce..921a056dd1 100644
> > >>> --- a/tests/tcg/s390x/Makefile.target
> > >>> +++ b/tests/tcg/s390x/Makefile.target
> > >>> @@ -16,6 +16,14 @@ TESTS+=shift
> > >>>   TESTS+=trap
> > >>>   TESTS+=signals-s390x
> > >>> +VECTOR_TESTS=vxeh2_vs
> > >>> +VECTOR_TESTS+=vxeh2_vcvt
> > >>> +VECTOR_TESTS+=vxeh2_vlstr
> > >>> +
> > >>> +TESTS+=$(VECTOR_TESTS)
> > >>> +
> > >>> +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
> > >>
> > >> @Thomas, will that survive our test framework already, or do we have to
> > >> wait for the debain11 changes?
> > >
> > > Alex' update to the container has already been merged:
> > >
> > > https://gitlab.com/qemu-project/qemu/-/commit/89767579cad2e371b
> > >
> > > ... and seems like it's working in Travis on s390x, too:
> > >
> > > https://app.travis-ci.com/github/huth/qemu/jobs/564188977#L12797
> > >
> > > ... so it seems like it should be OK now (considering that we drop support
> > > for the old Ubuntu version 18.04 in QEMU 7.1, too).
> >
> > Looks like I spoke a little bit too soon - some of the CI pipelines are
> > still using Debian 10 for running the TCG tests, and they are failing with
> > these patches applied:
> >
> > https://gitlab.com/thuth/qemu/-/jobs/2238422870#L3499
> >
> > Thus we either need to update the CI jobs to use Debian 11, or use
> > handcrafted instruction opcodes here again...
> >
> >   Thomas
> >
David Miller April 1, 2022, 2:16 a.m. UTC | #6
On Thu, Mar 31, 2022 at 10:15 PM David Miller <dmiller423@gmail.com> wrote:
>
> Hi,
>
> There is some issue with instruction sub/alt encodings not matching,
> but I worked around it easily.
>
> I'm dropping the updated patch for the tests in here.
> I know I should resend the entire patch series as a higher version
> really, and will do so.
> I'm hoping someone can tell me if it's ok to use .insn vrr  in place
> of vri(-d) as it doesn't match vri.
> [https://sourceware.org/binutils/docs-2.37/as/s390-Formats.html]
>
> .insn doesn't deal with sub encodings and there is no good alternative
> that I know of.
>
> example:
>
>     /* vri-d as vrr */
>     asm volatile(".insn vrr, 0xE70000000086, %[v1], %[v2], %[v3], 0, %[I], 0\n"
>                 : [v1] "=v" (v1->v)
>                 : [v2]  "v" (v2->v)
>                 , [v3]  "v" (v3->v)
>                 , [I]   "i" (I & 7));
>
> Patch is attached
>
>
> Thanks
> - David Miller
>
>
> On Thu, Mar 31, 2022 at 2:26 PM David Miller <dmiller423@gmail.com> wrote:
> >
> > Sorry,
> >    Didn't notice this, as it was on v4 patch emails.
> > I assume since there is no other follow up after a week,
> >  CI jobs are not being updated and I should change samples to use .insn.
> > I will try to get this out tomorrow.
> >
> > Thanks,
> > - David Miller
> >
> > On Wed, Mar 23, 2022 at 1:13 PM Thomas Huth <thuth@redhat.com> wrote:
> > >
> > > On 22/03/2022 11.31, Thomas Huth wrote:
> > > > On 22/03/2022 09.53, David Hildenbrand wrote:
> > > >> On 22.03.22 01:04, David Miller wrote:
> > > > [...]
> > > >>> diff --git a/tests/tcg/s390x/Makefile.target
> > > >>> b/tests/tcg/s390x/Makefile.target
> > > >>> index 8c9b6a13ce..921a056dd1 100644
> > > >>> --- a/tests/tcg/s390x/Makefile.target
> > > >>> +++ b/tests/tcg/s390x/Makefile.target
> > > >>> @@ -16,6 +16,14 @@ TESTS+=shift
> > > >>>   TESTS+=trap
> > > >>>   TESTS+=signals-s390x
> > > >>> +VECTOR_TESTS=vxeh2_vs
> > > >>> +VECTOR_TESTS+=vxeh2_vcvt
> > > >>> +VECTOR_TESTS+=vxeh2_vlstr
> > > >>> +
> > > >>> +TESTS+=$(VECTOR_TESTS)
> > > >>> +
> > > >>> +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
> > > >>
> > > >> @Thomas, will that survive our test framework already, or do we have to
> > > >> wait for the debain11 changes?
> > > >
> > > > Alex' update to the container has already been merged:
> > > >
> > > > https://gitlab.com/qemu-project/qemu/-/commit/89767579cad2e371b
> > > >
> > > > ... and seems like it's working in Travis on s390x, too:
> > > >
> > > > https://app.travis-ci.com/github/huth/qemu/jobs/564188977#L12797
> > > >
> > > > ... so it seems like it should be OK now (considering that we drop support
> > > > for the old Ubuntu version 18.04 in QEMU 7.1, too).
> > >
> > > Looks like I spoke a little bit too soon - some of the CI pipelines are
> > > still using Debian 10 for running the TCG tests, and they are failing with
> > > these patches applied:
> > >
> > > https://gitlab.com/thuth/qemu/-/jobs/2238422870#L3499
> > >
> > > Thus we either need to update the CI jobs to use Debian 11, or use
> > > handcrafted instruction opcodes here again...
> > >
> > >   Thomas
> > >
Christian Borntraeger April 1, 2022, 6:41 a.m. UTC | #7
Am 01.04.22 um 04:15 schrieb David Miller:
> Hi,
> 
> There is some issue with instruction sub/alt encodings not matching,
> but I worked around it easily.
> 
> I'm dropping the updated patch for the tests in here.
> I know I should resend the entire patch series as a higher version
> really, and will do so.
> I'm hoping someone can tell me if it's ok to use .insn vrr  in place
> of vri(-d) as it doesn't match vri.
> [https://sourceware.org/binutils/docs-2.37/as/s390-Formats.html]
> 
> .insn doesn't deal with sub encodings and there is no good alternative
> that I know of.
> 
> example:
> 
>      /* vri-d as vrr */
>      asm volatile(".insn vrr, 0xE70000000086, %[v1], %[v2], %[v3], 0, %[I], 0\n"
>                  : [v1] "=v" (v1->v)
>                  : [v2]  "v" (v2->v)
>                  , [v3]  "v" (v3->v)
>                  , [I]   "i" (I & 7));
> 
> Patch is attached

Yes, vri sucks and does not work with vrsd. Maybe just use .long which is probably
better than using a "wrong" format.
Opinions?
David Miller April 1, 2022, 3:02 p.m. UTC | #8
vrr is almost a perfect match (it is for this, larger than imm4 would
need to be split).

.long : this would be uglier.
use enough to be filled with nops after ?
or use a 32b and 16b instead if it's in .text it should make no difference.


On Fri, Apr 1, 2022 at 2:42 AM Christian Borntraeger
<borntraeger@linux.ibm.com> wrote:
>
>
>
> Am 01.04.22 um 04:15 schrieb David Miller:
> > Hi,
> >
> > There is some issue with instruction sub/alt encodings not matching,
> > but I worked around it easily.
> >
> > I'm dropping the updated patch for the tests in here.
> > I know I should resend the entire patch series as a higher version
> > really, and will do so.
> > I'm hoping someone can tell me if it's ok to use .insn vrr  in place
> > of vri(-d) as it doesn't match vri.
> > [https://sourceware.org/binutils/docs-2.37/as/s390-Formats.html]
> >
> > .insn doesn't deal with sub encodings and there is no good alternative
> > that I know of.
> >
> > example:
> >
> >      /* vri-d as vrr */
> >      asm volatile(".insn vrr, 0xE70000000086, %[v1], %[v2], %[v3], 0, %[I], 0\n"
> >                  : [v1] "=v" (v1->v)
> >                  : [v2]  "v" (v2->v)
> >                  , [v3]  "v" (v3->v)
> >                  , [I]   "i" (I & 7));
> >
> > Patch is attached
>
> Yes, vri sucks and does not work with vrsd. Maybe just use .long which is probably
> better than using a "wrong" format.
> Opinions?
Christian Borntraeger April 1, 2022, 3:25 p.m. UTC | #9
Am 01.04.22 um 17:02 schrieb David Miller:
> vrr is almost a perfect match (it is for this, larger than imm4 would
> need to be split).
> 
> .long : this would be uglier.
> use enough to be filled with nops after ?
> or use a 32b and 16b instead if it's in .text it should make no difference.

I will let Richard or David decide what they prefer.
David Hildenbrand April 5, 2022, 10:13 a.m. UTC | #10
On 01.04.22 17:25, Christian Borntraeger wrote:
> Am 01.04.22 um 17:02 schrieb David Miller:
>> vrr is almost a perfect match (it is for this, larger than imm4 would
>> need to be split).
>>
>> .long : this would be uglier.
>> use enough to be filled with nops after ?
>> or use a 32b and 16b instead if it's in .text it should make no difference.
> 
> I will let Richard or David decide what they prefer.
> 

I don't particularly care as long as there is a comment stating why we
need this hack.
David Miller April 5, 2022, 5:03 p.m. UTC | #11
Recommendation for comment?

/* vri-d encoding matches vrr for 4b imm.
  .insn does not handle this encoding variant.
*/

Christian: I will push another patch version as soon as that's decided.
(unless you prefer to choose the comment and edit during staging)

On Tue, Apr 5, 2022 at 6:13 AM David Hildenbrand <david@redhat.com> wrote:
>
> On 01.04.22 17:25, Christian Borntraeger wrote:
> > Am 01.04.22 um 17:02 schrieb David Miller:
> >> vrr is almost a perfect match (it is for this, larger than imm4 would
> >> need to be split).
> >>
> >> .long : this would be uglier.
> >> use enough to be filled with nops after ?
> >> or use a 32b and 16b instead if it's in .text it should make no difference.
> >
> > I will let Richard or David decide what they prefer.
> >
>
> I don't particularly care as long as there is a comment stating why we
> need this hack.
>
> --
> Thanks,
>
> David / dhildenb
>
David Hildenbrand April 12, 2022, 12:32 p.m. UTC | #12
On 05.04.22 19:03, David Miller wrote:
> Recommendation for comment?
> 
> /* vri-d encoding matches vrr for 4b imm.
>   .insn does not handle this encoding variant.
> */
> 

Sorry for the late reply.

".insn doesn't handle vri-d properly. So instead, we use vrr, which
matches vri-d with a 4b imm -- good enough for our purpose."
diff mbox series

Patch

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 8c9b6a13ce..921a056dd1 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -16,6 +16,14 @@  TESTS+=shift
 TESTS+=trap
 TESTS+=signals-s390x
 
+VECTOR_TESTS=vxeh2_vs
+VECTOR_TESTS+=vxeh2_vcvt
+VECTOR_TESTS+=vxeh2_vlstr
+
+TESTS+=$(VECTOR_TESTS)
+
+$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
+
 ifneq ($(HAVE_GDB_BIN),)
 GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py
 
diff --git a/tests/tcg/s390x/vxeh2_vcvt.c b/tests/tcg/s390x/vxeh2_vcvt.c
new file mode 100644
index 0000000000..71ecbd77b0
--- /dev/null
+++ b/tests/tcg/s390x/vxeh2_vcvt.c
@@ -0,0 +1,97 @@ 
+/*
+ * vxeh2_vcvt: vector-enhancements facility 2 vector convert *
+ */
+#include <stdint.h>
+
+typedef union S390Vector {
+    uint64_t d[2];  /* doubleword */
+    uint32_t w[4];  /* word */
+    uint16_t h[8];  /* halfword */
+    uint8_t  b[16]; /* byte */
+    float    f[4];
+    double   fd[2];
+    __uint128_t v;
+} S390Vector;
+
+#define M_S 8
+#define M4_XxC 4
+#define M4_def M4_XxC
+
+static inline void vcfps(S390Vector *v1, S390Vector *v2,
+    const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+    asm volatile("vcfps %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [m3]  "i" (m3)
+                , [m4]  "i" (m4)
+                , [m5]  "i" (m5));
+}
+
+static inline void vcfpl(S390Vector *v1, S390Vector *v2,
+    const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+    asm volatile("vcfpl %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [m3]  "i" (m3)
+                , [m4]  "i" (m4)
+                , [m5]  "i" (m5));
+}
+
+static inline void vcsfp(S390Vector *v1, S390Vector *v2,
+    const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+    asm volatile("vcsfp %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [m3]  "i" (m3)
+                , [m4]  "i" (m4)
+                , [m5]  "i" (m5));
+}
+
+static inline void vclfp(S390Vector *v1, S390Vector *v2,
+    const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+    asm volatile("vclfp %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [m3]  "i" (m3)
+                , [m4]  "i" (m4)
+                , [m5]  "i" (m5));
+}
+
+int main(int argc, char *argv[])
+{
+    S390Vector vd;
+    S390Vector vs_i32 = { .w[0] = 1, .w[1] = 64, .w[2] = 1024, .w[3] = -10 };
+    S390Vector vs_u32 = { .w[0] = 2, .w[1] = 32, .w[2] = 4096, .w[3] = 8888 };
+    S390Vector vs_f32 = { .f[0] = 3.987, .f[1] = 5.123,
+                          .f[2] = 4.499, .f[3] = 0.512 };
+
+    vd.d[0] = vd.d[1] = 0;
+    vcfps(&vd, &vs_i32, 2, M4_def, 0);
+    if (1 != vd.f[0] || 1024 != vd.f[2] || 64 != vd.f[1] || -10 != vd.f[3]) {
+        return 1;
+    }
+
+    vd.d[0] = vd.d[1] = 0;
+    vcfpl(&vd, &vs_u32, 2, M4_def, 0);
+    if (2 != vd.f[0] || 4096 != vd.f[2] || 32 != vd.f[1] || 8888 != vd.f[3]) {
+        return 1;
+    }
+
+    vd.d[0] = vd.d[1] = 0;
+    vcsfp(&vd, &vs_f32, 2, M4_def, 0);
+    if (4 != vd.w[0] || 4 != vd.w[2] || 5 != vd.w[1] || 1 != vd.w[3]) {
+        return 1;
+    }
+
+    vd.d[0] = vd.d[1] = 0;
+    vclfp(&vd, &vs_f32, 2, M4_def, 0);
+    if (4 != vd.w[0] || 4 != vd.w[2] || 5 != vd.w[1] || 1 != vd.w[3]) {
+        return 1;
+    }
+
+    return 0;
+}
diff --git a/tests/tcg/s390x/vxeh2_vlstr.c b/tests/tcg/s390x/vxeh2_vlstr.c
new file mode 100644
index 0000000000..bf2954e86d
--- /dev/null
+++ b/tests/tcg/s390x/vxeh2_vlstr.c
@@ -0,0 +1,146 @@ 
+/*
+ * vxeh2_vlstr: vector-enhancements facility 2 vector load/store reversed *
+ */
+#include <stdint.h>
+
+typedef union S390Vector {
+    uint64_t d[2];  /* doubleword */
+    uint32_t w[4];  /* word */
+    uint16_t h[8];  /* halfword */
+    uint8_t  b[16]; /* byte */
+    __uint128_t v;
+} S390Vector;
+
+#define ES8  0
+#define ES16 1
+#define ES32 2
+#define ES64 3
+
+#define vtst(v1, v2) \
+    if (v1.d[0] != v2.d[0] || v1.d[1] != v2.d[1]) { \
+        return 1;     \
+    }
+
+static inline void vler(S390Vector *v1, const void *va, uint8_t m3)
+{
+    asm volatile("vler %[v1], 0(%[va]), %[m3]\n"
+                : [v1] "+v" (v1->v)
+                : [va]  "d" (va)
+                , [m3]  "i" (m3)
+                : "memory");
+}
+
+static inline void vster(S390Vector *v1, const void *va, uint8_t m3)
+{
+    asm volatile("vster %[v1], 0(%[va]), %[m3]\n"
+                : [va] "+d" (va)
+                : [v1]  "v" (v1->v)
+                , [m3]  "i" (m3)
+                : "memory");
+}
+
+static inline void vlbr(S390Vector *v1, void *va, const uint8_t m3)
+{
+    asm volatile("vlbr %[v1], 0(%[va]), %[m3]\n"
+                : [v1] "+v" (v1->v)
+                : [va]  "d" (va)
+                , [m3]  "i" (m3)
+                : "memory");
+}
+
+static inline void vstbr(S390Vector *v1, void *va, const uint8_t m3)
+{
+    asm volatile("vstbr %[v1], 0(%[va]), %[m3]\n"
+                : [va] "+d" (va)
+                : [v1]  "v" (v1->v)
+                , [m3]  "i" (m3)
+                : "memory");
+}
+
+
+static inline void vlebrh(S390Vector *v1, void *va, const uint8_t m3)
+{
+    asm volatile("vlebrh %[v1], 0(%[va]), %[m3]\n"
+                : [v1] "+v" (v1->v)
+                : [va]  "d" (va)
+                , [m3]  "i" (m3)
+                : "memory");
+}
+
+static inline void vstebrh(S390Vector *v1, void *va, const uint8_t m3)
+{
+    asm volatile("vstebrh %[v1], 0(%[va]), %[m3]\n"
+                : [va] "+d" (va)
+                : [v1]  "v" (v1->v)
+                , [m3]  "i" (m3)
+                : "memory");
+}
+
+static inline void vllebrz(S390Vector *v1, void *va, const uint8_t m3)
+{
+    asm volatile("vllebrz %[v1], 0(%[va]), %[m3]\n"
+                : [v1] "+v" (v1->v)
+                : [va]  "d" (va)
+                , [m3]  "i" (m3)
+                : "memory");
+}
+
+static inline void vlbrrep(S390Vector *v1, void *va, const uint8_t m3)
+{
+    asm volatile("vlbrrep %[v1], 0(%[va]), %[m3]\n"
+                : [v1] "+v" (v1->v)
+                : [va]  "d" (va)
+                , [m3]  "i" (m3)
+                : "memory");
+}
+
+
+int main(int argc, char *argv[])
+{
+    S390Vector vd = { .d[0] = 0, .d[1] = 0 };
+    S390Vector vs = { .d[0] = 0x8FEEDDCCBBAA9988ull,
+                      .d[1] = 0x7766554433221107ull };
+
+    const S390Vector vt_v_er16 = {
+        .h[0] = 0x1107, .h[1] = 0x3322, .h[2] = 0x5544, .h[3] = 0x7766,
+        .h[4] = 0x9988, .h[5] = 0xBBAA, .h[6] = 0xDDCC, .h[7] = 0x8FEE };
+
+    const S390Vector vt_v_br16 = {
+        .h[0] = 0xEE8F, .h[1] = 0xCCDD, .h[2] = 0xAABB, .h[3] = 0x8899,
+        .h[4] = 0x6677, .h[5] = 0x4455, .h[6] = 0x2233, .h[7] = 0x0711 };
+
+    int ix;
+    uint64_t ss64 = 0xFEEDFACE0BADBEEFull, sd64 = 0;
+
+    vler (&vd, &vs, ES16);  vtst(vd, vt_v_er16);
+    vster(&vs, &vd, ES16);  vtst(vd, vt_v_er16);
+
+    vlbr (&vd, &vs, ES16);  vtst(vd, vt_v_br16);
+    vstbr(&vs, &vd, ES16);  vtst(vd, vt_v_br16);
+
+    vlebrh(&vd, &ss64, 5);
+    if (0xEDFE != vd.h[5]) {
+        return 1;
+    }
+
+    vstebrh(&vs, (uint8_t *)&sd64 + 4, 7);
+    if (0x0000000007110000ull != sd64) {
+        return 1;
+    }
+
+    vllebrz(&vd, (uint8_t *)&ss64 + 3, 2);
+    for (ix = 0; ix < 4; ix++) {
+        if (vd.w[ix] != (ix != 1 ? 0 : 0xBEAD0BCE)) {
+            return 1;
+        }
+    }
+
+    vlbrrep(&vd, (uint8_t *)&ss64 + 4, 1);
+    for (ix = 0; ix < 8; ix++) {
+        if (0xAD0B != vd.h[ix]) {
+            return 1;
+        }
+    }
+
+    return 0;
+}
diff --git a/tests/tcg/s390x/vxeh2_vs.c b/tests/tcg/s390x/vxeh2_vs.c
new file mode 100644
index 0000000000..04a3d4d7bb
--- /dev/null
+++ b/tests/tcg/s390x/vxeh2_vs.c
@@ -0,0 +1,91 @@ 
+/*
+ * vxeh2_vs: vector-enhancements facility 2 vector shift
+ */
+#include <stdint.h>
+
+typedef union S390Vector {
+    uint64_t d[2];  /* doubleword */
+    uint32_t w[4];  /* word */
+    uint16_t h[8];  /* halfword */
+    uint8_t  b[16]; /* byte */
+    __uint128_t v;
+} S390Vector;
+
+#define vtst(v1, v2) \
+    if (v1.d[0] != v2.d[0] || v1.d[1] != v2.d[1]) { \
+        return 1;     \
+    }
+
+static inline void vsl(S390Vector *v1, S390Vector *v2, S390Vector *v3)
+{
+    asm volatile("vsl %[v1], %[v2], %[v3]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [v3]  "v" (v3->v));
+}
+
+static inline void vsra(S390Vector *v1, S390Vector *v2, S390Vector *v3)
+{
+    asm volatile("vsra %[v1], %[v2], %[v3]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [v3]  "v" (v3->v));
+}
+
+static inline void vsrl(S390Vector *v1, S390Vector *v2, S390Vector *v3)
+{
+    asm volatile("vsrl %[v1], %[v2], %[v3]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [v3]  "v" (v3->v));
+}
+
+static inline void vsld(S390Vector *v1, S390Vector *v2,
+    S390Vector *v3, const uint8_t I)
+{
+    asm volatile("vsld %[v1], %[v2], %[v3], %[I]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [v3]  "v" (v3->v)
+                , [I]   "i" (I & 7));
+}
+
+static inline void vsrd(S390Vector *v1, S390Vector *v2,
+    S390Vector *v3, const uint8_t I)
+{
+    asm volatile("vsrd %[v1], %[v2], %[v3], %[I]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [v3]  "v" (v3->v)
+                , [I]   "i" (I & 7));
+}
+
+int main(int argc, char *argv[])
+{
+    const S390Vector vt_vsl  = { .d[0] = 0x7FEDBB32D5AA311Dull,
+                                 .d[1] = 0xBB65AA10912220C0ull };
+    const S390Vector vt_vsra = { .d[0] = 0xF1FE6E7399AA5466ull,
+                                 .d[1] = 0x0E762A5188221044ull };
+    const S390Vector vt_vsrl = { .d[0] = 0x11FE6E7399AA5466ull,
+                                 .d[1] = 0x0E762A5188221044ull };
+    const S390Vector vt_vsld = { .d[0] = 0x7F76EE65DD54CC43ull,
+                                 .d[1] = 0xBB32AA2199108838ull };
+    const S390Vector vt_vsrd = { .d[0] = 0x0E060802040E000Aull,
+                                 .d[1] = 0x0C060802040E000Aull };
+    S390Vector vs  = { .d[0] = 0x8FEEDDCCBBAA9988ull,
+                       .d[1] = 0x7766554433221107ull };
+    S390Vector  vd = { .d[0] = 0, .d[1] = 0 };
+    S390Vector vsi = { .d[0] = 0, .d[1] = 0 };
+
+    for (int ix = 0; ix < 16; ix++) {
+        vsi.b[ix] = (1 + (5 ^ ~ix)) & 7;
+    }
+
+    vsl (&vd, &vs, &vsi);       vtst(vd, vt_vsl);
+    vsra(&vd, &vs, &vsi);       vtst(vd, vt_vsra);
+    vsrl(&vd, &vs, &vsi);       vtst(vd, vt_vsrl);
+    vsld(&vd, &vs, &vsi, 3);  vtst(vd, vt_vsld);
+    vsrd(&vd, &vs, &vsi, 15); vtst(vd, vt_vsrd);
+
+    return 0;
+}