Message ID | 20220322000441.26495-11-dmiller423@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | s390x/tcg: Implement Vector-Enhancements Facility 2 | expand |
On 22.03.22 01:04, David Miller wrote: > Signed-off-by: David Miller <dmiller423@gmail.com> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Checkpatch complains about three things: ERROR: space prohibited between function name and open parenthesis '(' #262: FILE: tests/tcg/s390x/vxeh2_vlstr.c:115: + vler (&vd, &vs, ES16); vtst(vd, vt_v_er16); ERROR: space prohibited between function name and open parenthesis '(' #265: FILE: tests/tcg/s390x/vxeh2_vlstr.c:118: + vlbr (&vd, &vs, ES16); vtst(vd, vt_v_br16); ERROR: space prohibited between function name and open parenthesis '(' #383: FILE: tests/tcg/s390x/vxeh2_vs.c:84: + vsl (&vd, &vs, &vsi); vtst(vd, vt_vsl); total: 3 errors, 1 warnings, 348 lines checked > --- > tests/tcg/s390x/Makefile.target | 8 ++ > tests/tcg/s390x/vxeh2_vcvt.c | 97 +++++++++++++++++++++ > tests/tcg/s390x/vxeh2_vlstr.c | 146 ++++++++++++++++++++++++++++++++ > tests/tcg/s390x/vxeh2_vs.c | 91 ++++++++++++++++++++ > 4 files changed, 342 insertions(+) > create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c > create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c > create mode 100644 tests/tcg/s390x/vxeh2_vs.c > > diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target > index 8c9b6a13ce..921a056dd1 100644 > --- a/tests/tcg/s390x/Makefile.target > +++ b/tests/tcg/s390x/Makefile.target > @@ -16,6 +16,14 @@ TESTS+=shift > TESTS+=trap > TESTS+=signals-s390x > > +VECTOR_TESTS=vxeh2_vs > +VECTOR_TESTS+=vxeh2_vcvt > +VECTOR_TESTS+=vxeh2_vlstr > + > +TESTS+=$(VECTOR_TESTS) > + > +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2 @Thomas, will that survive our test framework already, or do we have to wait for the debain11 changes? > + > ifneq ($(HAVE_GDB_BIN),) > GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py > > diff --git a/tests/tcg/s390x/vxeh2_vcvt.c b/tests/tcg/s390x/vxeh2_vcvt.c > new file mode 100644 > index 0000000000..71ecbd77b0 > --- /dev/null > +++ b/tests/tcg/s390x/vxeh2_vcvt.c > @@ -0,0 +1,97 @@ > +/* > + * vxeh2_vcvt: vector-enhancements facility 2 vector convert * > + */ > +#include <stdint.h> > + > +typedef union S390Vector { > + uint64_t d[2]; /* doubleword */ > + uint32_t w[4]; /* word */ > + uint16_t h[8]; /* halfword */ > + uint8_t b[16]; /* byte */ > + float f[4]; > + double fd[2]; > + __uint128_t v; > +} S390Vector; Let's move that into a separate header (vx.h?) so we can reuse it. > + > +#define M_S 8 > +#define M4_XxC 4 > +#define M4_def M4_XxC [...] > diff --git a/tests/tcg/s390x/vxeh2_vlstr.c b/tests/tcg/s390x/vxeh2_vlstr.c > new file mode 100644 > index 0000000000..bf2954e86d > --- /dev/null > +++ b/tests/tcg/s390x/vxeh2_vlstr.c > @@ -0,0 +1,146 @@ > +/* > + * vxeh2_vlstr: vector-enhancements facility 2 vector load/store reversed * > + */ > +#include <stdint.h> > + > +typedef union S390Vector { > + uint64_t d[2]; /* doubleword */ > + uint32_t w[4]; /* word */ > + uint16_t h[8]; /* halfword */ > + uint8_t b[16]; /* byte */ > + __uint128_t v; > +} S390Vector; > + > +#define ES8 0 > +#define ES16 1 > +#define ES32 2 > +#define ES64 3 These should probably also go to the new header. > + > +#define vtst(v1, v2) \ > + if (v1.d[0] != v2.d[0] || v1.d[1] != v2.d[1]) { \ > + return 1; \ > + } > + > +static inline void vler(S390Vector *v1, const void *va, uint8_t m3) > +{ > + asm volatile("vler %[v1], 0(%[va]), %[m3]\n" > + : [v1] "+v" (v1->v) > + : [va] "d" (va) > + , [m3] "i" (m3) > + : "memory"); > +} > + > +static inline void vster(S390Vector *v1, const void *va, uint8_t m3) > +{ > + asm volatile("vster %[v1], 0(%[va]), %[m3]\n" > + : [va] "+d" (va) > + : [v1] "v" (v1->v) > + , [m3] "i" (m3) > + : "memory"); > +} > + > +static inline void vlbr(S390Vector *v1, void *va, const uint8_t m3) > +{ > + asm volatile("vlbr %[v1], 0(%[va]), %[m3]\n" > + : [v1] "+v" (v1->v) > + : [va] "d" (va) > + , [m3] "i" (m3) > + : "memory"); > +} > + > +static inline void vstbr(S390Vector *v1, void *va, const uint8_t m3) > +{ > + asm volatile("vstbr %[v1], 0(%[va]), %[m3]\n" > + : [va] "+d" (va) > + : [v1] "v" (v1->v) > + , [m3] "i" (m3) > + : "memory"); > +} > + > + > +static inline void vlebrh(S390Vector *v1, void *va, const uint8_t m3) > +{ > + asm volatile("vlebrh %[v1], 0(%[va]), %[m3]\n" > + : [v1] "+v" (v1->v) > + : [va] "d" (va) > + , [m3] "i" (m3) > + : "memory"); > +} > + > +static inline void vstebrh(S390Vector *v1, void *va, const uint8_t m3) > +{ > + asm volatile("vstebrh %[v1], 0(%[va]), %[m3]\n" > + : [va] "+d" (va) > + : [v1] "v" (v1->v) > + , [m3] "i" (m3) > + : "memory"); > +} > + > +static inline void vllebrz(S390Vector *v1, void *va, const uint8_t m3) > +{ > + asm volatile("vllebrz %[v1], 0(%[va]), %[m3]\n" > + : [v1] "+v" (v1->v) > + : [va] "d" (va) > + , [m3] "i" (m3) > + : "memory"); > +} > + > +static inline void vlbrrep(S390Vector *v1, void *va, const uint8_t m3) > +{ > + asm volatile("vlbrrep %[v1], 0(%[va]), %[m3]\n" > + : [v1] "+v" (v1->v) > + : [va] "d" (va) > + , [m3] "i" (m3) > + : "memory"); > +} > + > + Superfluous empty line. > +int main(int argc, char *argv[]) > +{ > + S390Vector vd = { .d[0] = 0, .d[1] = 0 }; > + S390Vector vs = { .d[0] = 0x8FEEDDCCBBAA9988ull, > + .d[1] = 0x7766554433221107ull }; > + > + const S390Vector vt_v_er16 = { > + .h[0] = 0x1107, .h[1] = 0x3322, .h[2] = 0x5544, .h[3] = 0x7766, > + .h[4] = 0x9988, .h[5] = 0xBBAA, .h[6] = 0xDDCC, .h[7] = 0x8FEE }; > + > + const S390Vector vt_v_br16 = { > + .h[0] = 0xEE8F, .h[1] = 0xCCDD, .h[2] = 0xAABB, .h[3] = 0x8899, > + .h[4] = 0x6677, .h[5] = 0x4455, .h[6] = 0x2233, .h[7] = 0x0711 }; > + > + int ix; > + uint64_t ss64 = 0xFEEDFACE0BADBEEFull, sd64 = 0; > + > + vler (&vd, &vs, ES16); vtst(vd, vt_v_er16); > + vster(&vs, &vd, ES16); vtst(vd, vt_v_er16); > + > + vlbr (&vd, &vs, ES16); vtst(vd, vt_v_br16); > + vstbr(&vs, &vd, ES16); vtst(vd, vt_v_br16); > + Please put each statement on a new line. > + vlebrh(&vd, &ss64, 5); > + if (0xEDFE != vd.h[5]) { > + return 1; > + } > + > + vstebrh(&vs, (uint8_t *)&sd64 + 4, 7); > + if (0x0000000007110000ull != sd64) { > + return 1; > + } > + > + vllebrz(&vd, (uint8_t *)&ss64 + 3, 2); > + for (ix = 0; ix < 4; ix++) { > + if (vd.w[ix] != (ix != 1 ? 0 : 0xBEAD0BCE)) { > + return 1; > + } > + } > + > + vlbrrep(&vd, (uint8_t *)&ss64 + 4, 1); > + for (ix = 0; ix < 8; ix++) { > + if (0xAD0B != vd.h[ix]) { > + return 1; > + } > + } > + > + return 0; > +} > diff --git a/tests/tcg/s390x/vxeh2_vs.c b/tests/tcg/s390x/vxeh2_vs.c > new file mode 100644 > index 0000000000..04a3d4d7bb > --- /dev/null > +++ b/tests/tcg/s390x/vxeh2_vs.c [...] > +int main(int argc, char *argv[]) > +{ > + const S390Vector vt_vsl = { .d[0] = 0x7FEDBB32D5AA311Dull, > + .d[1] = 0xBB65AA10912220C0ull }; > + const S390Vector vt_vsra = { .d[0] = 0xF1FE6E7399AA5466ull, > + .d[1] = 0x0E762A5188221044ull }; > + const S390Vector vt_vsrl = { .d[0] = 0x11FE6E7399AA5466ull, > + .d[1] = 0x0E762A5188221044ull }; > + const S390Vector vt_vsld = { .d[0] = 0x7F76EE65DD54CC43ull, > + .d[1] = 0xBB32AA2199108838ull }; > + const S390Vector vt_vsrd = { .d[0] = 0x0E060802040E000Aull, > + .d[1] = 0x0C060802040E000Aull }; > + S390Vector vs = { .d[0] = 0x8FEEDDCCBBAA9988ull, > + .d[1] = 0x7766554433221107ull }; > + S390Vector vd = { .d[0] = 0, .d[1] = 0 }; > + S390Vector vsi = { .d[0] = 0, .d[1] = 0 }; > + > + for (int ix = 0; ix < 16; ix++) { > + vsi.b[ix] = (1 + (5 ^ ~ix)) & 7; > + } > + > + vsl (&vd, &vs, &vsi); vtst(vd, vt_vsl); > + vsra(&vd, &vs, &vsi); vtst(vd, vt_vsra); > + vsrl(&vd, &vs, &vsi); vtst(vd, vt_vsrl); > + vsld(&vd, &vs, &vsi, 3); vtst(vd, vt_vsld); > + vsrd(&vd, &vs, &vsi, 15); vtst(vd, vt_vsrd); Dito. Please put each statement on a new line. > + > + return 0; > +}
On 22/03/2022 09.53, David Hildenbrand wrote: > On 22.03.22 01:04, David Miller wrote: [...] >> diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target >> index 8c9b6a13ce..921a056dd1 100644 >> --- a/tests/tcg/s390x/Makefile.target >> +++ b/tests/tcg/s390x/Makefile.target >> @@ -16,6 +16,14 @@ TESTS+=shift >> TESTS+=trap >> TESTS+=signals-s390x >> >> +VECTOR_TESTS=vxeh2_vs >> +VECTOR_TESTS+=vxeh2_vcvt >> +VECTOR_TESTS+=vxeh2_vlstr >> + >> +TESTS+=$(VECTOR_TESTS) >> + >> +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2 > > @Thomas, will that survive our test framework already, or do we have to > wait for the debain11 changes? Alex' update to the container has already been merged: https://gitlab.com/qemu-project/qemu/-/commit/89767579cad2e371b ... and seems like it's working in Travis on s390x, too: https://app.travis-ci.com/github/huth/qemu/jobs/564188977#L12797 ... so it seems like it should be OK now (considering that we drop support for the old Ubuntu version 18.04 in QEMU 7.1, too). Thomas
On 22/03/2022 11.31, Thomas Huth wrote: > On 22/03/2022 09.53, David Hildenbrand wrote: >> On 22.03.22 01:04, David Miller wrote: > [...] >>> diff --git a/tests/tcg/s390x/Makefile.target >>> b/tests/tcg/s390x/Makefile.target >>> index 8c9b6a13ce..921a056dd1 100644 >>> --- a/tests/tcg/s390x/Makefile.target >>> +++ b/tests/tcg/s390x/Makefile.target >>> @@ -16,6 +16,14 @@ TESTS+=shift >>> TESTS+=trap >>> TESTS+=signals-s390x >>> +VECTOR_TESTS=vxeh2_vs >>> +VECTOR_TESTS+=vxeh2_vcvt >>> +VECTOR_TESTS+=vxeh2_vlstr >>> + >>> +TESTS+=$(VECTOR_TESTS) >>> + >>> +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2 >> >> @Thomas, will that survive our test framework already, or do we have to >> wait for the debain11 changes? > > Alex' update to the container has already been merged: > > https://gitlab.com/qemu-project/qemu/-/commit/89767579cad2e371b > > ... and seems like it's working in Travis on s390x, too: > > https://app.travis-ci.com/github/huth/qemu/jobs/564188977#L12797 > > ... so it seems like it should be OK now (considering that we drop support > for the old Ubuntu version 18.04 in QEMU 7.1, too). Looks like I spoke a little bit too soon - some of the CI pipelines are still using Debian 10 for running the TCG tests, and they are failing with these patches applied: https://gitlab.com/thuth/qemu/-/jobs/2238422870#L3499 Thus we either need to update the CI jobs to use Debian 11, or use handcrafted instruction opcodes here again... Thomas
Sorry, Didn't notice this, as it was on v4 patch emails. I assume since there is no other follow up after a week, CI jobs are not being updated and I should change samples to use .insn. I will try to get this out tomorrow. Thanks, - David Miller On Wed, Mar 23, 2022 at 1:13 PM Thomas Huth <thuth@redhat.com> wrote: > > On 22/03/2022 11.31, Thomas Huth wrote: > > On 22/03/2022 09.53, David Hildenbrand wrote: > >> On 22.03.22 01:04, David Miller wrote: > > [...] > >>> diff --git a/tests/tcg/s390x/Makefile.target > >>> b/tests/tcg/s390x/Makefile.target > >>> index 8c9b6a13ce..921a056dd1 100644 > >>> --- a/tests/tcg/s390x/Makefile.target > >>> +++ b/tests/tcg/s390x/Makefile.target > >>> @@ -16,6 +16,14 @@ TESTS+=shift > >>> TESTS+=trap > >>> TESTS+=signals-s390x > >>> +VECTOR_TESTS=vxeh2_vs > >>> +VECTOR_TESTS+=vxeh2_vcvt > >>> +VECTOR_TESTS+=vxeh2_vlstr > >>> + > >>> +TESTS+=$(VECTOR_TESTS) > >>> + > >>> +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2 > >> > >> @Thomas, will that survive our test framework already, or do we have to > >> wait for the debain11 changes? > > > > Alex' update to the container has already been merged: > > > > https://gitlab.com/qemu-project/qemu/-/commit/89767579cad2e371b > > > > ... and seems like it's working in Travis on s390x, too: > > > > https://app.travis-ci.com/github/huth/qemu/jobs/564188977#L12797 > > > > ... so it seems like it should be OK now (considering that we drop support > > for the old Ubuntu version 18.04 in QEMU 7.1, too). > > Looks like I spoke a little bit too soon - some of the CI pipelines are > still using Debian 10 for running the TCG tests, and they are failing with > these patches applied: > > https://gitlab.com/thuth/qemu/-/jobs/2238422870#L3499 > > Thus we either need to update the CI jobs to use Debian 11, or use > handcrafted instruction opcodes here again... > > Thomas >
Hi, There is some issue with instruction sub/alt encodings not matching, but I worked around it easily. I'm dropping the updated patch for the tests in here. I know I should resend the entire patch series as a higher version really, and will do so. I'm hoping someone can tell me if it's ok to use .insn vrr in place of vri(-d) as it doesn't match vri. [https://sourceware.org/binutils/docs-2.37/as/s390-Formats.html] .insn doesn't deal with sub encodings and there is no good alternative that I know of. example: /* vri-d as vrr */ asm volatile(".insn vrr, 0xE70000000086, %[v1], %[v2], %[v3], 0, %[I], 0\n" : [v1] "=v" (v1->v) : [v2] "v" (v2->v) , [v3] "v" (v3->v) , [I] "i" (I & 7)); Patch is attached Thanks - David Miller On Thu, Mar 31, 2022 at 2:26 PM David Miller <dmiller423@gmail.com> wrote: > > Sorry, > Didn't notice this, as it was on v4 patch emails. > I assume since there is no other follow up after a week, > CI jobs are not being updated and I should change samples to use .insn. > I will try to get this out tomorrow. > > Thanks, > - David Miller > > On Wed, Mar 23, 2022 at 1:13 PM Thomas Huth <thuth@redhat.com> wrote: > > > > On 22/03/2022 11.31, Thomas Huth wrote: > > > On 22/03/2022 09.53, David Hildenbrand wrote: > > >> On 22.03.22 01:04, David Miller wrote: > > > [...] > > >>> diff --git a/tests/tcg/s390x/Makefile.target > > >>> b/tests/tcg/s390x/Makefile.target > > >>> index 8c9b6a13ce..921a056dd1 100644 > > >>> --- a/tests/tcg/s390x/Makefile.target > > >>> +++ b/tests/tcg/s390x/Makefile.target > > >>> @@ -16,6 +16,14 @@ TESTS+=shift > > >>> TESTS+=trap > > >>> TESTS+=signals-s390x > > >>> +VECTOR_TESTS=vxeh2_vs > > >>> +VECTOR_TESTS+=vxeh2_vcvt > > >>> +VECTOR_TESTS+=vxeh2_vlstr > > >>> + > > >>> +TESTS+=$(VECTOR_TESTS) > > >>> + > > >>> +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2 > > >> > > >> @Thomas, will that survive our test framework already, or do we have to > > >> wait for the debain11 changes? > > > > > > Alex' update to the container has already been merged: > > > > > > https://gitlab.com/qemu-project/qemu/-/commit/89767579cad2e371b > > > > > > ... and seems like it's working in Travis on s390x, too: > > > > > > https://app.travis-ci.com/github/huth/qemu/jobs/564188977#L12797 > > > > > > ... so it seems like it should be OK now (considering that we drop support > > > for the old Ubuntu version 18.04 in QEMU 7.1, too). > > > > Looks like I spoke a little bit too soon - some of the CI pipelines are > > still using Debian 10 for running the TCG tests, and they are failing with > > these patches applied: > > > > https://gitlab.com/thuth/qemu/-/jobs/2238422870#L3499 > > > > Thus we either need to update the CI jobs to use Debian 11, or use > > handcrafted instruction opcodes here again... > > > > Thomas > >
On Thu, Mar 31, 2022 at 10:15 PM David Miller <dmiller423@gmail.com> wrote: > > Hi, > > There is some issue with instruction sub/alt encodings not matching, > but I worked around it easily. > > I'm dropping the updated patch for the tests in here. > I know I should resend the entire patch series as a higher version > really, and will do so. > I'm hoping someone can tell me if it's ok to use .insn vrr in place > of vri(-d) as it doesn't match vri. > [https://sourceware.org/binutils/docs-2.37/as/s390-Formats.html] > > .insn doesn't deal with sub encodings and there is no good alternative > that I know of. > > example: > > /* vri-d as vrr */ > asm volatile(".insn vrr, 0xE70000000086, %[v1], %[v2], %[v3], 0, %[I], 0\n" > : [v1] "=v" (v1->v) > : [v2] "v" (v2->v) > , [v3] "v" (v3->v) > , [I] "i" (I & 7)); > > Patch is attached > > > Thanks > - David Miller > > > On Thu, Mar 31, 2022 at 2:26 PM David Miller <dmiller423@gmail.com> wrote: > > > > Sorry, > > Didn't notice this, as it was on v4 patch emails. > > I assume since there is no other follow up after a week, > > CI jobs are not being updated and I should change samples to use .insn. > > I will try to get this out tomorrow. > > > > Thanks, > > - David Miller > > > > On Wed, Mar 23, 2022 at 1:13 PM Thomas Huth <thuth@redhat.com> wrote: > > > > > > On 22/03/2022 11.31, Thomas Huth wrote: > > > > On 22/03/2022 09.53, David Hildenbrand wrote: > > > >> On 22.03.22 01:04, David Miller wrote: > > > > [...] > > > >>> diff --git a/tests/tcg/s390x/Makefile.target > > > >>> b/tests/tcg/s390x/Makefile.target > > > >>> index 8c9b6a13ce..921a056dd1 100644 > > > >>> --- a/tests/tcg/s390x/Makefile.target > > > >>> +++ b/tests/tcg/s390x/Makefile.target > > > >>> @@ -16,6 +16,14 @@ TESTS+=shift > > > >>> TESTS+=trap > > > >>> TESTS+=signals-s390x > > > >>> +VECTOR_TESTS=vxeh2_vs > > > >>> +VECTOR_TESTS+=vxeh2_vcvt > > > >>> +VECTOR_TESTS+=vxeh2_vlstr > > > >>> + > > > >>> +TESTS+=$(VECTOR_TESTS) > > > >>> + > > > >>> +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2 > > > >> > > > >> @Thomas, will that survive our test framework already, or do we have to > > > >> wait for the debain11 changes? > > > > > > > > Alex' update to the container has already been merged: > > > > > > > > https://gitlab.com/qemu-project/qemu/-/commit/89767579cad2e371b > > > > > > > > ... and seems like it's working in Travis on s390x, too: > > > > > > > > https://app.travis-ci.com/github/huth/qemu/jobs/564188977#L12797 > > > > > > > > ... so it seems like it should be OK now (considering that we drop support > > > > for the old Ubuntu version 18.04 in QEMU 7.1, too). > > > > > > Looks like I spoke a little bit too soon - some of the CI pipelines are > > > still using Debian 10 for running the TCG tests, and they are failing with > > > these patches applied: > > > > > > https://gitlab.com/thuth/qemu/-/jobs/2238422870#L3499 > > > > > > Thus we either need to update the CI jobs to use Debian 11, or use > > > handcrafted instruction opcodes here again... > > > > > > Thomas > > >
Am 01.04.22 um 04:15 schrieb David Miller: > Hi, > > There is some issue with instruction sub/alt encodings not matching, > but I worked around it easily. > > I'm dropping the updated patch for the tests in here. > I know I should resend the entire patch series as a higher version > really, and will do so. > I'm hoping someone can tell me if it's ok to use .insn vrr in place > of vri(-d) as it doesn't match vri. > [https://sourceware.org/binutils/docs-2.37/as/s390-Formats.html] > > .insn doesn't deal with sub encodings and there is no good alternative > that I know of. > > example: > > /* vri-d as vrr */ > asm volatile(".insn vrr, 0xE70000000086, %[v1], %[v2], %[v3], 0, %[I], 0\n" > : [v1] "=v" (v1->v) > : [v2] "v" (v2->v) > , [v3] "v" (v3->v) > , [I] "i" (I & 7)); > > Patch is attached Yes, vri sucks and does not work with vrsd. Maybe just use .long which is probably better than using a "wrong" format. Opinions?
vrr is almost a perfect match (it is for this, larger than imm4 would need to be split). .long : this would be uglier. use enough to be filled with nops after ? or use a 32b and 16b instead if it's in .text it should make no difference. On Fri, Apr 1, 2022 at 2:42 AM Christian Borntraeger <borntraeger@linux.ibm.com> wrote: > > > > Am 01.04.22 um 04:15 schrieb David Miller: > > Hi, > > > > There is some issue with instruction sub/alt encodings not matching, > > but I worked around it easily. > > > > I'm dropping the updated patch for the tests in here. > > I know I should resend the entire patch series as a higher version > > really, and will do so. > > I'm hoping someone can tell me if it's ok to use .insn vrr in place > > of vri(-d) as it doesn't match vri. > > [https://sourceware.org/binutils/docs-2.37/as/s390-Formats.html] > > > > .insn doesn't deal with sub encodings and there is no good alternative > > that I know of. > > > > example: > > > > /* vri-d as vrr */ > > asm volatile(".insn vrr, 0xE70000000086, %[v1], %[v2], %[v3], 0, %[I], 0\n" > > : [v1] "=v" (v1->v) > > : [v2] "v" (v2->v) > > , [v3] "v" (v3->v) > > , [I] "i" (I & 7)); > > > > Patch is attached > > Yes, vri sucks and does not work with vrsd. Maybe just use .long which is probably > better than using a "wrong" format. > Opinions?
Am 01.04.22 um 17:02 schrieb David Miller: > vrr is almost a perfect match (it is for this, larger than imm4 would > need to be split). > > .long : this would be uglier. > use enough to be filled with nops after ? > or use a 32b and 16b instead if it's in .text it should make no difference. I will let Richard or David decide what they prefer.
On 01.04.22 17:25, Christian Borntraeger wrote: > Am 01.04.22 um 17:02 schrieb David Miller: >> vrr is almost a perfect match (it is for this, larger than imm4 would >> need to be split). >> >> .long : this would be uglier. >> use enough to be filled with nops after ? >> or use a 32b and 16b instead if it's in .text it should make no difference. > > I will let Richard or David decide what they prefer. > I don't particularly care as long as there is a comment stating why we need this hack.
Recommendation for comment? /* vri-d encoding matches vrr for 4b imm. .insn does not handle this encoding variant. */ Christian: I will push another patch version as soon as that's decided. (unless you prefer to choose the comment and edit during staging) On Tue, Apr 5, 2022 at 6:13 AM David Hildenbrand <david@redhat.com> wrote: > > On 01.04.22 17:25, Christian Borntraeger wrote: > > Am 01.04.22 um 17:02 schrieb David Miller: > >> vrr is almost a perfect match (it is for this, larger than imm4 would > >> need to be split). > >> > >> .long : this would be uglier. > >> use enough to be filled with nops after ? > >> or use a 32b and 16b instead if it's in .text it should make no difference. > > > > I will let Richard or David decide what they prefer. > > > > I don't particularly care as long as there is a comment stating why we > need this hack. > > -- > Thanks, > > David / dhildenb >
On 05.04.22 19:03, David Miller wrote: > Recommendation for comment? > > /* vri-d encoding matches vrr for 4b imm. > .insn does not handle this encoding variant. > */ > Sorry for the late reply. ".insn doesn't handle vri-d properly. So instead, we use vrr, which matches vri-d with a 4b imm -- good enough for our purpose."
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target index 8c9b6a13ce..921a056dd1 100644 --- a/tests/tcg/s390x/Makefile.target +++ b/tests/tcg/s390x/Makefile.target @@ -16,6 +16,14 @@ TESTS+=shift TESTS+=trap TESTS+=signals-s390x +VECTOR_TESTS=vxeh2_vs +VECTOR_TESTS+=vxeh2_vcvt +VECTOR_TESTS+=vxeh2_vlstr + +TESTS+=$(VECTOR_TESTS) + +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2 + ifneq ($(HAVE_GDB_BIN),) GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py diff --git a/tests/tcg/s390x/vxeh2_vcvt.c b/tests/tcg/s390x/vxeh2_vcvt.c new file mode 100644 index 0000000000..71ecbd77b0 --- /dev/null +++ b/tests/tcg/s390x/vxeh2_vcvt.c @@ -0,0 +1,97 @@ +/* + * vxeh2_vcvt: vector-enhancements facility 2 vector convert * + */ +#include <stdint.h> + +typedef union S390Vector { + uint64_t d[2]; /* doubleword */ + uint32_t w[4]; /* word */ + uint16_t h[8]; /* halfword */ + uint8_t b[16]; /* byte */ + float f[4]; + double fd[2]; + __uint128_t v; +} S390Vector; + +#define M_S 8 +#define M4_XxC 4 +#define M4_def M4_XxC + +static inline void vcfps(S390Vector *v1, S390Vector *v2, + const uint8_t m3, const uint8_t m4, const uint8_t m5) +{ + asm volatile("vcfps %[v1], %[v2], %[m3], %[m4], %[m5]\n" + : [v1] "=v" (v1->v) + : [v2] "v" (v2->v) + , [m3] "i" (m3) + , [m4] "i" (m4) + , [m5] "i" (m5)); +} + +static inline void vcfpl(S390Vector *v1, S390Vector *v2, + const uint8_t m3, const uint8_t m4, const uint8_t m5) +{ + asm volatile("vcfpl %[v1], %[v2], %[m3], %[m4], %[m5]\n" + : [v1] "=v" (v1->v) + : [v2] "v" (v2->v) + , [m3] "i" (m3) + , [m4] "i" (m4) + , [m5] "i" (m5)); +} + +static inline void vcsfp(S390Vector *v1, S390Vector *v2, + const uint8_t m3, const uint8_t m4, const uint8_t m5) +{ + asm volatile("vcsfp %[v1], %[v2], %[m3], %[m4], %[m5]\n" + : [v1] "=v" (v1->v) + : [v2] "v" (v2->v) + , [m3] "i" (m3) + , [m4] "i" (m4) + , [m5] "i" (m5)); +} + +static inline void vclfp(S390Vector *v1, S390Vector *v2, + const uint8_t m3, const uint8_t m4, const uint8_t m5) +{ + asm volatile("vclfp %[v1], %[v2], %[m3], %[m4], %[m5]\n" + : [v1] "=v" (v1->v) + : [v2] "v" (v2->v) + , [m3] "i" (m3) + , [m4] "i" (m4) + , [m5] "i" (m5)); +} + +int main(int argc, char *argv[]) +{ + S390Vector vd; + S390Vector vs_i32 = { .w[0] = 1, .w[1] = 64, .w[2] = 1024, .w[3] = -10 }; + S390Vector vs_u32 = { .w[0] = 2, .w[1] = 32, .w[2] = 4096, .w[3] = 8888 }; + S390Vector vs_f32 = { .f[0] = 3.987, .f[1] = 5.123, + .f[2] = 4.499, .f[3] = 0.512 }; + + vd.d[0] = vd.d[1] = 0; + vcfps(&vd, &vs_i32, 2, M4_def, 0); + if (1 != vd.f[0] || 1024 != vd.f[2] || 64 != vd.f[1] || -10 != vd.f[3]) { + return 1; + } + + vd.d[0] = vd.d[1] = 0; + vcfpl(&vd, &vs_u32, 2, M4_def, 0); + if (2 != vd.f[0] || 4096 != vd.f[2] || 32 != vd.f[1] || 8888 != vd.f[3]) { + return 1; + } + + vd.d[0] = vd.d[1] = 0; + vcsfp(&vd, &vs_f32, 2, M4_def, 0); + if (4 != vd.w[0] || 4 != vd.w[2] || 5 != vd.w[1] || 1 != vd.w[3]) { + return 1; + } + + vd.d[0] = vd.d[1] = 0; + vclfp(&vd, &vs_f32, 2, M4_def, 0); + if (4 != vd.w[0] || 4 != vd.w[2] || 5 != vd.w[1] || 1 != vd.w[3]) { + return 1; + } + + return 0; +} diff --git a/tests/tcg/s390x/vxeh2_vlstr.c b/tests/tcg/s390x/vxeh2_vlstr.c new file mode 100644 index 0000000000..bf2954e86d --- /dev/null +++ b/tests/tcg/s390x/vxeh2_vlstr.c @@ -0,0 +1,146 @@ +/* + * vxeh2_vlstr: vector-enhancements facility 2 vector load/store reversed * + */ +#include <stdint.h> + +typedef union S390Vector { + uint64_t d[2]; /* doubleword */ + uint32_t w[4]; /* word */ + uint16_t h[8]; /* halfword */ + uint8_t b[16]; /* byte */ + __uint128_t v; +} S390Vector; + +#define ES8 0 +#define ES16 1 +#define ES32 2 +#define ES64 3 + +#define vtst(v1, v2) \ + if (v1.d[0] != v2.d[0] || v1.d[1] != v2.d[1]) { \ + return 1; \ + } + +static inline void vler(S390Vector *v1, const void *va, uint8_t m3) +{ + asm volatile("vler %[v1], 0(%[va]), %[m3]\n" + : [v1] "+v" (v1->v) + : [va] "d" (va) + , [m3] "i" (m3) + : "memory"); +} + +static inline void vster(S390Vector *v1, const void *va, uint8_t m3) +{ + asm volatile("vster %[v1], 0(%[va]), %[m3]\n" + : [va] "+d" (va) + : [v1] "v" (v1->v) + , [m3] "i" (m3) + : "memory"); +} + +static inline void vlbr(S390Vector *v1, void *va, const uint8_t m3) +{ + asm volatile("vlbr %[v1], 0(%[va]), %[m3]\n" + : [v1] "+v" (v1->v) + : [va] "d" (va) + , [m3] "i" (m3) + : "memory"); +} + +static inline void vstbr(S390Vector *v1, void *va, const uint8_t m3) +{ + asm volatile("vstbr %[v1], 0(%[va]), %[m3]\n" + : [va] "+d" (va) + : [v1] "v" (v1->v) + , [m3] "i" (m3) + : "memory"); +} + + +static inline void vlebrh(S390Vector *v1, void *va, const uint8_t m3) +{ + asm volatile("vlebrh %[v1], 0(%[va]), %[m3]\n" + : [v1] "+v" (v1->v) + : [va] "d" (va) + , [m3] "i" (m3) + : "memory"); +} + +static inline void vstebrh(S390Vector *v1, void *va, const uint8_t m3) +{ + asm volatile("vstebrh %[v1], 0(%[va]), %[m3]\n" + : [va] "+d" (va) + : [v1] "v" (v1->v) + , [m3] "i" (m3) + : "memory"); +} + +static inline void vllebrz(S390Vector *v1, void *va, const uint8_t m3) +{ + asm volatile("vllebrz %[v1], 0(%[va]), %[m3]\n" + : [v1] "+v" (v1->v) + : [va] "d" (va) + , [m3] "i" (m3) + : "memory"); +} + +static inline void vlbrrep(S390Vector *v1, void *va, const uint8_t m3) +{ + asm volatile("vlbrrep %[v1], 0(%[va]), %[m3]\n" + : [v1] "+v" (v1->v) + : [va] "d" (va) + , [m3] "i" (m3) + : "memory"); +} + + +int main(int argc, char *argv[]) +{ + S390Vector vd = { .d[0] = 0, .d[1] = 0 }; + S390Vector vs = { .d[0] = 0x8FEEDDCCBBAA9988ull, + .d[1] = 0x7766554433221107ull }; + + const S390Vector vt_v_er16 = { + .h[0] = 0x1107, .h[1] = 0x3322, .h[2] = 0x5544, .h[3] = 0x7766, + .h[4] = 0x9988, .h[5] = 0xBBAA, .h[6] = 0xDDCC, .h[7] = 0x8FEE }; + + const S390Vector vt_v_br16 = { + .h[0] = 0xEE8F, .h[1] = 0xCCDD, .h[2] = 0xAABB, .h[3] = 0x8899, + .h[4] = 0x6677, .h[5] = 0x4455, .h[6] = 0x2233, .h[7] = 0x0711 }; + + int ix; + uint64_t ss64 = 0xFEEDFACE0BADBEEFull, sd64 = 0; + + vler (&vd, &vs, ES16); vtst(vd, vt_v_er16); + vster(&vs, &vd, ES16); vtst(vd, vt_v_er16); + + vlbr (&vd, &vs, ES16); vtst(vd, vt_v_br16); + vstbr(&vs, &vd, ES16); vtst(vd, vt_v_br16); + + vlebrh(&vd, &ss64, 5); + if (0xEDFE != vd.h[5]) { + return 1; + } + + vstebrh(&vs, (uint8_t *)&sd64 + 4, 7); + if (0x0000000007110000ull != sd64) { + return 1; + } + + vllebrz(&vd, (uint8_t *)&ss64 + 3, 2); + for (ix = 0; ix < 4; ix++) { + if (vd.w[ix] != (ix != 1 ? 0 : 0xBEAD0BCE)) { + return 1; + } + } + + vlbrrep(&vd, (uint8_t *)&ss64 + 4, 1); + for (ix = 0; ix < 8; ix++) { + if (0xAD0B != vd.h[ix]) { + return 1; + } + } + + return 0; +} diff --git a/tests/tcg/s390x/vxeh2_vs.c b/tests/tcg/s390x/vxeh2_vs.c new file mode 100644 index 0000000000..04a3d4d7bb --- /dev/null +++ b/tests/tcg/s390x/vxeh2_vs.c @@ -0,0 +1,91 @@ +/* + * vxeh2_vs: vector-enhancements facility 2 vector shift + */ +#include <stdint.h> + +typedef union S390Vector { + uint64_t d[2]; /* doubleword */ + uint32_t w[4]; /* word */ + uint16_t h[8]; /* halfword */ + uint8_t b[16]; /* byte */ + __uint128_t v; +} S390Vector; + +#define vtst(v1, v2) \ + if (v1.d[0] != v2.d[0] || v1.d[1] != v2.d[1]) { \ + return 1; \ + } + +static inline void vsl(S390Vector *v1, S390Vector *v2, S390Vector *v3) +{ + asm volatile("vsl %[v1], %[v2], %[v3]\n" + : [v1] "=v" (v1->v) + : [v2] "v" (v2->v) + , [v3] "v" (v3->v)); +} + +static inline void vsra(S390Vector *v1, S390Vector *v2, S390Vector *v3) +{ + asm volatile("vsra %[v1], %[v2], %[v3]\n" + : [v1] "=v" (v1->v) + : [v2] "v" (v2->v) + , [v3] "v" (v3->v)); +} + +static inline void vsrl(S390Vector *v1, S390Vector *v2, S390Vector *v3) +{ + asm volatile("vsrl %[v1], %[v2], %[v3]\n" + : [v1] "=v" (v1->v) + : [v2] "v" (v2->v) + , [v3] "v" (v3->v)); +} + +static inline void vsld(S390Vector *v1, S390Vector *v2, + S390Vector *v3, const uint8_t I) +{ + asm volatile("vsld %[v1], %[v2], %[v3], %[I]\n" + : [v1] "=v" (v1->v) + : [v2] "v" (v2->v) + , [v3] "v" (v3->v) + , [I] "i" (I & 7)); +} + +static inline void vsrd(S390Vector *v1, S390Vector *v2, + S390Vector *v3, const uint8_t I) +{ + asm volatile("vsrd %[v1], %[v2], %[v3], %[I]\n" + : [v1] "=v" (v1->v) + : [v2] "v" (v2->v) + , [v3] "v" (v3->v) + , [I] "i" (I & 7)); +} + +int main(int argc, char *argv[]) +{ + const S390Vector vt_vsl = { .d[0] = 0x7FEDBB32D5AA311Dull, + .d[1] = 0xBB65AA10912220C0ull }; + const S390Vector vt_vsra = { .d[0] = 0xF1FE6E7399AA5466ull, + .d[1] = 0x0E762A5188221044ull }; + const S390Vector vt_vsrl = { .d[0] = 0x11FE6E7399AA5466ull, + .d[1] = 0x0E762A5188221044ull }; + const S390Vector vt_vsld = { .d[0] = 0x7F76EE65DD54CC43ull, + .d[1] = 0xBB32AA2199108838ull }; + const S390Vector vt_vsrd = { .d[0] = 0x0E060802040E000Aull, + .d[1] = 0x0C060802040E000Aull }; + S390Vector vs = { .d[0] = 0x8FEEDDCCBBAA9988ull, + .d[1] = 0x7766554433221107ull }; + S390Vector vd = { .d[0] = 0, .d[1] = 0 }; + S390Vector vsi = { .d[0] = 0, .d[1] = 0 }; + + for (int ix = 0; ix < 16; ix++) { + vsi.b[ix] = (1 + (5 ^ ~ix)) & 7; + } + + vsl (&vd, &vs, &vsi); vtst(vd, vt_vsl); + vsra(&vd, &vs, &vsi); vtst(vd, vt_vsra); + vsrl(&vd, &vs, &vsi); vtst(vd, vt_vsrl); + vsld(&vd, &vs, &vsi, 3); vtst(vd, vt_vsld); + vsrd(&vd, &vs, &vsi, 15); vtst(vd, vt_vsrd); + + return 0; +}