diff mbox series

[v6,13/13] tests/tcg/s390x: Tests for Vector Enhancements Facility 2

Message ID 20220428094708.84835-14-david@redhat.com (mailing list archive)
State New, archived
Headers show
Series s390x/tcg: Implement Vector-Enhancements Facility 2 | expand

Commit Message

David Hildenbrand April 28, 2022, 9:47 a.m. UTC
From: David Miller <dmiller423@gmail.com>

Signed-off-by: David Miller <dmiller423@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Tested-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 tests/tcg/s390x/Makefile.target |   8 ++
 tests/tcg/s390x/vx.h            |  19 +++++
 tests/tcg/s390x/vxeh2_vcvt.c    |  88 ++++++++++++++++++++
 tests/tcg/s390x/vxeh2_vlstr.c   | 139 ++++++++++++++++++++++++++++++++
 tests/tcg/s390x/vxeh2_vs.c      |  93 +++++++++++++++++++++
 5 files changed, 347 insertions(+)
 create mode 100644 tests/tcg/s390x/vx.h
 create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
 create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
 create mode 100644 tests/tcg/s390x/vxeh2_vs.c

Comments

Thomas Huth May 2, 2022, 8:12 a.m. UTC | #1
On 28/04/2022 11.47, David Hildenbrand wrote:
> From: David Miller <dmiller423@gmail.com>
> 
> Signed-off-by: David Miller <dmiller423@gmail.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> Tested-by: Thomas Huth <thuth@redhat.com>
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>   tests/tcg/s390x/Makefile.target |   8 ++
>   tests/tcg/s390x/vx.h            |  19 +++++
>   tests/tcg/s390x/vxeh2_vcvt.c    |  88 ++++++++++++++++++++
>   tests/tcg/s390x/vxeh2_vlstr.c   | 139 ++++++++++++++++++++++++++++++++
>   tests/tcg/s390x/vxeh2_vs.c      |  93 +++++++++++++++++++++
>   5 files changed, 347 insertions(+)
>   create mode 100644 tests/tcg/s390x/vx.h
>   create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
>   create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
>   create mode 100644 tests/tcg/s390x/vxeh2_vs.c
> 
> diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
> index f0d474a245..e50d617f21 100644
> --- a/tests/tcg/s390x/Makefile.target
> +++ b/tests/tcg/s390x/Makefile.target
> @@ -17,6 +17,14 @@ TESTS+=trap
>   TESTS+=signals-s390x
>   TESTS+=branch-relative-long
>   
> +VECTOR_TESTS=vxeh2_vs
> +VECTOR_TESTS+=vxeh2_vcvt
> +VECTOR_TESTS+=vxeh2_vlstr
> +
> +TESTS+=$(VECTOR_TESTS)
> +
> +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2

I'm sorry, but this still fails in the QEMU CI:

https://gitlab.com/thuth/qemu/-/jobs/2401500348

s390x-linux-gnu-gcc: error: unrecognized argument in option '-march=z15'

I think we either have to switch to manually encoded instructions again, or 
add a check to the Makefile and only add the tests if the compiler supports 
-march=z15 ...? Opinions? Preferences?

  Thomas
Thomas Huth May 2, 2022, 9:10 a.m. UTC | #2
On 02/05/2022 10.12, Thomas Huth wrote:
> On 28/04/2022 11.47, David Hildenbrand wrote:
>> From: David Miller <dmiller423@gmail.com>
>>
>> Signed-off-by: David Miller <dmiller423@gmail.com>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> Tested-by: Thomas Huth <thuth@redhat.com>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
>> ---
>>   tests/tcg/s390x/Makefile.target |   8 ++
>>   tests/tcg/s390x/vx.h            |  19 +++++
>>   tests/tcg/s390x/vxeh2_vcvt.c    |  88 ++++++++++++++++++++
>>   tests/tcg/s390x/vxeh2_vlstr.c   | 139 ++++++++++++++++++++++++++++++++
>>   tests/tcg/s390x/vxeh2_vs.c      |  93 +++++++++++++++++++++
>>   5 files changed, 347 insertions(+)
>>   create mode 100644 tests/tcg/s390x/vx.h
>>   create mode 100644 tests/tcg/s390x/vxeh2_vcvt.c
>>   create mode 100644 tests/tcg/s390x/vxeh2_vlstr.c
>>   create mode 100644 tests/tcg/s390x/vxeh2_vs.c
>>
>> diff --git a/tests/tcg/s390x/Makefile.target 
>> b/tests/tcg/s390x/Makefile.target
>> index f0d474a245..e50d617f21 100644
>> --- a/tests/tcg/s390x/Makefile.target
>> +++ b/tests/tcg/s390x/Makefile.target
>> @@ -17,6 +17,14 @@ TESTS+=trap
>>   TESTS+=signals-s390x
>>   TESTS+=branch-relative-long
>> +VECTOR_TESTS=vxeh2_vs
>> +VECTOR_TESTS+=vxeh2_vcvt
>> +VECTOR_TESTS+=vxeh2_vlstr
>> +
>> +TESTS+=$(VECTOR_TESTS)
>> +
>> +$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
> 
> I'm sorry, but this still fails in the QEMU CI:
> 
> https://gitlab.com/thuth/qemu/-/jobs/2401500348
> 
> s390x-linux-gnu-gcc: error: unrecognized argument in option '-march=z15'
> 
> I think we either have to switch to manually encoded instructions again, or 
> add a check to the Makefile and only add the tests if the compiler supports 
> -march=z15 ...? Opinions? Preferences?

I just tried, and seems like something like this should do the job, I think:

diff a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -20,11 +20,11 @@ TESTS+=branch-relative-long
  VECTOR_TESTS=vxeh2_vs
  VECTOR_TESTS+=vxeh2_vcvt
  VECTOR_TESTS+=vxeh2_vlstr
-
-TESTS+=$(VECTOR_TESTS)
-
  $(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
  
+TESTS+=$(if $(shell $(CC) -march=z15 -S -o /dev/null -xc /dev/null \
+                        >/dev/null 2>&1 && echo OK),$(VECTOR_TESTS))
+
  ifneq ($(HAVE_GDB_BIN),)
  GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py
  
Does that look reasonable?

  Thomas
Thomas Huth May 2, 2022, 9:35 a.m. UTC | #3
On 28/04/2022 11.47, David Hildenbrand wrote:
> From: David Miller <dmiller423@gmail.com>
> 
> Signed-off-by: David Miller <dmiller423@gmail.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> Tested-by: Thomas Huth <thuth@redhat.com>
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
[...]
> diff --git a/tests/tcg/s390x/vxeh2_vlstr.c b/tests/tcg/s390x/vxeh2_vlstr.c
> new file mode 100644
> index 0000000000..5677bf7c29
> --- /dev/null
> +++ b/tests/tcg/s390x/vxeh2_vlstr.c
> @@ -0,0 +1,139 @@
> +/*
> + * vxeh2_vlstr: vector-enhancements facility 2 vector load/store reversed *
> + */
> +#include <stdint.h>
> +#include "vx.h"
> +
> +#define vtst(v1, v2) \
> +    if (v1.d[0] != v2.d[0] || v1.d[1] != v2.d[1]) { \
> +        return 1;     \
> +    }
> +
> +static inline void vler(S390Vector *v1, const void *va, uint8_t m3)
> +{
> +    asm volatile("vler %[v1], 0(%[va]), %[m3]\n"
> +                : [v1] "+v" (v1->v)
> +                : [va]  "d" (va)
> +                , [m3]  "i" (m3)
> +                : "memory");
> +}

The vxeh2_vlstr test fails when compiling with Clang instead of GCC ... 
seems like it enjoys using register r0 in the spots that use the "d" 
constraints in the inline assembly in here. The fix is easy:

diff a/tests/tcg/s390x/vxeh2_vlstr.c b/tests/tcg/s390x/vxeh2_vlstr.c
--- a/tests/tcg/s390x/vxeh2_vlstr.c
+++ b/tests/tcg/s390x/vxeh2_vlstr.c
@@ -13,7 +13,7 @@ static inline void vler(S390Vector *v1, const void *va, 
uint8_t m3)
  {
      asm volatile("vler %[v1], 0(%[va]), %[m3]\n"
                  : [v1] "+v" (v1->v)
-                : [va]  "d" (va)
+                : [va]  "a" (va)
                  , [m3]  "i" (m3)
                  : "memory");
  }
@@ -21,7 +21,7 @@ static inline void vler(S390Vector *v1, const void *va, 
uint8_t m3)
  static inline void vster(S390Vector *v1, const void *va, uint8_t m3)
  {
      asm volatile("vster %[v1], 0(%[va]), %[m3]\n"
-                : [va] "+d" (va)
+                : [va] "+a" (va)
                  : [v1]  "v" (v1->v)
                  , [m3]  "i" (m3)
                  : "memory");
@@ -31,7 +31,7 @@ static inline void vlbr(S390Vector *v1, void *va, const 
uint8_t m3)
  {
      asm volatile("vlbr %[v1], 0(%[va]), %[m3]\n"
                  : [v1] "+v" (v1->v)
-                : [va]  "d" (va)
+                : [va]  "a" (va)
                  , [m3]  "i" (m3)
                  : "memory");
  }
@@ -39,7 +39,7 @@ static inline void vlbr(S390Vector *v1, void *va, const 
uint8_t m3)
  static inline void vstbr(S390Vector *v1, void *va, const uint8_t m3)
  {
      asm volatile("vstbr %[v1], 0(%[va]), %[m3]\n"
-                : [va] "+d" (va)
+                : [va] "+a" (va)
                  : [v1]  "v" (v1->v)
                  , [m3]  "i" (m3)
                  : "memory");
@@ -50,7 +50,7 @@ static inline void vlebrh(S390Vector *v1, void *va, const 
uint8_t m3)
  {
      asm volatile("vlebrh %[v1], 0(%[va]), %[m3]\n"
                  : [v1] "+v" (v1->v)
-                : [va]  "d" (va)
+                : [va]  "a" (va)
                  , [m3]  "i" (m3)
                  : "memory");
  }
@@ -58,7 +58,7 @@ static inline void vlebrh(S390Vector *v1, void *va, const 
uint8_t m3)
  static inline void vstebrh(S390Vector *v1, void *va, const uint8_t m3)
  {
      asm volatile("vstebrh %[v1], 0(%[va]), %[m3]\n"
-                : [va] "+d" (va)
+                : [va] "+a" (va)
                  : [v1]  "v" (v1->v)
                  , [m3]  "i" (m3)
                  : "memory");
@@ -68,7 +68,7 @@ static inline void vllebrz(S390Vector *v1, void *va, const 
uint8_t m3)
  {
      asm volatile("vllebrz %[v1], 0(%[va]), %[m3]\n"
                  : [v1] "+v" (v1->v)
-                : [va]  "d" (va)
+                : [va]  "a" (va)
                  , [m3]  "i" (m3)
                  : "memory");
  }
@@ -77,7 +77,7 @@ static inline void vlbrrep(S390Vector *v1, void *va, const 
uint8_t m3)
  {
      asm volatile("vlbrrep %[v1], 0(%[va]), %[m3]\n"
                  : [v1] "+v" (v1->v)
-                : [va]  "d" (va)
+                : [va]  "a" (va)
                  , [m3]  "i" (m3)
                  : "memory");
  }

I'll fix it up in my queue, so no need to resend.

  Thomas
diff mbox series

Patch

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index f0d474a245..e50d617f21 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -17,6 +17,14 @@  TESTS+=trap
 TESTS+=signals-s390x
 TESTS+=branch-relative-long
 
+VECTOR_TESTS=vxeh2_vs
+VECTOR_TESTS+=vxeh2_vcvt
+VECTOR_TESTS+=vxeh2_vlstr
+
+TESTS+=$(VECTOR_TESTS)
+
+$(VECTOR_TESTS): CFLAGS+=-march=z15 -O2
+
 ifneq ($(HAVE_GDB_BIN),)
 GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py
 
diff --git a/tests/tcg/s390x/vx.h b/tests/tcg/s390x/vx.h
new file mode 100644
index 0000000000..02e7fd518a
--- /dev/null
+++ b/tests/tcg/s390x/vx.h
@@ -0,0 +1,19 @@ 
+#ifndef QEMU_TESTS_S390X_VX_H
+#define QEMU_TESTS_S390X_VX_H
+
+typedef union S390Vector {
+    uint64_t d[2];  /* doubleword */
+    uint32_t w[4];  /* word */
+    uint16_t h[8];  /* halfword */
+    uint8_t  b[16]; /* byte */
+    float    f[4];  /* float32 */
+    double   fd[2]; /* float64 */
+    __uint128_t v;
+} S390Vector;
+
+#define ES8  0
+#define ES16 1
+#define ES32 2
+#define ES64 3
+
+#endif /* QEMU_TESTS_S390X_VX_H */
diff --git a/tests/tcg/s390x/vxeh2_vcvt.c b/tests/tcg/s390x/vxeh2_vcvt.c
new file mode 100644
index 0000000000..d6e551c16e
--- /dev/null
+++ b/tests/tcg/s390x/vxeh2_vcvt.c
@@ -0,0 +1,88 @@ 
+/*
+ * vxeh2_vcvt: vector-enhancements facility 2 vector convert *
+ */
+#include <stdint.h>
+#include "vx.h"
+
+#define M_S 8
+#define M4_XxC 4
+#define M4_def M4_XxC
+
+static inline void vcfps(S390Vector *v1, S390Vector *v2,
+    const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+    asm volatile("vcfps %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [m3]  "i" (m3)
+                , [m4]  "i" (m4)
+                , [m5]  "i" (m5));
+}
+
+static inline void vcfpl(S390Vector *v1, S390Vector *v2,
+    const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+    asm volatile("vcfpl %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [m3]  "i" (m3)
+                , [m4]  "i" (m4)
+                , [m5]  "i" (m5));
+}
+
+static inline void vcsfp(S390Vector *v1, S390Vector *v2,
+    const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+    asm volatile("vcsfp %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [m3]  "i" (m3)
+                , [m4]  "i" (m4)
+                , [m5]  "i" (m5));
+}
+
+static inline void vclfp(S390Vector *v1, S390Vector *v2,
+    const uint8_t m3,  const uint8_t m4,  const uint8_t m5)
+{
+    asm volatile("vclfp %[v1], %[v2], %[m3], %[m4], %[m5]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [m3]  "i" (m3)
+                , [m4]  "i" (m4)
+                , [m5]  "i" (m5));
+}
+
+int main(int argc, char *argv[])
+{
+    S390Vector vd;
+    S390Vector vs_i32 = { .w[0] = 1, .w[1] = 64, .w[2] = 1024, .w[3] = -10 };
+    S390Vector vs_u32 = { .w[0] = 2, .w[1] = 32, .w[2] = 4096, .w[3] = 8888 };
+    S390Vector vs_f32 = { .f[0] = 3.987, .f[1] = 5.123,
+                          .f[2] = 4.499, .f[3] = 0.512 };
+
+    vd.d[0] = vd.d[1] = 0;
+    vcfps(&vd, &vs_i32, 2, M4_def, 0);
+    if (1 != vd.f[0] || 1024 != vd.f[2] || 64 != vd.f[1] || -10 != vd.f[3]) {
+        return 1;
+    }
+
+    vd.d[0] = vd.d[1] = 0;
+    vcfpl(&vd, &vs_u32, 2, M4_def, 0);
+    if (2 != vd.f[0] || 4096 != vd.f[2] || 32 != vd.f[1] || 8888 != vd.f[3]) {
+        return 1;
+    }
+
+    vd.d[0] = vd.d[1] = 0;
+    vcsfp(&vd, &vs_f32, 2, M4_def, 0);
+    if (4 != vd.w[0] || 4 != vd.w[2] || 5 != vd.w[1] || 1 != vd.w[3]) {
+        return 1;
+    }
+
+    vd.d[0] = vd.d[1] = 0;
+    vclfp(&vd, &vs_f32, 2, M4_def, 0);
+    if (4 != vd.w[0] || 4 != vd.w[2] || 5 != vd.w[1] || 1 != vd.w[3]) {
+        return 1;
+    }
+
+    return 0;
+}
diff --git a/tests/tcg/s390x/vxeh2_vlstr.c b/tests/tcg/s390x/vxeh2_vlstr.c
new file mode 100644
index 0000000000..5677bf7c29
--- /dev/null
+++ b/tests/tcg/s390x/vxeh2_vlstr.c
@@ -0,0 +1,139 @@ 
+/*
+ * vxeh2_vlstr: vector-enhancements facility 2 vector load/store reversed *
+ */
+#include <stdint.h>
+#include "vx.h"
+
+#define vtst(v1, v2) \
+    if (v1.d[0] != v2.d[0] || v1.d[1] != v2.d[1]) { \
+        return 1;     \
+    }
+
+static inline void vler(S390Vector *v1, const void *va, uint8_t m3)
+{
+    asm volatile("vler %[v1], 0(%[va]), %[m3]\n"
+                : [v1] "+v" (v1->v)
+                : [va]  "d" (va)
+                , [m3]  "i" (m3)
+                : "memory");
+}
+
+static inline void vster(S390Vector *v1, const void *va, uint8_t m3)
+{
+    asm volatile("vster %[v1], 0(%[va]), %[m3]\n"
+                : [va] "+d" (va)
+                : [v1]  "v" (v1->v)
+                , [m3]  "i" (m3)
+                : "memory");
+}
+
+static inline void vlbr(S390Vector *v1, void *va, const uint8_t m3)
+{
+    asm volatile("vlbr %[v1], 0(%[va]), %[m3]\n"
+                : [v1] "+v" (v1->v)
+                : [va]  "d" (va)
+                , [m3]  "i" (m3)
+                : "memory");
+}
+
+static inline void vstbr(S390Vector *v1, void *va, const uint8_t m3)
+{
+    asm volatile("vstbr %[v1], 0(%[va]), %[m3]\n"
+                : [va] "+d" (va)
+                : [v1]  "v" (v1->v)
+                , [m3]  "i" (m3)
+                : "memory");
+}
+
+
+static inline void vlebrh(S390Vector *v1, void *va, const uint8_t m3)
+{
+    asm volatile("vlebrh %[v1], 0(%[va]), %[m3]\n"
+                : [v1] "+v" (v1->v)
+                : [va]  "d" (va)
+                , [m3]  "i" (m3)
+                : "memory");
+}
+
+static inline void vstebrh(S390Vector *v1, void *va, const uint8_t m3)
+{
+    asm volatile("vstebrh %[v1], 0(%[va]), %[m3]\n"
+                : [va] "+d" (va)
+                : [v1]  "v" (v1->v)
+                , [m3]  "i" (m3)
+                : "memory");
+}
+
+static inline void vllebrz(S390Vector *v1, void *va, const uint8_t m3)
+{
+    asm volatile("vllebrz %[v1], 0(%[va]), %[m3]\n"
+                : [v1] "+v" (v1->v)
+                : [va]  "d" (va)
+                , [m3]  "i" (m3)
+                : "memory");
+}
+
+static inline void vlbrrep(S390Vector *v1, void *va, const uint8_t m3)
+{
+    asm volatile("vlbrrep %[v1], 0(%[va]), %[m3]\n"
+                : [v1] "+v" (v1->v)
+                : [va]  "d" (va)
+                , [m3]  "i" (m3)
+                : "memory");
+}
+
+int main(int argc, char *argv[])
+{
+    S390Vector vd = { .d[0] = 0, .d[1] = 0 };
+    S390Vector vs = { .d[0] = 0x8FEEDDCCBBAA9988ull,
+                      .d[1] = 0x7766554433221107ull };
+
+    const S390Vector vt_v_er16 = {
+        .h[0] = 0x1107, .h[1] = 0x3322, .h[2] = 0x5544, .h[3] = 0x7766,
+        .h[4] = 0x9988, .h[5] = 0xBBAA, .h[6] = 0xDDCC, .h[7] = 0x8FEE };
+
+    const S390Vector vt_v_br16 = {
+        .h[0] = 0xEE8F, .h[1] = 0xCCDD, .h[2] = 0xAABB, .h[3] = 0x8899,
+        .h[4] = 0x6677, .h[5] = 0x4455, .h[6] = 0x2233, .h[7] = 0x0711 };
+
+    int ix;
+    uint64_t ss64 = 0xFEEDFACE0BADBEEFull, sd64 = 0;
+
+    vler(&vd, &vs, ES16);
+    vtst(vd, vt_v_er16);
+
+    vster(&vs, &vd, ES16);
+    vtst(vd, vt_v_er16);
+
+    vlbr(&vd, &vs, ES16);
+    vtst(vd, vt_v_br16);
+
+    vstbr(&vs, &vd, ES16);
+    vtst(vd, vt_v_br16);
+
+    vlebrh(&vd, &ss64, 5);
+    if (0xEDFE != vd.h[5]) {
+        return 1;
+    }
+
+    vstebrh(&vs, (uint8_t *)&sd64 + 4, 7);
+    if (0x0000000007110000ull != sd64) {
+        return 1;
+    }
+
+    vllebrz(&vd, (uint8_t *)&ss64 + 3, 2);
+    for (ix = 0; ix < 4; ix++) {
+        if (vd.w[ix] != (ix != 1 ? 0 : 0xBEAD0BCE)) {
+            return 1;
+        }
+    }
+
+    vlbrrep(&vd, (uint8_t *)&ss64 + 4, 1);
+    for (ix = 0; ix < 8; ix++) {
+        if (0xAD0B != vd.h[ix]) {
+            return 1;
+        }
+    }
+
+    return 0;
+}
diff --git a/tests/tcg/s390x/vxeh2_vs.c b/tests/tcg/s390x/vxeh2_vs.c
new file mode 100644
index 0000000000..b7ef419d79
--- /dev/null
+++ b/tests/tcg/s390x/vxeh2_vs.c
@@ -0,0 +1,93 @@ 
+/*
+ * vxeh2_vs: vector-enhancements facility 2 vector shift
+ */
+#include <stdint.h>
+#include "vx.h"
+
+#define vtst(v1, v2) \
+    if (v1.d[0] != v2.d[0] || v1.d[1] != v2.d[1]) { \
+        return 1;     \
+    }
+
+static inline void vsl(S390Vector *v1, S390Vector *v2, S390Vector *v3)
+{
+    asm volatile("vsl %[v1], %[v2], %[v3]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [v3]  "v" (v3->v));
+}
+
+static inline void vsra(S390Vector *v1, S390Vector *v2, S390Vector *v3)
+{
+    asm volatile("vsra %[v1], %[v2], %[v3]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [v3]  "v" (v3->v));
+}
+
+static inline void vsrl(S390Vector *v1, S390Vector *v2, S390Vector *v3)
+{
+    asm volatile("vsrl %[v1], %[v2], %[v3]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [v3]  "v" (v3->v));
+}
+
+static inline void vsld(S390Vector *v1, S390Vector *v2,
+    S390Vector *v3, const uint8_t I)
+{
+    asm volatile("vsld %[v1], %[v2], %[v3], %[I]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [v3]  "v" (v3->v)
+                , [I]   "i" (I & 7));
+}
+
+static inline void vsrd(S390Vector *v1, S390Vector *v2,
+    S390Vector *v3, const uint8_t I)
+{
+    asm volatile("vsrd %[v1], %[v2], %[v3], %[I]\n"
+                : [v1] "=v" (v1->v)
+                : [v2]  "v" (v2->v)
+                , [v3]  "v" (v3->v)
+                , [I]   "i" (I & 7));
+}
+
+int main(int argc, char *argv[])
+{
+    const S390Vector vt_vsl  = { .d[0] = 0x7FEDBB32D5AA311Dull,
+                                 .d[1] = 0xBB65AA10912220C0ull };
+    const S390Vector vt_vsra = { .d[0] = 0xF1FE6E7399AA5466ull,
+                                 .d[1] = 0x0E762A5188221044ull };
+    const S390Vector vt_vsrl = { .d[0] = 0x11FE6E7399AA5466ull,
+                                 .d[1] = 0x0E762A5188221044ull };
+    const S390Vector vt_vsld = { .d[0] = 0x7F76EE65DD54CC43ull,
+                                 .d[1] = 0xBB32AA2199108838ull };
+    const S390Vector vt_vsrd = { .d[0] = 0x0E060802040E000Aull,
+                                 .d[1] = 0x0C060802040E000Aull };
+    S390Vector vs  = { .d[0] = 0x8FEEDDCCBBAA9988ull,
+                       .d[1] = 0x7766554433221107ull };
+    S390Vector  vd = { .d[0] = 0, .d[1] = 0 };
+    S390Vector vsi = { .d[0] = 0, .d[1] = 0 };
+
+    for (int ix = 0; ix < 16; ix++) {
+        vsi.b[ix] = (1 + (5 ^ ~ix)) & 7;
+    }
+
+    vsl(&vd, &vs, &vsi);
+    vtst(vd, vt_vsl);
+
+    vsra(&vd, &vs, &vsi);
+    vtst(vd, vt_vsra);
+
+    vsrl(&vd, &vs, &vsi);
+    vtst(vd, vt_vsrl);
+
+    vsld(&vd, &vs, &vsi, 3);
+    vtst(vd, vt_vsld);
+
+    vsrd(&vd, &vs, &vsi, 15);
+    vtst(vd, vt_vsrd);
+
+    return 0;
+}