diff mbox series

[v2,2/7] target/s390x: vxeh2: vector string search

Message ID 20220307020327.3003-3-dmiller423@gmail.com (mailing list archive)
State New, archived
Headers show
Series s390x/tcg: Implement Vector-Enhancements Facility 2 | expand

Commit Message

David Miller March 7, 2022, 2:03 a.m. UTC
Signed-off-by: David Miller <dmiller423@gmail.com>
---
 target/s390x/helper.h                |  1 +
 target/s390x/tcg/insn-data.def       |  2 +
 target/s390x/tcg/translate.c         |  3 +-
 target/s390x/tcg/translate_vx.c.inc  | 17 ++++++++
 target/s390x/tcg/vec_string_helper.c | 65 ++++++++++++++++++++++++++++
 5 files changed, 87 insertions(+), 1 deletion(-)

Comments

Richard Henderson March 7, 2022, 6:50 p.m. UTC | #1
On 3/6/22 16:03, David Miller wrote:
> +static DisasJumpType op_vstrs(DisasContext *s, DisasOps *o)
> +{
> +    const uint8_t es = get_field(s, m5);
> +    const uint32_t D = get_field(s, m6);
> +
> +    if (es > ES_32) {
> +        gen_program_exception(s, PGM_SPECIFICATION);
> +        return DISAS_NORETURN;
> +    }

Missed a check for bits of m6 other than zs must be zero.

> +    gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
> +                   get_field(s, v3), get_field(s, v4),
> +                   cpu_env, (D << 16) | es, gen_helper_vstrs);

Why << 16?

> +void HELPER(vstrs)(void *v1, const void *v2, const void *v3, void *v4,
> +                   CPUS390XState *env, uint32_t desc) {
> +    const bool zs = (desc >> 16);

If this is meant to recover D, it won't.  The value that you passed above is had via 
simd_data(desc).  After that, you could get D via >> 16.

> +    const uint8_t es = desc & 16;

I think you clearly meant & 15 here.  Or maybe & 3, since ES <= 2?
However!

I think you shouldn't actually pass these values via simd_data().  I think you should have 
6 helper functions, which call this as an inline function passing ES and ZS as immediate 
arguments.  This will allow the compiler to fold away all of the multiple checks vs ES and ZS.


> +    uint32_t str_len = 0, eos = 0;
> +    uint32_t i = 0, j = 0, k = 0, cc = 0;
> +    uint32_t substr_len = ((uint8_t *)v4)[H1(7)] & 31;
> +
> +    for (i = 0; i < 16; i += char_size) {
> +        if (0 == es && !((uint8_t  *)v3)[H1(i >> es)]) { break; }
> +        if (1 == es && !((uint16_t *)v3)[H2(i >> es)]) { break; }
> +        if (2 == es && !((uint32_t *)v3)[H4(i >> es)]) { break; }

Instead of iterating to 16 by char_size and dividing by es, you should iterate to 
element_count by 1.  You should use s390_vec_read_element here, and elsewhere.

> +    }
> +    if (i < substr_len) {
> +        substr_len = i;
> +    }

This bounding of substr_len is only done when ZS is true.

> +    ((uint64_t *)v1)[0] = ((uint64_t *)v1)[1] = 0;
> +    ((uint8_t *)v1)[H1(7)] = k;

s390_vec_write_element64(v1, 0, k);
s390_vec_write_element64(v1, 1, 0);


r~
diff mbox series

Patch

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 7cbcbd7f0b..1e38ee2e4e 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -246,6 +246,7 @@  DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_6(vstrs, void, ptr, cptr, cptr, ptr, env, i32)
 
 /* === Vector Floating-Point Instructions */
 DEF_HELPER_FLAGS_5(gvec_vfa32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
index 6c8a8b229f..46add91a0e 100644
--- a/target/s390x/tcg/insn-data.def
+++ b/target/s390x/tcg/insn-data.def
@@ -1246,6 +1246,8 @@ 
     F(0xe75c, VISTR,   VRR_a, V,   0, 0, 0, 0, vistr, 0, IF_VEC)
 /* VECTOR STRING RANGE COMPARE */
     F(0xe78a, VSTRC,   VRR_d, V,   0, 0, 0, 0, vstrc, 0, IF_VEC)
+/* VECTOR STRING SEARCH */
+    F(0xe78b, VSTRS,   VRR_d, VE2, 0, 0, 0, 0, vstrs, 0, IF_VEC)
 
 /* === Vector Floating-Point Instructions */
 
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 904b51542f..d9ac29573d 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -6222,7 +6222,8 @@  enum DisasInsnEnum {
 #define FAC_PCI         S390_FEAT_ZPCI /* z/PCI facility */
 #define FAC_AIS         S390_FEAT_ADAPTER_INT_SUPPRESSION
 #define FAC_V           S390_FEAT_VECTOR /* vector facility */
-#define FAC_VE          S390_FEAT_VECTOR_ENH /* vector enhancements facility 1 */
+#define FAC_VE          S390_FEAT_VECTOR_ENH  /* vector enhancements facility 1 */
+#define FAC_VE2         S390_FEAT_VECTOR_ENH2 /* vector enhancements facility 2 */
 #define FAC_MIE2        S390_FEAT_MISC_INSTRUCTION_EXT2 /* miscellaneous-instruction-extensions facility 2 */
 #define FAC_MIE3        S390_FEAT_MISC_INSTRUCTION_EXT3 /* miscellaneous-instruction-extensions facility 3 */
 
diff --git a/target/s390x/tcg/translate_vx.c.inc b/target/s390x/tcg/translate_vx.c.inc
index ea28e40d4f..db86d9b87d 100644
--- a/target/s390x/tcg/translate_vx.c.inc
+++ b/target/s390x/tcg/translate_vx.c.inc
@@ -2497,6 +2497,23 @@  static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o)
     return DISAS_NEXT;
 }
 
+static DisasJumpType op_vstrs(DisasContext *s, DisasOps *o)
+{
+    const uint8_t es = get_field(s, m5);
+    const uint32_t D = get_field(s, m6);
+
+    if (es > ES_32) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+    gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
+                   get_field(s, v3), get_field(s, v4),
+                   cpu_env, (D << 16) | es, gen_helper_vstrs);
+
+    set_cc_static(s);
+    return DISAS_NEXT;
+}
+
 static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
 {
     const uint8_t fpf = get_field(s, m4);
diff --git a/target/s390x/tcg/vec_string_helper.c b/target/s390x/tcg/vec_string_helper.c
index ac315eb095..22c14c6925 100644
--- a/target/s390x/tcg/vec_string_helper.c
+++ b/target/s390x/tcg/vec_string_helper.c
@@ -471,3 +471,68 @@  void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3,  \
 DEF_VSTRC_CC_RT_HELPER(8)
 DEF_VSTRC_CC_RT_HELPER(16)
 DEF_VSTRC_CC_RT_HELPER(32)
+
+void HELPER(vstrs)(void *v1, const void *v2, const void *v3, void *v4,
+                   CPUS390XState *env, uint32_t desc) {
+    const bool zs = (desc >> 16);
+    const uint8_t es = desc & 16;
+    const uint8_t char_size = 1 << es;
+
+    uint32_t str_len = 0, eos = 0;
+    uint32_t i = 0, j = 0, k = 0, cc = 0;
+    uint32_t substr_len = ((uint8_t *)v4)[H1(7)] & 31;
+
+    for (i = 0; i < 16; i += char_size) {
+        if (0 == es && !((uint8_t  *)v3)[H1(i >> es)]) { break; }
+        if (1 == es && !((uint16_t *)v3)[H2(i >> es)]) { break; }
+        if (2 == es && !((uint32_t *)v3)[H4(i >> es)]) { break; }
+    }
+    if (i < substr_len) {
+        substr_len = i;
+    }
+    if (substr_len) {
+        if (zs) {
+            for (k = 0; k < 16; k += char_size) {
+                if (0 == es && !((uint8_t  *)v2)[H1(k >> es)]) { break; }
+                if (1 == es && !((uint16_t *)v2)[H2(k >> es)]) { break; }
+                if (2 == es && !((uint32_t *)v2)[H4(k >> es)]) { break; }
+            }
+            eos = (16 != k);
+            str_len = k;
+        } else {
+            str_len = 16;
+        }
+
+        for (k = 0; k < str_len; k += char_size) {
+            if (0 == es && ((uint8_t  *)v3)[H1(0)]
+                        == ((uint8_t  *)v2)[H1(k >> es)]) { break; }
+            if (1 == es && ((uint16_t *)v3)[H2(0)]
+                        == ((uint16_t *)v2)[H2(k >> es)]) { break; }
+            if (2 == es && ((uint32_t *)v3)[H4(0)]
+                        == ((uint32_t *)v2)[H4(k >> es)]) { break; }
+        }
+
+        if (k < 16 &&  (!eos || (k + substr_len) <= str_len)) {
+            if ((k + substr_len) <= 16) {
+                for (j = 0; j < substr_len; j += char_size) {
+                    if (0 == es && ((uint8_t  *)v3)[H1(j >> es)]
+                                != ((uint8_t  *)v2)[H1((k + j) >> es)]) { break; }
+                    if (1 == es && ((uint16_t *)v3)[H2(j >> es)]
+                                != ((uint16_t *)v2)[H2((k + j) >> es)]) { break; }
+                    if (2 == es && ((uint32_t *)v3)[H4(j >> es)]
+                                != ((uint32_t *)v2)[H4((k + j) >> es)]) { break; }
+                }
+            }
+            cc = (j == substr_len) ? 2 : 3;
+        } else {
+            cc = eos ? 1 : 0;
+            k = 16;
+        }
+    } else {
+        cc = 2;
+    }
+
+    ((uint64_t *)v1)[0] = ((uint64_t *)v1)[1] = 0;
+    ((uint8_t *)v1)[H1(7)] = k;
+    env->cc_op = cc;
+}