Message ID | 20220207152109.197566-41-broonie@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | arm64/sme: Initial support for the Scalable Matrix Extension | expand |
On 2/7/22 8:21 AM, Mark Brown wrote: > For every possible combination of SVE and SME vector length verify that for > each possible value of SVCR after a syscall we leave streaming mode and ZA > is preserved. We don't need to take account of any streaming/non streaming > SVE vector length changes in the assembler code since the store instructions > will handle the vector length for us. We log if the system supports FA64 and > only try to set FFR in streaming mode if it does. > > Signed-off-by: Mark Brown <broonie@kernel.org> > --- > .../selftests/arm64/abi/syscall-abi-asm.S | 69 +++++- > .../testing/selftests/arm64/abi/syscall-abi.c | 204 ++++++++++++++++-- > .../testing/selftests/arm64/abi/syscall-abi.h | 15 ++ > 3 files changed, 265 insertions(+), 23 deletions(-) > create mode 100644 tools/testing/selftests/arm64/abi/syscall-abi.h > > diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c > index 1e13b7523918..b632bfe9e022 100644 > --- a/tools/testing/selftests/arm64/abi/syscall-abi.c > +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c > @@ -18,9 +18,13 @@ > > #include "../../kselftest.h" > > +#include "syscall-abi.h" > + > #define NUM_VL ((SVE_VQ_MAX - SVE_VQ_MIN) + 1) > > -extern void do_syscall(int sve_vl); > +static int default_sme_vl; > + > +extern void do_syscall(int sve_vl, int sme_vl); > > static void fill_random(void *buf, size_t size) > { > @@ -48,14 +52,15 @@ static struct syscall_cfg { > uint64_t gpr_in[NUM_GPR]; > uint64_t gpr_out[NUM_GPR]; > > -static void setup_gpr(struct syscall_cfg *cfg, int sve_vl) > +static void setup_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, > + uint64_t svcr) > { > fill_random(gpr_in, sizeof(gpr_in)); > gpr_in[8] = cfg->syscall_nr; > memset(gpr_out, 0, sizeof(gpr_out)); > } > > -static int check_gpr(struct syscall_cfg *cfg, int sve_vl) > +static int check_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr) > { > int errors = 0; > int i; > @@ -79,13 +84,15 @@ static int check_gpr(struct syscall_cfg *cfg, int sve_vl) > uint64_t fpr_in[NUM_FPR * 2]; > uint64_t fpr_out[NUM_FPR * 2]; > > -static void setup_fpr(struct syscall_cfg *cfg, int sve_vl) > +static void setup_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, > + uint64_t svcr) > { > fill_random(fpr_in, sizeof(fpr_in)); > memset(fpr_out, 0, sizeof(fpr_out)); > } > > -static int check_fpr(struct syscall_cfg *cfg, int sve_vl) > +static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, > + uint64_t svcr) > { > int errors = 0; > int i; > @@ -109,13 +116,15 @@ static uint8_t z_zero[__SVE_ZREG_SIZE(SVE_VQ_MAX)]; > uint8_t z_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; > uint8_t z_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; > > -static void setup_z(struct syscall_cfg *cfg, int sve_vl) > +static void setup_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl, > + uint64_t svcr) > { > fill_random(z_in, sizeof(z_in)); > fill_random(z_out, sizeof(z_out)); > } > > -static int check_z(struct syscall_cfg *cfg, int sve_vl) > +static int check_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl, > + uint64_t svcr) > { > size_t reg_size = sve_vl; > int errors = 0; > @@ -126,13 +135,17 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl) > > /* > * After a syscall the low 128 bits of the Z registers should > - * be preserved and the rest be zeroed or preserved. > + * be preserved and the rest be zeroed or preserved, except if > + * we were in streaming mode in which case the low 128 bits may > + * also be cleared by the transition out of streaming mode. > */ > for (i = 0; i < SVE_NUM_ZREGS; i++) { > void *in = &z_in[reg_size * i]; > void *out = &z_out[reg_size * i]; > > - if (memcmp(in, out, SVE_VQ_BYTES) != 0) { > + if ((memcmp(in, out, SVE_VQ_BYTES) != 0) && > + !((svcr & SVCR_SM_MASK) && > + memcmp(z_zero, out, SVE_VQ_BYTES) == 0)) { > ksft_print_msg("%s SVE VL %d Z%d low 128 bits changed\n", > cfg->name, sve_vl, i); > errors++; > @@ -145,13 +158,15 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl) > uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)]; > uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)]; > > -static void setup_p(struct syscall_cfg *cfg, int sve_vl) > +static void setup_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl, > + uint64_t svcr) > { > fill_random(p_in, sizeof(p_in)); > fill_random(p_out, sizeof(p_out)); > } > > -static int check_p(struct syscall_cfg *cfg, int sve_vl) > +static int check_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl, > + uint64_t svcr) > { > size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */ > > @@ -175,8 +190,19 @@ static int check_p(struct syscall_cfg *cfg, int sve_vl) > uint8_t ffr_in[__SVE_PREG_SIZE(SVE_VQ_MAX)]; > uint8_t ffr_out[__SVE_PREG_SIZE(SVE_VQ_MAX)]; > > -static void setup_ffr(struct syscall_cfg *cfg, int sve_vl) > +static void setup_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, > + uint64_t svcr) > { > + /* > + * If we are in streaming mode and do not have FA64 then FFR > + * is unavailable. > + */ > + if ((svcr & SVCR_SM_MASK) && > + !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)) { > + memset(&ffr_in, 0, sizeof(ffr_in)); > + return; > + } > + > /* > * It is only valid to set a contiguous set of bits starting > * at 0. For now since we're expecting this to be cleared by > @@ -186,7 +212,8 @@ static void setup_ffr(struct syscall_cfg *cfg, int sve_vl) > fill_random(ffr_out, sizeof(ffr_out)); > } > > -static int check_ffr(struct syscall_cfg *cfg, int sve_vl) > +static int check_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, > + uint64_t svcr) > { > size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */ > int errors = 0; > @@ -195,6 +222,10 @@ static int check_ffr(struct syscall_cfg *cfg, int sve_vl) > if (!sve_vl) > return 0; > > + if ((svcr & SVCR_SM_MASK) && > + !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)) > + return 0; > + > /* After a syscall the P registers should be preserved or zeroed */ > for (i = 0; i < reg_size; i++) > if (ffr_out[i] && (ffr_in[i] != ffr_out[i])) > @@ -206,8 +237,65 @@ static int check_ffr(struct syscall_cfg *cfg, int sve_vl) > return errors; > } > > -typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl); > -typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl); > +uint64_t svcr_in, svcr_out; > + > +static void setup_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, > + uint64_t svcr) > +{ > + svcr_in = svcr; > +} > + > +static int check_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, > + uint64_t svcr) > +{ > + int errors = 0; > + > + if (svcr_out & SVCR_SM_MASK) { > + ksft_print_msg("%s Still in SM, SVCR %llx\n", > + cfg->name, svcr_out); > + errors++; > + } > + > + if ((svcr_in & SVCR_ZA_MASK) != (svcr_out & SVCR_ZA_MASK)) { > + ksft_print_msg("%s PSTATE.ZA changed, SVCR %llx != %llx\n", > + cfg->name, svcr_in, svcr_out); > + errors++; > + } > + > + return errors; > +} > + > +uint8_t za_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; > +uint8_t za_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; > + > +static void setup_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl, > + uint64_t svcr) > +{ > + fill_random(za_in, sizeof(za_in)); > + memset(za_out, 0, sizeof(za_out)); > +} > + > +static int check_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl, > + uint64_t svcr) > +{ > + size_t reg_size = sme_vl * sme_vl; Is there possibilty of size_t overfolow here? > + int errors = 0; > + > + if (!(svcr & SVCR_ZA_MASK)) > + return 0; > + > + if (memcmp(za_in, za_out, reg_size) != 0) { > + ksft_print_msg("SME VL %d ZA does not match\n", sme_vl); Print the expected value in addition to the sme_val. > + errors++; > + } > + > + return errors; > +} > + > +typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl, > + uint64_t svcr); > +typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl, > + uint64_t svcr); > > /* > * Each set of registers has a setup function which is called before > @@ -225,20 +313,23 @@ static struct { > { setup_z, check_z }, > { setup_p, check_p }, > { setup_ffr, check_ffr }, > + { setup_svcr, check_svcr }, > + { setup_za, check_za }, > }; > > -static bool do_test(struct syscall_cfg *cfg, int sve_vl) > +static bool do_test(struct syscall_cfg *cfg, int sve_vl, int sme_vl, > + uint64_t svcr) > { > int errors = 0; > int i; > > for (i = 0; i < ARRAY_SIZE(regset); i++) > - regset[i].setup(cfg, sve_vl); > + regset[i].setup(cfg, sve_vl, sme_vl, svcr); > > - do_syscall(sve_vl); > + do_syscall(sve_vl, sme_vl); > > for (i = 0; i < ARRAY_SIZE(regset); i++) > - errors += regset[i].check(cfg, sve_vl); > + errors += regset[i].check(cfg, sve_vl, sme_vl, svcr); > > return errors == 0; > } > @@ -246,9 +337,10 @@ static bool do_test(struct syscall_cfg *cfg, int sve_vl) > static void test_one_syscall(struct syscall_cfg *cfg) > { > int sve_vq, sve_vl; > + int sme_vq, sme_vl; > > /* FPSIMD only case */ > - ksft_test_result(do_test(cfg, 0), > + ksft_test_result(do_test(cfg, 0, default_sme_vl, 0), > "%s FPSIMD\n", cfg->name); > > if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) > @@ -265,8 +357,36 @@ static void test_one_syscall(struct syscall_cfg *cfg) > if (sve_vq != sve_vq_from_vl(sve_vl)) > sve_vq = sve_vq_from_vl(sve_vl); > > - ksft_test_result(do_test(cfg, sve_vl), > + ksft_test_result(do_test(cfg, sve_vl, default_sme_vl, 0), > "%s SVE VL %d\n", cfg->name, sve_vl); Print default_sme_vl as well. > + > + if (!(getauxval(AT_HWCAP2) & HWCAP2_SME)) > + continue; > + > + for (sme_vq = SVE_VQ_MAX; sme_vq > 0; --sme_vq) { > + sme_vl = prctl(PR_SME_SET_VL, sme_vq * 16); > + if (sme_vl == -1) > + ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n", > + strerror(errno), errno); > + > + sme_vl &= PR_SME_VL_LEN_MASK; > + > + if (sme_vq != sve_vq_from_vl(sme_vl)) > + sme_vq = sve_vq_from_vl(sme_vl); > + > + ksft_test_result(do_test(cfg, sve_vl, sme_vl, > + SVCR_ZA_MASK | SVCR_SM_MASK), > + "%s SVE VL %d/SME VL %d SM+ZA\n", > + cfg->name, sve_vl, sme_vl); > + ksft_test_result(do_test(cfg, sve_vl, sme_vl, > + SVCR_SM_MASK), > + "%s SVE VL %d/SME VL %d SM\n", > + cfg->name, sve_vl, sme_vl); > + ksft_test_result(do_test(cfg, sve_vl, sme_vl, > + SVCR_ZA_MASK), > + "%s SVE VL %d/SME VL %d ZA\n", > + cfg->name, sve_vl, sme_vl); > + } > } > } > > @@ -299,14 +419,54 @@ int sve_count_vls(void) > return vl_count; > } > > +int sme_count_vls(void) > +{ > + unsigned int vq; > + int vl_count = 0; > + int vl; > + > + if (!(getauxval(AT_HWCAP2) & HWCAP2_SME)) > + return 0; > + > + /* Ensure we configure a SME VL, used to flag if SVCR is set */ > + default_sme_vl = 16; > + > + /* > + * Enumerate up to SVE_VQ_MAX vector lengths > + */ > + for (vq = SVE_VQ_MAX; vq > 0; --vq) { > + vl = prctl(PR_SME_SET_VL, vq * 16); > + if (vl == -1) > + ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n", > + strerror(errno), errno); > + > + vl &= PR_SME_VL_LEN_MASK; > + > + if (vq != sve_vq_from_vl(vl)) > + vq = sve_vq_from_vl(vl); > + > + vl_count++; > + } > + > + return vl_count; > +} > + > int main(void) > { > int i; > + int tests = 1; /* FPSIMD */ > > srandom(getpid()); > > ksft_print_header(); > - ksft_set_plan(ARRAY_SIZE(syscalls) * (sve_count_vls() + 1)); > + tests += sve_count_vls(); > + tests += (sve_count_vls() * sme_count_vls()) * 3; > + ksft_set_plan(ARRAY_SIZE(syscalls) * tests); > + > + if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64) > + ksft_print_msg("SME with FA64\n"); > + else if (getauxval(AT_HWCAP2) & HWCAP2_SME) > + ksft_print_msg("SME without FA64\n"); > > for (i = 0; i < ARRAY_SIZE(syscalls); i++) > test_one_syscall(&syscalls[i]); > diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.h b/tools/testing/selftests/arm64/abi/syscall-abi.h > new file mode 100644 > index 000000000000..bda5a87ad381 > --- /dev/null > +++ b/tools/testing/selftests/arm64/abi/syscall-abi.h > @@ -0,0 +1,15 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +/* > + * Copyright (C) 2021 ARM Limited. > + */ > + > +#ifndef SYSCALL_ABI_H > +#define SYSCALL_ABI_H > + > +#define SVCR_ZA_MASK 2 > +#define SVCR_SM_MASK 1 > + > +#define SVCR_ZA_SHIFT 1 > +#define SVCR_SM_SHIFT 0 > + > +#endif > With these fixed or explained Reviewed-by: Shuah Khan <skhan@linuxfoundation.org> thanks, -- Shuah
On Mon, Feb 07, 2022 at 06:52:06PM -0700, Shuah Khan wrote: > On 2/7/22 8:21 AM, Mark Brown wrote: > > +static int check_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl, > > + uint64_t svcr) > > +{ > > + size_t reg_size = sme_vl * sme_vl; > Is there possibilty of size_t overfolow here? No, if the size of a vector were anywhere near to being able to being able to do that I think we'd have serious design issues with the ABI - the size being calculated here is the size of a single register. The current architectural maximum vector length is 2048 bits, which would give a size of 64K for ZA if implemented. > > + if (memcmp(za_in, za_out, reg_size) != 0) { > > + ksft_print_msg("SME VL %d ZA does not match\n", sme_vl); > Print the expected value in addition to the sme_val. This is not comparing the vector length, this is comparing the contents of the ZA register which may be up to 64K in size. There are serious presentational issues with displaying any errors in a useful fashion for such a large register which IME needs custom display code adding by whoever is debugging the issue that takes account of what the pattern being observed is. > > @@ -265,8 +357,36 @@ static void test_one_syscall(struct syscall_cfg *cfg) > > if (sve_vq != sve_vq_from_vl(sve_vl)) > > sve_vq = sve_vq_from_vl(sve_vl); > > - ksft_test_result(do_test(cfg, sve_vl), > > + ksft_test_result(do_test(cfg, sve_vl, default_sme_vl, 0), > > "%s SVE VL %d\n", cfg->name, sve_vl); > Print default_sme_vl as well. default_sme_vl is just being passed in as a dummy value here since the function takes a fixed number of arguments, this is testing the case where SME is not used or enabled and will be run on systems which do not have SME at all so there won't be any defined vector length for SME. I fear that it would cause confusion to display a SME VL here, and do_test() won't actually pay any attention to that argument in this case. We will individually step through all possible combinations of SVE and SME vector lengths in separate tests.
On 2/8/22 11:15 AM, Mark Brown wrote: > On Mon, Feb 07, 2022 at 06:52:06PM -0700, Shuah Khan wrote: >> On 2/7/22 8:21 AM, Mark Brown wrote: > >>> +static int check_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl, >>> + uint64_t svcr) >>> +{ >>> + size_t reg_size = sme_vl * sme_vl; > >> Is there possibilty of size_t overfolow here? > > No, if the size of a vector were anywhere near to being able to being > able to do that I think we'd have serious design issues with the ABI - > the size being calculated here is the size of a single register. The > current architectural maximum vector length is 2048 bits, which would > give a size of 64K for ZA if implemented. > >>> + if (memcmp(za_in, za_out, reg_size) != 0) { >>> + ksft_print_msg("SME VL %d ZA does not match\n", sme_vl); > >> Print the expected value in addition to the sme_val. > > This is not comparing the vector length, this is comparing the contents > of the ZA register which may be up to 64K in size. There are serious > presentational issues with displaying any errors in a useful fashion for > such a large register which IME needs custom display code adding by > whoever is debugging the issue that takes account of what the pattern > being observed is. > >>> @@ -265,8 +357,36 @@ static void test_one_syscall(struct syscall_cfg *cfg) >>> if (sve_vq != sve_vq_from_vl(sve_vl)) >>> sve_vq = sve_vq_from_vl(sve_vl); >>> - ksft_test_result(do_test(cfg, sve_vl), >>> + ksft_test_result(do_test(cfg, sve_vl, default_sme_vl, 0), >>> "%s SVE VL %d\n", cfg->name, sve_vl); > >> Print default_sme_vl as well. > > default_sme_vl is just being passed in as a dummy value here since the > function takes a fixed number of arguments, this is testing the case > where SME is not used or enabled and will be run on systems which do not > have SME at all so there won't be any defined vector length for SME. I > fear that it would cause confusion to display a SME VL here, and > do_test() won't actually pay any attention to that argument in this > case. We will individually step through all possible combinations of > SVE and SME vector lengths in separate tests. > Sounds good. thanks, -- Shuah
On Mon, Feb 07, 2022 at 03:21:09PM +0000, Mark Brown wrote: > For every possible combination of SVE and SME vector length verify that for > each possible value of SVCR after a syscall we leave streaming mode and ZA > is preserved. We don't need to take account of any streaming/non streaming > SVE vector length changes in the assembler code since the store instructions > will handle the vector length for us. We log if the system supports FA64 and > only try to set FFR in streaming mode if it does. > > Signed-off-by: Mark Brown <broonie@kernel.org> Acked-by: Catalin Marinas <catalin.marinas@arm.com>
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S index 983467cfcee0..bc70e04224bf 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S +++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S @@ -9,15 +9,42 @@ // invoked is configured in x8 of the input GPR data. // // x0: SVE VL, 0 for FP only +// x1: SME VL // // GPRs: gpr_in, gpr_out // FPRs: fpr_in, fpr_out // Zn: z_in, z_out // Pn: p_in, p_out // FFR: ffr_in, ffr_out +// ZA: za_in, za_out +// SVCR: svcr_in, svcr_out + +#include "syscall-abi.h" .arch_extension sve +/* + * LDR (vector to ZA array): + * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _ldr_za nw, nxbase, offset=0 + .inst 0xe1000000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +/* + * STR (vector from ZA array): + * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _str_za nw, nxbase, offset=0 + .inst 0xe1200000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + .globl do_syscall do_syscall: // Store callee saved registers x19-x29 (80 bytes) plus x0 and x1 @@ -30,6 +57,24 @@ do_syscall: stp x25, x26, [sp, #80] stp x27, x28, [sp, #96] + // Set SVCR if we're doing SME + cbz x1, 1f + adrp x2, svcr_in + ldr x2, [x2, :lo12:svcr_in] + msr S3_3_C4_C2_2, x2 +1: + + // Load ZA if it's enabled - uses x12 as scratch due to SME LDR + tbz x2, #SVCR_ZA_SHIFT, 1f + mov w12, #0 + ldr x2, =za_in +2: _ldr_za 12, 2 + add x2, x2, x1 + add x12, x12, #1 + cmp x1, x12 + bne 2b +1: + // Load GPRs x8-x28, and save our SP/FP for later comparison ldr x2, =gpr_in add x2, x2, #64 @@ -68,7 +113,7 @@ do_syscall: ldp q30, q31, [x2, #16 * 30] 1: - // Load the SVE registers if we're doing SVE + // Load the SVE registers if we're doing SVE/SME cbz x0, 1f ldr x2, =z_in @@ -105,9 +150,13 @@ do_syscall: ldr z30, [x2, #30, MUL VL] ldr z31, [x2, #31, MUL VL] + // Only set a non-zero FFR, test patterns must be zero since the + // syscall should clear it - this lets us handle FA64. ldr x2, =ffr_in + cbz x2, 2f ldr p0, [x2, #0] wrffr p0.b +2: ldr x2, =p_in ldr p0, [x2, #0, MUL VL] @@ -169,6 +218,24 @@ do_syscall: stp q28, q29, [x2, #16 * 28] stp q30, q31, [x2, #16 * 30] + // Save SVCR if we're doing SME + cbz x1, 1f + mrs x2, S3_3_C4_C2_2 + adrp x3, svcr_out + str x2, [x3, :lo12:svcr_out] +1: + + // Save ZA if it's enabled - uses x12 as scratch due to SME STR + tbz x2, #SVCR_ZA_SHIFT, 1f + mov w12, #0 + ldr x2, =za_out +2: _str_za 12, 2 + add x2, x2, x1 + add x12, x12, #1 + cmp x1, x12 + bne 2b +1: + // Save the SVE state if we have some cbz x0, 1f diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index 1e13b7523918..b632bfe9e022 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -18,9 +18,13 @@ #include "../../kselftest.h" +#include "syscall-abi.h" + #define NUM_VL ((SVE_VQ_MAX - SVE_VQ_MIN) + 1) -extern void do_syscall(int sve_vl); +static int default_sme_vl; + +extern void do_syscall(int sve_vl, int sme_vl); static void fill_random(void *buf, size_t size) { @@ -48,14 +52,15 @@ static struct syscall_cfg { uint64_t gpr_in[NUM_GPR]; uint64_t gpr_out[NUM_GPR]; -static void setup_gpr(struct syscall_cfg *cfg, int sve_vl) +static void setup_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { fill_random(gpr_in, sizeof(gpr_in)); gpr_in[8] = cfg->syscall_nr; memset(gpr_out, 0, sizeof(gpr_out)); } -static int check_gpr(struct syscall_cfg *cfg, int sve_vl) +static int check_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr) { int errors = 0; int i; @@ -79,13 +84,15 @@ static int check_gpr(struct syscall_cfg *cfg, int sve_vl) uint64_t fpr_in[NUM_FPR * 2]; uint64_t fpr_out[NUM_FPR * 2]; -static void setup_fpr(struct syscall_cfg *cfg, int sve_vl) +static void setup_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { fill_random(fpr_in, sizeof(fpr_in)); memset(fpr_out, 0, sizeof(fpr_out)); } -static int check_fpr(struct syscall_cfg *cfg, int sve_vl) +static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { int errors = 0; int i; @@ -109,13 +116,15 @@ static uint8_t z_zero[__SVE_ZREG_SIZE(SVE_VQ_MAX)]; uint8_t z_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; uint8_t z_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; -static void setup_z(struct syscall_cfg *cfg, int sve_vl) +static void setup_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { fill_random(z_in, sizeof(z_in)); fill_random(z_out, sizeof(z_out)); } -static int check_z(struct syscall_cfg *cfg, int sve_vl) +static int check_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { size_t reg_size = sve_vl; int errors = 0; @@ -126,13 +135,17 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl) /* * After a syscall the low 128 bits of the Z registers should - * be preserved and the rest be zeroed or preserved. + * be preserved and the rest be zeroed or preserved, except if + * we were in streaming mode in which case the low 128 bits may + * also be cleared by the transition out of streaming mode. */ for (i = 0; i < SVE_NUM_ZREGS; i++) { void *in = &z_in[reg_size * i]; void *out = &z_out[reg_size * i]; - if (memcmp(in, out, SVE_VQ_BYTES) != 0) { + if ((memcmp(in, out, SVE_VQ_BYTES) != 0) && + !((svcr & SVCR_SM_MASK) && + memcmp(z_zero, out, SVE_VQ_BYTES) == 0)) { ksft_print_msg("%s SVE VL %d Z%d low 128 bits changed\n", cfg->name, sve_vl, i); errors++; @@ -145,13 +158,15 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl) uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)]; uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)]; -static void setup_p(struct syscall_cfg *cfg, int sve_vl) +static void setup_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { fill_random(p_in, sizeof(p_in)); fill_random(p_out, sizeof(p_out)); } -static int check_p(struct syscall_cfg *cfg, int sve_vl) +static int check_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */ @@ -175,8 +190,19 @@ static int check_p(struct syscall_cfg *cfg, int sve_vl) uint8_t ffr_in[__SVE_PREG_SIZE(SVE_VQ_MAX)]; uint8_t ffr_out[__SVE_PREG_SIZE(SVE_VQ_MAX)]; -static void setup_ffr(struct syscall_cfg *cfg, int sve_vl) +static void setup_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { + /* + * If we are in streaming mode and do not have FA64 then FFR + * is unavailable. + */ + if ((svcr & SVCR_SM_MASK) && + !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)) { + memset(&ffr_in, 0, sizeof(ffr_in)); + return; + } + /* * It is only valid to set a contiguous set of bits starting * at 0. For now since we're expecting this to be cleared by @@ -186,7 +212,8 @@ static void setup_ffr(struct syscall_cfg *cfg, int sve_vl) fill_random(ffr_out, sizeof(ffr_out)); } -static int check_ffr(struct syscall_cfg *cfg, int sve_vl) +static int check_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */ int errors = 0; @@ -195,6 +222,10 @@ static int check_ffr(struct syscall_cfg *cfg, int sve_vl) if (!sve_vl) return 0; + if ((svcr & SVCR_SM_MASK) && + !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)) + return 0; + /* After a syscall the P registers should be preserved or zeroed */ for (i = 0; i < reg_size; i++) if (ffr_out[i] && (ffr_in[i] != ffr_out[i])) @@ -206,8 +237,65 @@ static int check_ffr(struct syscall_cfg *cfg, int sve_vl) return errors; } -typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl); -typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl); +uint64_t svcr_in, svcr_out; + +static void setup_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) +{ + svcr_in = svcr; +} + +static int check_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) +{ + int errors = 0; + + if (svcr_out & SVCR_SM_MASK) { + ksft_print_msg("%s Still in SM, SVCR %llx\n", + cfg->name, svcr_out); + errors++; + } + + if ((svcr_in & SVCR_ZA_MASK) != (svcr_out & SVCR_ZA_MASK)) { + ksft_print_msg("%s PSTATE.ZA changed, SVCR %llx != %llx\n", + cfg->name, svcr_in, svcr_out); + errors++; + } + + return errors; +} + +uint8_t za_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; +uint8_t za_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; + +static void setup_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) +{ + fill_random(za_in, sizeof(za_in)); + memset(za_out, 0, sizeof(za_out)); +} + +static int check_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) +{ + size_t reg_size = sme_vl * sme_vl; + int errors = 0; + + if (!(svcr & SVCR_ZA_MASK)) + return 0; + + if (memcmp(za_in, za_out, reg_size) != 0) { + ksft_print_msg("SME VL %d ZA does not match\n", sme_vl); + errors++; + } + + return errors; +} + +typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr); +typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr); /* * Each set of registers has a setup function which is called before @@ -225,20 +313,23 @@ static struct { { setup_z, check_z }, { setup_p, check_p }, { setup_ffr, check_ffr }, + { setup_svcr, check_svcr }, + { setup_za, check_za }, }; -static bool do_test(struct syscall_cfg *cfg, int sve_vl) +static bool do_test(struct syscall_cfg *cfg, int sve_vl, int sme_vl, + uint64_t svcr) { int errors = 0; int i; for (i = 0; i < ARRAY_SIZE(regset); i++) - regset[i].setup(cfg, sve_vl); + regset[i].setup(cfg, sve_vl, sme_vl, svcr); - do_syscall(sve_vl); + do_syscall(sve_vl, sme_vl); for (i = 0; i < ARRAY_SIZE(regset); i++) - errors += regset[i].check(cfg, sve_vl); + errors += regset[i].check(cfg, sve_vl, sme_vl, svcr); return errors == 0; } @@ -246,9 +337,10 @@ static bool do_test(struct syscall_cfg *cfg, int sve_vl) static void test_one_syscall(struct syscall_cfg *cfg) { int sve_vq, sve_vl; + int sme_vq, sme_vl; /* FPSIMD only case */ - ksft_test_result(do_test(cfg, 0), + ksft_test_result(do_test(cfg, 0, default_sme_vl, 0), "%s FPSIMD\n", cfg->name); if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) @@ -265,8 +357,36 @@ static void test_one_syscall(struct syscall_cfg *cfg) if (sve_vq != sve_vq_from_vl(sve_vl)) sve_vq = sve_vq_from_vl(sve_vl); - ksft_test_result(do_test(cfg, sve_vl), + ksft_test_result(do_test(cfg, sve_vl, default_sme_vl, 0), "%s SVE VL %d\n", cfg->name, sve_vl); + + if (!(getauxval(AT_HWCAP2) & HWCAP2_SME)) + continue; + + for (sme_vq = SVE_VQ_MAX; sme_vq > 0; --sme_vq) { + sme_vl = prctl(PR_SME_SET_VL, sme_vq * 16); + if (sme_vl == -1) + ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + sme_vl &= PR_SME_VL_LEN_MASK; + + if (sme_vq != sve_vq_from_vl(sme_vl)) + sme_vq = sve_vq_from_vl(sme_vl); + + ksft_test_result(do_test(cfg, sve_vl, sme_vl, + SVCR_ZA_MASK | SVCR_SM_MASK), + "%s SVE VL %d/SME VL %d SM+ZA\n", + cfg->name, sve_vl, sme_vl); + ksft_test_result(do_test(cfg, sve_vl, sme_vl, + SVCR_SM_MASK), + "%s SVE VL %d/SME VL %d SM\n", + cfg->name, sve_vl, sme_vl); + ksft_test_result(do_test(cfg, sve_vl, sme_vl, + SVCR_ZA_MASK), + "%s SVE VL %d/SME VL %d ZA\n", + cfg->name, sve_vl, sme_vl); + } } } @@ -299,14 +419,54 @@ int sve_count_vls(void) return vl_count; } +int sme_count_vls(void) +{ + unsigned int vq; + int vl_count = 0; + int vl; + + if (!(getauxval(AT_HWCAP2) & HWCAP2_SME)) + return 0; + + /* Ensure we configure a SME VL, used to flag if SVCR is set */ + default_sme_vl = 16; + + /* + * Enumerate up to SVE_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(PR_SME_SET_VL, vq * 16); + if (vl == -1) + ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + vl &= PR_SME_VL_LEN_MASK; + + if (vq != sve_vq_from_vl(vl)) + vq = sve_vq_from_vl(vl); + + vl_count++; + } + + return vl_count; +} + int main(void) { int i; + int tests = 1; /* FPSIMD */ srandom(getpid()); ksft_print_header(); - ksft_set_plan(ARRAY_SIZE(syscalls) * (sve_count_vls() + 1)); + tests += sve_count_vls(); + tests += (sve_count_vls() * sme_count_vls()) * 3; + ksft_set_plan(ARRAY_SIZE(syscalls) * tests); + + if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64) + ksft_print_msg("SME with FA64\n"); + else if (getauxval(AT_HWCAP2) & HWCAP2_SME) + ksft_print_msg("SME without FA64\n"); for (i = 0; i < ARRAY_SIZE(syscalls); i++) test_one_syscall(&syscalls[i]); diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.h b/tools/testing/selftests/arm64/abi/syscall-abi.h new file mode 100644 index 000000000000..bda5a87ad381 --- /dev/null +++ b/tools/testing/selftests/arm64/abi/syscall-abi.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 ARM Limited. + */ + +#ifndef SYSCALL_ABI_H +#define SYSCALL_ABI_H + +#define SVCR_ZA_MASK 2 +#define SVCR_SM_MASK 1 + +#define SVCR_ZA_SHIFT 1 +#define SVCR_SM_SHIFT 0 + +#endif
For every possible combination of SVE and SME vector length verify that for each possible value of SVCR after a syscall we leave streaming mode and ZA is preserved. We don't need to take account of any streaming/non streaming SVE vector length changes in the assembler code since the store instructions will handle the vector length for us. We log if the system supports FA64 and only try to set FFR in streaming mode if it does. Signed-off-by: Mark Brown <broonie@kernel.org> --- .../selftests/arm64/abi/syscall-abi-asm.S | 69 +++++- .../testing/selftests/arm64/abi/syscall-abi.c | 204 ++++++++++++++++-- .../testing/selftests/arm64/abi/syscall-abi.h | 15 ++ 3 files changed, 265 insertions(+), 23 deletions(-) create mode 100644 tools/testing/selftests/arm64/abi/syscall-abi.h