[kvm-unit-tests,v4,1/1] s390x: add exittime tests

Message ID	20221012124453.1207013-2-nrb@linux.ibm.com (mailing list archive)
State	Superseded
Headers	show Return-Path: <kvm-owner@kernel.org> From: Nico Boehr <nrb@linux.ibm.com> To: kvm@vger.kernel.org Cc: frankja@linux.ibm.com, imbrenda@linux.ibm.com, thuth@redhat.com Subject: [kvm-unit-tests PATCH v4 1/1] s390x: add exittime tests Date: Wed, 12 Oct 2022 14:44:53 +0200 Message-Id: <20221012124453.1207013-2-nrb@linux.ibm.com> In-Reply-To: <20221012124453.1207013-1-nrb@linux.ibm.com> References: <20221012124453.1207013-1-nrb@linux.ibm.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	s390x: Add exit time test \| expand [kvm-unit-tests,v4,0/1] s390x: Add exit time test [kvm-unit-tests,v4,1/1] s390x: add exittime tests

Message ID

20221012124453.1207013-2-nrb@linux.ibm.com (mailing list archive)

State

Superseded

Headers

From: Nico Boehr <nrb@linux.ibm.com>
To: kvm@vger.kernel.org
Cc: frankja@linux.ibm.com, imbrenda@linux.ibm.com, thuth@redhat.com
Subject: [kvm-unit-tests PATCH v4 1/1] s390x: add exittime tests
Date: Wed, 12 Oct 2022 14:44:53 +0200
Message-Id: <20221012124453.1207013-2-nrb@linux.ibm.com>
In-Reply-To: <20221012124453.1207013-1-nrb@linux.ibm.com>
References: <20221012124453.1207013-1-nrb@linux.ibm.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Precedence: bulk

Series

s390x: Add exit time test | expand

Commit Message

Nico Boehr Oct. 12, 2022, 12:44 p.m. UTC

Add a test to measure the execution time of several instructions. This
can be helpful in finding performance regressions in hypervisor code.

All tests are currently reported as PASS, since the baseline for their
execution time depends on the respective environment and since needs to
be determined on a case-by-case basis.

Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
---
 s390x/Makefile      |   1 +
 s390x/exittime.c    | 253 ++++++++++++++++++++++++++++++++++++++++++++
 s390x/unittests.cfg |   4 +
 3 files changed, 258 insertions(+)
 create mode 100644 s390x/exittime.c

Comments

Claudio Imbrenda Oct. 12, 2022, 1:30 p.m. UTC | #1

On Wed, 12 Oct 2022 14:44:53 +0200
Nico Boehr <nrb@linux.ibm.com> wrote:

> Add a test to measure the execution time of several instructions. This
> can be helpful in finding performance regressions in hypervisor code.
> 
> All tests are currently reported as PASS, since the baseline for their
> execution time depends on the respective environment and since needs to
> be determined on a case-by-case basis.
> 
> Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
> ---
>  s390x/Makefile      |   1 +
>  s390x/exittime.c    | 253 ++++++++++++++++++++++++++++++++++++++++++++
>  s390x/unittests.cfg |   4 +
>  3 files changed, 258 insertions(+)
>  create mode 100644 s390x/exittime.c
> 
> diff --git a/s390x/Makefile b/s390x/Makefile
> index fba09bc2df3a..a28c6746cf55 100644
> --- a/s390x/Makefile
> +++ b/s390x/Makefile
> @@ -37,6 +37,7 @@ tests += $(TEST_DIR)/migration-skey.elf
>  tests += $(TEST_DIR)/panic-loop-extint.elf
>  tests += $(TEST_DIR)/panic-loop-pgm.elf
>  tests += $(TEST_DIR)/migration-sck.elf
> +tests += $(TEST_DIR)/exittime.elf
>  
>  pv-tests += $(TEST_DIR)/pv-diags.elf
>  
> diff --git a/s390x/exittime.c b/s390x/exittime.c
> new file mode 100644
> index 000000000000..8e00a3c3559a
> --- /dev/null
> +++ b/s390x/exittime.c
> @@ -0,0 +1,253 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Measure run time of various instructions. Can be used to find runtime
> + * regressions of instructions which cause exits.
> + *
> + * Copyright IBM Corp. 2022
> + *
> + * Authors:
> + *  Nico Boehr <nrb@linux.ibm.com>
> + */
> +#include <libcflat.h>
> +#include <smp.h>
> +#include <sclp.h>
> +#include <asm/time.h>
> +#include <asm/sigp.h>
> +#include <asm/interrupt.h>
> +#include <asm/page.h>
> +
> +char pagebuf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
> +
> +static void test_sigp_sense_running(long destcpu)
> +{
> +	smp_sigp(destcpu, SIGP_SENSE_RUNNING, 0, NULL);
> +}
> +
> +static void test_nop(long ignore)
> +{
> +	/* nops don't trap into the hypervisor, so let's test them for reference */
> +	asm volatile(
> +		"nop"
> +		:
> +		:
> +		: "memory"
> +	);
> +}
> +
> +static void test_diag9c(long destcpu)
> +{
> +	asm volatile(
> +		"diag %[destcpu],0,0x9c"
> +		:
> +		: [destcpu] "d" (destcpu)
> +	);
> +}
> +
> +static long setup_get_this_cpuaddr(long ignore)
> +{
> +	return stap();
> +}
> +
> +static void test_diag44(long ignore)
> +{
> +	asm volatile(
> +		"diag 0,0,0x44"
> +	);
> +}
> +
> +static void test_stnsm(long ignore)
> +{
> +	int out;
> +
> +	asm volatile(
> +		"stnsm %[out],0xff"
> +		: [out] "=Q" (out)
> +	);
> +}
> +
> +static void test_stosm(long ignore)
> +{
> +	int out;
> +
> +	asm volatile(
> +		"stosm %[out],0"
> +		: [out] "=Q" (out)
> +	);
> +}
> +
> +static long setup_ssm(long ignore)
> +{
> +	long system_mask = 0;
> +
> +	asm volatile(
> +		"stosm %[system_mask],0"
> +		: [system_mask] "=Q" (system_mask)
> +	);
> +
> +	return system_mask;
> +}
> +
> +static void test_ssm(long old_system_mask)
> +{
> +	asm volatile(
> +		"ssm %[old_system_mask]"
> +		:
> +		: [old_system_mask] "Q" (old_system_mask)
> +	);
> +}
> +
> +static long setup_lctl4(long ignore)
> +{
> +	long ctl4_orig = 0;
> +
> +	asm volatile(
> +		"stctg 4,4,%[ctl4_orig]"
> +		: [ctl4_orig] "=S" (ctl4_orig)
> +	);
> +
> +	return ctl4_orig;
> +}
> +
> +static void test_lctl4(long ctl4_orig)
> +{
> +	asm volatile(
> +		"lctlg 4,4,%[ctl4_orig]"
> +		:
> +		: [ctl4_orig] "S" (ctl4_orig)
> +	);
> +}
> +
> +static void test_stpx(long ignore)
> +{
> +	unsigned int prefix;
> +
> +	asm volatile(
> +		"stpx %[prefix]"
> +		: [prefix] "=Q" (prefix)
> +	);
> +}
> +
> +static void test_stfl(long ignore)
> +{
> +	asm volatile(
> +		"stfl 0"
> +		:
> +		:
> +		: "memory"
> +	);
> +}
> +
> +static void test_epsw(long ignore)
> +{
> +	long r1, r2;
> +
> +	asm volatile(
> +		"epsw %[r1], %[r2]"
> +		: [r1] "=d" (r1), [r2] "=d" (r2)
> +	);
> +}
> +
> +static void test_illegal(long ignore)
> +{
> +	expect_pgm_int();
> +	asm volatile(
> +		".word 0"
> +	);
> +	clear_pgm_int();
> +}
> +
> +static long setup_servc(long arg)
> +{
> +	memset(pagebuf, 0, PAGE_SIZE);
> +	return arg;
> +}
> +
> +static void test_servc(long ignore)
> +{
> +	SCCB *sccb = (SCCB *) pagebuf;
> +
> +	sccb->h.length = 8;
> +	servc(0, (unsigned long) sccb);
> +}
> +
> +static void test_stsi(long fc)
> +{
> +	stsi(pagebuf, fc, 2, 2);
> +}
> +
> +struct test {
> +	const char *name;
> +	/*
> +	 * When non-null, will be called once before running the test loop.
> +	 * Its return value will be given as argument to testfunc.
> +	 */
> +	long (*setupfunc)(long arg);
> +	void (*testfunc)(long arg);
> +	long arg;
> +	long iters;
> +} const exittime_tests[] = {
> +	{"nop",                   NULL,                   test_nop,                0, 200000 },
> +	{"sigp sense running(0)", NULL,                   test_sigp_sense_running, 0, 20000 },
> +	{"sigp sense running(1)", NULL,                   test_sigp_sense_running, 1, 20000 },
> +	{"diag9c(self)",          setup_get_this_cpuaddr, test_diag9c,             0, 2000 },
> +	{"diag9c(0)",             NULL,                   test_diag9c,             0, 2000 },
> +	{"diag9c(1)",             NULL,                   test_diag9c,             1, 2000 },
> +	{"diag44",                NULL,                   test_diag44,             0, 2000 },
> +	{"stnsm",                 NULL,                   test_stnsm,              0, 200000 },
> +	{"stosm",                 NULL,                   test_stosm,              0, 200000 },
> +	{"ssm",                   setup_ssm,              test_ssm,                0, 200000 },
> +	{"lctl4",                 setup_lctl4,            test_lctl4,              0, 20000 },
> +	{"stpx",                  NULL,                   test_stpx,               0, 2000 },
> +	{"stfl",                  NULL,                   test_stfl,               0, 2000 },
> +	{"epsw",                  NULL,                   test_epsw,               0, 20000 },
> +	{"illegal",               NULL,                   test_illegal,            0, 2000 },
> +	{"servc",                 setup_servc,            test_servc,              0, 2000 },
> +	{"stsi122",               NULL,                   test_stsi,               1, 200 },
> +	{"stsi222",               NULL,                   test_stsi,               2, 200 },
> +	{"stsi322",               NULL,                   test_stsi,               3, 200 },
> +};
> +
> +static uint64_t tod_to_us(uint64_t tod)
> +{
> +	return tod >> STCK_SHIFT_US;
> +}
> +
> +int main(void)
> +{
> +	int i, j, k, testfunc_arg;
> +	const int outer_iters = 100;
> +	struct test const *current_test;
> +	uint64_t start, end, elapsed, worst, best, total, average;
> +
> +	report_prefix_push("exittime");
> +	report_pass("reporting total/best/worst of %d outer iterations", outer_iters);

you can just do a report_info here

> +
> +	for (i = 0; i < ARRAY_SIZE(exittime_tests); i++) {
> +		current_test = &exittime_tests[i];
> +		total = 0;
> +		worst = 0;
> +		best = -1;
> +		report_prefix_pushf("%s", current_test->name);
> +
> +		testfunc_arg = current_test->arg;
> +		if (current_test->setupfunc)
> +			testfunc_arg = current_test->setupfunc(testfunc_arg);
> +
> +		for (j = 0; j < outer_iters; j++) {
> +			stckf(&start);
> +			for (k = 0; k < current_test->iters; k++)
> +				current_test->testfunc(testfunc_arg);
> +			stckf(&end);

so the actual number of iterations is outer_iters * current_test->iters?

> +			elapsed = end - start;
> +			best = MIN(best, elapsed);
> +			worst = MAX(worst, elapsed);
> +			total += elapsed;
> +		}
> +		average = total / outer_iters;

so the average is per block

> +		report_pass("iters/total/best/avg/worst %lu/%lu/%lu/%lu/%lu us", current_test->iters, tod_to_us(total), tod_to_us(best), tod_to_us(average), tod_to_us(worst));

but here you give the number of iterations per block, but not the
number of blocks

so average min and max are per block, this means that it's not really
possible to measure the actual time of a single operation

maybe do a report_info with: outer_iters, current_test->iters, total

and then the report_pass with best avg worst, but per individual test,
not per block

something like average = total / (outer_iters * current_test->iters),

or at least normalize them, something like:

average = total / (outer_iters * current_test->iters / 1000);

so the numbers are always about the same number of actual iterations

(and then make sure current_test->iters is always at least 1000, it
doesn't matter if the test needs a few seconds to complete, as long as
it doesn't take minutes)

> +		report_prefix_pop();
> +	}
> +
> +	report_prefix_pop();
> +	return report_summary();
> +}
> diff --git a/s390x/unittests.cfg b/s390x/unittests.cfg
> index 2c04ae7c7c15..feb9abf03745 100644
> --- a/s390x/unittests.cfg
> +++ b/s390x/unittests.cfg
> @@ -201,3 +201,7 @@ timeout = 5
>  [migration-sck]
>  file = migration-sck.elf
>  groups = migration
> +
> +[exittime]
> +file = exittime.elf
> +smp = 2

diff --git a/s390x/Makefile b/s390x/Makefile
index fba09bc2df3a..a28c6746cf55 100644
--- a/s390x/Makefile
+++ b/s390x/Makefile
@@ -37,6 +37,7 @@  tests += $(TEST_DIR)/migration-skey.elf
 tests += $(TEST_DIR)/panic-loop-extint.elf
 tests += $(TEST_DIR)/panic-loop-pgm.elf
 tests += $(TEST_DIR)/migration-sck.elf
+tests += $(TEST_DIR)/exittime.elf
 
 pv-tests += $(TEST_DIR)/pv-diags.elf
 
diff --git a/s390x/exittime.c b/s390x/exittime.c
new file mode 100644
index 000000000000..8e00a3c3559a
--- /dev/null
+++ b/s390x/exittime.c
@@ -0,0 +1,253 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Measure run time of various instructions. Can be used to find runtime
+ * regressions of instructions which cause exits.
+ *
+ * Copyright IBM Corp. 2022
+ *
+ * Authors:
+ *  Nico Boehr <nrb@linux.ibm.com>
+ */
+#include <libcflat.h>
+#include <smp.h>
+#include <sclp.h>
+#include <asm/time.h>
+#include <asm/sigp.h>
+#include <asm/interrupt.h>
+#include <asm/page.h>
+
+char pagebuf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+
+static void test_sigp_sense_running(long destcpu)
+{
+	smp_sigp(destcpu, SIGP_SENSE_RUNNING, 0, NULL);
+}
+
+static void test_nop(long ignore)
+{
+	/* nops don't trap into the hypervisor, so let's test them for reference */
+	asm volatile(
+		"nop"
+		:
+		:
+		: "memory"
+	);
+}
+
+static void test_diag9c(long destcpu)
+{
+	asm volatile(
+		"diag %[destcpu],0,0x9c"
+		:
+		: [destcpu] "d" (destcpu)
+	);
+}
+
+static long setup_get_this_cpuaddr(long ignore)
+{
+	return stap();
+}
+
+static void test_diag44(long ignore)
+{
+	asm volatile(
+		"diag 0,0,0x44"
+	);
+}
+
+static void test_stnsm(long ignore)
+{
+	int out;
+
+	asm volatile(
+		"stnsm %[out],0xff"
+		: [out] "=Q" (out)
+	);
+}
+
+static void test_stosm(long ignore)
+{
+	int out;
+
+	asm volatile(
+		"stosm %[out],0"
+		: [out] "=Q" (out)
+	);
+}
+
+static long setup_ssm(long ignore)
+{
+	long system_mask = 0;
+
+	asm volatile(
+		"stosm %[system_mask],0"
+		: [system_mask] "=Q" (system_mask)
+	);
+
+	return system_mask;
+}
+
+static void test_ssm(long old_system_mask)
+{
+	asm volatile(
+		"ssm %[old_system_mask]"
+		:
+		: [old_system_mask] "Q" (old_system_mask)
+	);
+}
+
+static long setup_lctl4(long ignore)
+{
+	long ctl4_orig = 0;
+
+	asm volatile(
+		"stctg 4,4,%[ctl4_orig]"
+		: [ctl4_orig] "=S" (ctl4_orig)
+	);
+
+	return ctl4_orig;
+}
+
+static void test_lctl4(long ctl4_orig)
+{
+	asm volatile(
+		"lctlg 4,4,%[ctl4_orig]"
+		:
+		: [ctl4_orig] "S" (ctl4_orig)
+	);
+}
+
+static void test_stpx(long ignore)
+{
+	unsigned int prefix;
+
+	asm volatile(
+		"stpx %[prefix]"
+		: [prefix] "=Q" (prefix)
+	);
+}
+
+static void test_stfl(long ignore)
+{
+	asm volatile(
+		"stfl 0"
+		:
+		:
+		: "memory"
+	);
+}
+
+static void test_epsw(long ignore)
+{
+	long r1, r2;
+
+	asm volatile(
+		"epsw %[r1], %[r2]"
+		: [r1] "=d" (r1), [r2] "=d" (r2)
+	);
+}
+
+static void test_illegal(long ignore)
+{
+	expect_pgm_int();
+	asm volatile(
+		".word 0"
+	);
+	clear_pgm_int();
+}
+
+static long setup_servc(long arg)
+{
+	memset(pagebuf, 0, PAGE_SIZE);
+	return arg;
+}
+
+static void test_servc(long ignore)
+{
+	SCCB *sccb = (SCCB *) pagebuf;
+
+	sccb->h.length = 8;
+	servc(0, (unsigned long) sccb);
+}
+
+static void test_stsi(long fc)
+{
+	stsi(pagebuf, fc, 2, 2);
+}
+
+struct test {
+	const char *name;
+	/*
+	 * When non-null, will be called once before running the test loop.
+	 * Its return value will be given as argument to testfunc.
+	 */
+	long (*setupfunc)(long arg);
+	void (*testfunc)(long arg);
+	long arg;
+	long iters;
+} const exittime_tests[] = {
+	{"nop",                   NULL,                   test_nop,                0, 200000 },
+	{"sigp sense running(0)", NULL,                   test_sigp_sense_running, 0, 20000 },
+	{"sigp sense running(1)", NULL,                   test_sigp_sense_running, 1, 20000 },
+	{"diag9c(self)",          setup_get_this_cpuaddr, test_diag9c,             0, 2000 },
+	{"diag9c(0)",             NULL,                   test_diag9c,             0, 2000 },
+	{"diag9c(1)",             NULL,                   test_diag9c,             1, 2000 },
+	{"diag44",                NULL,                   test_diag44,             0, 2000 },
+	{"stnsm",                 NULL,                   test_stnsm,              0, 200000 },
+	{"stosm",                 NULL,                   test_stosm,              0, 200000 },
+	{"ssm",                   setup_ssm,              test_ssm,                0, 200000 },
+	{"lctl4",                 setup_lctl4,            test_lctl4,              0, 20000 },
+	{"stpx",                  NULL,                   test_stpx,               0, 2000 },
+	{"stfl",                  NULL,                   test_stfl,               0, 2000 },
+	{"epsw",                  NULL,                   test_epsw,               0, 20000 },
+	{"illegal",               NULL,                   test_illegal,            0, 2000 },
+	{"servc",                 setup_servc,            test_servc,              0, 2000 },
+	{"stsi122",               NULL,                   test_stsi,               1, 200 },
+	{"stsi222",               NULL,                   test_stsi,               2, 200 },
+	{"stsi322",               NULL,                   test_stsi,               3, 200 },
+};
+
+static uint64_t tod_to_us(uint64_t tod)
+{
+	return tod >> STCK_SHIFT_US;
+}
+
+int main(void)
+{
+	int i, j, k, testfunc_arg;
+	const int outer_iters = 100;
+	struct test const *current_test;
+	uint64_t start, end, elapsed, worst, best, total, average;
+
+	report_prefix_push("exittime");
+	report_pass("reporting total/best/worst of %d outer iterations", outer_iters);
+
+	for (i = 0; i < ARRAY_SIZE(exittime_tests); i++) {
+		current_test = &exittime_tests[i];
+		total = 0;
+		worst = 0;
+		best = -1;
+		report_prefix_pushf("%s", current_test->name);
+
+		testfunc_arg = current_test->arg;
+		if (current_test->setupfunc)
+			testfunc_arg = current_test->setupfunc(testfunc_arg);
+
+		for (j = 0; j < outer_iters; j++) {
+			stckf(&start);
+			for (k = 0; k < current_test->iters; k++)
+				current_test->testfunc(testfunc_arg);
+			stckf(&end);
+			elapsed = end - start;
+			best = MIN(best, elapsed);
+			worst = MAX(worst, elapsed);
+			total += elapsed;
+		}
+		average = total / outer_iters;
+		report_pass("iters/total/best/avg/worst %lu/%lu/%lu/%lu/%lu us", current_test->iters, tod_to_us(total), tod_to_us(best), tod_to_us(average), tod_to_us(worst));
+		report_prefix_pop();
+	}
+
+	report_prefix_pop();
+	return report_summary();
+}
diff --git a/s390x/unittests.cfg b/s390x/unittests.cfg
index 2c04ae7c7c15..feb9abf03745 100644
--- a/s390x/unittests.cfg
+++ b/s390x/unittests.cfg
@@ -201,3 +201,7 @@  timeout = 5
 [migration-sck]
 file = migration-sck.elf
 groups = migration
+
+[exittime]
+file = exittime.elf
+smp = 2

[kvm-unit-tests,v4,1/1] s390x: add exittime tests

Commit Message

Comments

Patch