diff mbox series

[PATCH/RFC] kunit/rtc: Add real support for very slow tests

Message ID 49d57ab512c47f01d6c374d533f1752871ea4246.1743091573.git.geert@linux-m68k.org (mailing list archive)
State New
Headers show
Series [PATCH/RFC] kunit/rtc: Add real support for very slow tests | expand

Commit Message

Geert Uytterhoeven March 27, 2025, 4:07 p.m. UTC
When running rtc_lib_test ("lib_test" before my "[PATCH] rtc: Rename
lib_test to rtc_lib_test") on m68k/ARAnyM:

    KTAP version 1
    1..1
	KTAP version 1
	# Subtest: rtc_lib_test_cases
	# module: rtc_lib_test
	1..2
	# rtc_time64_to_tm_test_date_range_1000: Test should be marked slow (runtime: 3.222371420s)
	ok 1 rtc_time64_to_tm_test_date_range_1000
	# rtc_time64_to_tm_test_date_range_160000: try timed out
	# rtc_time64_to_tm_test_date_range_160000: test case timed out
	# rtc_time64_to_tm_test_date_range_160000.speed: slow
	not ok 2 rtc_time64_to_tm_test_date_range_160000
    # rtc_lib_test_cases: pass:1 fail:1 skip:0 total:2
    # Totals: pass:1 fail:1 skip:0 total:2
    not ok 1 rtc_lib_test_cases

Commit 02c2d0c2a84172c3 ("kunit: Add speed attribute") added the notion
of "very slow" tests, but this is further unused and unhandled.

Hence:
  1. Introduce KUNIT_CASE_VERY_SLOW(),
  2. Increase timeout by ten; ideally this should only be done for very
     slow tests, but I couldn't find how to access kunit_case.attr.case
     from kunit_try_catch_run(),
  3. Mark rtc_time64_to_tm_test_date_range_1000 slow,
  4. Mark rtc_time64_to_tm_test_date_range_160000 very slow.

Afterwards:

    KTAP version 1
    1..1
	KTAP version 1
	# Subtest: rtc_lib_test_cases
	# module: rtc_lib_test
	1..2
	# rtc_time64_to_tm_test_date_range_1000.speed: slow
	ok 1 rtc_time64_to_tm_test_date_range_1000
	# rtc_time64_to_tm_test_date_range_160000.speed: very_slow
	ok 2 rtc_time64_to_tm_test_date_range_160000
    # rtc_lib_test_cases: pass:2 fail:0 skip:0 total:2
    # Totals: pass:2 fail:0 skip:0 total:2
    ok 1 rtc_lib_test_cases

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 drivers/rtc/rtc_lib_test.c |  4 ++--
 include/kunit/test.h       | 11 +++++++++++
 lib/kunit/try-catch.c      |  3 ++-
 3 files changed, 15 insertions(+), 3 deletions(-)

Comments

David Gow March 28, 2025, 8:06 a.m. UTC | #1
Hi Geert,

Thanks for sending this out: I think this raises some good questions
about exactly how to handle long running tests (particularly on
older/slower hardware).

I've put a few notes below, but, tl;dr: I think these are all good
changes, even if there's more we can do to better scale to slower
hardware.

On Fri, 28 Mar 2025 at 00:07, Geert Uytterhoeven <geert@linux-m68k.org> wrote:
>
> When running rtc_lib_test ("lib_test" before my "[PATCH] rtc: Rename
> lib_test to rtc_lib_test") on m68k/ARAnyM:
>
>     KTAP version 1
>     1..1
>         KTAP version 1
>         # Subtest: rtc_lib_test_cases
>         # module: rtc_lib_test
>         1..2
>         # rtc_time64_to_tm_test_date_range_1000: Test should be marked slow (runtime: 3.222371420s)
>         ok 1 rtc_time64_to_tm_test_date_range_1000
>         # rtc_time64_to_tm_test_date_range_160000: try timed out
>         # rtc_time64_to_tm_test_date_range_160000: test case timed out
>         # rtc_time64_to_tm_test_date_range_160000.speed: slow
>         not ok 2 rtc_time64_to_tm_test_date_range_160000
>     # rtc_lib_test_cases: pass:1 fail:1 skip:0 total:2
>     # Totals: pass:1 fail:1 skip:0 total:2
>     not ok 1 rtc_lib_test_cases
>
> Commit 02c2d0c2a84172c3 ("kunit: Add speed attribute") added the notion
> of "very slow" tests, but this is further unused and unhandled.
>
> Hence:
>   1. Introduce KUNIT_CASE_VERY_SLOW(),

Thanks -- I think we want this regardless.

>   2. Increase timeout by ten; ideally this should only be done for very
>      slow tests, but I couldn't find how to access kunit_case.attr.case
>      from kunit_try_catch_run(),


My feeling for tests generally is:
- Normal: effectively instant on modern hardware, O(seconds) on
ancient hardware.
- Slow: takes O(seconds) to run on modern hardware, O(minutes)..O(10s
of minutes) on ancient hardware.
- Very slow: O(minutes) or higher on modern hardware, infeasible on
ancient hardware.

Obviously the definition of "modern" and "ancient" hardware here is
pretty arbitrary: I'm using "modern, high-end x86" ~4GHz as my
"modern" example, and "66MHz 486" as my "ancient" one, but things like
emulation or embedded systems fit in-between.

Ultimately, I think the timeout probably needs to be configurable on a
per-machine basis more than a per-test one, but having a 10x
multiplier (or even a 100x multiplier) for very slow tests would also
work for me.

I quickly tried hacking together something to pass through the
attribute and implement this. Diff (probably mangled by gmail) below:
---
diff --git a/include/kunit/try-catch.h b/include/kunit/try-catch.h
index 7c966a1adbd3..24a29622068b 100644
--- a/include/kunit/try-catch.h
+++ b/include/kunit/try-catch.h
@@ -50,6 +50,13 @@ struct kunit_try_catch {
       void *context;
};

+struct kunit_try_catch_context {
+       struct kunit *test;
+       struct kunit_suite *suite;
+       struct kunit_case *test_case;
+};
+
+
void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context);

void __noreturn kunit_try_catch_throw(struct kunit_try_catch *try_catch);
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index 146d1b48a096..79d12c0c2d25 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -420,12 +420,6 @@ static void kunit_run_case_cleanup(struct kunit *test,
       kunit_case_internal_cleanup(test);
}

-struct kunit_try_catch_context {
-       struct kunit *test;
-       struct kunit_suite *suite;
-       struct kunit_case *test_case;
-};
-
static void kunit_try_run_case(void *data)
{
       struct kunit_try_catch_context *ctx = data;
diff --git a/lib/kunit/try-catch.c b/lib/kunit/try-catch.c
index 92099c67bb21..5f62e393d422 100644
--- a/lib/kunit/try-catch.c
+++ b/lib/kunit/try-catch.c
@@ -34,30 +34,15 @@ static int kunit_generic_run_threadfn_adapter(void *data)
       return 0;
}

-static unsigned long kunit_test_timeout(void)
+static unsigned long kunit_test_timeout(struct kunit_try_catch *try_catch)
{
-       /*
-        * TODO(brendanhiggins@google.com): We should probably have some type of
-        * variable timeout here. The only question is what that timeout value
-        * should be.
-        *
-        * The intention has always been, at some point, to be able to label
-        * tests with some type of size bucket (unit/small, integration/medium,
-        * large/system/end-to-end, etc), where each size bucket would get a
-        * default timeout value kind of like what Bazel does:
-        * https://docs.bazel.build/versions/master/be/common-definitions.html#test.size
-        * There is still some debate to be had on exactly how we do this. (For
-        * one, we probably want to have some sort of test runner level
-        * timeout.)
-        *
-        * For more background on this topic, see:
-        * https://mike-bland.com/2011/11/01/small-medium-large.html
-        *
-        * If tests timeout due to exceeding sysctl_hung_task_timeout_secs,
-        * the task will be killed and an oops generated.
-        */
-       // FIXME times ten for KUNIT_SPEED_VERY_SLOW?
-       return 10 * 300 * msecs_to_jiffies(MSEC_PER_SEC); /* 5 min */
+       struct kunit_try_catch_context *ctx = (struct
kunit_try_catch_context *)try_catch->context;
+       struct kunit_case *test_case = ctx->test_case;
+       unsigned long base_timeout = 300 *
msecs_to_jiffies(MSEC_PER_SEC); /* 5 min */
+       /* VERY_SLOW tests get 10 times the time to execute (50 minutes). */
+       unsigned long multiplier = (test_case->attr.speed ==
KUNIT_SPEED_VERY_SLOW) ? 10 : 1;
+
+       return multiplier * base_timeout;
}

void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context)
@@ -87,7 +72,7 @@ void kunit_try_catch_run(struct kunit_try_catch
*try_catch, void *context)
       wake_up_process(task_struct);

       time_remaining = wait_for_completion_timeout(task_done,
-                                                    kunit_test_timeout());
+
kunit_test_timeout(try_catch));
       if (time_remaining == 0) {
               try_catch->try_result = -ETIMEDOUT;
               kthread_stop(task_struct);

---

I'll get around to extending this to allow the "base timeout" to be
configurable as a command-line option, too, if this seems like a good
way to go.

>   3. Mark rtc_time64_to_tm_test_date_range_1000 slow,
>   4. Mark rtc_time64_to_tm_test_date_range_160000 very slow.

Hmm... these are definitely fast enough on my "modern" machine that
they probably only warrant "slow", not "very slow". But given they're
definitely causing problems on older machines, I'm happy to go with
marking the large ones very slow. (I've been waiting for them for
about 45 minutes so far on my 486.)

Do the time tests in kernel/time/time_test.c also need to be marked
very slow, or does that run much faster on your setup?


>
> Afterwards:
>
>     KTAP version 1
>     1..1
>         KTAP version 1
>         # Subtest: rtc_lib_test_cases
>         # module: rtc_lib_test
>         1..2
>         # rtc_time64_to_tm_test_date_range_1000.speed: slow
>         ok 1 rtc_time64_to_tm_test_date_range_1000
>         # rtc_time64_to_tm_test_date_range_160000.speed: very_slow
>         ok 2 rtc_time64_to_tm_test_date_range_160000
>     # rtc_lib_test_cases: pass:2 fail:0 skip:0 total:2
>     # Totals: pass:2 fail:0 skip:0 total:2
>     ok 1 rtc_lib_test_cases
>
> Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
> ---

Is this causing you enough strife that you want it in as-is, straight
away, or would you be happy with it being split up and polished a bit
first -- particularly around supporting the more configurable timeout,
and shifting the test changes into separate patches? (I'm happy to do
that for you if you don't want to dig around in the somewhat messy
KUnit try-catch stuff any further.)

Does anyone else (particularly anyone involved with the rtc tests)
have thoughts?

Cheers,
-- David

>  drivers/rtc/rtc_lib_test.c |  4 ++--
>  include/kunit/test.h       | 11 +++++++++++
>  lib/kunit/try-catch.c      |  3 ++-
>  3 files changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/rtc/rtc_lib_test.c b/drivers/rtc/rtc_lib_test.c
> index c30c759662e39b48..fd3210e39d37dbc6 100644
> --- a/drivers/rtc/rtc_lib_test.c
> +++ b/drivers/rtc/rtc_lib_test.c
> @@ -85,8 +85,8 @@ static void rtc_time64_to_tm_test_date_range_1000(struct kunit *test)
>  }
>
>  static struct kunit_case rtc_lib_test_cases[] = {
> -       KUNIT_CASE(rtc_time64_to_tm_test_date_range_1000),
> -       KUNIT_CASE_SLOW(rtc_time64_to_tm_test_date_range_160000),
> +       KUNIT_CASE_SLOW(rtc_time64_to_tm_test_date_range_1000),
> +       KUNIT_CASE_VERY_SLOW(rtc_time64_to_tm_test_date_range_160000),
>         {}
>  };
>
> diff --git a/include/kunit/test.h b/include/kunit/test.h
> index 9b773406e01f3c43..4e3c1cae5b41466e 100644
> --- a/include/kunit/test.h
> +++ b/include/kunit/test.h
> @@ -183,6 +183,17 @@ static inline char *kunit_status_to_ok_not_ok(enum kunit_status status)
>                 { .run_case = test_name, .name = #test_name,    \
>                   .attr.speed = KUNIT_SPEED_SLOW, .module_name = KBUILD_MODNAME}
>
> +/**
> + * KUNIT_CASE_VERY_SLOW - A helper for creating a &struct kunit_case
> + * with the very slow attribute
> + *
> + * @test_name: a reference to a test case function.
> + */
> +
> +#define KUNIT_CASE_VERY_SLOW(test_name)                        \
> +               { .run_case = test_name, .name = #test_name,    \
> +                 .attr.speed = KUNIT_SPEED_VERY_SLOW, .module_name = KBUILD_MODNAME}
> +
>  /**
>   * KUNIT_CASE_PARAM - A helper for creation a parameterized &struct kunit_case
>   *
> diff --git a/lib/kunit/try-catch.c b/lib/kunit/try-catch.c
> index 6bbe0025b0790bd2..92099c67bb21d0a4 100644
> --- a/lib/kunit/try-catch.c
> +++ b/lib/kunit/try-catch.c
> @@ -56,7 +56,8 @@ static unsigned long kunit_test_timeout(void)
>          * If tests timeout due to exceeding sysctl_hung_task_timeout_secs,
>          * the task will be killed and an oops generated.
>          */
> -       return 300 * msecs_to_jiffies(MSEC_PER_SEC); /* 5 min */
> +       // FIXME times ten for KUNIT_SPEED_VERY_SLOW?
> +       return 10 * 300 * msecs_to_jiffies(MSEC_PER_SEC); /* 5 min */
>  }
>
>  void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context)
> --
> 2.43.0
>
Geert Uytterhoeven March 28, 2025, 3:37 p.m. UTC | #2
Hi David,

On Fri, 28 Mar 2025 at 09:07, David Gow <davidgow@google.com> wrote:
> Thanks for sending this out: I think this raises some good questions
> about exactly how to handle long running tests (particularly on
> older/slower hardware).
>
> I've put a few notes below, but, tl;dr: I think these are all good
> changes, even if there's more we can do to better scale to slower
> hardware.
>
> On Fri, 28 Mar 2025 at 00:07, Geert Uytterhoeven <geert@linux-m68k.org> wrote:
> >   2. Increase timeout by ten; ideally this should only be done for very
> >      slow tests, but I couldn't find how to access kunit_case.attr.case
> >      from kunit_try_catch_run(),
>
>
> My feeling for tests generally is:
> - Normal: effectively instant on modern hardware, O(seconds) on
> ancient hardware.
> - Slow: takes O(seconds) to run on modern hardware, O(minutes)..O(10s
> of minutes) on ancient hardware.
> - Very slow: O(minutes) or higher on modern hardware, infeasible on
> ancient hardware.
>
> Obviously the definition of "modern" and "ancient" hardware here is
> pretty arbitrary: I'm using "modern, high-end x86" ~4GHz as my
> "modern" example, and "66MHz 486" as my "ancient" one, but things like
> emulation or embedded systems fit in-between.
>
> Ultimately, I think the timeout probably needs to be configurable on a
> per-machine basis more than a per-test one, but having a 10x
> multiplier (or even a 100x multiplier) for very slow tests would also
> work for me.

Yes, adapting automatically to the speed of the target maachine
would be nice, but non-trivial.

> I quickly tried hacking together something to pass through the
> attribute and implement this. Diff (probably mangled by gmail) below:

[...]

Thanks!

> I'll get around to extending this to allow the "base timeout" to be
> configurable as a command-line option, too, if this seems like a good
> way to go.
>
> >   3. Mark rtc_time64_to_tm_test_date_range_1000 slow,
> >   4. Mark rtc_time64_to_tm_test_date_range_160000 very slow.
>
> Hmm... these are definitely fast enough on my "modern" machine that
> they probably only warrant "slow", not "very slow". But given they're
> definitely causing problems on older machines, I'm happy to go with
> marking the large ones very slow. (I've been waiting for them for
> about 45 minutes so far on my 486.)
>
> Do the time tests in kernel/time/time_test.c also need to be marked
> very slow, or does that run much faster on your setup?

Hmm, I did run time_test (insmod took (+7 minutes), but I don't
seem to have pass/fail output. Will rerun...

Indeed:

    # time64_to_tm_test_date_range.speed: slow

Another test that wanted to be marked as slow was:

    # kunit_platform_device_add_twice_fails_test: Test should be
marked slow (runtime: 30.788248702s)

I will rerun all, as it seems I have lost some logs...

> Is this causing you enough strife that you want it in as-is, straight
> away, or would you be happy with it being split up and polished a bit
> first -- particularly around supporting the more configurable timeout,
> and shifting the test changes into separate patches? (I'm happy to do
> that for you if you don't want to dig around in the somewhat messy
> KUnit try-catch stuff any further.)

This is definitely not something urgent for me.

Thanks!

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
diff mbox series

Patch

diff --git a/drivers/rtc/rtc_lib_test.c b/drivers/rtc/rtc_lib_test.c
index c30c759662e39b48..fd3210e39d37dbc6 100644
--- a/drivers/rtc/rtc_lib_test.c
+++ b/drivers/rtc/rtc_lib_test.c
@@ -85,8 +85,8 @@  static void rtc_time64_to_tm_test_date_range_1000(struct kunit *test)
 }
 
 static struct kunit_case rtc_lib_test_cases[] = {
-	KUNIT_CASE(rtc_time64_to_tm_test_date_range_1000),
-	KUNIT_CASE_SLOW(rtc_time64_to_tm_test_date_range_160000),
+	KUNIT_CASE_SLOW(rtc_time64_to_tm_test_date_range_1000),
+	KUNIT_CASE_VERY_SLOW(rtc_time64_to_tm_test_date_range_160000),
 	{}
 };
 
diff --git a/include/kunit/test.h b/include/kunit/test.h
index 9b773406e01f3c43..4e3c1cae5b41466e 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -183,6 +183,17 @@  static inline char *kunit_status_to_ok_not_ok(enum kunit_status status)
 		{ .run_case = test_name, .name = #test_name,	\
 		  .attr.speed = KUNIT_SPEED_SLOW, .module_name = KBUILD_MODNAME}
 
+/**
+ * KUNIT_CASE_VERY_SLOW - A helper for creating a &struct kunit_case
+ * with the very slow attribute
+ *
+ * @test_name: a reference to a test case function.
+ */
+
+#define KUNIT_CASE_VERY_SLOW(test_name)			\
+		{ .run_case = test_name, .name = #test_name,	\
+		  .attr.speed = KUNIT_SPEED_VERY_SLOW, .module_name = KBUILD_MODNAME}
+
 /**
  * KUNIT_CASE_PARAM - A helper for creation a parameterized &struct kunit_case
  *
diff --git a/lib/kunit/try-catch.c b/lib/kunit/try-catch.c
index 6bbe0025b0790bd2..92099c67bb21d0a4 100644
--- a/lib/kunit/try-catch.c
+++ b/lib/kunit/try-catch.c
@@ -56,7 +56,8 @@  static unsigned long kunit_test_timeout(void)
 	 * If tests timeout due to exceeding sysctl_hung_task_timeout_secs,
 	 * the task will be killed and an oops generated.
 	 */
-	return 300 * msecs_to_jiffies(MSEC_PER_SEC); /* 5 min */
+	// FIXME times ten for KUNIT_SPEED_VERY_SLOW?
+	return 10 * 300 * msecs_to_jiffies(MSEC_PER_SEC); /* 5 min */
 }
 
 void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context)