diff mbox series

[linux-kselftest/test,v2] ext4: add kunit test for decoding extended timestamps

Message ID 20191010023931.230475-1-yzaikin@google.com (mailing list archive)
State New
Headers show
Series [linux-kselftest/test,v2] ext4: add kunit test for decoding extended timestamps | expand

Commit Message

Iurii Zaikin Oct. 10, 2019, 2:39 a.m. UTC
KUnit tests for decoding extended 64 bit timestamps.

Signed-off-by: Iurii Zaikin <yzaikin@google.com>
---
 fs/ext4/Kconfig      |  12 +++
 fs/ext4/Makefile     |   1 +
 fs/ext4/inode-test.c | 221 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 234 insertions(+)
 create mode 100644 fs/ext4/inode-test.c

--
2.23.0.700.g56cf767bdb-goog

Comments

Bird, Tim Oct. 10, 2019, 3:46 a.m. UTC | #1
> -----Original Message-----
> From Iurii Zaikin on Wednesday, October 09, 2019 4:40 PM
> 
> KUnit tests for decoding extended 64 bit timestamps.
> 
> Signed-off-by: Iurii Zaikin <yzaikin@google.com>
> ---
>  fs/ext4/Kconfig      |  12 +++
>  fs/ext4/Makefile     |   1 +
>  fs/ext4/inode-test.c | 221
> +++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 234 insertions(+)
>  create mode 100644 fs/ext4/inode-test.c
> 
> diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
> index cbb5ca830e57..cb0b52753674 100644
> --- a/fs/ext4/Kconfig
> +++ b/fs/ext4/Kconfig
> @@ -106,3 +106,15 @@ config EXT4_DEBUG
>  	  If you select Y here, then you will be able to turn on debugging
>  	  with a command such as:
>  		echo 1 > /sys/module/ext4/parameters/mballoc_debug
> +
> +config EXT4_KUNIT_TESTS
> +	bool "KUnit test for ext4 inode"
> +	depends on EXT4_FS
> +	depends on KUNIT
> +	help
> +	  This builds the ext4 inode sysctl unit test, which runs on boot.
> +	  Tests the encoding correctness of ext4 inode.
> +	  For more information on KUnit and unit tests in general please refer
> +	  to the KUnit documentation in Documentation/dev-tools/kunit/.
> +
> +	  If unsure, say N.
> diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
> index b17ddc229ac5..a0588fd2eea6 100644
> --- a/fs/ext4/Makefile
> +++ b/fs/ext4/Makefile
> @@ -13,4 +13,5 @@ ext4-y	:= balloc.o bitmap.o block_validity.o dir.o
> ext4_jbd2.o extents.o \
> 
>  ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
>  ext4-$(CONFIG_EXT4_FS_SECURITY)		+= xattr_security.o
> +ext4-$(CONFIG_EXT4_KUNIT_TESTS)	+= inode-test.o
>  ext4-$(CONFIG_FS_VERITY)		+= verity.o
> diff --git a/fs/ext4/inode-test.c b/fs/ext4/inode-test.c
> new file mode 100644
> index 000000000000..43bc6cb547cd
> --- /dev/null
> +++ b/fs/ext4/inode-test.c
> @@ -0,0 +1,221 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * KUnit test of ext4 inode that verify the seconds part of [a/c/m]
> + * timestamps in ext4 inode structs are decoded correctly.
> + * These tests are derived from the table under
> + * Documentation/filesystems/ext4/inodes.rst Inode Timestamps
> + */
> +
> +#include <kunit/test.h>
> +#include <linux/kernel.h>
> +#include <linux/time64.h>
> +
> +#include "ext4.h"
> +
> +/* binary: 00000000 00000000 00000000 00000000 */
> +#define LOWER_MSB_0 0L
> +/* binary: 01111111 11111111 11111111 11111111 */
> +#define UPPER_MSB_0 0x7fffffffL
> +/* binary: 10000000 00000000 00000000 00000000 */
> +#define LOWER_MSB_1 (-0x80000000L)
> +/* binary: 11111111 11111111 11111111 11111111 */
> +#define UPPER_MSB_1 (-1L)
> +/* binary: 00111111   11111111 11111111 11111111 */
> +#define MAX_NANOSECONDS ((1L << 30) - 1)
> +
> +#define CASE_NAME_FORMAT "%s: msb:%x lower_bound:%x extra_bits:
> %x"
> +
> +struct timestamp_expectation {
> +	const char *test_case_name;
> +	struct timespec64 expected;
> +	u32 extra_bits;
> +	bool msb_set;
> +	bool lower_bound;
> +};
> +
> +static time64_t get_32bit_time(const struct timestamp_expectation * const
> test)
> +{
> +	if (test->msb_set) {
> +		if (test->lower_bound)
> +			return LOWER_MSB_1;
> +
> +		return UPPER_MSB_1;
> +	}
> +
> +	if (test->lower_bound)
> +		return LOWER_MSB_0;
> +	return UPPER_MSB_0;
> +}
> +
> +
> +static void inode_test_xtimestamp_decoding(struct kunit *test)
> +{
> +	const struct timestamp_expectation test_data[] = {
> +		{
> +			.test_case_name = "1901-12-13",
> +			.msb_set = true,
> +			.lower_bound = true,
> +			.extra_bits = 0,
> +			.expected = {.tv_sec = -0x80000000LL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "1969-12-31",
> +			.msb_set = true,
> +			.lower_bound = false,
> +			.extra_bits = 0,
> +			.expected = {.tv_sec = -1LL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "1970-01-01",
> +			.msb_set = false,
> +			.lower_bound = true,
> +			.extra_bits = 0,
> +			.expected = {0LL, 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2038-01-19",
> +			.msb_set = false,
> +			.lower_bound = false,
> +			.extra_bits = 0,
> +			.expected = {.tv_sec = 0x7fffffffLL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2038-01-19",
It's quite handy if testcase names can be unique, and describe what it is they are testing.

If someone unfamiliar with this test looks at the results, it's nice if they can
intuit what it was that went wrong, based on the test case name.

IMHO these names are too short and not descriptive enough.
 -- Tim


> +			.msb_set = true,
> +			.lower_bound = true,
> +			.extra_bits = 1,
> +			.expected = {.tv_sec = 0x80000000LL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2106-02-07",
> +			.msb_set = true,
> +			.lower_bound = false,
> +			.extra_bits = 1,
> +			.expected = {.tv_sec = 0xffffffffLL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2106-02-07",
> +			.msb_set = false,
> +			.lower_bound = true,
> +			.extra_bits = 1,
> +			.expected = {.tv_sec = 0x100000000LL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2174-02-25",
> +			.msb_set = false,
> +			.lower_bound = false,
> +			.extra_bits = 1,
> +			.expected = {.tv_sec = 0x17fffffffLL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2174-02-25",
> +			.msb_set = true,
> +			.lower_bound = true,
> +			.extra_bits =  2,
> +			.expected = {.tv_sec = 0x180000000LL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2242-03-16",
> +			.msb_set = true,
> +			.lower_bound = false,
> +			.extra_bits = 2,
> +			.expected = {.tv_sec = 0x1ffffffffLL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2242-03-16",
> +			.msb_set = false,
> +			.lower_bound = true,
> +			.extra_bits = 2,
> +			.expected = {.tv_sec = 0x200000000LL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = " 2310-04-04",
> +			.msb_set = false,
> +			.lower_bound = false,
> +			.extra_bits = 2,
> +			.expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = " 2310-04-04 00:00:00.1",
> +			.msb_set = false,
> +			.lower_bound = false,
> +			.extra_bits = 6,
> +			.expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 1L},
> +		},
> +
> +		{
> +			.test_case_name = "2378-04-22
> 00:00:00.MAX_NSEC",
> +			.msb_set = false,
> +			.lower_bound = true,
> +			.extra_bits = 0xFFFFFFFF,
> +			.expected = {.tv_sec = 0x300000000LL,
> +				     .tv_nsec = MAX_NANOSECONDS},
> +		},
> +
> +		{
> +			.test_case_name = "2378-04-22",
> +			.msb_set = false,
> +			.lower_bound = true,
> +			.extra_bits = 3,
> +			.expected = {.tv_sec = 0x300000000LL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2446-05-10",
> +			.msb_set = false,
> +			.lower_bound = false,
> +			.extra_bits = 3,
> +			.expected = {.tv_sec = 0x37fffffffLL, .tv_nsec = 0L},
> +		}
> +	};
> +
> +	struct timespec64 timestamp;
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(test_data); ++i) {
> +		timestamp.tv_sec = get_32bit_time(&test_data[i]);
> +		ext4_decode_extra_time(&timestamp,
> +				       cpu_to_le32(test_data[i].extra_bits));
> +
> +		KUNIT_EXPECT_EQ_MSG(test,
> +				    test_data[i].expected.tv_sec,
> +				    timestamp.tv_sec,
> +				    CASE_NAME_FORMAT,
> +				    test_data[i].test_case_name,
> +				    test_data[i].msb_set,
> +				    test_data[i].lower_bound,
> +				    test_data[i].extra_bits);
> +		KUNIT_EXPECT_EQ_MSG(test,
> +				    test_data[i].expected.tv_nsec,
> +				    timestamp.tv_nsec,
> +				    CASE_NAME_FORMAT,
> +				    test_data[i].test_case_name,
> +				    test_data[i].msb_set,
> +				    test_data[i].lower_bound,
> +				    test_data[i].extra_bits);
> +	}
> +}
> +
> +static struct kunit_case ext4_inode_test_cases[] = {
> +	KUNIT_CASE(inode_test_xtimestamp_decoding),
> +	{}
> +};
> +
> +static struct kunit_suite ext4_inode_test_suite = {
> +	.name = "ext4_inode_test",
> +	.test_cases = ext4_inode_test_cases,
> +};
> +
> +kunit_test_suite(ext4_inode_test_suite);
> --
> 2.23.0.700.g56cf767bdb-goog
Iurii Zaikin Oct. 10, 2019, 4:45 p.m. UTC | #2
On Wed, Oct 9, 2019 at 8:47 PM <Tim.Bird@sony.com> wrote:
>
>
> > -----Original Message-----
> > From Iurii Zaikin on Wednesday, October 09, 2019 4:40 PM
> >
> > KUnit tests for decoding extended 64 bit timestamps.
> >
> > Signed-off-by: Iurii Zaikin <yzaikin@google.com>
> > ---
> >  fs/ext4/Kconfig      |  12 +++
> >  fs/ext4/Makefile     |   1 +
> >  fs/ext4/inode-test.c | 221
> > +++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 234 insertions(+)
> >  create mode 100644 fs/ext4/inode-test.c
> >
> > diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
> > index cbb5ca830e57..cb0b52753674 100644
> > --- a/fs/ext4/Kconfig
> > +++ b/fs/ext4/Kconfig
> > @@ -106,3 +106,15 @@ config EXT4_DEBUG
> >         If you select Y here, then you will be able to turn on debugging
> >         with a command such as:
> >               echo 1 > /sys/module/ext4/parameters/mballoc_debug
> > +
> > +config EXT4_KUNIT_TESTS
> > +     bool "KUnit test for ext4 inode"
> > +     depends on EXT4_FS
> > +     depends on KUNIT
> > +     help
> > +       This builds the ext4 inode sysctl unit test, which runs on boot.
> > +       Tests the encoding correctness of ext4 inode.
> > +       For more information on KUnit and unit tests in general please refer
> > +       to the KUnit documentation in Documentation/dev-tools/kunit/.
> > +
> > +       If unsure, say N.
> > diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
> > index b17ddc229ac5..a0588fd2eea6 100644
> > --- a/fs/ext4/Makefile
> > +++ b/fs/ext4/Makefile
> > @@ -13,4 +13,5 @@ ext4-y      := balloc.o bitmap.o block_validity.o dir.o
> > ext4_jbd2.o extents.o \
> >
> >  ext4-$(CONFIG_EXT4_FS_POSIX_ACL)     += acl.o
> >  ext4-$(CONFIG_EXT4_FS_SECURITY)              += xattr_security.o
> > +ext4-$(CONFIG_EXT4_KUNIT_TESTS)      += inode-test.o
> >  ext4-$(CONFIG_FS_VERITY)             += verity.o
> > diff --git a/fs/ext4/inode-test.c b/fs/ext4/inode-test.c
> > new file mode 100644
> > index 000000000000..43bc6cb547cd
> > --- /dev/null
> > +++ b/fs/ext4/inode-test.c
> > @@ -0,0 +1,221 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * KUnit test of ext4 inode that verify the seconds part of [a/c/m]
> > + * timestamps in ext4 inode structs are decoded correctly.
> > + * These tests are derived from the table under
> > + * Documentation/filesystems/ext4/inodes.rst Inode Timestamps
> > + */
> > +
> > +#include <kunit/test.h>
> > +#include <linux/kernel.h>
> > +#include <linux/time64.h>
> > +
> > +#include "ext4.h"
> > +
> > +/* binary: 00000000 00000000 00000000 00000000 */
> > +#define LOWER_MSB_0 0L
> > +/* binary: 01111111 11111111 11111111 11111111 */
> > +#define UPPER_MSB_0 0x7fffffffL
> > +/* binary: 10000000 00000000 00000000 00000000 */
> > +#define LOWER_MSB_1 (-0x80000000L)
> > +/* binary: 11111111 11111111 11111111 11111111 */
> > +#define UPPER_MSB_1 (-1L)
> > +/* binary: 00111111   11111111 11111111 11111111 */
> > +#define MAX_NANOSECONDS ((1L << 30) - 1)
> > +
> > +#define CASE_NAME_FORMAT "%s: msb:%x lower_bound:%x extra_bits:
> > %x"
> > +
> > +struct timestamp_expectation {
> > +     const char *test_case_name;
> > +     struct timespec64 expected;
> > +     u32 extra_bits;
> > +     bool msb_set;
> > +     bool lower_bound;
> > +};
> > +
> > +static time64_t get_32bit_time(const struct timestamp_expectation * const
> > test)
> > +{
> > +     if (test->msb_set) {
> > +             if (test->lower_bound)
> > +                     return LOWER_MSB_1;
> > +
> > +             return UPPER_MSB_1;
> > +     }
> > +
> > +     if (test->lower_bound)
> > +             return LOWER_MSB_0;
> > +     return UPPER_MSB_0;
> > +}
> > +
> > +
> > +static void inode_test_xtimestamp_decoding(struct kunit *test)
> > +{
> > +     const struct timestamp_expectation test_data[] = {
> > +             {
> > +                     .test_case_name = "1901-12-13",
> > +                     .msb_set = true,
> > +                     .lower_bound = true,
> > +                     .extra_bits = 0,
> > +                     .expected = {.tv_sec = -0x80000000LL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "1969-12-31",
> > +                     .msb_set = true,
> > +                     .lower_bound = false,
> > +                     .extra_bits = 0,
> > +                     .expected = {.tv_sec = -1LL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "1970-01-01",
> > +                     .msb_set = false,
> > +                     .lower_bound = true,
> > +                     .extra_bits = 0,
> > +                     .expected = {0LL, 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2038-01-19",
> > +                     .msb_set = false,
> > +                     .lower_bound = false,
> > +                     .extra_bits = 0,
> > +                     .expected = {.tv_sec = 0x7fffffffLL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2038-01-19",
> It's quite handy if testcase names can be unique, and describe what it is they are testing.
>
> If someone unfamiliar with this test looks at the results, it's nice if they can
> intuit what it was that went wrong, based on the test case name.
>
> IMHO these names are too short and not descriptive enough.
The test cases are pretty much 1:1 to the examples table referenced at
the top comment of the file. Would it help if I move the reference
comment closer to the test case definition or would you like the test
name to have a reference to a table entry encoded into it?
>  -- Tim
>
>
> > +                     .msb_set = true,
> > +                     .lower_bound = true,
> > +                     .extra_bits = 1,
> > +                     .expected = {.tv_sec = 0x80000000LL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2106-02-07",
> > +                     .msb_set = true,
> > +                     .lower_bound = false,
> > +                     .extra_bits = 1,
> > +                     .expected = {.tv_sec = 0xffffffffLL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2106-02-07",
> > +                     .msb_set = false,
> > +                     .lower_bound = true,
> > +                     .extra_bits = 1,
> > +                     .expected = {.tv_sec = 0x100000000LL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2174-02-25",
> > +                     .msb_set = false,
> > +                     .lower_bound = false,
> > +                     .extra_bits = 1,
> > +                     .expected = {.tv_sec = 0x17fffffffLL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2174-02-25",
> > +                     .msb_set = true,
> > +                     .lower_bound = true,
> > +                     .extra_bits =  2,
> > +                     .expected = {.tv_sec = 0x180000000LL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2242-03-16",
> > +                     .msb_set = true,
> > +                     .lower_bound = false,
> > +                     .extra_bits = 2,
> > +                     .expected = {.tv_sec = 0x1ffffffffLL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2242-03-16",
> > +                     .msb_set = false,
> > +                     .lower_bound = true,
> > +                     .extra_bits = 2,
> > +                     .expected = {.tv_sec = 0x200000000LL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = " 2310-04-04",
> > +                     .msb_set = false,
> > +                     .lower_bound = false,
> > +                     .extra_bits = 2,
> > +                     .expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = " 2310-04-04 00:00:00.1",
> > +                     .msb_set = false,
> > +                     .lower_bound = false,
> > +                     .extra_bits = 6,
> > +                     .expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 1L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2378-04-22
> > 00:00:00.MAX_NSEC",
> > +                     .msb_set = false,
> > +                     .lower_bound = true,
> > +                     .extra_bits = 0xFFFFFFFF,
> > +                     .expected = {.tv_sec = 0x300000000LL,
> > +                                  .tv_nsec = MAX_NANOSECONDS},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2378-04-22",
> > +                     .msb_set = false,
> > +                     .lower_bound = true,
> > +                     .extra_bits = 3,
> > +                     .expected = {.tv_sec = 0x300000000LL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2446-05-10",
> > +                     .msb_set = false,
> > +                     .lower_bound = false,
> > +                     .extra_bits = 3,
> > +                     .expected = {.tv_sec = 0x37fffffffLL, .tv_nsec = 0L},
> > +             }
> > +     };
> > +
> > +     struct timespec64 timestamp;
> > +     int i;
> > +
> > +     for (i = 0; i < ARRAY_SIZE(test_data); ++i) {
> > +             timestamp.tv_sec = get_32bit_time(&test_data[i]);
> > +             ext4_decode_extra_time(&timestamp,
> > +                                    cpu_to_le32(test_data[i].extra_bits));
> > +
> > +             KUNIT_EXPECT_EQ_MSG(test,
> > +                                 test_data[i].expected.tv_sec,
> > +                                 timestamp.tv_sec,
> > +                                 CASE_NAME_FORMAT,
> > +                                 test_data[i].test_case_name,
> > +                                 test_data[i].msb_set,
> > +                                 test_data[i].lower_bound,
> > +                                 test_data[i].extra_bits);
> > +             KUNIT_EXPECT_EQ_MSG(test,
> > +                                 test_data[i].expected.tv_nsec,
> > +                                 timestamp.tv_nsec,
> > +                                 CASE_NAME_FORMAT,
> > +                                 test_data[i].test_case_name,
> > +                                 test_data[i].msb_set,
> > +                                 test_data[i].lower_bound,
> > +                                 test_data[i].extra_bits);
> > +     }
> > +}
> > +
> > +static struct kunit_case ext4_inode_test_cases[] = {
> > +     KUNIT_CASE(inode_test_xtimestamp_decoding),
> > +     {}
> > +};
> > +
> > +static struct kunit_suite ext4_inode_test_suite = {
> > +     .name = "ext4_inode_test",
> > +     .test_cases = ext4_inode_test_cases,
> > +};
> > +
> > +kunit_test_suite(ext4_inode_test_suite);
> > --
> > 2.23.0.700.g56cf767bdb-goog
Shuah Khan Oct. 10, 2019, 5:11 p.m. UTC | #3
On 10/9/19 8:39 PM, Iurii Zaikin wrote:
> KUnit tests for decoding extended 64 bit timestamps.
> 
"Added the link to the ext4 docs from which the tests were derived."

Document reference is great. I would still like to see summary
in the commit log.

As you said below:

"This builds the ext4 inode sysctl unit test, which runs on boot."

Also include what should user expect to see when one of these fails.


> Signed-off-by: Iurii Zaikin <yzaikin@google.com>
> ---
>   fs/ext4/Kconfig      |  12 +++
>   fs/ext4/Makefile     |   1 +
>   fs/ext4/inode-test.c | 221 +++++++++++++++++++++++++++++++++++++++++++
>   3 files changed, 234 insertions(+)
>   create mode 100644 fs/ext4/inode-test.c
> 
> diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
> index cbb5ca830e57..cb0b52753674 100644
> --- a/fs/ext4/Kconfig
> +++ b/fs/ext4/Kconfig
> @@ -106,3 +106,15 @@ config EXT4_DEBUG
>   	  If you select Y here, then you will be able to turn on debugging
>   	  with a command such as:
>   		echo 1 > /sys/module/ext4/parameters/mballoc_debug
> +
> +config EXT4_KUNIT_TESTS
> +	bool "KUnit test for ext4 inode"
> +	depends on EXT4_FS
> +	depends on KUNIT
> +	help
> +	  This builds the ext4 inode sysctl unit test, which runs on boot.
> +	  Tests the encoding correctness of ext4 inode.
> +	  For more information on KUnit and unit tests in general please refer
> +	  to the KUnit documentation in Documentation/dev-tools/kunit/.

Please add Documentation/filesystems/ext4/inodes.rst Inode Timestamps
here as well.

Yeah. Especially after looking at the document, summary of what these
test(s) is definitely helpful. You can't expect users to read the
document before enabling it. Please write a summary of tests and what
they do and add it here and then in the commit log. Also include what
user should expect when they pass and when one of them fails.

> +
> +	  If unsure, say N.
> diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
> index b17ddc229ac5..a0588fd2eea6 100644
> --- a/fs/ext4/Makefile
> +++ b/fs/ext4/Makefile
> @@ -13,4 +13,5 @@ ext4-y	:= balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
> 
>   ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
>   ext4-$(CONFIG_EXT4_FS_SECURITY)		+= xattr_security.o
> +ext4-$(CONFIG_EXT4_KUNIT_TESTS)	+= inode-test.o
>   ext4-$(CONFIG_FS_VERITY)		+= verity.o
> diff --git a/fs/ext4/inode-test.c b/fs/ext4/inode-test.c
> new file mode 100644
> index 000000000000..43bc6cb547cd
> --- /dev/null
> +++ b/fs/ext4/inode-test.c
> @@ -0,0 +1,221 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * KUnit test of ext4 inode that verify the seconds part of [a/c/m]
> + * timestamps in ext4 inode structs are decoded correctly.
> + * These tests are derived from the table under
> + * Documentation/filesystems/ext4/inodes.rst Inode Timestamps

Yeah. Especially after looking at the document, summary of what these
test(s) is definitely helpful. You can't expect users to read the
document before enabling the tests.

> + */
> +
> +#include <kunit/test.h>
> +#include <linux/kernel.h>
> +#include <linux/time64.h>
> +
> +#include "ext4.h"
> +
> +/* binary: 00000000 00000000 00000000 00000000 */
> +#define LOWER_MSB_0 0L
> +/* binary: 01111111 11111111 11111111 11111111 */
> +#define UPPER_MSB_0 0x7fffffffL
> +/* binary: 10000000 00000000 00000000 00000000 */
> +#define LOWER_MSB_1 (-0x80000000L)
> +/* binary: 11111111 11111111 11111111 11111111 */
> +#define UPPER_MSB_1 (-1L)
> +/* binary: 00111111   11111111 11111111 11111111 */
> +#define MAX_NANOSECONDS ((1L << 30) - 1)
> +
> +#define CASE_NAME_FORMAT "%s: msb:%x lower_bound:%x extra_bits: %x"
> +
> +struct timestamp_expectation {
> +	const char *test_case_name;
> +	struct timespec64 expected;
> +	u32 extra_bits;
> +	bool msb_set;
> +	bool lower_bound;
> +};
> +
> +static time64_t get_32bit_time(const struct timestamp_expectation * const test)
> +{
> +	if (test->msb_set) {
> +		if (test->lower_bound)
> +			return LOWER_MSB_1;
> +
> +		return UPPER_MSB_1;
> +	}
> +
> +	if (test->lower_bound)
> +		return LOWER_MSB_0;
> +	return UPPER_MSB_0;
> +}
> +
> +
> +static void inode_test_xtimestamp_decoding(struct kunit *test)
> +{
> +	const struct timestamp_expectation test_data[] = {
> +		{
> +			.test_case_name = "1901-12-13",
> +			.msb_set = true,
> +			.lower_bound = true,
> +			.extra_bits = 0,
> +			.expected = {.tv_sec = -0x80000000LL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "1969-12-31",
> +			.msb_set = true,
> +			.lower_bound = false,
> +			.extra_bits = 0,
> +			.expected = {.tv_sec = -1LL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "1970-01-01",
> +			.msb_set = false,
> +			.lower_bound = true,
> +			.extra_bits = 0,
> +			.expected = {0LL, 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2038-01-19",
> +			.msb_set = false,
> +			.lower_bound = false,
> +			.extra_bits = 0,
> +			.expected = {.tv_sec = 0x7fffffffLL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2038-01-19",
> +			.msb_set = true,
> +			.lower_bound = true,
> +			.extra_bits = 1,
> +			.expected = {.tv_sec = 0x80000000LL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2106-02-07",
> +			.msb_set = true,
> +			.lower_bound = false,
> +			.extra_bits = 1,
> +			.expected = {.tv_sec = 0xffffffffLL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2106-02-07",
> +			.msb_set = false,
> +			.lower_bound = true,
> +			.extra_bits = 1,
> +			.expected = {.tv_sec = 0x100000000LL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2174-02-25",
> +			.msb_set = false,
> +			.lower_bound = false,
> +			.extra_bits = 1,
> +			.expected = {.tv_sec = 0x17fffffffLL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2174-02-25",
> +			.msb_set = true,
> +			.lower_bound = true,
> +			.extra_bits =  2,
> +			.expected = {.tv_sec = 0x180000000LL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2242-03-16",
> +			.msb_set = true,
> +			.lower_bound = false,
> +			.extra_bits = 2,
> +			.expected = {.tv_sec = 0x1ffffffffLL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2242-03-16",
> +			.msb_set = false,
> +			.lower_bound = true,
> +			.extra_bits = 2,
> +			.expected = {.tv_sec = 0x200000000LL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = " 2310-04-04",
> +			.msb_set = false,
> +			.lower_bound = false,
> +			.extra_bits = 2,
> +			.expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = " 2310-04-04 00:00:00.1",
> +			.msb_set = false,
> +			.lower_bound = false,
> +			.extra_bits = 6,
> +			.expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 1L},
> +		},
> +
> +		{
> +			.test_case_name = "2378-04-22 00:00:00.MAX_NSEC",
> +			.msb_set = false,
> +			.lower_bound = true,
> +			.extra_bits = 0xFFFFFFFF,
> +			.expected = {.tv_sec = 0x300000000LL,
> +				     .tv_nsec = MAX_NANOSECONDS},
> +		},
> +
> +		{
> +			.test_case_name = "2378-04-22",
> +			.msb_set = false,
> +			.lower_bound = true,
> +			.extra_bits = 3,
> +			.expected = {.tv_sec = 0x300000000LL, .tv_nsec = 0L},
> +		},
> +
> +		{
> +			.test_case_name = "2446-05-10",
> +			.msb_set = false,
> +			.lower_bound = false,
> +			.extra_bits = 3,
> +			.expected = {.tv_sec = 0x37fffffffLL, .tv_nsec = 0L},
> +		}
> +	};
> +

Is there a way to make the test data dynamic. Can you read from a data
file? It will be easier to if the data

Maybe this is question to Brendan?

> +	struct timespec64 timestamp;
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(test_data); ++i) {
> +		timestamp.tv_sec = get_32bit_time(&test_data[i]);
> +		ext4_decode_extra_time(&timestamp,
> +				       cpu_to_le32(test_data[i].extra_bits));
> +
> +		KUNIT_EXPECT_EQ_MSG(test,
> +				    test_data[i].expected.tv_sec,
> +				    timestamp.tv_sec,
> +				    CASE_NAME_FORMAT,
> +				    test_data[i].test_case_name,
> +				    test_data[i].msb_set,
> +				    test_data[i].lower_bound,
> +				    test_data[i].extra_bits);
> +		KUNIT_EXPECT_EQ_MSG(test,
> +				    test_data[i].expected.tv_nsec,
> +				    timestamp.tv_nsec,
> +				    CASE_NAME_FORMAT,
> +				    test_data[i].test_case_name,
> +				    test_data[i].msb_set,
> +				    test_data[i].lower_bound,
> +				    test_data[i].extra_bits);
> +	}
> +}
> +
> +static struct kunit_case ext4_inode_test_cases[] = {
> +	KUNIT_CASE(inode_test_xtimestamp_decoding),
> +	{}
> +};
> +
> +static struct kunit_suite ext4_inode_test_suite = {
> +	.name = "ext4_inode_test",
> +	.test_cases = ext4_inode_test_cases,
> +};
> +
> +kunit_test_suite(ext4_inode_test_suite);
> --
> 2.23.0.700.g56cf767bdb-goog
> 

thanks,
-- Shuah
Bird, Tim Oct. 10, 2019, 8:29 p.m. UTC | #4
> -----Original Message-----
> From: Iurii Zaikin on Thursday, October 10, 2019 6:45 AM
> 
> On Wed, Oct 9, 2019 at 8:47 PM <Tim.Bird@sony.com> wrote:
> >
> >
> > > -----Original Message-----
> > > From Iurii Zaikin on Wednesday, October 09, 2019 4:40 PM
> > >
> > > KUnit tests for decoding extended 64 bit timestamps.
> > >
> > > Signed-off-by: Iurii Zaikin <yzaikin@google.com>
> > > ---
> > >  fs/ext4/Kconfig      |  12 +++
> > >  fs/ext4/Makefile     |   1 +
> > >  fs/ext4/inode-test.c | 221
> > > +++++++++++++++++++++++++++++++++++++++++++
> > >  3 files changed, 234 insertions(+)
> > >  create mode 100644 fs/ext4/inode-test.c
> > >
> > > diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
> > > index cbb5ca830e57..cb0b52753674 100644
> > > --- a/fs/ext4/Kconfig
> > > +++ b/fs/ext4/Kconfig
> > > @@ -106,3 +106,15 @@ config EXT4_DEBUG
> > >         If you select Y here, then you will be able to turn on debugging
> > >         with a command such as:
> > >               echo 1 > /sys/module/ext4/parameters/mballoc_debug
> > > +
> > > +config EXT4_KUNIT_TESTS
> > > +     bool "KUnit test for ext4 inode"
> > > +     depends on EXT4_FS
> > > +     depends on KUNIT
> > > +     help
> > > +       This builds the ext4 inode sysctl unit test, which runs on boot.
> > > +       Tests the encoding correctness of ext4 inode.
> > > +       For more information on KUnit and unit tests in general please refer
> > > +       to the KUnit documentation in Documentation/dev-tools/kunit/.
> > > +
> > > +       If unsure, say N.
> > > diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
> > > index b17ddc229ac5..a0588fd2eea6 100644
> > > --- a/fs/ext4/Makefile
> > > +++ b/fs/ext4/Makefile
> > > @@ -13,4 +13,5 @@ ext4-y      := balloc.o bitmap.o block_validity.o dir.o
> > > ext4_jbd2.o extents.o \
> > >
> > >  ext4-$(CONFIG_EXT4_FS_POSIX_ACL)     += acl.o
> > >  ext4-$(CONFIG_EXT4_FS_SECURITY)              += xattr_security.o
> > > +ext4-$(CONFIG_EXT4_KUNIT_TESTS)      += inode-test.o
> > >  ext4-$(CONFIG_FS_VERITY)             += verity.o
> > > diff --git a/fs/ext4/inode-test.c b/fs/ext4/inode-test.c
> > > new file mode 100644
> > > index 000000000000..43bc6cb547cd
> > > --- /dev/null
> > > +++ b/fs/ext4/inode-test.c
> > > @@ -0,0 +1,221 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +/*
> > > + * KUnit test of ext4 inode that verify the seconds part of [a/c/m]
> > > + * timestamps in ext4 inode structs are decoded correctly.
> > > + * These tests are derived from the table under
> > > + * Documentation/filesystems/ext4/inodes.rst Inode Timestamps
> > > + */
> > > +
> > > +#include <kunit/test.h>
> > > +#include <linux/kernel.h>
> > > +#include <linux/time64.h>
> > > +
> > > +#include "ext4.h"
> > > +
> > > +/* binary: 00000000 00000000 00000000 00000000 */
> > > +#define LOWER_MSB_0 0L
> > > +/* binary: 01111111 11111111 11111111 11111111 */
> > > +#define UPPER_MSB_0 0x7fffffffL
> > > +/* binary: 10000000 00000000 00000000 00000000 */
> > > +#define LOWER_MSB_1 (-0x80000000L)
> > > +/* binary: 11111111 11111111 11111111 11111111 */
> > > +#define UPPER_MSB_1 (-1L)
> > > +/* binary: 00111111   11111111 11111111 11111111 */
> > > +#define MAX_NANOSECONDS ((1L << 30) - 1)
> > > +
> > > +#define CASE_NAME_FORMAT "%s: msb:%x lower_bound:%x
> extra_bits:
> > > %x"
> > > +
> > > +struct timestamp_expectation {
> > > +     const char *test_case_name;
> > > +     struct timespec64 expected;
> > > +     u32 extra_bits;
> > > +     bool msb_set;
> > > +     bool lower_bound;
> > > +};
> > > +
> > > +static time64_t get_32bit_time(const struct timestamp_expectation *
> const
> > > test)
> > > +{
> > > +     if (test->msb_set) {
> > > +             if (test->lower_bound)
> > > +                     return LOWER_MSB_1;
> > > +
> > > +             return UPPER_MSB_1;
> > > +     }
> > > +
> > > +     if (test->lower_bound)
> > > +             return LOWER_MSB_0;
> > > +     return UPPER_MSB_0;
> > > +}
> > > +
> > > +
> > > +static void inode_test_xtimestamp_decoding(struct kunit *test)
> > > +{
> > > +     const struct timestamp_expectation test_data[] = {
> > > +             {
> > > +                     .test_case_name = "1901-12-13",
> > > +                     .msb_set = true,
> > > +                     .lower_bound = true,
> > > +                     .extra_bits = 0,
> > > +                     .expected = {.tv_sec = -0x80000000LL, .tv_nsec = 0L},
> > > +             },
> > > +
> > > +             {
> > > +                     .test_case_name = "1969-12-31",
> > > +                     .msb_set = true,
> > > +                     .lower_bound = false,
> > > +                     .extra_bits = 0,
> > > +                     .expected = {.tv_sec = -1LL, .tv_nsec = 0L},
> > > +             },
> > > +
> > > +             {
> > > +                     .test_case_name = "1970-01-01",
> > > +                     .msb_set = false,
> > > +                     .lower_bound = true,
> > > +                     .extra_bits = 0,
> > > +                     .expected = {0LL, 0L},
> > > +             },
> > > +
> > > +             {
> > > +                     .test_case_name = "2038-01-19",
> > > +                     .msb_set = false,
> > > +                     .lower_bound = false,
> > > +                     .extra_bits = 0,
> > > +                     .expected = {.tv_sec = 0x7fffffffLL, .tv_nsec = 0L},
> > > +             },
> > > +
> > > +             {
> > > +                     .test_case_name = "2038-01-19",
> > It's quite handy if testcase names can be unique, and describe what it is
> they are testing.
> >
> > If someone unfamiliar with this test looks at the results, it's nice if they can
> > intuit what it was that went wrong, based on the test case name.
> >
> > IMHO these names are too short and not descriptive enough.
>
> The test cases are pretty much 1:1 to the examples table referenced at
> the top comment of the file. Would it help if I move the reference
> comment closer to the test case definition or would you like the test
> name to have a reference to a table entry encoded into it?

I think moving the comment to right above the testcase definitions
would be good.  Somehow I missed that.

OK - I also missed the usage of the TESTCASE_NAME_FORMAT string.  This obviously
handles the issue of the testcase names being unique, but doesn't help those not
familiar with the test.

What I'm suggesting is just a little bit of extra wording, describing in English
what the test is checking for.  This is for people looking
at test results who don't know the internals of the test.

I'm pretty sure these are the wrong descriptions, but something like this:
             {
                     .test_case_name = "2038-01-19 check upper edge of 31-bit boundary",
                     .msb_set = false,
                     .lower_bound = false,
                     .extra_bits = 0,
                     .expected = {.tv_sec = 0x7fffffffLL, .tv_nsec = 0L},
             },
             {
                     .test_case_name = "2038-01-19 check first use of extra epoch bit",
                     .msb_set = true,
                     .lower_bound = true,
                     .extra_bits = 1,
                     .expected = {.tv_sec = 0x80000000LL, .tv_nsec = 0L},
             },

I'm not pedantic about it.
 -- Tim

> > > +                     .msb_set = true,
> > > +                     .lower_bound = true,
> > > +                     .extra_bits = 1,
> > > +                     .expected = {.tv_sec = 0x80000000LL, .tv_nsec = 0L},
> > > +             },
> > > +                     .msb_set = true,
> > > +                     .lower_bound = true,
> > > +                     .extra_bits = 1,
> > > +                     .expected = {.tv_sec = 0x80000000LL, .tv_nsec = 0L},
> > > +             },
> > > +
> > > +             {
> > > +                     .test_case_name = "2106-02-07",
> > > +                     .msb_set = true,
> > > +                     .lower_bound = false,
> > > +                     .extra_bits = 1,
> > > +                     .expected = {.tv_sec = 0xffffffffLL, .tv_nsec = 0L},
> > > +             },
> > > +
> > > +             {
> > > +                     .test_case_name = "2106-02-07",
> > > +                     .msb_set = false,
> > > +                     .lower_bound = true,
> > > +                     .extra_bits = 1,
> > > +                     .expected = {.tv_sec = 0x100000000LL, .tv_nsec = 0L},
> > > +             },
> > > +
> > > +             {
> > > +                     .test_case_name = "2174-02-25",
> > > +                     .msb_set = false,
> > > +                     .lower_bound = false,
> > > +                     .extra_bits = 1,
> > > +                     .expected = {.tv_sec = 0x17fffffffLL, .tv_nsec = 0L},
> > > +             },
> > > +
> > > +             {
> > > +                     .test_case_name = "2174-02-25",
> > > +                     .msb_set = true,
> > > +                     .lower_bound = true,
> > > +                     .extra_bits =  2,
> > > +                     .expected = {.tv_sec = 0x180000000LL, .tv_nsec = 0L},
> > > +             },
> > > +
> > > +             {
> > > +                     .test_case_name = "2242-03-16",
> > > +                     .msb_set = true,
> > > +                     .lower_bound = false,
> > > +                     .extra_bits = 2,
> > > +                     .expected = {.tv_sec = 0x1ffffffffLL, .tv_nsec = 0L},
> > > +             },
> > > +
> > > +             {
> > > +                     .test_case_name = "2242-03-16",
> > > +                     .msb_set = false,
> > > +                     .lower_bound = true,
> > > +                     .extra_bits = 2,
> > > +                     .expected = {.tv_sec = 0x200000000LL, .tv_nsec = 0L},
> > > +             },
> > > +
> > > +             {
> > > +                     .test_case_name = " 2310-04-04",
> > > +                     .msb_set = false,
> > > +                     .lower_bound = false,
> > > +                     .extra_bits = 2,
> > > +                     .expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 0L},
> > > +             },
> > > +
> > > +             {
> > > +                     .test_case_name = " 2310-04-04 00:00:00.1",
> > > +                     .msb_set = false,
> > > +                     .lower_bound = false,
> > > +                     .extra_bits = 6,
> > > +                     .expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 1L},
> > > +             },
> > > +
> > > +             {
> > > +                     .test_case_name = "2378-04-22
> > > 00:00:00.MAX_NSEC",
> > > +                     .msb_set = false,
> > > +                     .lower_bound = true,
> > > +                     .extra_bits = 0xFFFFFFFF,
> > > +                     .expected = {.tv_sec = 0x300000000LL,
> > > +                                  .tv_nsec = MAX_NANOSECONDS},
> > > +             },
> > > +
> > > +             {
> > > +                     .test_case_name = "2378-04-22",
> > > +                     .msb_set = false,
> > > +                     .lower_bound = true,
> > > +                     .extra_bits = 3,
> > > +                     .expected = {.tv_sec = 0x300000000LL, .tv_nsec = 0L},
> > > +             },
> > > +
> > > +             {
> > > +                     .test_case_name = "2446-05-10",
> > > +                     .msb_set = false,
> > > +                     .lower_bound = false,
> > > +                     .extra_bits = 3,
> > > +                     .expected = {.tv_sec = 0x37fffffffLL, .tv_nsec = 0L},
> > > +             }
> > > +     };
> > > +
> > > +     struct timespec64 timestamp;
> > > +     int i;
> > > +
> > > +     for (i = 0; i < ARRAY_SIZE(test_data); ++i) {
> > > +             timestamp.tv_sec = get_32bit_time(&test_data[i]);
> > > +             ext4_decode_extra_time(&timestamp,
> > > +                                    cpu_to_le32(test_data[i].extra_bits));
> > > +
> > > +             KUNIT_EXPECT_EQ_MSG(test,
> > > +                                 test_data[i].expected.tv_sec,
> > > +                                 timestamp.tv_sec,
> > > +                                 CASE_NAME_FORMAT,
> > > +                                 test_data[i].test_case_name,
> > > +                                 test_data[i].msb_set,
> > > +                                 test_data[i].lower_bound,
> > > +                                 test_data[i].extra_bits);
> > > +             KUNIT_EXPECT_EQ_MSG(test,
> > > +                                 test_data[i].expected.tv_nsec,
> > > +                                 timestamp.tv_nsec,
> > > +                                 CASE_NAME_FORMAT,
> > > +                                 test_data[i].test_case_name,
> > > +                                 test_data[i].msb_set,
> > > +                                 test_data[i].lower_bound,
> > > +                                 test_data[i].extra_bits);
> > > +     }
> > > +}
> > > +
> > > +static struct kunit_case ext4_inode_test_cases[] = {
> > > +     KUNIT_CASE(inode_test_xtimestamp_decoding),
> > > +     {}
> > > +};
> > > +
> > > +static struct kunit_suite ext4_inode_test_suite = {
> > > +     .name = "ext4_inode_test",
> > > +     .test_cases = ext4_inode_test_cases,
> > > +};
> > > +
> > > +kunit_test_suite(ext4_inode_test_suite);
> > > --
> > > 2.23.0.700.g56cf767bdb-goog
Iurii Zaikin Oct. 10, 2019, 10:13 p.m. UTC | #5
On Thu, Oct 10, 2019 at 10:11 AM Shuah Khan <skhan@linuxfoundation.org> wrote:
>
> On 10/9/19 8:39 PM, Iurii Zaikin wrote:
> > KUnit tests for decoding extended 64 bit timestamps.
> >
> "Added the link to the ext4 docs from which the tests were derived."
>
> Document reference is great. I would still like to see summary
> in the commit log.
>
> As you said below:
>
> "This builds the ext4 inode sysctl unit test, which runs on boot."
>
> Also include what should user expect to see when one of these fails.
Will do.
>
> > Signed-off-by: Iurii Zaikin <yzaikin@google.com>
> > ---
> >   fs/ext4/Kconfig      |  12 +++
> >   fs/ext4/Makefile     |   1 +
> >   fs/ext4/inode-test.c | 221 +++++++++++++++++++++++++++++++++++++++++++
> >   3 files changed, 234 insertions(+)
> >   create mode 100644 fs/ext4/inode-test.c
> >
> > diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
> > index cbb5ca830e57..cb0b52753674 100644
> > --- a/fs/ext4/Kconfig
> > +++ b/fs/ext4/Kconfig
> > @@ -106,3 +106,15 @@ config EXT4_DEBUG
> >         If you select Y here, then you will be able to turn on debugging
> >         with a command such as:
> >               echo 1 > /sys/module/ext4/parameters/mballoc_debug
> > +
> > +config EXT4_KUNIT_TESTS
> > +     bool "KUnit test for ext4 inode"
> > +     depends on EXT4_FS
> > +     depends on KUNIT
> > +     help
> > +       This builds the ext4 inode sysctl unit test, which runs on boot.
> > +       Tests the encoding correctness of ext4 inode.
> > +       For more information on KUnit and unit tests in general please refer
> > +       to the KUnit documentation in Documentation/dev-tools/kunit/.
>
> Please add Documentation/filesystems/ext4/inodes.rst Inode Timestamps
> here as well.
> Yeah. Especially after looking at the document, summary of what these
> test(s) is definitely helpful. You can't expect users to read the
> document before enabling it. Please write a summary of tests and what
> they do and add it here and then in the commit log. Also include what
> user should expect when they pass and when one of them fails.
>
I'm not sure this is compatible with Theodore's preference for having a single
config option for all ext4 tests. If anything, I should be removing
all inode-specific
stuff from the description.
I think it makes sense to add wording that this option is only useful
for devs running
a kernel test harness and should not be enabled in production.
> > +
> > +       If unsure, say N.
> > diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
> > index b17ddc229ac5..a0588fd2eea6 100644
> > --- a/fs/ext4/Makefile
> > +++ b/fs/ext4/Makefile
> > @@ -13,4 +13,5 @@ ext4-y      := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
> >
> >   ext4-$(CONFIG_EXT4_FS_POSIX_ACL)    += acl.o
> >   ext4-$(CONFIG_EXT4_FS_SECURITY)             += xattr_security.o
> > +ext4-$(CONFIG_EXT4_KUNIT_TESTS)      += inode-test.o
> >   ext4-$(CONFIG_FS_VERITY)            += verity.o
> > diff --git a/fs/ext4/inode-test.c b/fs/ext4/inode-test.c
> > new file mode 100644
> > index 000000000000..43bc6cb547cd
> > --- /dev/null
> > +++ b/fs/ext4/inode-test.c
> > @@ -0,0 +1,221 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * KUnit test of ext4 inode that verify the seconds part of [a/c/m]
> > + * timestamps in ext4 inode structs are decoded correctly.
> > + * These tests are derived from the table under
> > + * Documentation/filesystems/ext4/inodes.rst Inode Timestamps
>
> Yeah. Especially after looking at the document, summary of what these
> test(s) is definitely helpful. You can't expect users to read the
> document before enabling the tests.
>
> > + */
> > +
> > +#include <kunit/test.h>
> > +#include <linux/kernel.h>
> > +#include <linux/time64.h>
> > +
> > +#include "ext4.h"
> > +
> > +/* binary: 00000000 00000000 00000000 00000000 */
> > +#define LOWER_MSB_0 0L
> > +/* binary: 01111111 11111111 11111111 11111111 */
> > +#define UPPER_MSB_0 0x7fffffffL
> > +/* binary: 10000000 00000000 00000000 00000000 */
> > +#define LOWER_MSB_1 (-0x80000000L)
> > +/* binary: 11111111 11111111 11111111 11111111 */
> > +#define UPPER_MSB_1 (-1L)
> > +/* binary: 00111111   11111111 11111111 11111111 */
> > +#define MAX_NANOSECONDS ((1L << 30) - 1)
> > +
> > +#define CASE_NAME_FORMAT "%s: msb:%x lower_bound:%x extra_bits: %x"
> > +
> > +struct timestamp_expectation {
> > +     const char *test_case_name;
> > +     struct timespec64 expected;
> > +     u32 extra_bits;
> > +     bool msb_set;
> > +     bool lower_bound;
> > +};
> > +
> > +static time64_t get_32bit_time(const struct timestamp_expectation * const test)
> > +{
> > +     if (test->msb_set) {
> > +             if (test->lower_bound)
> > +                     return LOWER_MSB_1;
> > +
> > +             return UPPER_MSB_1;
> > +     }
> > +
> > +     if (test->lower_bound)
> > +             return LOWER_MSB_0;
> > +     return UPPER_MSB_0;
> > +}
> > +
> > +
> > +static void inode_test_xtimestamp_decoding(struct kunit *test)
> > +{
> > +     const struct timestamp_expectation test_data[] = {
> > +             {
> > +                     .test_case_name = "1901-12-13",
> > +                     .msb_set = true,
> > +                     .lower_bound = true,
> > +                     .extra_bits = 0,
> > +                     .expected = {.tv_sec = -0x80000000LL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "1969-12-31",
> > +                     .msb_set = true,
> > +                     .lower_bound = false,
> > +                     .extra_bits = 0,
> > +                     .expected = {.tv_sec = -1LL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "1970-01-01",
> > +                     .msb_set = false,
> > +                     .lower_bound = true,
> > +                     .extra_bits = 0,
> > +                     .expected = {0LL, 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2038-01-19",
> > +                     .msb_set = false,
> > +                     .lower_bound = false,
> > +                     .extra_bits = 0,
> > +                     .expected = {.tv_sec = 0x7fffffffLL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2038-01-19",
> > +                     .msb_set = true,
> > +                     .lower_bound = true,
> > +                     .extra_bits = 1,
> > +                     .expected = {.tv_sec = 0x80000000LL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2106-02-07",
> > +                     .msb_set = true,
> > +                     .lower_bound = false,
> > +                     .extra_bits = 1,
> > +                     .expected = {.tv_sec = 0xffffffffLL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2106-02-07",
> > +                     .msb_set = false,
> > +                     .lower_bound = true,
> > +                     .extra_bits = 1,
> > +                     .expected = {.tv_sec = 0x100000000LL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2174-02-25",
> > +                     .msb_set = false,
> > +                     .lower_bound = false,
> > +                     .extra_bits = 1,
> > +                     .expected = {.tv_sec = 0x17fffffffLL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2174-02-25",
> > +                     .msb_set = true,
> > +                     .lower_bound = true,
> > +                     .extra_bits =  2,
> > +                     .expected = {.tv_sec = 0x180000000LL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2242-03-16",
> > +                     .msb_set = true,
> > +                     .lower_bound = false,
> > +                     .extra_bits = 2,
> > +                     .expected = {.tv_sec = 0x1ffffffffLL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2242-03-16",
> > +                     .msb_set = false,
> > +                     .lower_bound = true,
> > +                     .extra_bits = 2,
> > +                     .expected = {.tv_sec = 0x200000000LL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = " 2310-04-04",
> > +                     .msb_set = false,
> > +                     .lower_bound = false,
> > +                     .extra_bits = 2,
> > +                     .expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = " 2310-04-04 00:00:00.1",
> > +                     .msb_set = false,
> > +                     .lower_bound = false,
> > +                     .extra_bits = 6,
> > +                     .expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 1L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2378-04-22 00:00:00.MAX_NSEC",
> > +                     .msb_set = false,
> > +                     .lower_bound = true,
> > +                     .extra_bits = 0xFFFFFFFF,
> > +                     .expected = {.tv_sec = 0x300000000LL,
> > +                                  .tv_nsec = MAX_NANOSECONDS},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2378-04-22",
> > +                     .msb_set = false,
> > +                     .lower_bound = true,
> > +                     .extra_bits = 3,
> > +                     .expected = {.tv_sec = 0x300000000LL, .tv_nsec = 0L},
> > +             },
> > +
> > +             {
> > +                     .test_case_name = "2446-05-10",
> > +                     .msb_set = false,
> > +                     .lower_bound = false,
> > +                     .extra_bits = 3,
> > +                     .expected = {.tv_sec = 0x37fffffffLL, .tv_nsec = 0L},
> > +             }
> > +     };
> > +
>
> Is there a way to make the test data dynamic. Can you read from a data
> file? It will be easier to if the data
>
> Maybe this is question to Brendan?
>
From the general unit test philosophy, unit tests must be 100% deterministic,
repeatable and simple enough to be correct by visual inspection, dynamically
generated test data runs contrary to these goals IMHO.
As for reading from a data file, not sure what exactly you mean here:
 - Having a running kernel read a file in the filesystem, especially as early in
the initialization process as KUnit currently runs is something I'm not sure
how to implement reliably. Also, doing I/O in the tests will make them slower
and require more setup from test running environment.
- Having reading a file in the build stage and linking it as a data
blob into the
kernel image. This approach looks better to me since it avoids the I/O and has
no noticeable speed penalty or test harness requirements. It would be up to
Brendan whether he wants such capability in KUnit and based on the user-space
test code I've seen so far, the number of test data points in this
test doesn't warrant
reading from files even in userspace which has far fewer constraints.
> > +     struct timespec64 timestamp;
> > +     int i;
> > +
> > +     for (i = 0; i < ARRAY_SIZE(test_data); ++i) {
> > +             timestamp.tv_sec = get_32bit_time(&test_data[i]);
> > +             ext4_decode_extra_time(&timestamp,
> > +                                    cpu_to_le32(test_data[i].extra_bits));
> > +
> > +             KUNIT_EXPECT_EQ_MSG(test,
> > +                                 test_data[i].expected.tv_sec,
> > +                                 timestamp.tv_sec,
> > +                                 CASE_NAME_FORMAT,
> > +                                 test_data[i].test_case_name,
> > +                                 test_data[i].msb_set,
> > +                                 test_data[i].lower_bound,
> > +                                 test_data[i].extra_bits);
> > +             KUNIT_EXPECT_EQ_MSG(test,
> > +                                 test_data[i].expected.tv_nsec,
> > +                                 timestamp.tv_nsec,
> > +                                 CASE_NAME_FORMAT,
> > +                                 test_data[i].test_case_name,
> > +                                 test_data[i].msb_set,
> > +                                 test_data[i].lower_bound,
> > +                                 test_data[i].extra_bits);
> > +     }
> > +}
> > +
> > +static struct kunit_case ext4_inode_test_cases[] = {
> > +     KUNIT_CASE(inode_test_xtimestamp_decoding),
> > +     {}
> > +};
> > +
> > +static struct kunit_suite ext4_inode_test_suite = {
> > +     .name = "ext4_inode_test",
> > +     .test_cases = ext4_inode_test_cases,
> > +};
> > +
> > +kunit_test_suite(ext4_inode_test_suite);
> > --
> > 2.23.0.700.g56cf767bdb-goog
> >
>
> thanks,
> -- Shuah
Iurii Zaikin Oct. 10, 2019, 11:49 p.m. UTC | #6
On Thu, Oct 10, 2019 at 1:29 PM <Tim.Bird@sony.com> wrote:
>
>
>
> > -----Original Message-----
> > From: Iurii Zaikin on Thursday, October 10, 2019 6:45 AM
> >
> > On Wed, Oct 9, 2019 at 8:47 PM <Tim.Bird@sony.com> wrote:
> > >
> > >
> > > > -----Original Message-----
> > > > From Iurii Zaikin on Wednesday, October 09, 2019 4:40 PM
> > > >
> > > > KUnit tests for decoding extended 64 bit timestamps.
> > > >
> > > > Signed-off-by: Iurii Zaikin <yzaikin@google.com>
> > > > ---
> > > >  fs/ext4/Kconfig      |  12 +++
> > > >  fs/ext4/Makefile     |   1 +
> > > >  fs/ext4/inode-test.c | 221
> > > > +++++++++++++++++++++++++++++++++++++++++++
> > > >  3 files changed, 234 insertions(+)
> > > >  create mode 100644 fs/ext4/inode-test.c
> > > >
> > > > diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
> > > > index cbb5ca830e57..cb0b52753674 100644
> > > > --- a/fs/ext4/Kconfig
> > > > +++ b/fs/ext4/Kconfig
> > > > @@ -106,3 +106,15 @@ config EXT4_DEBUG
> > > >         If you select Y here, then you will be able to turn on debugging
> > > >         with a command such as:
> > > >               echo 1 > /sys/module/ext4/parameters/mballoc_debug
> > > > +
> > > > +config EXT4_KUNIT_TESTS
> > > > +     bool "KUnit test for ext4 inode"
> > > > +     depends on EXT4_FS
> > > > +     depends on KUNIT
> > > > +     help
> > > > +       This builds the ext4 inode sysctl unit test, which runs on boot.
> > > > +       Tests the encoding correctness of ext4 inode.
> > > > +       For more information on KUnit and unit tests in general please refer
> > > > +       to the KUnit documentation in Documentation/dev-tools/kunit/.
> > > > +
> > > > +       If unsure, say N.
> > > > diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
> > > > index b17ddc229ac5..a0588fd2eea6 100644
> > > > --- a/fs/ext4/Makefile
> > > > +++ b/fs/ext4/Makefile
> > > > @@ -13,4 +13,5 @@ ext4-y      := balloc.o bitmap.o block_validity.o dir.o
> > > > ext4_jbd2.o extents.o \
> > > >
> > > >  ext4-$(CONFIG_EXT4_FS_POSIX_ACL)     += acl.o
> > > >  ext4-$(CONFIG_EXT4_FS_SECURITY)              += xattr_security.o
> > > > +ext4-$(CONFIG_EXT4_KUNIT_TESTS)      += inode-test.o
> > > >  ext4-$(CONFIG_FS_VERITY)             += verity.o
> > > > diff --git a/fs/ext4/inode-test.c b/fs/ext4/inode-test.c
> > > > new file mode 100644
> > > > index 000000000000..43bc6cb547cd
> > > > --- /dev/null
> > > > +++ b/fs/ext4/inode-test.c
> > > > @@ -0,0 +1,221 @@
> > > > +// SPDX-License-Identifier: GPL-2.0
> > > > +/*
> > > > + * KUnit test of ext4 inode that verify the seconds part of [a/c/m]
> > > > + * timestamps in ext4 inode structs are decoded correctly.
> > > > + * These tests are derived from the table under
> > > > + * Documentation/filesystems/ext4/inodes.rst Inode Timestamps
> > > > + */
> > > > +
> > > > +#include <kunit/test.h>
> > > > +#include <linux/kernel.h>
> > > > +#include <linux/time64.h>
> > > > +
> > > > +#include "ext4.h"
> > > > +
> > > > +/* binary: 00000000 00000000 00000000 00000000 */
> > > > +#define LOWER_MSB_0 0L
> > > > +/* binary: 01111111 11111111 11111111 11111111 */
> > > > +#define UPPER_MSB_0 0x7fffffffL
> > > > +/* binary: 10000000 00000000 00000000 00000000 */
> > > > +#define LOWER_MSB_1 (-0x80000000L)
> > > > +/* binary: 11111111 11111111 11111111 11111111 */
> > > > +#define UPPER_MSB_1 (-1L)
> > > > +/* binary: 00111111   11111111 11111111 11111111 */
> > > > +#define MAX_NANOSECONDS ((1L << 30) - 1)
> > > > +
> > > > +#define CASE_NAME_FORMAT "%s: msb:%x lower_bound:%x
> > extra_bits:
> > > > %x"
> > > > +
> > > > +struct timestamp_expectation {
> > > > +     const char *test_case_name;
> > > > +     struct timespec64 expected;
> > > > +     u32 extra_bits;
> > > > +     bool msb_set;
> > > > +     bool lower_bound;
> > > > +};
> > > > +
> > > > +static time64_t get_32bit_time(const struct timestamp_expectation *
> > const
> > > > test)
> > > > +{
> > > > +     if (test->msb_set) {
> > > > +             if (test->lower_bound)
> > > > +                     return LOWER_MSB_1;
> > > > +
> > > > +             return UPPER_MSB_1;
> > > > +     }
> > > > +
> > > > +     if (test->lower_bound)
> > > > +             return LOWER_MSB_0;
> > > > +     return UPPER_MSB_0;
> > > > +}
> > > > +
> > > > +
> > > > +static void inode_test_xtimestamp_decoding(struct kunit *test)
> > > > +{
> > > > +     const struct timestamp_expectation test_data[] = {
> > > > +             {
> > > > +                     .test_case_name = "1901-12-13",
> > > > +                     .msb_set = true,
> > > > +                     .lower_bound = true,
> > > > +                     .extra_bits = 0,
> > > > +                     .expected = {.tv_sec = -0x80000000LL, .tv_nsec = 0L},
> > > > +             },
> > > > +
> > > > +             {
> > > > +                     .test_case_name = "1969-12-31",
> > > > +                     .msb_set = true,
> > > > +                     .lower_bound = false,
> > > > +                     .extra_bits = 0,
> > > > +                     .expected = {.tv_sec = -1LL, .tv_nsec = 0L},
> > > > +             },
> > > > +
> > > > +             {
> > > > +                     .test_case_name = "1970-01-01",
> > > > +                     .msb_set = false,
> > > > +                     .lower_bound = true,
> > > > +                     .extra_bits = 0,
> > > > +                     .expected = {0LL, 0L},
> > > > +             },
> > > > +
> > > > +             {
> > > > +                     .test_case_name = "2038-01-19",
> > > > +                     .msb_set = false,
> > > > +                     .lower_bound = false,
> > > > +                     .extra_bits = 0,
> > > > +                     .expected = {.tv_sec = 0x7fffffffLL, .tv_nsec = 0L},
> > > > +             },
> > > > +
> > > > +             {
> > > > +                     .test_case_name = "2038-01-19",
> > > It's quite handy if testcase names can be unique, and describe what it is
> > they are testing.
> > >
> > > If someone unfamiliar with this test looks at the results, it's nice if they can
> > > intuit what it was that went wrong, based on the test case name.
> > >
> > > IMHO these names are too short and not descriptive enough.
> >
> > The test cases are pretty much 1:1 to the examples table referenced at
> > the top comment of the file. Would it help if I move the reference
> > comment closer to the test case definition or would you like the test
> > name to have a reference to a table entry encoded into it?
>
> I think moving the comment to right above the testcase definitions
> would be good.  Somehow I missed that.
>
Done
> OK - I also missed the usage of the TESTCASE_NAME_FORMAT string.  This obviously
> handles the issue of the testcase names being unique, but doesn't help those not
> familiar with the test.
>
> What I'm suggesting is just a little bit of extra wording, describing in English
> what the test is checking for.  This is for people looking
> at test results who don't know the internals of the test.
>
> I'm pretty sure these are the wrong descriptions, but something like this:
>              {
>                      .test_case_name = "2038-01-19 check upper edge of 31-bit boundary",
>                      .msb_set = false,
>                      .lower_bound = false,
>                      .extra_bits = 0,
>                      .expected = {.tv_sec = 0x7fffffffLL, .tv_nsec = 0L},
>              },
>              {
>                      .test_case_name = "2038-01-19 check first use of extra epoch bit",
>                      .msb_set = true,
>                      .lower_bound = true,
>                      .extra_bits = 1,
>                      .expected = {.tv_sec = 0x80000000LL, .tv_nsec = 0L},
>              },
>
> I'm not pedantic about it.
Done
>  -- Tim
>
> > > > +                     .msb_set = true,
> > > > +                     .lower_bound = true,
> > > > +                     .extra_bits = 1,
> > > > +                     .expected = {.tv_sec = 0x80000000LL, .tv_nsec = 0L},
> > > > +             },
> > > > +                     .msb_set = true,
> > > > +                     .lower_bound = true,
> > > > +                     .extra_bits = 1,
> > > > +                     .expected = {.tv_sec = 0x80000000LL, .tv_nsec = 0L},
> > > > +             },
> > > > +
> > > > +             {
> > > > +                     .test_case_name = "2106-02-07",
> > > > +                     .msb_set = true,
> > > > +                     .lower_bound = false,
> > > > +                     .extra_bits = 1,
> > > > +                     .expected = {.tv_sec = 0xffffffffLL, .tv_nsec = 0L},
> > > > +             },
> > > > +
> > > > +             {
> > > > +                     .test_case_name = "2106-02-07",
> > > > +                     .msb_set = false,
> > > > +                     .lower_bound = true,
> > > > +                     .extra_bits = 1,
> > > > +                     .expected = {.tv_sec = 0x100000000LL, .tv_nsec = 0L},
> > > > +             },
> > > > +
> > > > +             {
> > > > +                     .test_case_name = "2174-02-25",
> > > > +                     .msb_set = false,
> > > > +                     .lower_bound = false,
> > > > +                     .extra_bits = 1,
> > > > +                     .expected = {.tv_sec = 0x17fffffffLL, .tv_nsec = 0L},
> > > > +             },
> > > > +
> > > > +             {
> > > > +                     .test_case_name = "2174-02-25",
> > > > +                     .msb_set = true,
> > > > +                     .lower_bound = true,
> > > > +                     .extra_bits =  2,
> > > > +                     .expected = {.tv_sec = 0x180000000LL, .tv_nsec = 0L},
> > > > +             },
> > > > +
> > > > +             {
> > > > +                     .test_case_name = "2242-03-16",
> > > > +                     .msb_set = true,
> > > > +                     .lower_bound = false,
> > > > +                     .extra_bits = 2,
> > > > +                     .expected = {.tv_sec = 0x1ffffffffLL, .tv_nsec = 0L},
> > > > +             },
> > > > +
> > > > +             {
> > > > +                     .test_case_name = "2242-03-16",
> > > > +                     .msb_set = false,
> > > > +                     .lower_bound = true,
> > > > +                     .extra_bits = 2,
> > > > +                     .expected = {.tv_sec = 0x200000000LL, .tv_nsec = 0L},
> > > > +             },
> > > > +
> > > > +             {
> > > > +                     .test_case_name = " 2310-04-04",
> > > > +                     .msb_set = false,
> > > > +                     .lower_bound = false,
> > > > +                     .extra_bits = 2,
> > > > +                     .expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 0L},
> > > > +             },
> > > > +
> > > > +             {
> > > > +                     .test_case_name = " 2310-04-04 00:00:00.1",
> > > > +                     .msb_set = false,
> > > > +                     .lower_bound = false,
> > > > +                     .extra_bits = 6,
> > > > +                     .expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 1L},
> > > > +             },
> > > > +
> > > > +             {
> > > > +                     .test_case_name = "2378-04-22
> > > > 00:00:00.MAX_NSEC",
> > > > +                     .msb_set = false,
> > > > +                     .lower_bound = true,
> > > > +                     .extra_bits = 0xFFFFFFFF,
> > > > +                     .expected = {.tv_sec = 0x300000000LL,
> > > > +                                  .tv_nsec = MAX_NANOSECONDS},
> > > > +             },
> > > > +
> > > > +             {
> > > > +                     .test_case_name = "2378-04-22",
> > > > +                     .msb_set = false,
> > > > +                     .lower_bound = true,
> > > > +                     .extra_bits = 3,
> > > > +                     .expected = {.tv_sec = 0x300000000LL, .tv_nsec = 0L},
> > > > +             },
> > > > +
> > > > +             {
> > > > +                     .test_case_name = "2446-05-10",
> > > > +                     .msb_set = false,
> > > > +                     .lower_bound = false,
> > > > +                     .extra_bits = 3,
> > > > +                     .expected = {.tv_sec = 0x37fffffffLL, .tv_nsec = 0L},
> > > > +             }
> > > > +     };
> > > > +
> > > > +     struct timespec64 timestamp;
> > > > +     int i;
> > > > +
> > > > +     for (i = 0; i < ARRAY_SIZE(test_data); ++i) {
> > > > +             timestamp.tv_sec = get_32bit_time(&test_data[i]);
> > > > +             ext4_decode_extra_time(&timestamp,
> > > > +                                    cpu_to_le32(test_data[i].extra_bits));
> > > > +
> > > > +             KUNIT_EXPECT_EQ_MSG(test,
> > > > +                                 test_data[i].expected.tv_sec,
> > > > +                                 timestamp.tv_sec,
> > > > +                                 CASE_NAME_FORMAT,
> > > > +                                 test_data[i].test_case_name,
> > > > +                                 test_data[i].msb_set,
> > > > +                                 test_data[i].lower_bound,
> > > > +                                 test_data[i].extra_bits);
> > > > +             KUNIT_EXPECT_EQ_MSG(test,
> > > > +                                 test_data[i].expected.tv_nsec,
> > > > +                                 timestamp.tv_nsec,
> > > > +                                 CASE_NAME_FORMAT,
> > > > +                                 test_data[i].test_case_name,
> > > > +                                 test_data[i].msb_set,
> > > > +                                 test_data[i].lower_bound,
> > > > +                                 test_data[i].extra_bits);
> > > > +     }
> > > > +}
> > > > +
> > > > +static struct kunit_case ext4_inode_test_cases[] = {
> > > > +     KUNIT_CASE(inode_test_xtimestamp_decoding),
> > > > +     {}
> > > > +};
> > > > +
> > > > +static struct kunit_suite ext4_inode_test_suite = {
> > > > +     .name = "ext4_inode_test",
> > > > +     .test_cases = ext4_inode_test_cases,
> > > > +};
> > > > +
> > > > +kunit_test_suite(ext4_inode_test_suite);
> > > > --
> > > > 2.23.0.700.g56cf767bdb-goog
Brendan Higgins Oct. 11, 2019, 10:05 a.m. UTC | #7
Sorry for the late reply. I am on vacation until Wednesday, October 16th.

On Thu, Oct 10, 2019 at 3:14 PM Iurii Zaikin <yzaikin@google.com> wrote:
>
> On Thu, Oct 10, 2019 at 10:11 AM Shuah Khan <skhan@linuxfoundation.org> wrote:
> >
> > On 10/9/19 8:39 PM, Iurii Zaikin wrote:
> > > KUnit tests for decoding extended 64 bit timestamps.
> > >
> > "Added the link to the ext4 docs from which the tests were derived."
> >
> > Document reference is great. I would still like to see summary
> > in the commit log.
> >
> > As you said below:
> >
> > "This builds the ext4 inode sysctl unit test, which runs on boot."
> >
> > Also include what should user expect to see when one of these fails.
> Will do.
> >
> > > Signed-off-by: Iurii Zaikin <yzaikin@google.com>
> > > ---
> > >   fs/ext4/Kconfig      |  12 +++
> > >   fs/ext4/Makefile     |   1 +
> > >   fs/ext4/inode-test.c | 221 +++++++++++++++++++++++++++++++++++++++++++
> > >   3 files changed, 234 insertions(+)
> > >   create mode 100644 fs/ext4/inode-test.c
> > >
> > > diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
> > > index cbb5ca830e57..cb0b52753674 100644
> > > --- a/fs/ext4/Kconfig
> > > +++ b/fs/ext4/Kconfig
> > > @@ -106,3 +106,15 @@ config EXT4_DEBUG
> > >         If you select Y here, then you will be able to turn on debugging
> > >         with a command such as:
> > >               echo 1 > /sys/module/ext4/parameters/mballoc_debug
> > > +
> > > +config EXT4_KUNIT_TESTS
> > > +     bool "KUnit test for ext4 inode"
> > > +     depends on EXT4_FS
> > > +     depends on KUNIT
> > > +     help
> > > +       This builds the ext4 inode sysctl unit test, which runs on boot.
> > > +       Tests the encoding correctness of ext4 inode.
> > > +       For more information on KUnit and unit tests in general please refer
> > > +       to the KUnit documentation in Documentation/dev-tools/kunit/.
> >
> > Please add Documentation/filesystems/ext4/inodes.rst Inode Timestamps
> > here as well.
> > Yeah. Especially after looking at the document, summary of what these
> > test(s) is definitely helpful. You can't expect users to read the
> > document before enabling it. Please write a summary of tests and what
> > they do and add it here and then in the commit log. Also include what
> > user should expect when they pass and when one of them fails.
> >
> I'm not sure this is compatible with Theodore's preference for having a single
> config option for all ext4 tests. If anything, I should be removing

That's an interesting point. Should we try to establish a pattern for
how tests should be configured? My *very long term* goal is to
eventually have tests able to be built and run without any kind of
kernel of any kind, but I don't think that having a single config for
all tests in a subsystem gets in the way of that, so I don't think I
have a strong preference in terms of what I want to do.

Nevertheless, I think establishing patterns is good. Do we want to try
to follow Ted's preference as a general rule from now on?

> all inode-specific
> stuff from the description.
> I think it makes sense to add wording that this option is only useful
> for devs running
> a kernel test harness and should not be enabled in production.
> > > +
> > > +       If unsure, say N.
> > > diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
> > > index b17ddc229ac5..a0588fd2eea6 100644
> > > --- a/fs/ext4/Makefile
> > > +++ b/fs/ext4/Makefile
> > > @@ -13,4 +13,5 @@ ext4-y      := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
> > >
> > >   ext4-$(CONFIG_EXT4_FS_POSIX_ACL)    += acl.o
> > >   ext4-$(CONFIG_EXT4_FS_SECURITY)             += xattr_security.o
> > > +ext4-$(CONFIG_EXT4_KUNIT_TESTS)      += inode-test.o
> > >   ext4-$(CONFIG_FS_VERITY)            += verity.o
> > > diff --git a/fs/ext4/inode-test.c b/fs/ext4/inode-test.c
> > > new file mode 100644
> > > index 000000000000..43bc6cb547cd
> > > --- /dev/null
> > > +++ b/fs/ext4/inode-test.c
> > > @@ -0,0 +1,221 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +/*
> > > + * KUnit test of ext4 inode that verify the seconds part of [a/c/m]
> > > + * timestamps in ext4 inode structs are decoded correctly.
> > > + * These tests are derived from the table under
> > > + * Documentation/filesystems/ext4/inodes.rst Inode Timestamps
> >
> > Yeah. Especially after looking at the document, summary of what these
> > test(s) is definitely helpful. You can't expect users to read the
> > document before enabling the tests.
> >
> > > + */
> > > +
> > > +#include <kunit/test.h>
> > > +#include <linux/kernel.h>
> > > +#include <linux/time64.h>
> > > +
> > > +#include "ext4.h"
> > > +
> > > +/* binary: 00000000 00000000 00000000 00000000 */
> > > +#define LOWER_MSB_0 0L
> > > +/* binary: 01111111 11111111 11111111 11111111 */
> > > +#define UPPER_MSB_0 0x7fffffffL
> > > +/* binary: 10000000 00000000 00000000 00000000 */
> > > +#define LOWER_MSB_1 (-0x80000000L)
> > > +/* binary: 11111111 11111111 11111111 11111111 */
> > > +#define UPPER_MSB_1 (-1L)
> > > +/* binary: 00111111   11111111 11111111 11111111 */
> > > +#define MAX_NANOSECONDS ((1L << 30) - 1)
> > > +
> > > +#define CASE_NAME_FORMAT "%s: msb:%x lower_bound:%x extra_bits: %x"
> > > +
> > > +struct timestamp_expectation {
> > > +     const char *test_case_name;
> > > +     struct timespec64 expected;
> > > +     u32 extra_bits;
> > > +     bool msb_set;
> > > +     bool lower_bound;
> > > +};
> > > +
> > > +static time64_t get_32bit_time(const struct timestamp_expectation * const test)
> > > +{
> > > +     if (test->msb_set) {
> > > +             if (test->lower_bound)
> > > +                     return LOWER_MSB_1;
> > > +
> > > +             return UPPER_MSB_1;
> > > +     }
> > > +
> > > +     if (test->lower_bound)
> > > +             return LOWER_MSB_0;
> > > +     return UPPER_MSB_0;
> > > +}
> > > +
> > > +
> > > +static void inode_test_xtimestamp_decoding(struct kunit *test)
> > > +{
> > > +     const struct timestamp_expectation test_data[] = {
> > > +             {
> > > +                     .test_case_name = "1901-12-13",
> > > +                     .msb_set = true,
> > > +                     .lower_bound = true,
> > > +                     .extra_bits = 0,
> > > +                     .expected = {.tv_sec = -0x80000000LL, .tv_nsec = 0L},
> > > +             },
> > > +
[...]
> > > +             {
> > > +                     .test_case_name = "2446-05-10",
> > > +                     .msb_set = false,
> > > +                     .lower_bound = false,
> > > +                     .extra_bits = 3,
> > > +                     .expected = {.tv_sec = 0x37fffffffLL, .tv_nsec = 0L},
> > > +             }
> > > +     };
> > > +
> >
> > Is there a way to make the test data dynamic. Can you read from a data
> > file? It will be easier to if the data
> >
> > Maybe this is question to Brendan?
> >
> From the general unit test philosophy, unit tests must be 100% deterministic,
> repeatable and simple enough to be correct by visual inspection, dynamically
> generated test data runs contrary to these goals IMHO.

I 100% agree with this position on unit tests, Iurii.

> As for reading from a data file, not sure what exactly you mean here:
>  - Having a running kernel read a file in the filesystem, especially as early in
> the initialization process as KUnit currently runs is something I'm not sure
> how to implement reliably. Also, doing I/O in the tests will make them slower
> and require more setup from test running environment.
> - Having reading a file in the build stage and linking it as a data
> blob into the
> kernel image. This approach looks better to me since it avoids the I/O and has
> no noticeable speed penalty or test harness requirements. It would be up to
> Brendan whether he wants such capability in KUnit and based on the user-space
> test code I've seen so far, the number of test data points in this
> test doesn't warrant
> reading from files even in userspace which has far fewer constraints.

I agree with Iurii. I don't think that this example alone warrants
adding support for being able to read test data in from a separate
file (I would also like some clarification here on what is meant by
reading in from a separate file). I can imagine some scenarios where
that might make sense, but I think it would be better to get more
examples before trying to support that use case.

Thanks!
Theodore Ts'o Oct. 11, 2019, 1:19 p.m. UTC | #8
On Fri, Oct 11, 2019 at 03:05:43AM -0700, Brendan Higgins wrote:
> That's an interesting point. Should we try to establish a pattern for
> how tests should be configured? My *very long term* goal is to
> eventually have tests able to be built and run without any kind of
> kernel of any kind, but I don't think that having a single config for
> all tests in a subsystem gets in the way of that, so I don't think I
> have a strong preference in terms of what I want to do.
> 
> Nevertheless, I think establishing patterns is good. Do we want to try
> to follow Ted's preference as a general rule from now on?

As I suggested on another thread (started on kunit-dev, but Brendan
has cc'ed in linux-kselftest), I think it might really work well if
"make kunit" runs all of the kunit tests automatically.  As we add
more kunit tests, finding all of the CONFIG options so they can be
added to the kunitconfig file is going to be hard, so kunit.py really
needs an --allconfig which does this automatically.

Along these lines, perhaps we should state that as a general rule the
CONFIG option for Kunit tests should only depend on KUINIT, and use
select to enable other dependencies.  i.e., for the ext4 kunit tests,
it should look like this:

config EXT4_KUNIT_TESTS
	bool "KUnit test for ext4 inode"
	select EXT4_FS
	depends on KUNIT
...

In the current patch, we use "depends on EXT4_FS", which meant that
when I first added "CONFIG_EXT4_KUNIT_TESTS=y" to the kunitconfig
file, I got the following confusing error message:

% ./tools/testing/kunit/kunit.py  run
Regenerating .config ...
ERROR:root:Provided Kconfig is not contained in validated .config!

Using "select EXT4_FS" makes it much easier to enable the ext4 kunit
tests in kunitconfig.  At the moment requiring that we two lines to
kunitconfig to enable ext4 isn't _that_ bad:

CONFIG_EXT4_FS=y
CONFIG_EXT4_KUNIT_TESTS=y

but over time, if many subsystems start adding unit tests, the
overhead of managing the kunitconfig file is going to get unwieldy.
Hence my suggestion that we just make all Kunit CONFIG options depend
only on CONFIG_KUNIT.

> I agree with Iurii. I don't think that this example alone warrants
> adding support for being able to read test data in from a separate
> file (I would also like some clarification here on what is meant by
> reading in from a separate file). I can imagine some scenarios where
> that might make sense, but I think it would be better to get more
> examples before trying to support that use case.

So what I was thinking might happen is that for some of the largest
unit tests before I would transition to deciding that xfstests was the
better way to go, I *might* have a small, 100k ext4 file system which
would checked into the kernel sources as fs/ext4/kunit_test.img, and
there would be a makefile rule that would turn that into
fs/ext4/kunit_test_img.c file that might look something like:

const ext4_kunit_test_img[] = {
      0xde, ...

But I'm not sure I actually want to go down that path.  It would
certainly better from a test design perspective to create test mocks
at a higher layer, such as ext4_iget() and ext4_read_block_bitmap().

The problem is that quite a bit of code in ext4 would have to be
*extensively* refactored in order to allow for easy test mocking,
since we have calls to sb_bread, ext4_bread(), submit_bh(), etc.,
sprinkled alongside the code logic that we would want to test.

So using a small test image and making the cut line be at the buffer
cache layer is going to be much, *much* simpler at least in the short
term.  So the big question is how much of an investment (or technical
debt paydown) do I want to do right away, versus taking a shortcut to
get better unit test coverage more quickly, and then do further tech
debt reduction later?

   	       	     	    	       - Ted
Iurii Zaikin Oct. 12, 2019, 2:38 a.m. UTC | #9
On Fri, Oct 11, 2019 at 6:19 AM Theodore Y. Ts'o <tytso@mit.edu> wrote:
>
> On Fri, Oct 11, 2019 at 03:05:43AM -0700, Brendan Higgins wrote:
> > That's an interesting point. Should we try to establish a pattern for
> > how tests should be configured? My *very long term* goal is to
> > eventually have tests able to be built and run without any kind of
> > kernel of any kind, but I don't think that having a single config for
> > all tests in a subsystem gets in the way of that, so I don't think I
> > have a strong preference in terms of what I want to do.
> >
> > Nevertheless, I think establishing patterns is good. Do we want to try
> > to follow Ted's preference as a general rule from now on?
>
> As I suggested on another thread (started on kunit-dev, but Brendan
> has cc'ed in linux-kselftest), I think it might really work well if
> "make kunit" runs all of the kunit tests automatically.  As we add
> more kunit tests, finding all of the CONFIG options so they can be
> added to the kunitconfig file is going to be hard, so kunit.py really
> needs an --allconfig which does this automatically.
>
> Along these lines, perhaps we should state that as a general rule the
> CONFIG option for Kunit tests should only depend on KUINIT, and use
> select to enable other dependencies.  i.e., for the ext4 kunit tests,
> it should look like this:
>
> config EXT4_KUNIT_TESTS
>         bool "KUnit test for ext4 inode"
>         select EXT4_FS
>         depends on KUNIT
> ...
Done

> In the current patch, we use "depends on EXT4_FS", which meant that
> when I first added "CONFIG_EXT4_KUNIT_TESTS=y" to the kunitconfig
> file, I got the following confusing error message:
>
> % ./tools/testing/kunit/kunit.py  run
> Regenerating .config ...
> ERROR:root:Provided Kconfig is not contained in validated .config!
>
> Using "select EXT4_FS" makes it much easier to enable the ext4 kunit
> tests in kunitconfig.  At the moment requiring that we two lines to
> kunitconfig to enable ext4 isn't _that_ bad:
>
> CONFIG_EXT4_FS=y
> CONFIG_EXT4_KUNIT_TESTS=y
>
> but over time, if many subsystems start adding unit tests, the
> overhead of managing the kunitconfig file is going to get unwieldy.
> Hence my suggestion that we just make all Kunit CONFIG options depend
> only on CONFIG_KUNIT.
>
> > I agree with Iurii. I don't think that this example alone warrants
> > adding support for being able to read test data in from a separate
> > file (I would also like some clarification here on what is meant by
> > reading in from a separate file). I can imagine some scenarios where
> > that might make sense, but I think it would be better to get more
> > examples before trying to support that use case.
>
> So what I was thinking might happen is that for some of the largest
> unit tests before I would transition to deciding that xfstests was the
> better way to go, I *might* have a small, 100k ext4 file system which
> would checked into the kernel sources as fs/ext4/kunit_test.img, and
> there would be a makefile rule that would turn that into
> fs/ext4/kunit_test_img.c file that might look something like:
>
> const ext4_kunit_test_img[] = {
>       0xde, ...
>
> But I'm not sure I actually want to go down that path.  It would
> certainly better from a test design perspective to create test mocks
> at a higher layer, such as ext4_iget() and ext4_read_block_bitmap().
>
> The problem is that quite a bit of code in ext4 would have to be
> *extensively* refactored in order to allow for easy test mocking,
> since we have calls to sb_bread, ext4_bread(), submit_bh(), etc.,
> sprinkled alongside the code logic that we would want to test.
>
> So using a small test image and making the cut line be at the buffer
> cache layer is going to be much, *much* simpler at least in the short
> term.  So the big question is how much of an investment (or technical
> debt paydown) do I want to do right away, versus taking a shortcut to
> get better unit test coverage more quickly, and then do further tech
> debt reduction later?
>
>                                        - Ted
Brendan Higgins Oct. 16, 2019, 10:18 p.m. UTC | #10
On Fri, Oct 11, 2019 at 6:19 AM Theodore Y. Ts'o <tytso@mit.edu> wrote:
>
> On Fri, Oct 11, 2019 at 03:05:43AM -0700, Brendan Higgins wrote:
> > That's an interesting point. Should we try to establish a pattern for
> > how tests should be configured? My *very long term* goal is to
> > eventually have tests able to be built and run without any kind of
> > kernel of any kind, but I don't think that having a single config for
> > all tests in a subsystem gets in the way of that, so I don't think I
> > have a strong preference in terms of what I want to do.
> >
> > Nevertheless, I think establishing patterns is good. Do we want to try
> > to follow Ted's preference as a general rule from now on?
>
> As I suggested on another thread (started on kunit-dev, but Brendan
> has cc'ed in linux-kselftest), I think it might really work well if

For reference, that thread can be found here:
https://lore.kernel.org/linux-kselftest/CAFd5g46+OMmP8mYsH8vcpMpdOeYryp=1Lsab4Hy6pAhWjX5-4Q@mail.gmail.com/

> "make kunit" runs all of the kunit tests automatically.  As we add
> more kunit tests, finding all of the CONFIG options so they can be
> added to the kunitconfig file is going to be hard, so kunit.py really
> needs an --allconfig which does this automatically.
>
> Along these lines, perhaps we should state that as a general rule the
> CONFIG option for Kunit tests should only depend on KUINIT, and use
> select to enable other dependencies.  i.e., for the ext4 kunit tests,

I support this. Although I think that we will eventually find
ourselves in a position where it is not possible to satisfy all
dependencies for all KUnit tests, this may get us far enough along
that the problem may be easier, or may work well enough for a long
time. It's hard to say. In anycase, I think it makes sense for a unit
test config to select its dependencies. I also think it makes sense to
make each subsystem have a master config for all KUnit tests.

> it should look like this:
>
> config EXT4_KUNIT_TESTS
>         bool "KUnit test for ext4 inode"
>         select EXT4_FS
>         depends on KUNIT
> ...
>
> In the current patch, we use "depends on EXT4_FS", which meant that
> when I first added "CONFIG_EXT4_KUNIT_TESTS=y" to the kunitconfig
> file, I got the following confusing error message:
>
> % ./tools/testing/kunit/kunit.py  run
> Regenerating .config ...
> ERROR:root:Provided Kconfig is not contained in validated .config!
>
> Using "select EXT4_FS" makes it much easier to enable the ext4 kunit
> tests in kunitconfig.  At the moment requiring that we two lines to
> kunitconfig to enable ext4 isn't _that_ bad:
>
> CONFIG_EXT4_FS=y
> CONFIG_EXT4_KUNIT_TESTS=y
>
> but over time, if many subsystems start adding unit tests, the
> overhead of managing the kunitconfig file is going to get unwieldy.

Agreed.

> Hence my suggestion that we just make all Kunit CONFIG options depend
> only on CONFIG_KUNIT.

That makes sense for now. I think we will eventually reach a point
where that may not be enough or that we may have KUnit configs which
are mutually exclusive; nevertheless, I imagine that this may be a
good short term solution for a decent amount of time.

Shuah suggested an alternative in the form of config fragments. I
think Ted's solution is going to be easier to maintain in the short
term. Any other thoughts?

[...]

Cheers
Shuah Khan Oct. 16, 2019, 11:26 p.m. UTC | #11
On 10/16/19 4:18 PM, Brendan Higgins wrote:
> On Fri, Oct 11, 2019 at 6:19 AM Theodore Y. Ts'o <tytso@mit.edu> wrote:
>>
>> On Fri, Oct 11, 2019 at 03:05:43AM -0700, Brendan Higgins wrote:
>>> That's an interesting point. Should we try to establish a pattern for
>>> how tests should be configured? My *very long term* goal is to
>>> eventually have tests able to be built and run without any kind of
>>> kernel of any kind, but I don't think that having a single config for
>>> all tests in a subsystem gets in the way of that, so I don't think I
>>> have a strong preference in terms of what I want to do.
>>>
>>> Nevertheless, I think establishing patterns is good. Do we want to try
>>> to follow Ted's preference as a general rule from now on?
>>
>> As I suggested on another thread (started on kunit-dev, but Brendan
>> has cc'ed in linux-kselftest), I think it might really work well if
> 
> For reference, that thread can be found here:
> https://lore.kernel.org/linux-kselftest/CAFd5g46+OMmP8mYsH8vcpMpdOeYryp=1Lsab4Hy6pAhWjX5-4Q@mail.gmail.com/
> 
>> "make kunit" runs all of the kunit tests automatically.  As we add
>> more kunit tests, finding all of the CONFIG options so they can be
>> added to the kunitconfig file is going to be hard, so kunit.py really
>> needs an --allconfig which does this automatically.
>>
>> Along these lines, perhaps we should state that as a general rule the
>> CONFIG option for Kunit tests should only depend on KUINIT, and use
>> select to enable other dependencies.  i.e., for the ext4 kunit tests,
> 
> I support this. Although I think that we will eventually find
> ourselves in a position where it is not possible to satisfy all
> dependencies for all KUnit tests, this may get us far enough along
> that the problem may be easier, or may work well enough for a long
> time. It's hard to say. In anycase, I think it makes sense for a unit
> test config to select its dependencies. I also think it makes sense to
> make each subsystem have a master config for all KUnit tests.
> 
>> it should look like this:
>>
>> config EXT4_KUNIT_TESTS
>>          bool "KUnit test for ext4 inode"
>>          select EXT4_FS
>>          depends on KUNIT
>> ...
>>
>> In the current patch, we use "depends on EXT4_FS", which meant that
>> when I first added "CONFIG_EXT4_KUNIT_TESTS=y" to the kunitconfig
>> file, I got the following confusing error message:
>>
>> % ./tools/testing/kunit/kunit.py  run
>> Regenerating .config ...
>> ERROR:root:Provided Kconfig is not contained in validated .config!
>>
>> Using "select EXT4_FS" makes it much easier to enable the ext4 kunit
>> tests in kunitconfig.  At the moment requiring that we two lines to
>> kunitconfig to enable ext4 isn't _that_ bad:
>>
>> CONFIG_EXT4_FS=y
>> CONFIG_EXT4_KUNIT_TESTS=y
>>
>> but over time, if many subsystems start adding unit tests, the
>> overhead of managing the kunitconfig file is going to get unwieldy.
> 
> Agreed.
> 
>> Hence my suggestion that we just make all Kunit CONFIG options depend
>> only on CONFIG_KUNIT.
> 

Sounds good to me. I am a bit behind in reviews. I will review v5.

> That makes sense for now. I think we will eventually reach a point
> where that may not be enough or that we may have KUnit configs which
> are mutually exclusive; nevertheless, I imagine that this may be a
> good short term solution for a decent amount of time.
> 
> Shuah suggested an alternative in the form of config fragments. I
> think Ted's solution is going to be easier to maintain in the short
> term. Any other thoughts?
> 

I don't recall commenting on config fragments per say. I think I was
asking if we can make the test data dynamic as opposed to static.

Lurii said it might be difficult to do it that way since we are doing
this at boot time + we are testing extfs. If not for this test, for
others, it would good to explore option to make test data dynamic,
so it would be easier to use custom test data.

I don't really buy the argument that unit tests should be deterministic
Possibly, but I would opt for having the ability to feed test data.

thanks,
-- Shuah
Iurii Zaikin Oct. 17, 2019, 12:07 a.m. UTC | #12
On Wed, Oct 16, 2019 at 4:26 PM Shuah Khan <skhan@linuxfoundation.org> wrote:
>
> On 10/16/19 4:18 PM, Brendan Higgins wrote:
> > On Fri, Oct 11, 2019 at 6:19 AM Theodore Y. Ts'o <tytso@mit.edu> wrote:
> >>
> >> On Fri, Oct 11, 2019 at 03:05:43AM -0700, Brendan Higgins wrote:
> >>> That's an interesting point. Should we try to establish a pattern for
> >>> how tests should be configured? My *very long term* goal is to
> >>> eventually have tests able to be built and run without any kind of
> >>> kernel of any kind, but I don't think that having a single config for
> >>> all tests in a subsystem gets in the way of that, so I don't think I
> >>> have a strong preference in terms of what I want to do.
> >>>
> >>> Nevertheless, I think establishing patterns is good. Do we want to try
> >>> to follow Ted's preference as a general rule from now on?
> >>
> >> As I suggested on another thread (started on kunit-dev, but Brendan
> >> has cc'ed in linux-kselftest), I think it might really work well if
> >
> > For reference, that thread can be found here:
> > https://lore.kernel.org/linux-kselftest/CAFd5g46+OMmP8mYsH8vcpMpdOeYryp=1Lsab4Hy6pAhWjX5-4Q@mail.gmail.com/
> >
> >> "make kunit" runs all of the kunit tests automatically.  As we add
> >> more kunit tests, finding all of the CONFIG options so they can be
> >> added to the kunitconfig file is going to be hard, so kunit.py really
> >> needs an --allconfig which does this automatically.
> >>
> >> Along these lines, perhaps we should state that as a general rule the
> >> CONFIG option for Kunit tests should only depend on KUINIT, and use
> >> select to enable other dependencies.  i.e., for the ext4 kunit tests,
> >
> > I support this. Although I think that we will eventually find
> > ourselves in a position where it is not possible to satisfy all
> > dependencies for all KUnit tests, this may get us far enough along
> > that the problem may be easier, or may work well enough for a long
> > time. It's hard to say. In anycase, I think it makes sense for a unit
> > test config to select its dependencies. I also think it makes sense to
> > make each subsystem have a master config for all KUnit tests.
> >
> >> it should look like this:
> >>
> >> config EXT4_KUNIT_TESTS
> >>          bool "KUnit test for ext4 inode"
> >>          select EXT4_FS
> >>          depends on KUNIT
> >> ...
> >>
> >> In the current patch, we use "depends on EXT4_FS", which meant that
> >> when I first added "CONFIG_EXT4_KUNIT_TESTS=y" to the kunitconfig
> >> file, I got the following confusing error message:
> >>
> >> % ./tools/testing/kunit/kunit.py  run
> >> Regenerating .config ...
> >> ERROR:root:Provided Kconfig is not contained in validated .config!
> >>
> >> Using "select EXT4_FS" makes it much easier to enable the ext4 kunit
> >> tests in kunitconfig.  At the moment requiring that we two lines to
> >> kunitconfig to enable ext4 isn't _that_ bad:
> >>
> >> CONFIG_EXT4_FS=y
> >> CONFIG_EXT4_KUNIT_TESTS=y
> >>
> >> but over time, if many subsystems start adding unit tests, the
> >> overhead of managing the kunitconfig file is going to get unwieldy.
> >
> > Agreed.
> >
> >> Hence my suggestion that we just make all Kunit CONFIG options depend
> >> only on CONFIG_KUNIT.
> >
>
> Sounds good to me. I am a bit behind in reviews. I will review v5.
>
> > That makes sense for now. I think we will eventually reach a point
> > where that may not be enough or that we may have KUnit configs which
> > are mutually exclusive; nevertheless, I imagine that this may be a
> > good short term solution for a decent amount of time.
> >
> > Shuah suggested an alternative in the form of config fragments. I
> > think Ted's solution is going to be easier to maintain in the short
> > term. Any other thoughts?
> >
>
> I don't recall commenting on config fragments per say. I think I was
> asking if we can make the test data dynamic as opposed to static.
>
> Lurii said it might be difficult to do it that way since we are doing
> this at boot time + we are testing extfs. If not for this test, for
> others, it would good to explore option to make test data dynamic,
> so it would be easier to use custom test data.
>
I can see the value of building test data blobs for unit test consumption
but the intended use case would be more like what Theodore described
than just manually feeding random data.
> I don't really buy the argument that unit tests should be deterministic
> Possibly, but I would opt for having the ability to feed test data.
>
If the test data is exercising an interesting case, you ought to add it to the
test permanently. If not, why bother testing against this data at all?
The whole purpose of the unit tests is getting a lot of coverage on the cheap
and keeping it there to catch any regressions.
> thanks,
> -- Shuah
Theodore Ts'o Oct. 17, 2019, 12:08 p.m. UTC | #13
On Wed, Oct 16, 2019 at 05:26:29PM -0600, Shuah Khan wrote:
> 
> I don't really buy the argument that unit tests should be deterministic
> Possibly, but I would opt for having the ability to feed test data.

I strongly believe that unit tests should be deterministic.
Non-deterministic tests are essentially fuzz tests.  And fuzz tests
should be different from unit tests.

We want unit tests to run quickly.  Fuzz tests need to be run for a
large number of passes (perhaps hours) in order to be sure that we've
hit any possible bad cases.  We want to be able to easily bisect fuzz
tests --- preferably, automatically.  And any kind of flakey test is
hell to bisect.

It's bad enough when a test is flakey because of the underlying code.
But when a test is flakey because the test inputs are
non-deterministic, it's even worse.

						- Ted
Bird, Tim Oct. 17, 2019, 10:25 p.m. UTC | #14
> -----Original Message-----
> From: Theodore Y. Ts'o on October 17, 2019 2:09 AM
> 
> On Wed, Oct 16, 2019 at 05:26:29PM -0600, Shuah Khan wrote:
> >
> > I don't really buy the argument that unit tests should be deterministic
> > Possibly, but I would opt for having the ability to feed test data.
> 
> I strongly believe that unit tests should be deterministic.
> Non-deterministic tests are essentially fuzz tests.  And fuzz tests
> should be different from unit tests.

I'm not sure I have the entire context here, but I think deterministic
might not be the right word, or it might not capture the exact meaning
intended.

I think there are multiple issues here:
 1. Does the test enclose all its data, including working data and expected results?
Or, does the test allow someone to provide working data?  This alternative
implies that either the some of testcases or the results might be different depending on
the data that is provided.  IMHO the test would be deterministic if it always produced
the same results based on the same data inputs.  And if the input data was deterministic.
I would call this a data-driven test.

Since the results would be dependent on the data provided, the results
from tests using different data would not be comparable.  Essentially,
changing the input data changes the test so maybe it's best to consider
this a different test.  Like 'test-with-data-A' and 'test-with-data-B'.

2. Does the test automatically detect some attribute of the system, and adjust
its operation based on that (does the test probe?)  This is actually quite common
if you include things like when a test requires root access to run.  Sometimes such tests,
when run without root privilege, run as many testcases as possible not as root, and skip
the testcases that require root.

In general, altering the test based on probed data is a form of data-driven test,
except the data is not provided by the user.  Whether
this is deterministic in the sense of (1) depends on whether the data that
is probed is deterministic.  In the case or requiring root, then it should
not change from run to run (and it should probably be reflected in the characterization
of the results).

Maybe neither of the above cases fall in the category of unit tests, but
they are not necessarily fuzzing tests.  IMHO a fuzzing test is one which randomizes
the data for a data-driven test (hence using non-deterministic data).  Once the fuzzer
has found a bug, and the data and code for a test is fixed into a reproducer program,
then at that point it should be deterministic (modulo what I say about race condition
tests below).

> 
> We want unit tests to run quickly.  Fuzz tests need to be run for a
> large number of passes (perhaps hours) in order to be sure that we've
> hit any possible bad cases.  We want to be able to easily bisect fuzz
> tests --- preferably, automatically.  And any kind of flakey test is
> hell to bisect.
Agreed.
 
> It's bad enough when a test is flakey because of the underlying code.
> But when a test is flakey because the test inputs are
> non-deterministic, it's even worse.
I very much agree on this as well.

I'm not sure how one classes a program that seeks to invoke a race condition.
This can take variable time, so in that sense it is not deterministic.   But it should
produce the same result if the probabilities required for the race condition
to be hit are fulfilled.  Probably (see what I did there :-), one needs to take
a probabilistic approach to reproducing and bisecting such bugs.  The duration
or iterations required to reproduce the bug (to some confidence level) may
need to be included with the reproducer program.  I'm not sure if the syskaller
reproducers do this or not, or if they just run forever.  One I looked at ran forever.
But you would want to limit this in order to produce results with some confidence
level (and not waste testing resources).

---
The reason I want get clarity on the issue of data-driven tests is that I think
data-driven tests and tests that probe are very much desirable.  This allows a
test to be able to be more generalized and allows for specialization of the
test for more scenarios without re-coding it.
I'm not sure if this still qualifies as unit testing, but it's very useful as a means to
extend the value of a test.  We haven't trod into the mocking parts of kunit,
but I'm hoping that it may be possible to have that be data-driven (depending on
what's being mocked), to make it easier to test more things using the same code.

Finally, I think the issue of testing speed is orthogonal to whether a test is self-enclosed
or data-driven.  Definitely fuzzers, which are experimenting with system interaction
in a non-deterministic way, have speed problems.

Just my 2 cents.
 -- Tim
Shuah Khan Oct. 17, 2019, 10:49 p.m. UTC | #15
On 10/17/19 6:08 AM, Theodore Y. Ts'o wrote:
> On Wed, Oct 16, 2019 at 05:26:29PM -0600, Shuah Khan wrote:
>>
>> I don't really buy the argument that unit tests should be deterministic
>> Possibly, but I would opt for having the ability to feed test data.
> 
> I strongly believe that unit tests should be deterministic.
> Non-deterministic tests are essentially fuzz tests.  And fuzz tests
> should be different from unit tests.
> 

Having the ability to take test data doesn't make it non-deterministic
though. It just means that if user wants to test with a different set
of data, there is no need to recompile the test. This could be helpful
to test cases the test write didn't think about.

You could make the data in this test the default and add ability to
pass in data as needed.

> We want unit tests to run quickly.  Fuzz tests need to be run for a
> large number of passes (perhaps hours) in order to be sure that we've
> hit any possible bad cases.  We want to be able to easily bisect fuzz
> tests --- preferably, automatically.  And any kind of flakey test is
> hell to bisect.
> 

Absolutely.

> It's bad enough when a test is flakey because of the underlying code.
> But when a test is flakey because the test inputs are
> non-deterministic, it's even worse.
> 

That is fine. You can achieve both by making the test data included in
the test the default for deterministic behavior and allow users to
supply another set of data.

thanks,
-- Shuah
Theodore Ts'o Oct. 17, 2019, 10:56 p.m. UTC | #16
On Thu, Oct 17, 2019 at 10:25:35PM +0000, Tim.Bird@sony.com wrote:
> 
> I'm not sure I have the entire context here, but I think deterministic
> might not be the right word, or it might not capture the exact meaning
> intended.
> 
> I think there are multiple issues here:
>  1. Does the test enclose all its data, including working data and expected results?
> Or, does the test allow someone to provide working data?  This
> alternative implies that either the some of testcases or the results
> might be different depending on the data that is provided.  IMHO the
> test would be deterministic if it always produced the same results
> based on the same data inputs.  And if the input data was
> deterministic.  I would call this a data-driven test.

Do you mean that the tester would provide the test data at the time
when they launch the test?  I don't think that would *ever* be
considered a unit test.

My definition of a unit test is something which runs quickly so you
can do make some quick edits, and then run something like "make
check", and find out whether or not you've made any screw ups.  Or it
might get used in an automated way, such that immediately after I push
to a Gerrit server, tests *automatically* get run, and within a minute
or two, I get notified if there are any test failures, and a -1 review
is automatically posted to the proposed change in the Web UI.

So this is not the sort of thing where the human will be providing
working data to change how the test behaves.  The idea is that you
type "make check", and hundreds of tests run, sanity checking basic
functional soundness of the changes which were just made.

Unit tests also tend to run on small pieces of code --- for example,
in how we encode 64-bit timestamps in ext4, to provide both 100ns
granularity timestamps as well as post-2038 encoding, where the "low"
32-bits are backwards compatible with traditional 32-bit time_t's.  It
turns doing this is complicated, and it's easy to screw it up (and in
fact, we have).  So now we have a set of unit tests for it.

The unit tests use a lot of reusable infrastructure, so that while
there are a series of tests to test each of the various different
boundary conditions, they are *driven* by test data.  So for example,
this is a test...

		{
			.test_case_name =
	  "2174-02-25 Lower bound of 32bit <0 timestamp, hi extra sec bit on.",
			.msb_set = true,
			.lower_bound = true,
			.extra_bits =  2,
			.expected = {.tv_sec = 0x180000000LL, .tv_nsec = 0L},
		},


... and this is a different test....

		{
			.test_case_name =
"2310-04-04 Upper bound of 32bit>=0 timestamp, hi extra sec bit 1. 1 ns bit 1.",
			.msb_set = false,
			.lower_bound = false,
			.extra_bits = 6,
			.expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 1L},
		},

So yes, it's very data-driven.  *But* all of the code, data, and
expected results are all encapsulated in fs/ext4/inode-test.c.

So when you say, "does the test allow someone to provide working
data", it's unclear what you mean by that.  How do you expect a user
to provide the working data?  And how do you make the test easy to
run?

> 2. Does the test automatically detect some attribute of the system,
> and adjust its operation based on that (does the test probe?)  This
> is actually quite common if you include things like when a test
> requires root access to run.  Sometimes such tests, when run without
> root privilege, run as many testcases as possible not as root, and
> skip the testcases that require root.

And if the test needs to do this, it's *automatically* not a unit
test.  It might be an integration test.  xfstests fall very squarely
into into that category.  Integration tests are heavier-weight.
You're not going to run them as lightly as you run unit tests, because
they take more resources, and more wall clock time before you get
results.

One of the main point of unit tests is that they are testing units of
code smaller than "the whole kernel".  When I'm testing how 64-bit
timestamps are encoding, "root" has no meaning, because the scope of
the test doesn't even really include the concept of a process, or user
privileges.

One of the other advantages of unit tests is that sometimes it's much
easier to test the corner cases of some internal abstraction because
you can have test mocks which are can simulate conditions which would
be very hard to simulate if you are running userspace code which can
talk to the kernel only via the system call interface.

> The reason I want get clarity on the issue of data-driven tests is
> that I think data-driven tests and tests that probe are very much
> desirable.  This allows a test to be able to be more generalized and
> allows for specialization of the test for more scenarios without
> re-coding it.  I'm not sure if this still qualifies as unit testing,
> but it's very useful as a means to extend the value of a test.  We
> haven't trod into the mocking parts of kunit, but I'm hoping that it
> may be possible to have that be data-driven (depending on what's
> being mocked), to make it easier to test more things using the same
> code.

So does this distinction help?

If you need to do things which involve system calls and whether the
system calls are run as root, it seems to me that the existing
kselftest infrastructure is the right thing to do.  If you are
interested in testing code below (and in some cases *well* below) the
system call interface, then kunit is the right approach.

I'd encourage you to take a look at Iiuri's fs/ext4/inode-test.c.  As
you can see, it is very data-driven.  But it is also, at the same
time, very self-contained, and which can be easily run, and run very
quickly.

Cheers,

						- Ted
Iurii Zaikin Oct. 17, 2019, 11:07 p.m. UTC | #17
> Having the ability to take test data doesn't make it non-deterministic
> though. It just means that if user wants to test with a different set
> of data, there is no need to recompile the test. This could be helpful
> to test cases the test write didn't think about.
>
Again, unit tests are not meant to be babysat. They are intended to become
a part of the codebase and be run against every proposed change to ensure
the change doesn't break anything.
The whole process is supposed to be fully automated.
Imagine a KUnit test run for all tests that gets kicked off as soon as a patch
shows up in Patchwork and the maintainers getting the test run results.
If you can think of a test that the change author didn't for a new corner case,
then you as the maintainer ask the change author to add such test.
Or if some corner case comes up as a result of a bug then the new case is
submitted with the fix.
This is how unit testing is deployed in the larger software world. In the most
enlightened places a change will not be accepted unless it's accompanied by
the unit tests that offer good coverage for the possible inputs and code paths.
A change that breaks existing tests is either incorrect and has to be fixed or
the existing tests need to be updated for the behavior change.
Shuah Khan Oct. 17, 2019, 11:12 p.m. UTC | #18
On 10/17/19 5:07 PM, Iurii Zaikin wrote:
>> Having the ability to take test data doesn't make it non-deterministic
>> though. It just means that if user wants to test with a different set
>> of data, there is no need to recompile the test. This could be helpful
>> to test cases the test write didn't think about.
>>
> Again, unit tests are not meant to be babysat. They are intended to become
> a part of the codebase and be run against every proposed change to ensure
> the change doesn't break anything.
> The whole process is supposed to be fully automated.
> Imagine a KUnit test run for all tests that gets kicked off as soon as a patch
> shows up in Patchwork and the maintainers getting the test run results.
> If you can think of a test that the change author didn't for a new corner case,
> then you as the maintainer ask the change author to add such test.
> Or if some corner case comes up as a result of a bug then the new case is
> submitted with the fix.
> This is how unit testing is deployed in the larger software world. In the most
> enlightened places a change will not be accepted unless it's accompanied by
> the unit tests that offer good coverage for the possible inputs and code paths.
> A change that breaks existing tests is either incorrect and has to be fixed or
> the existing tests need to be updated for the behavior change.
> 

Okay. I am asking for an option to override the test data. You didn't
address that part.

You can do all of this and allow users to supply another set of data.
It doesn't gave to be one or the other.

thanks,
-- Shuah
Iurii Zaikin Oct. 17, 2019, 11:27 p.m. UTC | #19
> You can do all of this and allow users to supply another set of data.
> It doesn't gave to be one or the other.
>
What is the use case for running a unit test on a different data set than
what it comes with?
Bird, Tim Oct. 17, 2019, 11:40 p.m. UTC | #20
> -----Original Message-----
> From: Theodore Y. Ts'o 
> 
> On Thu, Oct 17, 2019 at 10:25:35PM +0000, Tim.Bird@sony.com wrote:
> >
> > I'm not sure I have the entire context here, but I think deterministic
> > might not be the right word, or it might not capture the exact meaning
> > intended.
> >
> > I think there are multiple issues here:
> >  1. Does the test enclose all its data, including working data and expected
> results?
> > Or, does the test allow someone to provide working data?  This
> > alternative implies that either the some of testcases or the results
> > might be different depending on the data that is provided.  IMHO the
> > test would be deterministic if it always produced the same results
> > based on the same data inputs.  And if the input data was
> > deterministic.  I would call this a data-driven test.
> 
> Do you mean that the tester would provide the test data at the time
> when they launch the test?  

No. Well, the data might be provided at some time independent
of the test compilation time, but it would not be made up on the fly.
So the data might be provided at run time, but that shouldn't imply
that the data is random, or that there is some lengthy fabrication
process that happens at test execution time.

> My definition of a unit test is something which runs quickly so you
> can do make some quick edits, and then run something like "make
> check", and find out whether or not you've made any screw ups.  Or it
> might get used in an automated way, such that immediately after I push
> to a Gerrit server, tests *automatically* get run, and within a minute
> or two, I get notified if there are any test failures, and a -1 review
> is automatically posted to the proposed change in the Web UI.
> 
> So this is not the sort of thing where the human will be providing
> working data to change how the test behaves.  The idea is that you
> type "make check", and hundreds of tests run, sanity checking basic
> functional soundness of the changes which were just made.
I'm not talking about any kind of system where a human is required
to think something up when the test runs.

> 
> Unit tests also tend to run on small pieces of code --- for example,
> in how we encode 64-bit timestamps in ext4, to provide both 100ns
> granularity timestamps as well as post-2038 encoding, where the "low"
> 32-bits are backwards compatible with traditional 32-bit time_t's.  It
> turns doing this is complicated, and it's easy to screw it up (and in
> fact, we have).  So now we have a set of unit tests for it.
> 
> The unit tests use a lot of reusable infrastructure, so that while
> there are a series of tests to test each of the various different
> boundary conditions, they are *driven* by test data.  So for example,
> this is a test...
> 
> 		{
> 			.test_case_name =
> 	  "2174-02-25 Lower bound of 32bit <0 timestamp, hi extra sec bit
> on.",
> 			.msb_set = true,
> 			.lower_bound = true,
> 			.extra_bits =  2,
> 			.expected = {.tv_sec = 0x180000000LL, .tv_nsec = 0L},
> 		},
> 
> 
> ... and this is a different test....
> 
> 		{
> 			.test_case_name =
> "2310-04-04 Upper bound of 32bit>=0 timestamp, hi extra sec bit 1. 1 ns bit
> 1.",
> 			.msb_set = false,
> 			.lower_bound = false,
> 			.extra_bits = 6,
> 			.expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 1L},
> 		},
> 
> So yes, it's very data-driven.  *But* all of the code, data, and
> expected results are all encapsulated in fs/ext4/inode-test.c.

Agreed.  By data-driven, I should have more specifically said
something like external-data driven.

> 
> So when you say, "does the test allow someone to provide working
> data", it's unclear what you mean by that.  

I mean that some piece of data that allows the test to test more conditions.
More specifically, I mean data that already exists at test execution
time.

> How do you expect a user
> to provide the working data?  And how do you make the test easy to
> run?

In this case, the data is encoded in C structures and requires re-compiling
the test when a new data point is added.  One could envision reading
the list of data points from a file, and processing them.
Each new data point becomes a new test case.  This allows someone
to add a data point - for example maybe Iurii missed an interesting one,
or the algorithm changed and a new test case becomes worth testing.

In this case, the cost of parsing the data file does add some overhead,
but it's not onerous.  I'm not sure how, or whether, kunit handles
the issue of reading data from a file at test time.  But it doesn't have
to be a file read.  I'm just talking separating data from code.

This might not be the best example, because the C code for expressing
the data points is pretty simple, so someone can add another data
point pretty easily.  What cannot be done with the data points embedded
in the code, however, is send a data file indicating a new problem data point,
without other people having to re-compile the code to check it out
on their system.

> 
> > 2. Does the test automatically detect some attribute of the system,
> > and adjust its operation based on that (does the test probe?)  This
> > is actually quite common if you include things like when a test
> > requires root access to run.  Sometimes such tests, when run without
> > root privilege, run as many testcases as possible not as root, and
> > skip the testcases that require root.
> 
> And if the test needs to do this, it's *automatically* not a unit
> test.

Not necessarily.  Maybe the root privilege example is not a good one.
How about a test that probes the kernel config, and executes
some variation of the tests based on the config values it detects?

This has nothing to do with the scope of the test or the speed of
the test.

> It might be an integration test.  xfstests fall very squarely
> into into that category.  Integration tests are heavier-weight.
> You're not going to run them as lightly as you run unit tests, because
> they take more resources, and more wall clock time before you get
> results.
> 
> One of the main point of unit tests is that they are testing units of
> code smaller than "the whole kernel".  When I'm testing how 64-bit
> timestamps are encoding, "root" has no meaning, because the scope of
> the test doesn't even really include the concept of a process, or user
> privileges.
Agreed.

> 
> One of the other advantages of unit tests is that sometimes it's much
> easier to test the corner cases of some internal abstraction because
> you can have test mocks which are can simulate conditions which would
> be very hard to simulate if you are running userspace code which can
> talk to the kernel only via the system call interface.
Agreed.

> 
> > The reason I want get clarity on the issue of data-driven tests is
> > that I think data-driven tests and tests that probe are very much
> > desirable.  This allows a test to be able to be more generalized and
> > allows for specialization of the test for more scenarios without
> > re-coding it.  I'm not sure if this still qualifies as unit testing,
> > but it's very useful as a means to extend the value of a test.  We
> > haven't trod into the mocking parts of kunit, but I'm hoping that it
> > may be possible to have that be data-driven (depending on what's
> > being mocked), to make it easier to test more things using the same
> > code.
> 
> So does this distinction help?
> 
> If you need to do things which involve system calls and whether the
> system calls are run as root, it seems to me that the existing
> kselftest infrastructure is the right thing to do.  If you are
> interested in testing code below (and in some cases *well* below) the
> system call interface, then kunit is the right approach.
> 
> I'd encourage you to take a look at Iiuri's fs/ext4/inode-test.c.  As
> you can see, it is very data-driven.  But it is also, at the same
> time, very self-contained, and which can be easily run, and run very
> quickly.

I'm familiar with the test.  I provided input to it.  In using it as an example
I am not advocating that it is a good candidate for being data driven (with
data external to the test program).
But other tests - particular ones where hardware needs to be mocked,
are.

 -- Tim
Shuah Khan Oct. 17, 2019, 11:42 p.m. UTC | #21
On 10/17/19 5:27 PM, Iurii Zaikin wrote:
>> You can do all of this and allow users to supply another set of data.
>> It doesn't gave to be one or the other.
>>
> What is the use case for running a unit test on a different data set than
> what it comes with?
> 

Let me ask the question in a different way. I would like to see the
ability supply test data at run-time. It can be the same data every
single time the test runs.

I am not seeing the reasons to embed the test data in the test itself.

thanks,
-- Shuah
Bird, Tim Oct. 17, 2019, 11:54 p.m. UTC | #22
> -----Original Message-----
> From: Iurii Zaikin
> 
> > You can do all of this and allow users to supply another set of data.
> > It doesn't gave to be one or the other.
> >
> What is the use case for running a unit test on a different data set than
> what it comes with?

I just gave some ideas in another message (our emails crossed),
but one use case is to allow someone besides the test author
to inject additional data points, and to do so without having to re-compile
the code.

They might do this for multiple reasons:
 - to experiment with additional data points
 - to try to diagnose a problem they are seeing
 - to fill gaps they see in existing data points

Whether this makes sense depends on a lot of factors.  I suspect
the timestamp test code is not a good candidate for this, as the code
is simple enough that adding a new test case is pretty trivial.  For some
other types of tests, adding the data via an external file could be easier
than changing the code of the test.
 -- Tim
Shuah Khan Oct. 17, 2019, 11:59 p.m. UTC | #23
On 10/17/19 5:54 PM, Tim.Bird@sony.com wrote:
> 
> 
>> -----Original Message-----
>> From: Iurii Zaikin
>>
>>> You can do all of this and allow users to supply another set of data.
>>> It doesn't gave to be one or the other.
>>>
>> What is the use case for running a unit test on a different data set than
>> what it comes with?
> 
> I just gave some ideas in another message (our emails crossed),
> but one use case is to allow someone besides the test author
> to inject additional data points, and to do so without having to re-compile
> the code.
> 

Right. Test author might not think about all the possible ways to
test.

> They might do this for multiple reasons:
>   - to experiment with additional data points
>   - to try to diagnose a problem they are seeing
>   - to fill gaps they see in existing data points
> 

Thanks for explaining the scenarios.

> Whether this makes sense depends on a lot of factors.  I suspect
> the timestamp test code is not a good candidate for this, as the code
> is simple enough that adding a new test case is pretty trivial.  For some
> other types of tests, adding the data via an external file could be easier
> than changing the code of the test.

I agree. Even if author thinks of all the different ways (I am convinced
of that), still taking test data at run-time makes the unit test just
more effective.

thanks,
-- Shuah
Iurii Zaikin Oct. 18, 2019, 12:11 a.m. UTC | #24
On Thu, Oct 17, 2019 at 4:54 PM <Tim.Bird@sony.com> wrote:
>
>
>
> > -----Original Message-----
> > From: Iurii Zaikin
> >
> > > You can do all of this and allow users to supply another set of data.
> > > It doesn't gave to be one or the other.
> > >
> > What is the use case for running a unit test on a different data set than
> > what it comes with?
>
> I just gave some ideas in another message (our emails crossed),
> but one use case is to allow someone besides the test author
> to inject additional data points, and to do so without having to re-compile
> the code.
>
> They might do this for multiple reasons:
>  - to experiment with additional data points
>  - to try to diagnose a problem they are seeing
>  - to fill gaps they see in existing data points
>
> Whether this makes sense depends on a lot of factors.  I suspect
> the timestamp test code is not a good candidate for this, as the code
> is simple enough that adding a new test case is pretty trivial.  For some
> other types of tests, adding the data via an external file could be easier
> than changing the code of the test.

I think feeding test data without recompiling looks attractive right now because
in order to run a single test you need to compile and link the whole kernel.
I believe KUnit's strategic goal is the ability to only compile the
relevant bits,
which is admittedly very far off.
Normally, in application programming the amount of code that needs to be
recompiled in order to run a test suite is small enough that the added
complexity
of enabling the test to get the data from external sources is not
warranted. Typically,
external files are used when something is not practical to include in
the source file
directly due to size or complexity, i.e. a large snippet of text, an
image file, some
binary data etc. Such needs are typically addressed by the test author
rather than
the core test framework.
Now, in application programming you can do a lot of things like
reading a file which
is trickier in kernel. But again we've come to supporting a use case
for test data which
has to be fabricated through some involved process or otherwise not
easily included
in the source file.
And if you come up with an additional test case, why not just add it
and leave it there?
Unit tests are cheap, even if a case proves to be redundant, the mere
fact that the
code under test made you think of such a case is sufficient to
permanently include
the test case into the test suite.
Bird, Tim Oct. 18, 2019, 12:38 a.m. UTC | #25
> -----Original Message-----
> From: Iurii Zaikin
> 
> On Thu, Oct 17, 2019 at 4:54 PM <Tim.Bird@sony.com> wrote:
> >
> > > -----Original Message-----
> > > From: Iurii Zaikin
> > >
> > > > You can do all of this and allow users to supply another set of data.
> > > > It doesn't gave to be one or the other.
> > > >
> > > What is the use case for running a unit test on a different data set than
> > > what it comes with?
> >
> > I just gave some ideas in another message (our emails crossed),
> > but one use case is to allow someone besides the test author
> > to inject additional data points, and to do so without having to re-compile
> > the code.
> >
> > They might do this for multiple reasons:
> >  - to experiment with additional data points
> >  - to try to diagnose a problem they are seeing
> >  - to fill gaps they see in existing data points
> >
> > Whether this makes sense depends on a lot of factors.  I suspect
> > the timestamp test code is not a good candidate for this, as the code
> > is simple enough that adding a new test case is pretty trivial.  For some
> > other types of tests, adding the data via an external file could be easier
> > than changing the code of the test.
> 
> I think feeding test data without recompiling looks attractive right now
> because
> in order to run a single test you need to compile and link the whole kernel.
> I believe KUnit's strategic goal is the ability to only compile the
> relevant bits,
> which is admittedly very far off.
> Normally, in application programming the amount of code that needs to be
> recompiled in order to run a test suite is small enough that the added
> complexity
> of enabling the test to get the data from external sources is not
> warranted. Typically,
> external files are used when something is not practical to include in
> the source file
> directly due to size or complexity, i.e. a large snippet of text, an
> image file, some
> binary data etc. Such needs are typically addressed by the test author
> rather than
> the core test framework.
> Now, in application programming you can do a lot of things like
> reading a file which
> is trickier in kernel. But again we've come to supporting a use case
> for test data which
> has to be fabricated through some involved process or otherwise not
> easily included
> in the source file.
I strongly agree with everything you say here.

> And if you come up with an additional test case, why not just add it
> and leave it there?
You should do exactly that.   But the part you glossed over is the 
"coming up with an additional test case" part.

Having a data-driven test can, in some circumstances, allow one
to more easily come up with additional interesting test cases.
This is where Ted is exactly right about fuzzers.  You don't want to
execute a fuzzer as part of your unit testing.  But you might want to 
execute a fuzzer to come up with additional unit test cases to add.
And fuzzers are easier to write for data files than for C code.
(I run a grave risk of de-railing the conversation by referring back to fuzzers,
just when I believe we are coming to agreement about a number of ideas. :-).
Just to be clear, I'm not promoting fuzzers for unit testing.

Regards,
 -- Tim

> Unit tests are cheap, even if a case proves to be redundant, the mere
> fact that the
> code under test made you think of such a case is sufficient to
> permanently include
> the test case into the test suite.
Iurii Zaikin Oct. 18, 2019, 1:06 a.m. UTC | #26
> Having a data-driven test can, in some circumstances, allow one
> to more easily come up with additional interesting test cases.
Let's try to break this request down:
1. You would like first-class support for parameterized tests, as they
are commonly called,
instead of the ad-hoc thing I did because this is all upstream KUnit has atm.
Me too!
2. You would like a way to pass the data to the parameterized test
other than hardcoding.
Depending on how parameterizing is designed, the input part may or may
not belong in KUnit.
As an example we have a test which has custom code that does
essentially "ls dir" and passes
the list of input files as the list of parameters to the test
framework and then each individual test
has the code to read the file passed as the parameterized test's
parameter. This is not the most
elegant example but it's the quickest I could come up offhand the
point being to demonstrate
what can potentially belong in the core test framework what can be
external to it.
Brendan Higgins Oct. 18, 2019, 1:12 a.m. UTC | #27
On Thu, Oct 17, 2019 at 3:25 PM <Tim.Bird@sony.com> wrote:
>
>
>
> > -----Original Message-----
> > From: Theodore Y. Ts'o on October 17, 2019 2:09 AM
> >
> > On Wed, Oct 16, 2019 at 05:26:29PM -0600, Shuah Khan wrote:
> > >
> > > I don't really buy the argument that unit tests should be deterministic
> > > Possibly, but I would opt for having the ability to feed test data.
> >
> > I strongly believe that unit tests should be deterministic.
> > Non-deterministic tests are essentially fuzz tests.  And fuzz tests
> > should be different from unit tests.
>
> I'm not sure I have the entire context here, but I think deterministic
> might not be the right word, or it might not capture the exact meaning
> intended.
>
> I think there are multiple issues here:
>  1. Does the test enclose all its data, including working data and expected results?
> Or, does the test allow someone to provide working data?  This alternative
> implies that either the some of testcases or the results might be different depending on
> the data that is provided.  IMHO the test would be deterministic if it always produced
> the same results based on the same data inputs.  And if the input data was deterministic.
> I would call this a data-driven test.
>
> Since the results would be dependent on the data provided, the results
> from tests using different data would not be comparable.  Essentially,
> changing the input data changes the test so maybe it's best to consider
> this a different test.  Like 'test-with-data-A' and 'test-with-data-B'.

That kind of sound like parameterized tests[1]; it was a feature I was
thinking about adding to KUnit, but I think the general idea of
parameterized tests has fallen out of favor; I am not sure why. In any
case, I have used parameterized tests before and have found them
useful in certain circumstances.

> 2. Does the test automatically detect some attribute of the system, and adjust
> its operation based on that (does the test probe?)  This is actually quite common
> if you include things like when a test requires root access to run.  Sometimes such tests,
> when run without root privilege, run as many testcases as possible not as root, and skip
> the testcases that require root.
>
> In general, altering the test based on probed data is a form of data-driven test,
> except the data is not provided by the user.  Whether
> this is deterministic in the sense of (1) depends on whether the data that
> is probed is deterministic.  In the case or requiring root, then it should
> not change from run to run (and it should probably be reflected in the characterization
> of the results).
>
> Maybe neither of the above cases fall in the category of unit tests, but
> they are not necessarily fuzzing tests.  IMHO a fuzzing test is one which randomizes

Kind of sounds remotely similar to Haskell's QuickCheck[2]; it's sort
of a mix of unit testing and fuzz testing. I have used this style of
testing for other projects and it can be pretty useful. I actually
have a little experiment somewhere trying to port the idea to KUnit.

> the data for a data-driven test (hence using non-deterministic data).  Once the fuzzer
> has found a bug, and the data and code for a test is fixed into a reproducer program,
> then at that point it should be deterministic (modulo what I say about race condition
> tests below).
>
> >
> > We want unit tests to run quickly.  Fuzz tests need to be run for a
> > large number of passes (perhaps hours) in order to be sure that we've
> > hit any possible bad cases.  We want to be able to easily bisect fuzz
> > tests --- preferably, automatically.  And any kind of flakey test is
> > hell to bisect.
> Agreed.
>
> > It's bad enough when a test is flakey because of the underlying code.
> > But when a test is flakey because the test inputs are
> > non-deterministic, it's even worse.
> I very much agree on this as well.
>
> I'm not sure how one classes a program that seeks to invoke a race condition.
> This can take variable time, so in that sense it is not deterministic.   But it should
> produce the same result if the probabilities required for the race condition
> to be hit are fulfilled.  Probably (see what I did there :-), one needs to take
> a probabilistic approach to reproducing and bisecting such bugs.  The duration
> or iterations required to reproduce the bug (to some confidence level) may
> need to be included with the reproducer program.  I'm not sure if the syskaller
> reproducers do this or not, or if they just run forever.  One I looked at ran forever.
> But you would want to limit this in order to produce results with some confidence
> level (and not waste testing resources).
>
> ---
> The reason I want get clarity on the issue of data-driven tests is that I think
> data-driven tests and tests that probe are very much desirable.  This allows a
> test to be able to be more generalized and allows for specialization of the
> test for more scenarios without re-coding it.
> I'm not sure if this still qualifies as unit testing, but it's very useful as a means to
> extend the value of a test.  We haven't trod into the mocking parts of kunit,
> but I'm hoping that it may be possible to have that be data-driven (depending on
> what's being mocked), to make it easier to test more things using the same code.

I imagine it wouldn't be that hard to add that on as a feature of a
parameterized testing implementation.

> Finally, I think the issue of testing speed is orthogonal to whether a test is self-enclosed
> or data-driven.  Definitely fuzzers, which are experimenting with system interaction
> in a non-deterministic way, have speed problems.

[1] https://dzone.com/articles/junit-parameterized-test
[2] http://hackage.haskell.org/package/QuickCheck

Cheers!
Bird, Tim Oct. 18, 2019, 1:30 a.m. UTC | #28
> -----Original Message-----
> From: Brendan Higgins 
> 
> On Thu, Oct 17, 2019 at 3:25 PM <Tim.Bird@sony.com> wrote:
> >
> > > -----Original Message-----
> > > From: Theodore Y. Ts'o on October 17, 2019 2:09 AM
> > >
> > > On Wed, Oct 16, 2019 at 05:26:29PM -0600, Shuah Khan wrote:
> > > >
> > > > I don't really buy the argument that unit tests should be deterministic
> > > > Possibly, but I would opt for having the ability to feed test data.
> > >
> > > I strongly believe that unit tests should be deterministic.
> > > Non-deterministic tests are essentially fuzz tests.  And fuzz tests
> > > should be different from unit tests.
> >
> > I'm not sure I have the entire context here, but I think deterministic
> > might not be the right word, or it might not capture the exact meaning
> > intended.
> >
> > I think there are multiple issues here:
> >  1. Does the test enclose all its data, including working data and expected
> results?
> > Or, does the test allow someone to provide working data?  This alternative
> > implies that either the some of testcases or the results might be different
> depending on
> > the data that is provided.  IMHO the test would be deterministic if it always
> produced
> > the same results based on the same data inputs.  And if the input data was
> deterministic.
> > I would call this a data-driven test.
> >
> > Since the results would be dependent on the data provided, the results
> > from tests using different data would not be comparable.  Essentially,
> > changing the input data changes the test so maybe it's best to consider
> > this a different test.  Like 'test-with-data-A' and 'test-with-data-B'.
> 
> That kind of sound like parameterized tests[1]; 

...

> 
> [1] https://dzone.com/articles/junit-parameterized-test

Both Iurii and you pointed me at parameterized tests.  This sounds like
what I am talking about.  We have a lot of infrastructure for these concepts in Fuego,
but it's called something different.

I'm not sure if I should feel smart for having developed some of these concepts
independently or dumb for not having known about this (apparently) standard
nomenclature. ;-)

Thanks for the reference!
 -- Tim
Theodore Ts'o Oct. 18, 2019, 1:40 a.m. UTC | #29
On Thu, Oct 17, 2019 at 11:40:01PM +0000, Tim.Bird@sony.com wrote:
> 
> No. Well, the data might be provided at some time independent
> of the test compilation time, but it would not be made up on the fly.
> So the data might be provided at run time, but that shouldn't imply
> that the data is random, or that there is some lengthy fabrication
> process that happens at test execution time.

So how would the data be provided?  Via a mounted file system?  There
is no mounted file system when we're running a kunit test.  One of the
reasons why kunit is fast is because we're not running init scripts,
and we're not mounting a file system.

The fabrication process isn't really lengthy, though.  If I modify
fs/ext4/inode-test.c to add or remove a test, it takes:

Elapsed time: 2.672s total, 0.001s configuring, 2.554s building, 0.116s running

Compare and contrast this with running "kvm-xfstests -c 4k generic/001"

The actual time to run the test generic/001 is 3 seconds.  But there
is a 9 second overhead in starting the VM, for a total test time of 12
seconds.  So sure, with kvm-xfstests I can drop a config file in
/tmp/kvm-xfstests-tytso, which is mounted as /vtmp using 9p, so you
could provide "user provided data" via a text file.  But the overhead
of starting up a full KVM, mounting a file system, starting userspace,
etc., is 9 seconds.

Contrast this with 2.5 seconds to recompile and relink
fs/ext4/inode-test.c into the kunit library.  I wouldn't call that a
"length fabrication process".  Is it really worth it to add in some
super-complex way to feed a data text file into a Kunit test, when
editing the test file and rerunning the test really doesn't take that
long?

> In this case, the cost of parsing the data file does add some overhead,
> but it's not onerous.  I'm not sure how, or whether, kunit handles
> the issue of reading data from a file at test time.  But it doesn't have
> to be a file read.  I'm just talking separating data from code.

It's not the cost of parsing the data file is how to *feed* the data
file into the test file.  How exactly are we supposed to do it?  9p?
Some other mounted file system?  That's where all the complexity and
overhead is going to be.

> Not necessarily.  Maybe the root privilege example is not a good one.
> How about a test that probes the kernel config, and executes
> some variation of the tests based on the config values it detects?

But that's even easier.  We can put "#ifdef CONFIG_xxx" into the
fs/ext4/inode-test.c file.  Again, it doesn't take that long to
recompile and relink the test .c file.

Apologies, but this really seems like complexity in search of a
problem....

						- Ted
Bird, Tim Oct. 18, 2019, 2:40 a.m. UTC | #30
> -----Original Message-----
> From: Theodore Y. Ts'o 
> 
> On Thu, Oct 17, 2019 at 11:40:01PM +0000, Tim.Bird@sony.com wrote:
> >
> > No. Well, the data might be provided at some time independent
> > of the test compilation time, but it would not be made up on the fly.
> > So the data might be provided at run time, but that shouldn't imply
> > that the data is random, or that there is some lengthy fabrication
> > process that happens at test execution time.
> 
> So how would the data be provided?  Via a mounted file system?  There
> is no mounted file system when we're running a kunit test.  One of the
> reasons why kunit is fast is because we're not running init scripts,
> and we're not mounting a file system.
> 
> The fabrication process isn't really lengthy, though.  If I modify
> fs/ext4/inode-test.c to add or remove a test, it takes:
> 
> Elapsed time: 2.672s total, 0.001s configuring, 2.554s building, 0.116s running
> 
> Compare and contrast this with running "kvm-xfstests -c 4k generic/001"
> 
> The actual time to run the test generic/001 is 3 seconds.  But there
> is a 9 second overhead in starting the VM, for a total test time of 12
> seconds.  So sure, with kvm-xfstests I can drop a config file in
> /tmp/kvm-xfstests-tytso, which is mounted as /vtmp using 9p, so you
> could provide "user provided data" via a text file.  But the overhead
> of starting up a full KVM, mounting a file system, starting userspace,
> etc., is 9 seconds.
> 
> Contrast this with 2.5 seconds to recompile and relink
> fs/ext4/inode-test.c into the kunit library.  I wouldn't call that a
> "length fabrication process". 

I'm not sure I understand your point here at all.  I never said that
compiling the code was a lengthy fabrication process.  I said that
I was NOT envisioning a lengthy fabrication process at runtime
for the creation of the external data.  Indeed, I wasn't envisioning fabricating
the data at test runtime at all.  I was trying to clarify that I didn't envision
a human or fuzzer in the loop at test runtime, but apparently I didn't make that clear.
The clarification was based on an assumption I made about what you were asking
in your question.  Maybe that assumption was bad.

> Is it really worth it to add in some
> super-complex way to feed a data text file into a Kunit test, when
> editing the test file and rerunning the test really doesn't take that
> long?
> 
> > In this case, the cost of parsing the data file does add some overhead,
> > but it's not onerous.  I'm not sure how, or whether, kunit handles
> > the issue of reading data from a file at test time.  But it doesn't have
> > to be a file read.  I'm just talking separating data from code.
> 
> It's not the cost of parsing the data file is how to *feed* the data
> file into the test file.  How exactly are we supposed to do it?  9p?
> Some other mounted file system?  That's where all the complexity and
> overhead is going to be.
> 
> > Not necessarily.  Maybe the root privilege example is not a good one.
> > How about a test that probes the kernel config, and executes
> > some variation of the tests based on the config values it detects?
> 
> But that's even easier.  We can put "#ifdef CONFIG_xxx" into the
> fs/ext4/inode-test.c file.  Again, it doesn't take that long to
> recompile and relink the test .c file.
> 
> Apologies, but this really seems like complexity in search of a
> problem....

We're just talking past each other.  My original e-mail was a rebuttal
to your assertion that any test that was data-driven or non-deterministic
was a fuzzer.  I still believe that's just not the case.  This is independent
of the mechanics or speed of how the data is input.

I also conceded (multiple times) that externally data-driven techniques are probably
more aptly applied to non-unit tests. I've heard your pitch about speed, and I'm sympathetic.
My point is that I believe there is a place for data-driven tests.

I can live with having failed to convince you, which I'll put down to my inadequacy to
accurately communicate my ideas. :-)

Regards,
 -- Tim
Theodore Ts'o Oct. 18, 2019, 3:27 p.m. UTC | #31
On Fri, Oct 18, 2019 at 02:40:50AM +0000, Tim.Bird@sony.com wrote:
> We're just talking past each other.  My original e-mail was a rebuttal
> to your assertion that any test that was data-driven or non-deterministic
> was a fuzzer.  I still believe that's just not the case.  This is independent
> of the mechanics or speed of how the data is input.

Apologies, I was still focused on the original context of this thread,
which was about suggested improvements to Iurii's ext4 kunit test, or
perhaps adding new features to Kunit.

> I also conceded (multiple times) that externally data-driven
> techniques are probably more aptly applied to non-unit tests. I've
> heard your pitch about speed, and I'm sympathetic.  My point is that
> I believe there is a place for data-driven tests.

I guess I would put it differently.  The key goal is it should be
really easy for developers to run, create, and extend tests.
Data-driven tests is certainly one technique to make it easier to
extend tests, and indeed fs/ext4/inode-test.c is data-driven with the
goal to make it easier to add additional tests.

Having the data for the test be external is certainly one option, and
there will be cases where it will make sense.  However, the overhead
in creating the parser for the data, and additional complexity
required to get the test data to be fed to the test program means that
that benefits need to be pretty large in order to balance the
additional costs of having an external data file, especially for
Kunit.

In terms of the abstract question, is there a place for data-driven
tests, I'm in complete agreement with you.  I've used this many times
personally, especially when writing tests which are implemented in
terms of shell scripts.  Examples of this include e2fsprogs's
regression test suite and xfstests.  I don't consider that a terribly
interesting question though; I view that as on the same order as "is
the sky blue?" or "are apple pies yummy?"

The more interesting, and more concrete question is whether there is a
place for external data-driven tests in Kunit, and there I am *much*
more skeptical.  Sure, I could imagine adding some infrastructure
where user-mode linux could read files from its "host" system.  But
there are those people who want to run KUnit tests by cross-compiling
an ARM kernel and then shipping the resulting kernel to an test board,
and then reading the output from the ARM board's serial console.  In
that case, we can't make the assumption that Kunit tests will be
running under user-mode linux, so the complexity of how we get the
external data file into the KUnit test just went up by another order
of magnitude.  It's going to be ***so*** much simpler if the test data
is embedded in the kernel built by KUnit.  That way, it works both in
the ARCH=um case, and in the "build an ARM kernel and push it to a
test board" case.

Cheers,

					- Ted
Shuah Khan Oct. 18, 2019, 8:24 p.m. UTC | #32
On 10/18/19 9:27 AM, Theodore Y. Ts'o wrote:
> On Fri, Oct 18, 2019 at 02:40:50AM +0000, Tim.Bird@sony.com wrote:
>> We're just talking past each other.  My original e-mail was a rebuttal
>> to your assertion that any test that was data-driven or non-deterministic
>> was a fuzzer.  I still believe that's just not the case.  This is independent
>> of the mechanics or speed of how the data is input.
> 
> Apologies, I was still focused on the original context of this thread,
> which was about suggested improvements to Iurii's ext4 kunit test, or
> perhaps adding new features to Kunit.
> 
>> I also conceded (multiple times) that externally data-driven
>> techniques are probably more aptly applied to non-unit tests. I've
>> heard your pitch about speed, and I'm sympathetic.  My point is that
>> I believe there is a place for data-driven tests.
> 

As such what this current test does is data driven right. What we are
discussing is how the data is supplied? In this case it is embedded.

> I guess I would put it differently.  The key goal is it should be
> really easy for developers to run, create, and extend tests.
> Data-driven tests is certainly one technique to make it easier to
> extend tests, and indeed fs/ext4/inode-test.c is data-driven with the
> goal to make it easier to add additional tests.
> 

Again I would make the distinction that "how the data supplied".
Embedded in the test vs. having the flexibility to accept external
test data. From what I can tell,  I didn't see anybody say that the
embedded data is it and nothing more needed.

Instead of adding the ability to read, the suggestion is for modifying
the data. This gets real tedious and don't think anybody will take the
time to do it. On the other hand, is there a few test data files to run
with, it makes it easier to exercise different cases.

> Having the data for the test be external is certainly one option, and
> there will be cases where it will make sense.  However, the overhead
> in creating the parser for the data, and additional complexity
> required to get the test data to be fed to the test program means that
> that benefits need to be pretty large in order to balance the
> additional costs of having an external data file, especially for
> Kunit.
> 

Let's explore it further before deciding whether is useful or not.

> In terms of the abstract question, is there a place for data-driven
> tests, I'm in complete agreement with you.  I've used this many times
> personally, especially when writing tests which are implemented in
> terms of shell scripts.  Examples of this include e2fsprogs's
> regression test suite and xfstests.  I don't consider that a terribly
> interesting question though; I view that as on the same order as "is
> the sky blue?" or "are apple pies yummy?"
> 
> The more interesting, and more concrete question is whether there is a
> place for external data-driven tests in Kunit, and there I am *much*
> more skeptical. 

This is what I am interested in exploring. I do think it will add value.
I can see some use-cases that could benefit from this.

I am not suggesting that this should happen soon. This is something that
can be looked into for the future. I have a few use-cases in mind that
could benefit.

btw. I am by no means suggesting to that this test going in is dependent
on the external data. I already sent my Reviewed-by for the v6 and
planning to pull it into linux-kselftest test for 5.5-rc1.

thanks,
-- Shuah
Brendan Higgins Oct. 24, 2019, 1:30 a.m. UTC | #33
On Fri, Oct 18, 2019 at 1:24 PM Shuah Khan <skhan@linuxfoundation.org> wrote:
>
> On 10/18/19 9:27 AM, Theodore Y. Ts'o wrote:
> > On Fri, Oct 18, 2019 at 02:40:50AM +0000, Tim.Bird@sony.com wrote:
> >> We're just talking past each other.  My original e-mail was a rebuttal
> >> to your assertion that any test that was data-driven or non-deterministic
> >> was a fuzzer.  I still believe that's just not the case.  This is independent
> >> of the mechanics or speed of how the data is input.
> >
> > Apologies, I was still focused on the original context of this thread,
> > which was about suggested improvements to Iurii's ext4 kunit test, or
> > perhaps adding new features to Kunit.
> >
> >> I also conceded (multiple times) that externally data-driven
> >> techniques are probably more aptly applied to non-unit tests. I've
> >> heard your pitch about speed, and I'm sympathetic.  My point is that
> >> I believe there is a place for data-driven tests.
> >
>
> As such what this current test does is data driven right. What we are
> discussing is how the data is supplied? In this case it is embedded.
>
> > I guess I would put it differently.  The key goal is it should be
> > really easy for developers to run, create, and extend tests.
> > Data-driven tests is certainly one technique to make it easier to
> > extend tests, and indeed fs/ext4/inode-test.c is data-driven with the
> > goal to make it easier to add additional tests.
> >
>
> Again I would make the distinction that "how the data supplied".
> Embedded in the test vs. having the flexibility to accept external
> test data. From what I can tell,  I didn't see anybody say that the
> embedded data is it and nothing more needed.
>
> Instead of adding the ability to read, the suggestion is for modifying
> the data. This gets real tedious and don't think anybody will take the
> time to do it. On the other hand, is there a few test data files to run
> with, it makes it easier to exercise different cases.
>
> > Having the data for the test be external is certainly one option, and
> > there will be cases where it will make sense.  However, the overhead
> > in creating the parser for the data, and additional complexity
> > required to get the test data to be fed to the test program means that
> > that benefits need to be pretty large in order to balance the
> > additional costs of having an external data file, especially for
> > Kunit.
> >
>
> Let's explore it further before deciding whether is useful or not.
>
> > In terms of the abstract question, is there a place for data-driven
> > tests, I'm in complete agreement with you.  I've used this many times
> > personally, especially when writing tests which are implemented in
> > terms of shell scripts.  Examples of this include e2fsprogs's
> > regression test suite and xfstests.  I don't consider that a terribly
> > interesting question though; I view that as on the same order as "is
> > the sky blue?" or "are apple pies yummy?"
> >
> > The more interesting, and more concrete question is whether there is a
> > place for external data-driven tests in Kunit, and there I am *much*
> > more skeptical.
>
> This is what I am interested in exploring. I do think it will add value.
> I can see some use-cases that could benefit from this.
>
> I am not suggesting that this should happen soon. This is something that
> can be looked into for the future. I have a few use-cases in mind that
> could benefit.
>
> btw. I am by no means suggesting to that this test going in is dependent
> on the external data. I already sent my Reviewed-by for the v6 and
> planning to pull it into linux-kselftest test for 5.5-rc1.

I am not opposed to exploring this further.

How about we make this a follow up task to making a generic mechanism
for parameterized tests? I seem to recall that Tim and Iurii both
thought that a generic mechanism for parameterized tests was a good
idea, and I am certainly not opposed.

An implementation like JUnit's[1] I think would lend itself to an
extension that allows loading additional test cases in a running
kernel. Does that sound like a good starting point?

Basically, I imagine having a mechanism not dissimilar to JUnit's
parameterized tests that allows test parameters to be specified in the
test suite, but in a way that is decoupled from the test case. It
should be relatively straightforward to allow the data structure
containing the test parameters to be overridden at runtime without
really affecting how the test is implemented in anyway. It should be
transparent to the test maintainer.

Thoughts?

[1] https://dzone.com/articles/junit-parameterized-test
diff mbox series

Patch

diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index cbb5ca830e57..cb0b52753674 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -106,3 +106,15 @@  config EXT4_DEBUG
 	  If you select Y here, then you will be able to turn on debugging
 	  with a command such as:
 		echo 1 > /sys/module/ext4/parameters/mballoc_debug
+
+config EXT4_KUNIT_TESTS
+	bool "KUnit test for ext4 inode"
+	depends on EXT4_FS
+	depends on KUNIT
+	help
+	  This builds the ext4 inode sysctl unit test, which runs on boot.
+	  Tests the encoding correctness of ext4 inode.
+	  For more information on KUnit and unit tests in general please refer
+	  to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+	  If unsure, say N.
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index b17ddc229ac5..a0588fd2eea6 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -13,4 +13,5 @@  ext4-y	:= balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \

 ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
 ext4-$(CONFIG_EXT4_FS_SECURITY)		+= xattr_security.o
+ext4-$(CONFIG_EXT4_KUNIT_TESTS)	+= inode-test.o
 ext4-$(CONFIG_FS_VERITY)		+= verity.o
diff --git a/fs/ext4/inode-test.c b/fs/ext4/inode-test.c
new file mode 100644
index 000000000000..43bc6cb547cd
--- /dev/null
+++ b/fs/ext4/inode-test.c
@@ -0,0 +1,221 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit test of ext4 inode that verify the seconds part of [a/c/m]
+ * timestamps in ext4 inode structs are decoded correctly.
+ * These tests are derived from the table under
+ * Documentation/filesystems/ext4/inodes.rst Inode Timestamps
+ */
+
+#include <kunit/test.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+
+#include "ext4.h"
+
+/* binary: 00000000 00000000 00000000 00000000 */
+#define LOWER_MSB_0 0L
+/* binary: 01111111 11111111 11111111 11111111 */
+#define UPPER_MSB_0 0x7fffffffL
+/* binary: 10000000 00000000 00000000 00000000 */
+#define LOWER_MSB_1 (-0x80000000L)
+/* binary: 11111111 11111111 11111111 11111111 */
+#define UPPER_MSB_1 (-1L)
+/* binary: 00111111   11111111 11111111 11111111 */
+#define MAX_NANOSECONDS ((1L << 30) - 1)
+
+#define CASE_NAME_FORMAT "%s: msb:%x lower_bound:%x extra_bits: %x"
+
+struct timestamp_expectation {
+	const char *test_case_name;
+	struct timespec64 expected;
+	u32 extra_bits;
+	bool msb_set;
+	bool lower_bound;
+};
+
+static time64_t get_32bit_time(const struct timestamp_expectation * const test)
+{
+	if (test->msb_set) {
+		if (test->lower_bound)
+			return LOWER_MSB_1;
+
+		return UPPER_MSB_1;
+	}
+
+	if (test->lower_bound)
+		return LOWER_MSB_0;
+	return UPPER_MSB_0;
+}
+
+
+static void inode_test_xtimestamp_decoding(struct kunit *test)
+{
+	const struct timestamp_expectation test_data[] = {
+		{
+			.test_case_name = "1901-12-13",
+			.msb_set = true,
+			.lower_bound = true,
+			.extra_bits = 0,
+			.expected = {.tv_sec = -0x80000000LL, .tv_nsec = 0L},
+		},
+
+		{
+			.test_case_name = "1969-12-31",
+			.msb_set = true,
+			.lower_bound = false,
+			.extra_bits = 0,
+			.expected = {.tv_sec = -1LL, .tv_nsec = 0L},
+		},
+
+		{
+			.test_case_name = "1970-01-01",
+			.msb_set = false,
+			.lower_bound = true,
+			.extra_bits = 0,
+			.expected = {0LL, 0L},
+		},
+
+		{
+			.test_case_name = "2038-01-19",
+			.msb_set = false,
+			.lower_bound = false,
+			.extra_bits = 0,
+			.expected = {.tv_sec = 0x7fffffffLL, .tv_nsec = 0L},
+		},
+
+		{
+			.test_case_name = "2038-01-19",
+			.msb_set = true,
+			.lower_bound = true,
+			.extra_bits = 1,
+			.expected = {.tv_sec = 0x80000000LL, .tv_nsec = 0L},
+		},
+
+		{
+			.test_case_name = "2106-02-07",
+			.msb_set = true,
+			.lower_bound = false,
+			.extra_bits = 1,
+			.expected = {.tv_sec = 0xffffffffLL, .tv_nsec = 0L},
+		},
+
+		{
+			.test_case_name = "2106-02-07",
+			.msb_set = false,
+			.lower_bound = true,
+			.extra_bits = 1,
+			.expected = {.tv_sec = 0x100000000LL, .tv_nsec = 0L},
+		},
+
+		{
+			.test_case_name = "2174-02-25",
+			.msb_set = false,
+			.lower_bound = false,
+			.extra_bits = 1,
+			.expected = {.tv_sec = 0x17fffffffLL, .tv_nsec = 0L},
+		},
+
+		{
+			.test_case_name = "2174-02-25",
+			.msb_set = true,
+			.lower_bound = true,
+			.extra_bits =  2,
+			.expected = {.tv_sec = 0x180000000LL, .tv_nsec = 0L},
+		},
+
+		{
+			.test_case_name = "2242-03-16",
+			.msb_set = true,
+			.lower_bound = false,
+			.extra_bits = 2,
+			.expected = {.tv_sec = 0x1ffffffffLL, .tv_nsec = 0L},
+		},
+
+		{
+			.test_case_name = "2242-03-16",
+			.msb_set = false,
+			.lower_bound = true,
+			.extra_bits = 2,
+			.expected = {.tv_sec = 0x200000000LL, .tv_nsec = 0L},
+		},
+
+		{
+			.test_case_name = " 2310-04-04",
+			.msb_set = false,
+			.lower_bound = false,
+			.extra_bits = 2,
+			.expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 0L},
+		},
+
+		{
+			.test_case_name = " 2310-04-04 00:00:00.1",
+			.msb_set = false,
+			.lower_bound = false,
+			.extra_bits = 6,
+			.expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 1L},
+		},
+
+		{
+			.test_case_name = "2378-04-22 00:00:00.MAX_NSEC",
+			.msb_set = false,
+			.lower_bound = true,
+			.extra_bits = 0xFFFFFFFF,
+			.expected = {.tv_sec = 0x300000000LL,
+				     .tv_nsec = MAX_NANOSECONDS},
+		},
+
+		{
+			.test_case_name = "2378-04-22",
+			.msb_set = false,
+			.lower_bound = true,
+			.extra_bits = 3,
+			.expected = {.tv_sec = 0x300000000LL, .tv_nsec = 0L},
+		},
+
+		{
+			.test_case_name = "2446-05-10",
+			.msb_set = false,
+			.lower_bound = false,
+			.extra_bits = 3,
+			.expected = {.tv_sec = 0x37fffffffLL, .tv_nsec = 0L},
+		}
+	};
+
+	struct timespec64 timestamp;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(test_data); ++i) {
+		timestamp.tv_sec = get_32bit_time(&test_data[i]);
+		ext4_decode_extra_time(&timestamp,
+				       cpu_to_le32(test_data[i].extra_bits));
+
+		KUNIT_EXPECT_EQ_MSG(test,
+				    test_data[i].expected.tv_sec,
+				    timestamp.tv_sec,
+				    CASE_NAME_FORMAT,
+				    test_data[i].test_case_name,
+				    test_data[i].msb_set,
+				    test_data[i].lower_bound,
+				    test_data[i].extra_bits);
+		KUNIT_EXPECT_EQ_MSG(test,
+				    test_data[i].expected.tv_nsec,
+				    timestamp.tv_nsec,
+				    CASE_NAME_FORMAT,
+				    test_data[i].test_case_name,
+				    test_data[i].msb_set,
+				    test_data[i].lower_bound,
+				    test_data[i].extra_bits);
+	}
+}
+
+static struct kunit_case ext4_inode_test_cases[] = {
+	KUNIT_CASE(inode_test_xtimestamp_decoding),
+	{}
+};
+
+static struct kunit_suite ext4_inode_test_suite = {
+	.name = "ext4_inode_test",
+	.test_cases = ext4_inode_test_cases,
+};
+
+kunit_test_suite(ext4_inode_test_suite);