diff mbox series

[1/3] xfs: add memory failure test for dax mode

Message ID 20220513031411.2369314-2-ruansy.fnst@fujitsu.com (mailing list archive)
State New, archived
Headers show
Series xfs: add memory failure tests for dax mode | expand

Commit Message

Shiyang Ruan May 13, 2022, 3:14 a.m. UTC
Make sure memory failure mechanism works when filesystem is mounted with
dax option.

Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
---
 .gitignore                      |   1 +
 src/Makefile                    |   3 +-
 src/t_mmap_cow_memory_failure.c | 157 ++++++++++++++++++++++++++++++++
 tests/xfs/900                   |  48 ++++++++++
 tests/xfs/900.out               |   9 ++
 5 files changed, 217 insertions(+), 1 deletion(-)
 create mode 100644 src/t_mmap_cow_memory_failure.c
 create mode 100755 tests/xfs/900
 create mode 100644 tests/xfs/900.out

Comments

Zorro Lang May 18, 2022, 6:38 p.m. UTC | #1
On Fri, May 13, 2022 at 11:14:09AM +0800, Shiyang Ruan wrote:
> Make sure memory failure mechanism works when filesystem is mounted with
> dax option.
> 
> Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
> ---
>  .gitignore                      |   1 +
>  src/Makefile                    |   3 +-
>  src/t_mmap_cow_memory_failure.c | 157 ++++++++++++++++++++++++++++++++
>  tests/xfs/900                   |  48 ++++++++++
>  tests/xfs/900.out               |   9 ++
>  5 files changed, 217 insertions(+), 1 deletion(-)
>  create mode 100644 src/t_mmap_cow_memory_failure.c
>  create mode 100755 tests/xfs/900
>  create mode 100644 tests/xfs/900.out
> 
> diff --git a/.gitignore b/.gitignore
> index ba0c572b..1d26b28a 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -146,6 +146,7 @@ tags
>  /src/t_holes
>  /src/t_immutable
>  /src/t_mmap_collision
> +/src/t_mmap_cow_memory_failure
>  /src/t_mmap_cow_race
>  /src/t_mmap_dio
>  /src/t_mmap_fallocate
> diff --git a/src/Makefile b/src/Makefile
> index 111ce1d9..d702e200 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -18,7 +18,8 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \
>  	t_ext4_dax_journal_corruption t_ext4_dax_inline_corruption \
>  	t_ofd_locks t_mmap_collision mmap-write-concurrent \
>  	t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \
> -	t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale
> +	t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale \
> +	t_mmap_cow_memory_failure
>  
>  LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
>  	preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \
> diff --git a/src/t_mmap_cow_memory_failure.c b/src/t_mmap_cow_memory_failure.c
> new file mode 100644
> index 00000000..4b2c1b8a
> --- /dev/null
> +++ b/src/t_mmap_cow_memory_failure.c
> @@ -0,0 +1,157 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2022 Fujitsu Corporation. */
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <libgen.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <semaphore.h>
> +#include <sys/mman.h>
> +#include <sys/wait.h>
> +#include <sys/sem.h>
> +#include <time.h>
> +#include <unistd.h>
> +
> +sem_t *sem;
> +
> +void sigbus_handler(int signal)
> +{
> +	printf("Process is killed by signal: %d\n", signal);
> +	sem_post(sem);
> +}
> +
> +void mmap_read_file(char *filename, off_t offset, size_t size)
> +{
> +	int fd;
> +	char *map, *dummy;
> +	struct timespec ts;
> +
> +	fd = open(filename, O_RDWR);
> +	map = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, offset);
> +	dummy = malloc(size);
> +
> +	/* make sure page fault happens */
> +	memcpy(dummy, map, size);
> +
> +	/* ready */
> +	sem_post(sem);
> +
> +	usleep(200000);
> +
> +	clock_gettime(CLOCK_REALTIME, &ts);
> +	ts.tv_sec += 3;
> +	/* wait for injection done */
> +	sem_timedwait(sem, &ts);
> +
> +	free(dummy);
> +	munmap(map, size);
> +	close(fd);
> +}
> +
> +void mmap_read_file_then_poison(char *filename, off_t offset, size_t size,
> +		off_t poisonOffset, size_t poisonSize)
> +{
> +	int fd, error;
> +	char *map, *dummy;
> +
> +	/* wait for parent preparation done */
> +	sem_wait(sem);
> +
> +	fd = open(filename, O_RDWR);
> +	map = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, offset);
> +	dummy = malloc(size);
> +
> +	/* make sure page fault happens */
> +	memcpy(dummy, map, size);
> +
> +	printf("Inject poison...\n");
> +	error = madvise(map + poisonOffset, poisonSize, MADV_HWPOISON);
> +	if (error)
> +		printf("madvise() has fault: %d, errno: %d\n", error, errno);
> +
> +	free(dummy);
> +	munmap(map, size);
> +	close(fd);
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	char *pReadFile = NULL, *pPoisonFile = NULL;
> +	size_t mmapSize, poisonSize;
> +	off_t mmapOffset = 0, poisonOffset = 0;
> +	long pagesize = sysconf(_SC_PAGESIZE);
> +	int c;
> +	pid_t pid;
> +
> +	if (pagesize < 1) {
> +		fprintf(stderr, "sysconf(_SC_PAGESIZE): failed to get page size\n");
> +		abort();
> +	}
> +
> +	/* default mmap / poison size, in unit of System Page Size */
> +	mmapSize = poisonSize = pagesize;
> +
> +	while ((c = getopt(argc, argv, "o::s::O::S::R:P:")) != -1) {
> +		switch (c) {
> +		/* mmap offset */
> +		case 'o':
> +			mmapOffset = atoi(optarg) * pagesize;
> +			break;
> +		/* mmap size */
> +		case 's':
> +			mmapSize = atoi(optarg) * pagesize;
> +			break;
> +		/* madvice offset */
> +		case 'O':
> +			poisonOffset = atoi(optarg) * pagesize;
> +			break;
> +		/* madvice size */
> +		case 'S':
> +			poisonSize = atoi(optarg) * pagesize;
> +			break;
> +		/* filename for mmap read */
> +		case 'R':
> +			pReadFile = optarg;
> +			break;
> +		/* filename for poison read */
> +		case 'P':
> +			pPoisonFile = optarg;
> +			break;
> +		default:
> +			printf("Unknown option: %c\n", c);
> +			exit(1);
> +		}
> +	}
> +
> +	if (!pReadFile || !pPoisonFile) {
> +		printf("Usage: \n"
> +		       "  %s [-o mmapOffset] [-s mmapSize] [-O mmapOffset] [-S mmapSize] -R readFile -P poisonFile\n"
> +		       "  (offset and size are both in unit of System Page Size: %ld)\n",
> +				basename(argv[0]), pagesize);
> +		exit(0);
> +	}
> +	if (poisonSize < mmapSize)
> +		mmapSize = poisonSize;
> +
> +	/* fork and mmap files */
> +	pid = fork();
> +	if (pid == 0) {
> +		/* handle SIGBUS */
> +		signal(SIGBUS, sigbus_handler);
> +		sem = sem_open("sync", O_CREAT, 0666, 0);
> +
> +		/* mread & do memory failure on poison file */
> +		mmap_read_file_then_poison(pPoisonFile, mmapOffset, mmapSize,
> +				poisonOffset, poisonSize);
> +
> +		sem_close(sem);
> +	} else {
> +		sem = sem_open("sync", O_CREAT, 0666, 0);
> +
> +		/* mread read file, wait for child process to be killed */
> +		mmap_read_file(pReadFile, mmapOffset, mmapSize);
> +		sem_close(sem);
> +	}
> +	exit(0);
> +}
> diff --git a/tests/xfs/900 b/tests/xfs/900
> new file mode 100755
> index 00000000..da11230a
> --- /dev/null
> +++ b/tests/xfs/900
> @@ -0,0 +1,48 @@
> +#! /bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +#
> +# FS QA Test No. 900
> +#
> +# Test memory failure mechanism when dax enabled
> +#
> +. ./common/preamble
> +_begin_fstest auto quick dax
> +
> +# Import common functions.
> +. ./common/filter
> +. ./common/reflink
> +
> +# real QA test starts here
> +_require_check_dmesg
> +_require_scratch_reflink
> +_require_cp_reflink
> +_require_xfs_scratch_rmapbt
> +_require_scratch_dax_mountopt "dax"
> +_require_test_program "t_mmap_cow_memory_failure"
> +
> +echo "Format and mount"
> +_scratch_mkfs > $seqres.full 2>&1
> +_scratch_mount "-o dax" >> $seqres.full 2>&1
> +
> +testdir=$SCRATCH_MNT/test-$seq
> +mkdir $testdir
> +
> +echo "Create the original files"
> +filesize=65536
> +_pwrite_byte 0x61 0 $filesize $testdir/testfile >> $seqres.full
> +_scratch_cycle_mount "dax"
> +
> +echo "Inject memory failure (1 page)"
> +# create two processes:
> +#  process1: mread 1 page to cause page fault, and wait
> +#  process2: mread 1 page to cause page fault, then inject poison on this page
> +$here/src/t_mmap_cow_memory_failure -s1 -S1 -R $testdir/testfile -P $testdir/testfile
> +
> +echo "Inject memory failure (2 pages)"
> +$here/src/t_mmap_cow_memory_failure -s2 -S2 -R $testdir/testfile -P $testdir/testfile
> +
> +_check_dmesg_for "Sending SIGBUS to t_mmap_cow_memo" || echo "Memory failure didn't kill the process"
> +_check_dmesg_for "recovery action for dax page: Recovered" || echo "Failured page didn't recovered"
> +
> +# success, all done
> +status=0
> diff --git a/tests/xfs/900.out b/tests/xfs/900.out
> new file mode 100644
> index 00000000..d861bf1f
> --- /dev/null
> +++ b/tests/xfs/900.out
> @@ -0,0 +1,9 @@
> +QA output created by 900
> +Format and mount
> +Create the original files
> +Inject memory failure (1 page)
> +Inject posion...

As your program is: printf("Inject poison...\n");

So I think this place should be "poison".

> +Process is killed by signal: 7
> +Inject memory failure (2 pages)
> +Inject posion...

same as above. And other 2 patches might have same issue.

Thanks,
Zorro

> +Process is killed by signal: 7
> -- 
> 2.35.1
> 
> 
>
Shiyang Ruan May 19, 2022, 1:53 a.m. UTC | #2
在 2022/5/19 2:38, Zorro Lang 写道:
> On Fri, May 13, 2022 at 11:14:09AM +0800, Shiyang Ruan wrote:
>> Make sure memory failure mechanism works when filesystem is mounted with
>> dax option.
>>
>> Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
>> ---
>>   .gitignore                      |   1 +
>>   src/Makefile                    |   3 +-
>>   src/t_mmap_cow_memory_failure.c | 157 ++++++++++++++++++++++++++++++++
>>   tests/xfs/900                   |  48 ++++++++++
>>   tests/xfs/900.out               |   9 ++
>>   5 files changed, 217 insertions(+), 1 deletion(-)
>>   create mode 100644 src/t_mmap_cow_memory_failure.c
>>   create mode 100755 tests/xfs/900
>>   create mode 100644 tests/xfs/900.out
>>
...
>> diff --git a/tests/xfs/900.out b/tests/xfs/900.out
>> new file mode 100644
>> index 00000000..d861bf1f
>> --- /dev/null
>> +++ b/tests/xfs/900.out
>> @@ -0,0 +1,9 @@
>> +QA output created by 900
>> +Format and mount
>> +Create the original files
>> +Inject memory failure (1 page)
>> +Inject posion...
> 
> As your program is: printf("Inject poison...\n");
> 
> So I think this place should be "poison".

My bad. I forgot to fix this typo in .out file.

> 
>> +Process is killed by signal: 7
>> +Inject memory failure (2 pages)
>> +Inject posion...
> 
> same as above. And other 2 patches might have same issue.

Thanks!  Will fix them.


--
Ruan.

> 
> Thanks,
> Zorro
> 
>> +Process is killed by signal: 7
>> -- 
>> 2.35.1
>>
>>
>>
>
Zorro Lang May 19, 2022, 4:56 a.m. UTC | #3
On Thu, May 19, 2022 at 09:53:23AM +0800, Shiyang Ruan wrote:
> 
> 
> 在 2022/5/19 2:38, Zorro Lang 写道:
> > On Fri, May 13, 2022 at 11:14:09AM +0800, Shiyang Ruan wrote:
> > > Make sure memory failure mechanism works when filesystem is mounted with
> > > dax option.
> > > 
> > > Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
> > > ---
> > >   .gitignore                      |   1 +
> > >   src/Makefile                    |   3 +-
> > >   src/t_mmap_cow_memory_failure.c | 157 ++++++++++++++++++++++++++++++++
> > >   tests/xfs/900                   |  48 ++++++++++
> > >   tests/xfs/900.out               |   9 ++
> > >   5 files changed, 217 insertions(+), 1 deletion(-)
> > >   create mode 100644 src/t_mmap_cow_memory_failure.c
> > >   create mode 100755 tests/xfs/900
> > >   create mode 100644 tests/xfs/900.out
> > > 
> ...
> > > diff --git a/tests/xfs/900.out b/tests/xfs/900.out
> > > new file mode 100644
> > > index 00000000..d861bf1f
> > > --- /dev/null
> > > +++ b/tests/xfs/900.out
> > > @@ -0,0 +1,9 @@
> > > +QA output created by 900
> > > +Format and mount
> > > +Create the original files
> > > +Inject memory failure (1 page)
> > > +Inject posion...
> > 
> > As your program is: printf("Inject poison...\n");
> > 
> > So I think this place should be "poison".
> 
> My bad. I forgot to fix this typo in .out file.

Never mind. I'd like to merge these tests after your kernel patchset about
"Add reflink&dedupe support for fsdax" be merged at first. To avoid you
might add/change something. And I need to see the test works on offical
kernel at least, before merging it. So feel free to ping me, if I forget
this patchset at that time :)

Thanks,
Zorro

> 
> > 
> > > +Process is killed by signal: 7
> > > +Inject memory failure (2 pages)
> > > +Inject posion...
> > 
> > same as above. And other 2 patches might have same issue.
> 
> Thanks!  Will fix them.
> 
> 
> --
> Ruan.
> 
> > 
> > Thanks,
> > Zorro
> > 
> > > +Process is killed by signal: 7
> > > -- 
> > > 2.35.1
> > > 
> > > 
> > > 
> > 
> 
>
diff mbox series

Patch

diff --git a/.gitignore b/.gitignore
index ba0c572b..1d26b28a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -146,6 +146,7 @@  tags
 /src/t_holes
 /src/t_immutable
 /src/t_mmap_collision
+/src/t_mmap_cow_memory_failure
 /src/t_mmap_cow_race
 /src/t_mmap_dio
 /src/t_mmap_fallocate
diff --git a/src/Makefile b/src/Makefile
index 111ce1d9..d702e200 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -18,7 +18,8 @@  TARGETS = dirstress fill fill2 getpagesize holes lstat64 \
 	t_ext4_dax_journal_corruption t_ext4_dax_inline_corruption \
 	t_ofd_locks t_mmap_collision mmap-write-concurrent \
 	t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \
-	t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale
+	t_mmap_writev_overlap checkpoint_journal mmap-rw-fault allocstale \
+	t_mmap_cow_memory_failure
 
 LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
 	preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \
diff --git a/src/t_mmap_cow_memory_failure.c b/src/t_mmap_cow_memory_failure.c
new file mode 100644
index 00000000..4b2c1b8a
--- /dev/null
+++ b/src/t_mmap_cow_memory_failure.c
@@ -0,0 +1,157 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Fujitsu Corporation. */
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <semaphore.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <sys/sem.h>
+#include <time.h>
+#include <unistd.h>
+
+sem_t *sem;
+
+void sigbus_handler(int signal)
+{
+	printf("Process is killed by signal: %d\n", signal);
+	sem_post(sem);
+}
+
+void mmap_read_file(char *filename, off_t offset, size_t size)
+{
+	int fd;
+	char *map, *dummy;
+	struct timespec ts;
+
+	fd = open(filename, O_RDWR);
+	map = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, offset);
+	dummy = malloc(size);
+
+	/* make sure page fault happens */
+	memcpy(dummy, map, size);
+
+	/* ready */
+	sem_post(sem);
+
+	usleep(200000);
+
+	clock_gettime(CLOCK_REALTIME, &ts);
+	ts.tv_sec += 3;
+	/* wait for injection done */
+	sem_timedwait(sem, &ts);
+
+	free(dummy);
+	munmap(map, size);
+	close(fd);
+}
+
+void mmap_read_file_then_poison(char *filename, off_t offset, size_t size,
+		off_t poisonOffset, size_t poisonSize)
+{
+	int fd, error;
+	char *map, *dummy;
+
+	/* wait for parent preparation done */
+	sem_wait(sem);
+
+	fd = open(filename, O_RDWR);
+	map = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, offset);
+	dummy = malloc(size);
+
+	/* make sure page fault happens */
+	memcpy(dummy, map, size);
+
+	printf("Inject poison...\n");
+	error = madvise(map + poisonOffset, poisonSize, MADV_HWPOISON);
+	if (error)
+		printf("madvise() has fault: %d, errno: %d\n", error, errno);
+
+	free(dummy);
+	munmap(map, size);
+	close(fd);
+}
+
+int main(int argc, char *argv[])
+{
+	char *pReadFile = NULL, *pPoisonFile = NULL;
+	size_t mmapSize, poisonSize;
+	off_t mmapOffset = 0, poisonOffset = 0;
+	long pagesize = sysconf(_SC_PAGESIZE);
+	int c;
+	pid_t pid;
+
+	if (pagesize < 1) {
+		fprintf(stderr, "sysconf(_SC_PAGESIZE): failed to get page size\n");
+		abort();
+	}
+
+	/* default mmap / poison size, in unit of System Page Size */
+	mmapSize = poisonSize = pagesize;
+
+	while ((c = getopt(argc, argv, "o::s::O::S::R:P:")) != -1) {
+		switch (c) {
+		/* mmap offset */
+		case 'o':
+			mmapOffset = atoi(optarg) * pagesize;
+			break;
+		/* mmap size */
+		case 's':
+			mmapSize = atoi(optarg) * pagesize;
+			break;
+		/* madvice offset */
+		case 'O':
+			poisonOffset = atoi(optarg) * pagesize;
+			break;
+		/* madvice size */
+		case 'S':
+			poisonSize = atoi(optarg) * pagesize;
+			break;
+		/* filename for mmap read */
+		case 'R':
+			pReadFile = optarg;
+			break;
+		/* filename for poison read */
+		case 'P':
+			pPoisonFile = optarg;
+			break;
+		default:
+			printf("Unknown option: %c\n", c);
+			exit(1);
+		}
+	}
+
+	if (!pReadFile || !pPoisonFile) {
+		printf("Usage: \n"
+		       "  %s [-o mmapOffset] [-s mmapSize] [-O mmapOffset] [-S mmapSize] -R readFile -P poisonFile\n"
+		       "  (offset and size are both in unit of System Page Size: %ld)\n",
+				basename(argv[0]), pagesize);
+		exit(0);
+	}
+	if (poisonSize < mmapSize)
+		mmapSize = poisonSize;
+
+	/* fork and mmap files */
+	pid = fork();
+	if (pid == 0) {
+		/* handle SIGBUS */
+		signal(SIGBUS, sigbus_handler);
+		sem = sem_open("sync", O_CREAT, 0666, 0);
+
+		/* mread & do memory failure on poison file */
+		mmap_read_file_then_poison(pPoisonFile, mmapOffset, mmapSize,
+				poisonOffset, poisonSize);
+
+		sem_close(sem);
+	} else {
+		sem = sem_open("sync", O_CREAT, 0666, 0);
+
+		/* mread read file, wait for child process to be killed */
+		mmap_read_file(pReadFile, mmapOffset, mmapSize);
+		sem_close(sem);
+	}
+	exit(0);
+}
diff --git a/tests/xfs/900 b/tests/xfs/900
new file mode 100755
index 00000000..da11230a
--- /dev/null
+++ b/tests/xfs/900
@@ -0,0 +1,48 @@ 
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# FS QA Test No. 900
+#
+# Test memory failure mechanism when dax enabled
+#
+. ./common/preamble
+_begin_fstest auto quick dax
+
+# Import common functions.
+. ./common/filter
+. ./common/reflink
+
+# real QA test starts here
+_require_check_dmesg
+_require_scratch_reflink
+_require_cp_reflink
+_require_xfs_scratch_rmapbt
+_require_scratch_dax_mountopt "dax"
+_require_test_program "t_mmap_cow_memory_failure"
+
+echo "Format and mount"
+_scratch_mkfs > $seqres.full 2>&1
+_scratch_mount "-o dax" >> $seqres.full 2>&1
+
+testdir=$SCRATCH_MNT/test-$seq
+mkdir $testdir
+
+echo "Create the original files"
+filesize=65536
+_pwrite_byte 0x61 0 $filesize $testdir/testfile >> $seqres.full
+_scratch_cycle_mount "dax"
+
+echo "Inject memory failure (1 page)"
+# create two processes:
+#  process1: mread 1 page to cause page fault, and wait
+#  process2: mread 1 page to cause page fault, then inject poison on this page
+$here/src/t_mmap_cow_memory_failure -s1 -S1 -R $testdir/testfile -P $testdir/testfile
+
+echo "Inject memory failure (2 pages)"
+$here/src/t_mmap_cow_memory_failure -s2 -S2 -R $testdir/testfile -P $testdir/testfile
+
+_check_dmesg_for "Sending SIGBUS to t_mmap_cow_memo" || echo "Memory failure didn't kill the process"
+_check_dmesg_for "recovery action for dax page: Recovered" || echo "Failured page didn't recovered"
+
+# success, all done
+status=0
diff --git a/tests/xfs/900.out b/tests/xfs/900.out
new file mode 100644
index 00000000..d861bf1f
--- /dev/null
+++ b/tests/xfs/900.out
@@ -0,0 +1,9 @@ 
+QA output created by 900
+Format and mount
+Create the original files
+Inject memory failure (1 page)
+Inject posion...
+Process is killed by signal: 7
+Inject memory failure (2 pages)
+Inject posion...
+Process is killed by signal: 7