diff mbox series

[RFC,6/6] mm: huge_memory: enable debugfs to split huge pages to any order.

Message ID 20201111204008.21332-7-zi.yan@sent.com (mailing list archive)
State New, archived
Headers show
Series Split huge pages to any lower order pages. | expand

Commit Message

Zi Yan Nov. 11, 2020, 8:40 p.m. UTC
From: Zi Yan <ziy@nvidia.com>

It is used to test split_huge_page_to_list_to_order for pagecache THPs.
Also add test cases for split_huge_page_to_list_to_order via both
debugfs and truncating a file.

Signed-off-by: Zi Yan <ziy@nvidia.com>
---
 mm/huge_memory.c                              |  13 +--
 .../selftests/vm/split_huge_page_test.c       | 102 +++++++++++++++++-
 2 files changed, 105 insertions(+), 10 deletions(-)
diff mbox series

Patch

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 88f50da40c9b..b7470607a08b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2974,7 +2974,7 @@  static ssize_t split_huge_pages_in_range_pid_write(struct file *file,
 	static DEFINE_MUTEX(mutex);
 	ssize_t ret;
 	char input_buf[80]; /* hold pid, start_vaddr, end_vaddr */
-	int pid;
+	int pid, to_order = 0;
 	unsigned long vaddr_start, vaddr_end, addr;
 	nodemask_t task_nodes;
 	struct mm_struct *mm;
@@ -2990,8 +2990,9 @@  static ssize_t split_huge_pages_in_range_pid_write(struct file *file,
 		goto out;
 
 	input_buf[80] = '\0';
-	ret = sscanf(input_buf, "%d,%lx,%lx", &pid, &vaddr_start, &vaddr_end);
-	if (ret != 3) {
+	ret = sscanf(input_buf, "%d,%lx,%lx,%d", &pid, &vaddr_start, &vaddr_end, &to_order);
+	/* cannot split to order-1 THP, which is not possible */
+	if ((ret != 3 && ret != 4) || to_order == 1) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -2999,8 +3000,8 @@  static ssize_t split_huge_pages_in_range_pid_write(struct file *file,
 	vaddr_end &= PAGE_MASK;
 
 	ret = strlen(input_buf);
-	pr_debug("split huge pages in pid: %d, vaddr: [%lx - %lx]\n",
-		 pid, vaddr_start, vaddr_end);
+	pr_debug("split huge pages in pid: %d, vaddr: [%lx - %lx], to order: %d\n",
+		 pid, vaddr_start, vaddr_end, to_order);
 
 	mm = find_mm_struct(pid, &task_nodes);
 	if (IS_ERR(mm)) {
@@ -3038,7 +3039,7 @@  static ssize_t split_huge_pages_in_range_pid_write(struct file *file,
 		addr += page_size(page) - PAGE_SIZE;
 
 		/* reset addr if split fails */
-		if (split_huge_page(page))
+		if (split_huge_page_to_list_to_order(page, NULL, to_order))
 			addr -= (page_size(page) - PAGE_SIZE);
 
 		unlock_page(page);
diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c
index c8a32ae9e13a..bcbc5a9d327c 100644
--- a/tools/testing/selftests/vm/split_huge_page_test.c
+++ b/tools/testing/selftests/vm/split_huge_page_test.c
@@ -16,6 +16,7 @@ 
 #include <sys/wait.h>
 #include <malloc.h>
 #include <stdbool.h>
+#include <time.h>
 
 #define PAGE_4KB (4096UL)
 #define PAGE_2MB (512UL*PAGE_4KB)
@@ -31,6 +32,7 @@ 
 
 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages_in_range_pid"
 #define SMAP_PATH "/proc/self/smaps"
+#define THP_FS_PATH "/mnt/thp_fs"
 #define INPUT_MAX 80
 
 static int write_file(const char *path, const char *buf, size_t buflen)
@@ -50,13 +52,13 @@  static int write_file(const char *path, const char *buf, size_t buflen)
 	return (unsigned int) numwritten;
 }
 
-static void write_debugfs(int pid, uint64_t vaddr_start, uint64_t vaddr_end)
+static void write_debugfs(int pid, uint64_t vaddr_start, uint64_t vaddr_end, int order)
 {
 	char input[INPUT_MAX];
 	int ret;
 
-	ret = snprintf(input, INPUT_MAX, "%d,%lx,%lx", pid, vaddr_start,
-			vaddr_end);
+	ret = snprintf(input, INPUT_MAX, "%d,%lx,%lx,%d", pid, vaddr_start,
+			vaddr_end, order);
 	if (ret >= INPUT_MAX) {
 		printf("%s: Debugfs input is too long\n", __func__);
 		exit(EXIT_FAILURE);
@@ -139,7 +141,7 @@  void split_pmd_thp(void)
 	}
 
 	/* split all possible huge pages */
-	write_debugfs(getpid(), (uint64_t)one_page, (uint64_t)one_page + len);
+	write_debugfs(getpid(), (uint64_t)one_page, (uint64_t)one_page + len, 0);
 
 	*one_page = 0;
 
@@ -153,9 +155,101 @@  void split_pmd_thp(void)
 	free(one_page);
 }
 
+void create_pagecache_thp_and_fd(size_t fd_size, int *fd, char **addr)
+{
+	const char testfile[] = THP_FS_PATH "/test";
+	size_t i;
+	int dummy;
+
+	srand(time(NULL));
+
+	*fd = open(testfile, O_CREAT | O_RDWR, 0664);
+
+	for (i = 0; i < fd_size; i++) {
+		unsigned char byte = rand();
+
+		write(*fd, &byte, sizeof(byte));
+	}
+	close(*fd);
+	sync();
+	*fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
+	if (*fd == -1) {
+		perror("open drop_caches");
+		exit(EXIT_FAILURE);
+	}
+	if (write(*fd, "3", 1) != 1) {
+		perror("write to drop_caches");
+		exit(EXIT_FAILURE);
+	}
+	close(*fd);
+
+	*fd = open(testfile, O_RDWR);
+
+	*addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0);
+	if (*addr == (char *)-1) {
+		perror("cannot mmap");
+		exit(1);
+	}
+	madvise(*addr, fd_size, MADV_HUGEPAGE);
+
+	for (size_t i = 0; i < fd_size; i++)
+		dummy += *(*addr + i);
+}
+
+void split_thp_in_pagecache_to_order(int order)
+{
+	int fd;
+	char *addr;
+	size_t fd_size = 2 * PAGE_2MB, i;
+
+	create_pagecache_thp_and_fd(fd_size, &fd, &addr);
+
+	printf("split %ld kB pagecache page to order %d ... ", fd_size >> 10, order);
+	write_debugfs(getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order);
+
+	for (i = 0; i < fd_size; i++)
+		*(addr + i) = (char)i;
+
+	close(fd);
+	printf("done\n");
+}
+
+void truncate_thp_in_pagecache_to_order(int order)
+{
+	int fd;
+	char *addr;
+	size_t fd_size = 2 * PAGE_2MB, i;
+
+	create_pagecache_thp_and_fd(fd_size, &fd, &addr);
+
+	printf("truncate %ld kB pagecache page to size %lu kB ... ", fd_size >> 10, 4UL << order);
+	ftruncate(fd, PAGE_4KB << order);
+
+	for (i = 0; i < (PAGE_4KB << order); i++)
+		*(addr + i) = (char)i;
+
+	close(fd);
+	printf("done\n");
+}
+
 int main(int argc, char **argv)
 {
+	int i;
+
+	if (geteuid() != 0) {
+		printf("Please run the benchmark as root\n");
+		exit(EXIT_FAILURE);
+	}
+
 	split_pmd_thp();
 
+	for (i = 8; i >= 0; i--)
+		if (i != 1)
+			split_thp_in_pagecache_to_order(i);
+
+	for (i = 8; i >= 0; i--)
+		if (i != 1)
+			truncate_thp_in_pagecache_to_order(i);
+
 	return 0;
 }