@@ -14,12 +14,21 @@
#include <signal.h>
#include <time.h>
#include <sys/statfs.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <linux/userfaultfd.h>
+#include <sys/syscall.h>
+#include <fcntl.h>
#define TWOMEG (2<<20)
#define RUNTIME (60)
#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+#ifndef MADV_SPLIT
+#define MADV_SPLIT 26
+#endif
+
FIXTURE(migration)
{
pthread_t *threads;
@@ -265,4 +274,141 @@ TEST_F_TIMEOUT(migration, shared_hugetlb, 2*RUNTIME)
close(fd);
}
+#ifdef __NR_userfaultfd
+static int map_at_high_granularity(char *mem, size_t length)
+{
+ int i;
+ int ret;
+ int uffd = syscall(__NR_userfaultfd, 0);
+ struct uffdio_api api;
+ struct uffdio_register reg;
+ int pagesize = getpagesize();
+
+ if (uffd < 0) {
+ perror("couldn't create uffd");
+ return uffd;
+ }
+
+ api.api = UFFD_API;
+ api.features = 0;
+
+ ret = ioctl(uffd, UFFDIO_API, &api);
+ if (ret || api.api != UFFD_API) {
+ perror("UFFDIO_API failed");
+ goto out;
+ }
+
+ if (madvise(mem, length, MADV_SPLIT) == -1) {
+ perror("MADV_SPLIT failed");
+ goto out;
+ }
+
+ reg.range.start = (unsigned long)mem;
+ reg.range.len = length;
+
+ reg.mode = UFFDIO_REGISTER_MODE_MISSING | UFFDIO_REGISTER_MODE_MINOR;
+
+ ret = ioctl(uffd, UFFDIO_REGISTER, ®);
+ if (ret) {
+ perror("UFFDIO_REGISTER failed");
+ goto out;
+ }
+
+ /* UFFDIO_CONTINUE each 4K segment of the 2M page. */
+ for (i = 0; i < length/pagesize; ++i) {
+ struct uffdio_continue cont;
+
+ cont.range.start = (unsigned long long)mem + i * pagesize;
+ cont.range.len = pagesize;
+ cont.mode = 0;
+ ret = ioctl(uffd, UFFDIO_CONTINUE, &cont);
+ if (ret) {
+ fprintf(stderr, "UFFDIO_CONTINUE failed "
+ "for %llx -> %llx: %d\n",
+ cont.range.start,
+ cont.range.start + cont.range.len,
+ errno);
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ close(uffd);
+ return ret;
+}
+#else
+static int map_at_high_granularity(char *mem, size_t length)
+{
+ fprintf(stderr, "Userfaultfd missing\n");
+ return -1;
+}
+#endif /* __NR_userfaultfd */
+
+/*
+ * Tests the high-granularity hugetlb migration entry paths.
+ */
+TEST_F_TIMEOUT(migration, shared_hugetlb_hgm, 2*RUNTIME)
+{
+ uint64_t *ptr;
+ int i;
+ int fd;
+ unsigned long sz;
+ struct statfs filestat;
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ fd = memfd_create("tmp_hugetlb", MFD_HUGETLB);
+ if (fd < 0)
+ SKIP(return, "Couldn't create hugetlb memfd");
+
+ if (fstatfs(fd, &filestat) < 0)
+ SKIP(return, "Couldn't fstatfs hugetlb file");
+
+ sz = filestat.f_bsize;
+
+ if (ftruncate(fd, sz))
+ SKIP(return, "Couldn't allocate hugetlb pages");
+
+ if (fallocate(fd, 0, 0, sz) < 0) {
+ perror("fallocate failed");
+ SKIP(return, "fallocate failed");
+ }
+
+ ptr = mmap(NULL, sz, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (ptr == MAP_FAILED)
+ SKIP(return, "Could not allocate hugetlb pages");
+
+ /*
+ * We have to map_at_high_granularity before we memset, otherwise
+ * memset will map everything at the hugepage size.
+ */
+ if (map_at_high_granularity((char *)ptr, sz) < 0)
+ SKIP(return, "Could not map HugeTLB range at high granularity");
+
+ /* Populate the page we're migrating. */
+ for (i = 0; i < sz/sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ for (i = 0; i < self->nthreads - 1; i++)
+ if (pthread_create(&self->threads[i], NULL, access_mem, ptr))
+ perror("Couldn't create thread");
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2, 10), 0);
+ for (i = 0; i < self->nthreads - 1; i++) {
+ ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
+ pthread_join(self->threads[i], NULL);
+ }
+
+ /* Check that the contents didnt' change. */
+ for (i = 0; i < sz/sizeof(*ptr); ++i) {
+ ASSERT_EQ(ptr[i], i);
+ if (ptr[i] != i)
+ break;
+ }
+
+ ftruncate(fd, 0);
+ close(fd);
+}
+
TEST_HARNESS_MAIN
This is mostly the same as the shared HugeTLB case, but instead of mapping the page with a regular page fault, we map it with lots of UFFDIO_CONTINUE operations. We also verify that the contents haven't changed after the migration, which would be the case if the post-migration PTEs pointed to the wrong page. Signed-off-by: James Houghton <jthoughton@google.com>