Message ID | 20230405142535.493854-1-david@redhat.com (mailing list archive) |
---|---|
Headers | show |
Series | mm/userfaultfd: fix and cleanup for migration entries with uffd-wp | expand |
On Wed, Apr 05, 2023 at 04:25:33PM +0200, David Hildenbrand wrote: > One fix (I have a simple reproducer but it's too long to paste it into > the commit) I hope the recent rework of the unit test can help having more of these unit tests into the kselftests directly. Currently it's still ugly even after the rework patchset - we'll need to reference e.g. area_dst in unit tests for the allocated memory region (with specified type of either MEM_ANON, MEM_SHMEM, ...), but there's plan to make it even better. Thanks for fixing those already.
On 05.04.23 17:17, Peter Xu wrote: > On Wed, Apr 05, 2023 at 04:25:33PM +0200, David Hildenbrand wrote: >> One fix (I have a simple reproducer but it's too long to paste it into >> the commit) > > I hope the recent rework of the unit test can help having more of these > unit tests into the kselftests directly. > > Currently it's still ugly even after the rework patchset - we'll need to > reference e.g. area_dst in unit tests for the allocated memory region (with > specified type of either MEM_ANON, MEM_SHMEM, ...), but there's plan to > make it even better. Yes, I refrained from messing with the selftest for now while you rework Here is the hacky reproducer: #include <stdio.h> #include <stdlib.h> #include <string.h> #include <stdbool.h> #include <fcntl.h> #include <unistd.h> #include <errno.h> #include <stdint.h> #include <sys/mman.h> #include <sys/syscall.h> #include <sys/ioctl.h> #include <linux/userfaultfd.h> #include <linux/mempolicy.h> static int uffd; static int pagemap_fd; static size_t pagesize; static uint64_t pagemap_get_entry(int fd, char *start) { const unsigned long pfn = (unsigned long)start / pagesize; uint64_t entry; int ret; ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry)); if (ret != sizeof(entry)) { fprintf(stderr, "pread() failed\n"); exit(1); } return entry; } #define BIT_ULL(nr) (1ULL << (nr)) #define PM_UFFD_WP BIT_ULL(57) #define PM_SWAP BIT_ULL(62) #define PM_PRESENT BIT_ULL(63) static bool pagemap_is_uffd_wp(int fd, char *start) { uint64_t entry = pagemap_get_entry(fd, start); return entry & PM_UFFD_WP; } static bool pagemap_is_populated(int fd, char *start) { uint64_t entry = pagemap_get_entry(fd, start); return entry & (PM_SWAP | PM_PRESENT); } static int setup_uffd(char *mem, size_t size) { struct uffdio_api uffdio_api; struct uffdio_register uffdio_register; struct uffdio_range uffd_range; uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY); if (uffd < 0) { fprintf(stderr, "syscall() failed: %d\n", errno); return -errno; } uffdio_api.api = UFFD_API; uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP; if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) { fprintf(stderr, "UFFDIO_API failed: %d\n", errno); return -errno; } if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) { fprintf(stderr, "UFFD_FEATURE_PAGEFAULT_FLAG_WP missing\n"); return -ENOSYS; } /* Register UFFD-WP */ uffdio_register.range.start = (unsigned long) mem; uffdio_register.range.len = size; uffdio_register.mode = UFFDIO_REGISTER_MODE_WP; if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) < 0) { fprintf(stderr, "UFFDIO_REGISTER failed: %d\n", errno); return -errno; } return 0; } int main(int argc, char **argv) { struct uffdio_writeprotect uffd_writeprotect; const size_t thpsize = 2 * 1024 * 1024; const size_t mmap_size = 2 * thpsize; char *mem; pagesize = getpagesize(); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); if (pagemap_fd < 0) { fprintf(stderr, "open() failed\n"); exit(1); } mem = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); if (mem == MAP_FAILED) { fprintf(stderr, "mmap() failed\n"); return -errno; } mem = (char *)(((uintptr_t)mem + thpsize) & ~(thpsize - 1)); if (madvise(mem, thpsize, MADV_HUGEPAGE)) { fprintf(stderr, "MADV_HUGEPAGE failed\n"); return -errno; } /* Populate a THP. */ memset(mem, 0, pagesize); if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { fprintf(stderr, "Did not get a THP populated\n"); return -EBUSY; } /* Verify uffd-wp is not set. */ if (pagemap_is_uffd_wp(pagemap_fd, mem)) { fprintf(stderr, "uffd-wp unexpectedly set\n"); return -1; } /* Setup UFFD and protect the page. */ if (setup_uffd(mem, thpsize)) return 1; uffd_writeprotect.range.start = (unsigned long) mem; uffd_writeprotect.range.len = thpsize; uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_WP; if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) { fprintf(stderr, "UFFDIO_WRITEPROTECT failed: %d\n", errno); return -errno; } /* Verify uffd-wp is set. */ if (!pagemap_is_uffd_wp(pagemap_fd, mem)) { fprintf(stderr, "uffd-wp not set\n"); return -1; } /* Migrate the THP. */ if (syscall(__NR_mbind, mem, thpsize, MPOL_LOCAL, NULL, 0x7fful, MPOL_MF_MOVE)) { fprintf(stderr, "mbind() failed\n"); return -errno; } /* Verify uffd-wp is still set. */ if (!pagemap_is_uffd_wp(pagemap_fd, mem)) { fprintf(stderr, "uffd-wp lost\n"); return -1; } return 0; }