Message ID | 68cf1d70f3fb8ce4e3c1a4899c19df4f6c382a13.1662078528.git-series.apopple@nvidia.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [v4,1/4] mm/migrate_device.c: Flush TLB while holding PTL | expand |
On 9/1/22 17:35, Alistair Popple wrote: > We were not correctly copying PTE dirty bits to pages during > migrate_vma_setup() calls. This could potentially lead to data loss, so > add a test for this. > > Signed-off-by: Alistair Popple <apopple@nvidia.com> > --- > tools/testing/selftests/vm/hmm-tests.c | 124 ++++++++++++++++++++++++++- > 1 file changed, 124 insertions(+) > > diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c > index 529f53b..70fdb49 100644 > --- a/tools/testing/selftests/vm/hmm-tests.c > +++ b/tools/testing/selftests/vm/hmm-tests.c > @@ -1200,6 +1200,130 @@ TEST_F(hmm, migrate_multiple) > } > } > > +static char cgroup[] = "/sys/fs/cgroup/hmm-test-XXXXXX"; > +static int write_cgroup_param(char *cgroup_path, char *param, long value) > +{ > + int ret; > + FILE *f; > + char *filename; > + > + if (asprintf(&filename, "%s/%s", cgroup_path, param) < 0) > + return -1; > + > + f = fopen(filename, "w"); > + if (!f) { > + ret = -1; > + goto out; > + } > + > + ret = fprintf(f, "%ld\n", value); > + if (ret < 0) > + goto out1; > + > + ret = 0; > + > +out1: > + fclose(f); > +out: > + free(filename); > + > + return ret; > +} > + > +static int setup_cgroup(void) > +{ > + pid_t pid = getpid(); > + int ret; > + > + if (!mkdtemp(cgroup)) > + return -1; > + > + ret = write_cgroup_param(cgroup, "cgroup.procs", pid); > + if (ret) > + return ret; > + > + return 0; > +} > + > +static int destroy_cgroup(void) > +{ > + pid_t pid = getpid(); > + int ret; > + > + ret = write_cgroup_param("/sys/fs/cgroup/cgroup.procs", > + "cgroup.proc", pid); > + if (ret) > + return ret; > + > + if (rmdir(cgroup)) > + return -1; > + > + return 0; > +} > + > +/* > + * Try and migrate a dirty page that has previously been swapped to disk. This > + * checks that we don't loose dirty bits. s/loose/lose/ > + */ > +TEST_F(hmm, migrate_dirty_page) > +{ > + struct hmm_buffer *buffer; > + unsigned long npages; > + unsigned long size; > + unsigned long i; > + int *ptr; > + int tmp = 0; > + > + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; > + ASSERT_NE(npages, 0); > + size = npages << self->page_shift; > + > + buffer = malloc(sizeof(*buffer)); > + ASSERT_NE(buffer, NULL); > + > + buffer->fd = -1; > + buffer->size = size; > + buffer->mirror = malloc(size); > + ASSERT_NE(buffer->mirror, NULL); > + > + ASSERT_EQ(setup_cgroup(), 0); > + > + buffer->ptr = mmap(NULL, size, > + PROT_READ | PROT_WRITE, > + MAP_PRIVATE | MAP_ANONYMOUS, > + buffer->fd, 0); > + ASSERT_NE(buffer->ptr, MAP_FAILED); > + > + /* Initialize buffer in system memory. */ > + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) > + ptr[i] = 0; > + > + ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30)); > + > + /* Fault pages back in from swap as clean pages */ > + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) > + tmp += ptr[i]; > + > + /* Dirty the pte */ > + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) > + ptr[i] = i; > + > + /* > + * Attempt to migrate memory to device, which should fail because > + * hopefully some pages are backed by swap storage. > + */ > + ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages)); Are you really sure that you want to assert on that? Because doing so guarantees a test failure if and when we every upgrade the kernel to be able to migrate swap-backed pages. And I seem to recall that this current inability to migrate swap-backed pages is considered a flaw to be fixed, right? > + > + ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30)); > + > + /* Check we still see the updated data after restoring from swap. */ > + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) > + ASSERT_EQ(ptr[i], i); > + > + hmm_buffer_free(buffer); > + destroy_cgroup(); > +} > + > /* > * Read anonymous memory multiple times. > */ thanks,
John Hubbard <jhubbard@nvidia.com> writes: > On 9/1/22 17:35, Alistair Popple wrote: [...] >> +/* >> + * Try and migrate a dirty page that has previously been swapped to disk. This >> + * checks that we don't loose dirty bits. > > s/loose/lose/ Thanks. >> + */ >> +TEST_F(hmm, migrate_dirty_page) >> +{ >> + struct hmm_buffer *buffer; >> + unsigned long npages; >> + unsigned long size; >> + unsigned long i; >> + int *ptr; >> + int tmp = 0; >> + >> + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; >> + ASSERT_NE(npages, 0); >> + size = npages << self->page_shift; >> + >> + buffer = malloc(sizeof(*buffer)); >> + ASSERT_NE(buffer, NULL); >> + >> + buffer->fd = -1; >> + buffer->size = size; >> + buffer->mirror = malloc(size); >> + ASSERT_NE(buffer->mirror, NULL); >> + >> + ASSERT_EQ(setup_cgroup(), 0); >> + >> + buffer->ptr = mmap(NULL, size, >> + PROT_READ | PROT_WRITE, >> + MAP_PRIVATE | MAP_ANONYMOUS, >> + buffer->fd, 0); >> + ASSERT_NE(buffer->ptr, MAP_FAILED); >> + >> + /* Initialize buffer in system memory. */ >> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) >> + ptr[i] = 0; >> + >> + ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30)); >> + >> + /* Fault pages back in from swap as clean pages */ >> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) >> + tmp += ptr[i]; >> + >> + /* Dirty the pte */ >> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) >> + ptr[i] = i; >> + >> + /* >> + * Attempt to migrate memory to device, which should fail because >> + * hopefully some pages are backed by swap storage. >> + */ >> + ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages)); > > Are you really sure that you want to assert on that? Because doing so > guarantees a test failure if and when we every upgrade the kernel to > be able to migrate swap-backed pages. And I seem to recall that this > current inability to migrate swap-backed pages is considered a flaw > to be fixed, right? Right, that's a good point. I was using failure (ASSERT_TRUE) here as a way of detecting that at least some pages are swap-backed, because if no pages end up being swap-backed the test is invalid. I'm not really sure what to do about it though. It's likely the fix for swap-backed migration may make this bug impossible to hit anyway, because the obvious fix is to just drop the pages from the swapcache during migration which would force writeback during subsequent reclaim. So I'm inclined to leave this here even if it only serves to remind us about it when we do fix migration of swap-backed pages, because we will of course run hmm-tests before submitting that fix :-) We can then either fix the test or drop it if we think it's no longer possible to hit. >> + >> + ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30)); >> + >> + /* Check we still see the updated data after restoring from swap. */ >> + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) >> + ASSERT_EQ(ptr[i], i); >> + >> + hmm_buffer_free(buffer); >> + destroy_cgroup(); >> +} >> + >> /* >> * Read anonymous memory multiple times. >> */ > > thanks,
On 9/7/22 04:13, Alistair Popple wrote: >>> + /* >>> + * Attempt to migrate memory to device, which should fail because >>> + * hopefully some pages are backed by swap storage. >>> + */ >>> + ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages)); >> >> Are you really sure that you want to assert on that? Because doing so >> guarantees a test failure if and when we every upgrade the kernel to >> be able to migrate swap-backed pages. And I seem to recall that this >> current inability to migrate swap-backed pages is considered a flaw >> to be fixed, right? > > Right, that's a good point. I was using failure (ASSERT_TRUE) here as a > way of detecting that at least some pages are swap-backed, because if no > pages end up being swap-backed the test is invalid. Yes. But "invalid" or "waived" is a much different test result than "failed". > > I'm not really sure what to do about it though. It's likely the fix for Remove the assert. If the test framework allows and you prefer, you can print a warning. > swap-backed migration may make this bug impossible to hit anyway, > because the obvious fix is to just drop the pages from the swapcache > during migration which would force writeback during subsequent reclaim. > > So I'm inclined to leave this here even if it only serves to remind us > about it when we do fix migration of swap-backed pages, because we will > of course run hmm-tests before submitting that fix :-) We can then > either fix the test or drop it if we think it's no longer possible to > hit. Oh no no no, please. This is not how to do tests. If you want a TODO list somewhere, there are other ways. But tests that require maintenance when you change something are an anti-pattern. thanks,
John Hubbard <jhubbard@nvidia.com> writes: > On 9/7/22 04:13, Alistair Popple wrote: >>>> + /* >>>> + * Attempt to migrate memory to device, which should fail because >>>> + * hopefully some pages are backed by swap storage. >>>> + */ >>>> + ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages)); >>> >>> Are you really sure that you want to assert on that? Because doing so >>> guarantees a test failure if and when we every upgrade the kernel to >>> be able to migrate swap-backed pages. And I seem to recall that this >>> current inability to migrate swap-backed pages is considered a flaw >>> to be fixed, right? >> Right, that's a good point. I was using failure (ASSERT_TRUE) here as a >> way of detecting that at least some pages are swap-backed, because if no >> pages end up being swap-backed the test is invalid. > > Yes. But "invalid" or "waived" is a much different test result than > "failed". True. Unfortunately our test framework needs some love as I don't think it's possible to return a result of "invalid" or "waived". We can skip a test though, so that might be the best option here. >> I'm not really sure what to do about it though. It's likely the fix for > > Remove the assert. If the test framework allows and you prefer, you > can print a warning. > >> swap-backed migration may make this bug impossible to hit anyway, >> because the obvious fix is to just drop the pages from the swapcache >> during migration which would force writeback during subsequent reclaim. >> So I'm inclined to leave this here even if it only serves to remind us >> about it when we do fix migration of swap-backed pages, because we will >> of course run hmm-tests before submitting that fix :-) We can then >> either fix the test or drop it if we think it's no longer possible to >> hit. > > Oh no no no, please. This is not how to do tests. If you want a TODO > list somewhere, there are other ways. But tests that require maintenance > when you change something are an anti-pattern. Fair enough, I think what you're asking for is a higher level test that doesn't rely on implementation side-effects. I wrote this test mostly to discover if we could hit problems with the current implementation hence why it's a bit messy. But I think I can fix this up without relying on implementation side-effects - really I just want to confirm that at least some pages got swapped to disk which I can do via looking at /proc/self/pagemap. - Alistair > thanks,
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 529f53b..70fdb49 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -1200,6 +1200,130 @@ TEST_F(hmm, migrate_multiple) } } +static char cgroup[] = "/sys/fs/cgroup/hmm-test-XXXXXX"; +static int write_cgroup_param(char *cgroup_path, char *param, long value) +{ + int ret; + FILE *f; + char *filename; + + if (asprintf(&filename, "%s/%s", cgroup_path, param) < 0) + return -1; + + f = fopen(filename, "w"); + if (!f) { + ret = -1; + goto out; + } + + ret = fprintf(f, "%ld\n", value); + if (ret < 0) + goto out1; + + ret = 0; + +out1: + fclose(f); +out: + free(filename); + + return ret; +} + +static int setup_cgroup(void) +{ + pid_t pid = getpid(); + int ret; + + if (!mkdtemp(cgroup)) + return -1; + + ret = write_cgroup_param(cgroup, "cgroup.procs", pid); + if (ret) + return ret; + + return 0; +} + +static int destroy_cgroup(void) +{ + pid_t pid = getpid(); + int ret; + + ret = write_cgroup_param("/sys/fs/cgroup/cgroup.procs", + "cgroup.proc", pid); + if (ret) + return ret; + + if (rmdir(cgroup)) + return -1; + + return 0; +} + +/* + * Try and migrate a dirty page that has previously been swapped to disk. This + * checks that we don't loose dirty bits. + */ +TEST_F(hmm, migrate_dirty_page) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int tmp = 0; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + ASSERT_EQ(setup_cgroup(), 0); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = 0; + + ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30)); + + /* Fault pages back in from swap as clean pages */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + tmp += ptr[i]; + + /* Dirty the pte */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* + * Attempt to migrate memory to device, which should fail because + * hopefully some pages are backed by swap storage. + */ + ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages)); + + ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30)); + + /* Check we still see the updated data after restoring from swap. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); + destroy_cgroup(); +} + /* * Read anonymous memory multiple times. */
We were not correctly copying PTE dirty bits to pages during migrate_vma_setup() calls. This could potentially lead to data loss, so add a test for this. Signed-off-by: Alistair Popple <apopple@nvidia.com> --- tools/testing/selftests/vm/hmm-tests.c | 124 ++++++++++++++++++++++++++- 1 file changed, 124 insertions(+)