diff mbox series

[v2] KVM: selftests: Fix dirty_log_page_splitting_test as page migration

Message ID 20240122064053.2825097-1-tao1.su@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [v2] KVM: selftests: Fix dirty_log_page_splitting_test as page migration | expand

Commit Message

Tao Su Jan. 22, 2024, 6:40 a.m. UTC
In dirty_log_page_splitting_test, vm_get_stat(vm, "pages_4k") has
probability of gradually reducing before enabling dirty logging. The
reason is the backing sources of some pages (test code and per-vCPU
stacks) are not HugeTLB, leading to the possibility of being migrated.

Requiring NUMA balancing be disabled isn't going to fix the underlying
issue, it's just guarding against one of the more likely culprits.
Therefore, precisely validate only the test data pages, i.e. ensure
no huge pages left and the number of all 4k pages should be at least
equal to the split pages after splitting.

Reported-by: Yi Lai <yi1.lai@intel.com>
Signed-off-by: Tao Su <tao1.su@linux.intel.com>
Tested-by: Yi Lai <yi1.lai@intel.com>
---
Changelog:

v2:
  - Drop the requirement of NUMA balancing
  - Change the ASSERT conditions

v1:
  https://lore.kernel.org/all/20240117064441.2633784-1-tao1.su@linux.intel.com/
---
 .../kvm/x86_64/dirty_log_page_splitting_test.c     | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)


base-commit: 6613476e225e090cc9aad49be7fa504e290dd33d

Comments

Sean Christopherson Jan. 26, 2024, 8:39 p.m. UTC | #1
+David

On Mon, Jan 22, 2024, Tao Su wrote:
> In dirty_log_page_splitting_test, vm_get_stat(vm, "pages_4k") has
> probability of gradually reducing before enabling dirty logging. The
> reason is the backing sources of some pages (test code and per-vCPU
> stacks) are not HugeTLB, leading to the possibility of being migrated.
> 
> Requiring NUMA balancing be disabled isn't going to fix the underlying
> issue, it's just guarding against one of the more likely culprits.
> Therefore, precisely validate only the test data pages, i.e. ensure
> no huge pages left and the number of all 4k pages should be at least
> equal to the split pages after splitting.
> 
> Reported-by: Yi Lai <yi1.lai@intel.com>
> Signed-off-by: Tao Su <tao1.su@linux.intel.com>
> Tested-by: Yi Lai <yi1.lai@intel.com>
> ---
> Changelog:
> 
> v2:
>   - Drop the requirement of NUMA balancing
>   - Change the ASSERT conditions
> 
> v1:
>   https://lore.kernel.org/all/20240117064441.2633784-1-tao1.su@linux.intel.com/
> ---
>  .../kvm/x86_64/dirty_log_page_splitting_test.c     | 14 +++++++-------
>  1 file changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
> index 634c6bfcd572..63f9cd2b1e31 100644
> --- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
> +++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
> @@ -92,7 +92,7 @@ static void run_test(enum vm_guest_mode mode, void *unused)
>  	uint64_t host_num_pages;
>  	uint64_t pages_per_slot;
>  	int i;
> -	uint64_t total_4k_pages;
> +	uint64_t split_4k_pages;
>  	struct kvm_page_stats stats_populated;
>  	struct kvm_page_stats stats_dirty_logging_enabled;
>  	struct kvm_page_stats stats_dirty_pass[ITERATIONS];
> @@ -166,9 +166,8 @@ static void run_test(enum vm_guest_mode mode, void *unused)
>  	memstress_destroy_vm(vm);
>  
>  	/* Make assertions about the page counts. */
> -	total_4k_pages = stats_populated.pages_4k;
> -	total_4k_pages += stats_populated.pages_2m * 512;
> -	total_4k_pages += stats_populated.pages_1g * 512 * 512;
> +	split_4k_pages = stats_populated.pages_2m * 512;
> +	split_4k_pages += stats_populated.pages_1g * 512 * 512;
>  
>  	/*
>  	 * Check that all huge pages were split. Since large pages can only
> @@ -180,11 +179,13 @@ static void run_test(enum vm_guest_mode mode, void *unused)
>  	 */
>  	if (dirty_log_manual_caps) {
>  		TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
> -		TEST_ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages);
> +		TEST_ASSERT(stats_clear_pass[0].pages_4k >= split_4k_pages,
> +			    "The number of 4k pages should be at least equal to the split pages");
>  		TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
>  	} else {
>  		TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
> -		TEST_ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages);
> +		TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= split_4k_pages,
> +			    "The number of 4k pages should be at least equal to the split pages");
>  	}
>  
>  	/*
> @@ -192,7 +193,6 @@ static void run_test(enum vm_guest_mode mode, void *unused)
>  	 * memory again, the page counts should be the same as they were
>  	 * right after initial population of memory.
>  	 */
> -	TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
>  	TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
>  	TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);

Isn't it possible that something other than guest data could be mapped by THP
hugepage, and that that hugepage could get shattered between the initial run and
the re-population run?

The test knows (or at least, darn well should know) exactly how much memory is
being dirty logged.  Rather that rely *only* on before/after heuristics, can't
we assert that the _delta_, i.e. the number of hugepages that are split, and then
the number of hugepages that are reconstituted, is greater than or equal to the
size of the memslots being dirty logged?

>  }
> 
> base-commit: 6613476e225e090cc9aad49be7fa504e290dd33d
> -- 
> 2.34.1
>
Tao Su Jan. 29, 2024, 8:21 a.m. UTC | #2
On Fri, Jan 26, 2024 at 12:39:49PM -0800, Sean Christopherson wrote:
> +David
> 

[ ... ]

> >  
> >  	/*
> > @@ -192,7 +193,6 @@ static void run_test(enum vm_guest_mode mode, void *unused)
> >  	 * memory again, the page counts should be the same as they were
> >  	 * right after initial population of memory.
> >  	 */
> > -	TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
> >  	TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
> >  	TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
> 
> Isn't it possible that something other than guest data could be mapped by THP
> hugepage, and that that hugepage could get shattered between the initial run and
> the re-population run?

Good catch, I found that if the backing source is specified as THP, all hugepages
can also be migrated.

> 
> The test knows (or at least, darn well should know) exactly how much memory is
> being dirty logged.  Rather that rely *only* on before/after heuristics, can't
> we assert that the _delta_, i.e. the number of hugepages that are split, and then
> the number of hugepages that are reconstituted, is greater than or equal to the
> size of the memslots being dirty logged?

Due to page migration, the values of get_page_stats() are not available (including
pages_2m and pages_1g), and dirty logging can only count the pages that have been
split. It may be possible to use the existing guest_num_pages to construct the
following assert condition:

        guest_num_pages <= the number of dirty pages

Do you think this assert condition is correct and enough?

Thanks,
Tao

> 
> >  }
> > 
> > base-commit: 6613476e225e090cc9aad49be7fa504e290dd33d
> > -- 
> > 2.34.1
> >
Sean Christopherson Jan. 29, 2024, 5:32 p.m. UTC | #3
On Mon, Jan 29, 2024, Tao Su wrote:
> On Fri, Jan 26, 2024 at 12:39:49PM -0800, Sean Christopherson wrote:
> > +David
> > 
> 
> [ ... ]
> 
> > >  
> > >  	/*
> > > @@ -192,7 +193,6 @@ static void run_test(enum vm_guest_mode mode, void *unused)
> > >  	 * memory again, the page counts should be the same as they were
> > >  	 * right after initial population of memory.
> > >  	 */
> > > -	TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
> > >  	TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
> > >  	TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
> > 
> > Isn't it possible that something other than guest data could be mapped by THP
> > hugepage, and that that hugepage could get shattered between the initial run and
> > the re-population run?
> 
> Good catch, I found that if the backing source is specified as THP, all hugepages
> can also be migrated.

The backing source for the test slots?  Using THP is inherently fragile for this
test, and IMO is firmly out of scope.  I was talking about any memslots allocated
by the core library, e.g. for the test's code and page tables.  Those should be
forced to be MADV_NOHUGEPAGE, though if the allocations are smaller than 2MiB
(I forget how much we allocate), that would suffice for now.

If we ensure the other memslots can only use 4KiB, then it's only the *.pages_4k
checks that are problematic.  Then the hugepage counts can be precise.

Something like this is what I'm thinking (again, assuming the "other" memslot
created by the library can't use THP).

---
 .../x86_64/dirty_log_page_splitting_test.c    | 21 +++++++++++--------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
index 634c6bfcd572..4864cf3fae57 100644
--- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
+++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
@@ -92,7 +92,6 @@ static void run_test(enum vm_guest_mode mode, void *unused)
 	uint64_t host_num_pages;
 	uint64_t pages_per_slot;
 	int i;
-	uint64_t total_4k_pages;
 	struct kvm_page_stats stats_populated;
 	struct kvm_page_stats stats_dirty_logging_enabled;
 	struct kvm_page_stats stats_dirty_pass[ITERATIONS];
@@ -107,6 +106,9 @@ static void run_test(enum vm_guest_mode mode, void *unused)
 	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
 	host_num_pages = vm_num_host_pages(mode, guest_num_pages);
 	pages_per_slot = host_num_pages / SLOTS;
+	TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS);
+	TEST_ASSERT(!(host_num_pages % 512),
+		    "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages);
 
 	bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot);
 
@@ -165,10 +167,8 @@ static void run_test(enum vm_guest_mode mode, void *unused)
 	memstress_free_bitmaps(bitmaps, SLOTS);
 	memstress_destroy_vm(vm);
 
-	/* Make assertions about the page counts. */
-	total_4k_pages = stats_populated.pages_4k;
-	total_4k_pages += stats_populated.pages_2m * 512;
-	total_4k_pages += stats_populated.pages_1g * 512 * 512;
+	TEST_ASSERT_EQ((stats_populated.pages_2m * 512 +
+			stats_populated.pages_1g * 512 * 512), host_num_pages);
 
 	/*
 	 * Check that all huge pages were split. Since large pages can only
@@ -180,19 +180,22 @@ static void run_test(enum vm_guest_mode mode, void *unused)
 	 */
 	if (dirty_log_manual_caps) {
 		TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
-		TEST_ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages);
+		TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages,
+			    "Expected at least '%lu' 4KiB pages, found only '%lu'",
+			    host_num_pages, stats_clear_pass[0].pages_4k);
 		TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
 	} else {
 		TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
-		TEST_ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages);
+		TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages,
+			    "Expected at least '%lu' 4KiB pages, found only '%lu'",
+			    host_num_pages, stats_clear_pass[0].pages_4k);
 	}
 
 	/*
 	 * Once dirty logging is disabled and the vCPUs have touched all their
-	 * memory again, the page counts should be the same as they were
+	 * memory again, the hugepage counts should be the same as they were
 	 * right after initial population of memory.
 	 */
-	TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
 	TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
 	TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
 }

base-commit: 0762cdfe8ee16e4035b0ad27418686ef0452932f
--
Tao Su Jan. 30, 2024, 8:04 a.m. UTC | #4
On Mon, Jan 29, 2024 at 09:32:15AM -0800, Sean Christopherson wrote:
> On Mon, Jan 29, 2024, Tao Su wrote:
> > On Fri, Jan 26, 2024 at 12:39:49PM -0800, Sean Christopherson wrote:
> > > +David
> > > 
> > 
> > [ ... ]
> > 
> > > >  
> > > >  	/*
> > > > @@ -192,7 +193,6 @@ static void run_test(enum vm_guest_mode mode, void *unused)
> > > >  	 * memory again, the page counts should be the same as they were
> > > >  	 * right after initial population of memory.
> > > >  	 */
> > > > -	TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
> > > >  	TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
> > > >  	TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
> > > 
> > > Isn't it possible that something other than guest data could be mapped by THP
> > > hugepage, and that that hugepage could get shattered between the initial run and
> > > the re-population run?
> > 
> > Good catch, I found that if the backing source is specified as THP, all hugepages
> > can also be migrated.
> 
> The backing source for the test slots?  Using THP is inherently fragile for this
> test, and IMO is firmly out of scope.

Yes, the user can optionally specify the backing source of test slots, e.g.,

    ./x86_64/dirty_log_page_splitting_test -s anonymous_thp

but we already have a hint: “This test will only work reliably with HugeTLB memory.
It can work with THP, but that is best effort.”

> I was talking about any memslots allocated by the core library, e.g. for the
> test's code and page tables.  Those should be forced to be MADV_NOHUGEPAGE,
> though if the allocations are smaller than 2MiB (I forget how much we allocate),
> that would suffice for now.

The "other" memslot is more than 2MiB, and in this test, the memory that the guest
can touch in "other" memslot also exceeds 2MiB. It seems that the existing code has
forced the memory of VM_MEM_SRC_ANONYMOUS to MADV_NOHUGEPAGE, e.g.,
in vm_userspace_mem_region_add():

    ret = madvise(region->host_mem, npages * vm->page_size,
		          src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);

> 
> If we ensure the other memslots can only use 4KiB, then it's only the *.pages_4k
> checks that are problematic.  Then the hugepage counts can be precise.

Yes, I see.

> 
> Something like this is what I'm thinking (again, assuming the "other" memslot
> created by the library can't use THP).

The below code looks good to me and test PASS in our machine.

Thanks,
Tao

> 
> ---
>  .../x86_64/dirty_log_page_splitting_test.c    | 21 +++++++++++--------
>  1 file changed, 12 insertions(+), 9 deletions(-)
> 
> diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
> index 634c6bfcd572..4864cf3fae57 100644
> --- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
> +++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
> @@ -92,7 +92,6 @@ static void run_test(enum vm_guest_mode mode, void *unused)
>  	uint64_t host_num_pages;
>  	uint64_t pages_per_slot;
>  	int i;
> -	uint64_t total_4k_pages;
>  	struct kvm_page_stats stats_populated;
>  	struct kvm_page_stats stats_dirty_logging_enabled;
>  	struct kvm_page_stats stats_dirty_pass[ITERATIONS];
> @@ -107,6 +106,9 @@ static void run_test(enum vm_guest_mode mode, void *unused)
>  	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
>  	host_num_pages = vm_num_host_pages(mode, guest_num_pages);
>  	pages_per_slot = host_num_pages / SLOTS;
> +	TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS);
> +	TEST_ASSERT(!(host_num_pages % 512),
> +		    "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages);
>  
>  	bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot);
>  
> @@ -165,10 +167,8 @@ static void run_test(enum vm_guest_mode mode, void *unused)
>  	memstress_free_bitmaps(bitmaps, SLOTS);
>  	memstress_destroy_vm(vm);
>  
> -	/* Make assertions about the page counts. */
> -	total_4k_pages = stats_populated.pages_4k;
> -	total_4k_pages += stats_populated.pages_2m * 512;
> -	total_4k_pages += stats_populated.pages_1g * 512 * 512;
> +	TEST_ASSERT_EQ((stats_populated.pages_2m * 512 +
> +			stats_populated.pages_1g * 512 * 512), host_num_pages);
>  
>  	/*
>  	 * Check that all huge pages were split. Since large pages can only
> @@ -180,19 +180,22 @@ static void run_test(enum vm_guest_mode mode, void *unused)
>  	 */
>  	if (dirty_log_manual_caps) {
>  		TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
> -		TEST_ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages);
> +		TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages,
> +			    "Expected at least '%lu' 4KiB pages, found only '%lu'",
> +			    host_num_pages, stats_clear_pass[0].pages_4k);
>  		TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
>  	} else {
>  		TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
> -		TEST_ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages);
> +		TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages,
> +			    "Expected at least '%lu' 4KiB pages, found only '%lu'",
> +			    host_num_pages, stats_clear_pass[0].pages_4k);
>  	}
>  
>  	/*
>  	 * Once dirty logging is disabled and the vCPUs have touched all their
> -	 * memory again, the page counts should be the same as they were
> +	 * memory again, the hugepage counts should be the same as they were
>  	 * right after initial population of memory.
>  	 */
> -	TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
>  	TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
>  	TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
>  }
> 
> base-commit: 0762cdfe8ee16e4035b0ad27418686ef0452932f
> -- 
>
diff mbox series

Patch

diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
index 634c6bfcd572..63f9cd2b1e31 100644
--- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
+++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
@@ -92,7 +92,7 @@  static void run_test(enum vm_guest_mode mode, void *unused)
 	uint64_t host_num_pages;
 	uint64_t pages_per_slot;
 	int i;
-	uint64_t total_4k_pages;
+	uint64_t split_4k_pages;
 	struct kvm_page_stats stats_populated;
 	struct kvm_page_stats stats_dirty_logging_enabled;
 	struct kvm_page_stats stats_dirty_pass[ITERATIONS];
@@ -166,9 +166,8 @@  static void run_test(enum vm_guest_mode mode, void *unused)
 	memstress_destroy_vm(vm);
 
 	/* Make assertions about the page counts. */
-	total_4k_pages = stats_populated.pages_4k;
-	total_4k_pages += stats_populated.pages_2m * 512;
-	total_4k_pages += stats_populated.pages_1g * 512 * 512;
+	split_4k_pages = stats_populated.pages_2m * 512;
+	split_4k_pages += stats_populated.pages_1g * 512 * 512;
 
 	/*
 	 * Check that all huge pages were split. Since large pages can only
@@ -180,11 +179,13 @@  static void run_test(enum vm_guest_mode mode, void *unused)
 	 */
 	if (dirty_log_manual_caps) {
 		TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
-		TEST_ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages);
+		TEST_ASSERT(stats_clear_pass[0].pages_4k >= split_4k_pages,
+			    "The number of 4k pages should be at least equal to the split pages");
 		TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
 	} else {
 		TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
-		TEST_ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages);
+		TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= split_4k_pages,
+			    "The number of 4k pages should be at least equal to the split pages");
 	}
 
 	/*
@@ -192,7 +193,6 @@  static void run_test(enum vm_guest_mode mode, void *unused)
 	 * memory again, the page counts should be the same as they were
 	 * right after initial population of memory.
 	 */
-	TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
 	TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
 	TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
 }