Message ID | 20220810175830.2175089-4-coltonlewis@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: selftests: Randomize memory access of dirty_log_perf_test | expand |
On Wed, Aug 10, 2022 at 05:58:30PM +0000, Colton Lewis wrote: > Add the ability to use random_table to randomize the order in which > pages are accessed. Add the -a argument to enable this new > behavior. This should make accesses less predictable and make for a > more realistic test. It includes the possibility that the same pages > may be hit multiple times during an iteration. > > Signed-off-by: Colton Lewis <coltonlewis@google.com> > --- > .../testing/selftests/kvm/dirty_log_perf_test.c | 11 +++++++++-- > .../selftests/kvm/include/perf_test_util.h | 2 ++ > .../testing/selftests/kvm/lib/perf_test_util.c | 17 ++++++++++++++++- > 3 files changed, 27 insertions(+), 3 deletions(-) > > diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c > index dcc5d44fc757..265cb4f7e088 100644 > --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c > +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c > @@ -132,6 +132,7 @@ struct test_params { > bool partition_vcpu_memory_access; > enum vm_mem_backing_src_type backing_src; > int slots; > + bool random_access; > uint32_t random_seed; > }; > > @@ -227,6 +228,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) > p->partition_vcpu_memory_access); > > perf_test_set_wr_fract(vm, p->wr_fract); > + perf_test_set_random_access(vm, p->random_access); > > guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift; > guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); > @@ -357,10 +359,11 @@ static void run_test(enum vm_guest_mode mode, void *arg) > static void help(char *name) > { > puts(""); > - printf("usage: %s [-h] [-i iterations] [-p offset] [-g] " > + printf("usage: %s [-h] [-a] [-r random seed] [-i iterations] [-p offset] [-g] " > "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]" > "[-x memslots]\n", name); > puts(""); > + printf(" -a: access memory randomly rather than in order.\n"); > printf(" -i: specify iteration counts (default: %"PRIu64")\n", > TEST_HOST_LOOP_N); > printf(" -g: Do not enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2. This\n" > @@ -403,6 +406,7 @@ int main(int argc, char *argv[]) > .partition_vcpu_memory_access = true, > .backing_src = DEFAULT_VM_MEM_SRC, > .slots = 1, > + .random_access = false, > .random_seed = time(NULL), > }; > int opt; > @@ -414,8 +418,11 @@ int main(int argc, char *argv[]) > > guest_modes_append_default(); > > - while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:or:s:x:")) != -1) { > + while ((opt = getopt(argc, argv, "aeghi:p:m:nb:f:v:or:s:x:")) != -1) { > switch (opt) { > + case 'a': > + p.random_access = true; > + break; > case 'e': > /* 'e' is for evil. */ > run_vcpus_while_disabling_dirty_logging = true; > diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h > index 597875d0c3db..6c6f81ce2216 100644 > --- a/tools/testing/selftests/kvm/include/perf_test_util.h > +++ b/tools/testing/selftests/kvm/include/perf_test_util.h > @@ -39,6 +39,7 @@ struct perf_test_args { > > /* Run vCPUs in L2 instead of L1, if the architecture supports it. */ > bool nested; > + bool random_access; > > struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS]; > }; > @@ -56,6 +57,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, > void perf_test_destroy_vm(struct kvm_vm *vm); > > void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract); > +void perf_test_set_random_access(struct kvm_vm *vm, bool random_access); > > void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)); > void perf_test_join_vcpu_threads(int vcpus); > diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c > index 3c7b93349fef..9838d1ad9166 100644 > --- a/tools/testing/selftests/kvm/lib/perf_test_util.c > +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c > @@ -52,6 +52,9 @@ void perf_test_guest_code(uint32_t vcpu_idx) > struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_idx]; > uint64_t gva; > uint64_t pages; > + uint64_t addr; > + bool random_access = pta->random_access; > + bool populated = false; > int i; > > gva = vcpu_args->gva; > @@ -62,7 +65,11 @@ void perf_test_guest_code(uint32_t vcpu_idx) > > while (true) { > for (i = 0; i < pages; i++) { > - uint64_t addr = gva + (i * pta->guest_page_size); > + if (populated && random_access) Skipping the populate phase makes sense to ensure everything is populated I guess. What was your rational? Either way I think this policy should be driven by the test, rather than harde-coded in perf_test_guest_code(). i.e. Move the call perf_test_set_random_access() in dirty_log_perf_test.c to just after the population phase. > + addr = gva + > + ((random_table[vcpu_idx][i] % pages) * pta->guest_page_size); > + else > + addr = gva + (i * pta->guest_page_size); > > if (random_table[vcpu_idx][i] % 100 < pta->wr_fract) > *(uint64_t *)addr = 0x0123456789ABCDEF; > @@ -70,6 +77,7 @@ void perf_test_guest_code(uint32_t vcpu_idx) > READ_ONCE(*(uint64_t *)addr); > } > > + populated = true; > GUEST_SYNC(1); > } > } > @@ -169,6 +177,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, > > /* By default vCPUs will write to memory. */ > pta->wr_fract = 100; > + pta->random_access = false; > > /* > * Snapshot the non-huge page size. This is used by the guest code to > @@ -276,6 +285,12 @@ void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract) > sync_global_to_guest(vm, perf_test_args); > } > > +void perf_test_set_random_access(struct kvm_vm *vm, bool random_access) > +{ > + perf_test_args.random_access = random_access; > + sync_global_to_guest(vm, perf_test_args); > +} > + > uint64_t __weak perf_test_nested_pages(int nr_vcpus) > { > return 0; > -- > 2.37.1.559.g78731f0fdb-goog >
On Wed, Aug 10, 2022 at 04:49:23PM -0700, David Matlack wrote: > On Wed, Aug 10, 2022 at 05:58:30PM +0000, Colton Lewis wrote: > > diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c > > index 3c7b93349fef..9838d1ad9166 100644 > > --- a/tools/testing/selftests/kvm/lib/perf_test_util.c > > +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c > > @@ -52,6 +52,9 @@ void perf_test_guest_code(uint32_t vcpu_idx) > > struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_idx]; > > uint64_t gva; > > uint64_t pages; > > + uint64_t addr; > > + bool random_access = pta->random_access; > > + bool populated = false; > > int i; > > > > gva = vcpu_args->gva; > > @@ -62,7 +65,11 @@ void perf_test_guest_code(uint32_t vcpu_idx) > > > > while (true) { > > for (i = 0; i < pages; i++) { > > - uint64_t addr = gva + (i * pta->guest_page_size); > > + if (populated && random_access) > > Skipping the populate phase makes sense to ensure everything is > populated I guess. What was your rational? That's it. Wanted to ensure everything was populated. Random population won't hit every page, but those unpopulated pages might be hit on subsequent iterations. I originally let population be random too and suspect this was driving an odd behavior I noticed early in testing where later iterations would be much faster than earlier ones. > Either way I think this policy should be driven by the test, rather than > harde-coded in perf_test_guest_code(). i.e. Move the call > perf_test_set_random_access() in dirty_log_perf_test.c to just after the > population phase. That makes sense. Will do.
On Fri, Aug 12, 2022 at 9:24 AM Colton Lewis <coltonlewis@google.com> wrote: > > On Wed, Aug 10, 2022 at 04:49:23PM -0700, David Matlack wrote: > > On Wed, Aug 10, 2022 at 05:58:30PM +0000, Colton Lewis wrote: > > > diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c > > > index 3c7b93349fef..9838d1ad9166 100644 > > > --- a/tools/testing/selftests/kvm/lib/perf_test_util.c > > > +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c > > > @@ -52,6 +52,9 @@ void perf_test_guest_code(uint32_t vcpu_idx) > > > struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_idx]; > > > uint64_t gva; > > > uint64_t pages; > > > + uint64_t addr; > > > + bool random_access = pta->random_access; > > > + bool populated = false; > > > int i; > > > > > > gva = vcpu_args->gva; > > > @@ -62,7 +65,11 @@ void perf_test_guest_code(uint32_t vcpu_idx) > > > > > > while (true) { > > > for (i = 0; i < pages; i++) { > > > - uint64_t addr = gva + (i * pta->guest_page_size); > > > + if (populated && random_access) > > > > Skipping the populate phase makes sense to ensure everything is > > populated I guess. What was your rational? > > That's it. Wanted to ensure everything was populated. Random > population won't hit every page, but those unpopulated pages might be > hit on subsequent iterations. I originally let population be random > too and suspect this was driving an odd behavior I noticed early in > testing where later iterations would be much faster than earlier ones. > > > Either way I think this policy should be driven by the test, rather than > > harde-coded in perf_test_guest_code(). i.e. Move the call > > perf_test_set_random_access() in dirty_log_perf_test.c to just after the > > population phase. > > That makes sense. Will do. Ah but if you get rid of the table refill between iterations, each vCPU will access the same pages every iteration. At that point there's no reason to distinguish the populate phase from the other phases, so perhaps just drop the special case for the populate phase altogether?
On Fri, Aug 12, 2022 at 09:28:05AM -0700, David Matlack wrote: > On Fri, Aug 12, 2022 at 9:24 AM Colton Lewis <coltonlewis@google.com> wrote: > > > > On Wed, Aug 10, 2022 at 04:49:23PM -0700, David Matlack wrote: > > > On Wed, Aug 10, 2022 at 05:58:30PM +0000, Colton Lewis wrote: > > > > diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c > > > > index 3c7b93349fef..9838d1ad9166 100644 > > > > --- a/tools/testing/selftests/kvm/lib/perf_test_util.c > > > > +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c > > > > @@ -52,6 +52,9 @@ void perf_test_guest_code(uint32_t vcpu_idx) > > > > struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_idx]; > > > > uint64_t gva; > > > > uint64_t pages; > > > > + uint64_t addr; > > > > + bool random_access = pta->random_access; > > > > + bool populated = false; > > > > int i; > > > > > > > > gva = vcpu_args->gva; > > > > @@ -62,7 +65,11 @@ void perf_test_guest_code(uint32_t vcpu_idx) > > > > > > > > while (true) { > > > > for (i = 0; i < pages; i++) { > > > > - uint64_t addr = gva + (i * pta->guest_page_size); > > > > + if (populated && random_access) > > > > > > Skipping the populate phase makes sense to ensure everything is > > > populated I guess. What was your rational? > > > > That's it. Wanted to ensure everything was populated. Random > > population won't hit every page, but those unpopulated pages might be > > hit on subsequent iterations. I originally let population be random > > too and suspect this was driving an odd behavior I noticed early in > > testing where later iterations would be much faster than earlier ones. > > > > > Either way I think this policy should be driven by the test, rather than > > > harde-coded in perf_test_guest_code(). i.e. Move the call > > > perf_test_set_random_access() in dirty_log_perf_test.c to just after the > > > population phase. > > > > That makes sense. Will do. > > Ah but if you get rid of the table refill between iterations, each > vCPU will access the same pages every iteration. At that point there's > no reason to distinguish the populate phase from the other phases, so > perhaps just drop the special case for the populate phase altogether? You're right. Will do.
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index dcc5d44fc757..265cb4f7e088 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -132,6 +132,7 @@ struct test_params { bool partition_vcpu_memory_access; enum vm_mem_backing_src_type backing_src; int slots; + bool random_access; uint32_t random_seed; }; @@ -227,6 +228,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) p->partition_vcpu_memory_access); perf_test_set_wr_fract(vm, p->wr_fract); + perf_test_set_random_access(vm, p->random_access); guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift; guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); @@ -357,10 +359,11 @@ static void run_test(enum vm_guest_mode mode, void *arg) static void help(char *name) { puts(""); - printf("usage: %s [-h] [-i iterations] [-p offset] [-g] " + printf("usage: %s [-h] [-a] [-r random seed] [-i iterations] [-p offset] [-g] " "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]" "[-x memslots]\n", name); puts(""); + printf(" -a: access memory randomly rather than in order.\n"); printf(" -i: specify iteration counts (default: %"PRIu64")\n", TEST_HOST_LOOP_N); printf(" -g: Do not enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2. This\n" @@ -403,6 +406,7 @@ int main(int argc, char *argv[]) .partition_vcpu_memory_access = true, .backing_src = DEFAULT_VM_MEM_SRC, .slots = 1, + .random_access = false, .random_seed = time(NULL), }; int opt; @@ -414,8 +418,11 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:or:s:x:")) != -1) { + while ((opt = getopt(argc, argv, "aeghi:p:m:nb:f:v:or:s:x:")) != -1) { switch (opt) { + case 'a': + p.random_access = true; + break; case 'e': /* 'e' is for evil. */ run_vcpus_while_disabling_dirty_logging = true; diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 597875d0c3db..6c6f81ce2216 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -39,6 +39,7 @@ struct perf_test_args { /* Run vCPUs in L2 instead of L1, if the architecture supports it. */ bool nested; + bool random_access; struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS]; }; @@ -56,6 +57,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, void perf_test_destroy_vm(struct kvm_vm *vm); void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract); +void perf_test_set_random_access(struct kvm_vm *vm, bool random_access); void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)); void perf_test_join_vcpu_threads(int vcpus); diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c index 3c7b93349fef..9838d1ad9166 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -52,6 +52,9 @@ void perf_test_guest_code(uint32_t vcpu_idx) struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_idx]; uint64_t gva; uint64_t pages; + uint64_t addr; + bool random_access = pta->random_access; + bool populated = false; int i; gva = vcpu_args->gva; @@ -62,7 +65,11 @@ void perf_test_guest_code(uint32_t vcpu_idx) while (true) { for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * pta->guest_page_size); + if (populated && random_access) + addr = gva + + ((random_table[vcpu_idx][i] % pages) * pta->guest_page_size); + else + addr = gva + (i * pta->guest_page_size); if (random_table[vcpu_idx][i] % 100 < pta->wr_fract) *(uint64_t *)addr = 0x0123456789ABCDEF; @@ -70,6 +77,7 @@ void perf_test_guest_code(uint32_t vcpu_idx) READ_ONCE(*(uint64_t *)addr); } + populated = true; GUEST_SYNC(1); } } @@ -169,6 +177,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, /* By default vCPUs will write to memory. */ pta->wr_fract = 100; + pta->random_access = false; /* * Snapshot the non-huge page size. This is used by the guest code to @@ -276,6 +285,12 @@ void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract) sync_global_to_guest(vm, perf_test_args); } +void perf_test_set_random_access(struct kvm_vm *vm, bool random_access) +{ + perf_test_args.random_access = random_access; + sync_global_to_guest(vm, perf_test_args); +} + uint64_t __weak perf_test_nested_pages(int nr_vcpus) { return 0;
Add the ability to use random_table to randomize the order in which pages are accessed. Add the -a argument to enable this new behavior. This should make accesses less predictable and make for a more realistic test. It includes the possibility that the same pages may be hit multiple times during an iteration. Signed-off-by: Colton Lewis <coltonlewis@google.com> --- .../testing/selftests/kvm/dirty_log_perf_test.c | 11 +++++++++-- .../selftests/kvm/include/perf_test_util.h | 2 ++ .../testing/selftests/kvm/lib/perf_test_util.c | 17 ++++++++++++++++- 3 files changed, 27 insertions(+), 3 deletions(-)