Message ID | 20240809103129.365029-3-dev.jain@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Improve migration by backing off earlier | expand |
On 8/9/24 04:31, Dev Jain wrote: > Do not fail the test for just a single instance of migration failure, > since migration is a best-effort service. The cover letter says: "Given that migration is a best-effort service, it is wrong to fail the test for just a single failure; hence, fail the test after 100 consecutive failures (where 100 is still a subjective choice)." You do want to mention the above here. The reason being, I would like to know what this does to the run-time of this test if migration fails and retried 100 times. > > Signed-off-by: Dev Jain <dev.jain@arm.com> > Suggested-by: David Hildenbrand <david@redhat.com> > Reviewed-by: Ryan Roberts <ryan.roberts@arm.com> > Tested-by: Ryan Roberts <ryan.roberts@arm.com> > --- > tools/testing/selftests/mm/migration.c | 17 +++++++++++------ > 1 file changed, 11 insertions(+), 6 deletions(-) > > diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c > index 6908569ef406..64bcbb7151cf 100644 > --- a/tools/testing/selftests/mm/migration.c > +++ b/tools/testing/selftests/mm/migration.c > @@ -15,10 +15,10 @@ > #include <signal.h> > #include <time.h> > > -#define TWOMEG (2<<20) > -#define RUNTIME (20) > - > -#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) > +#define TWOMEG (2<<20) > +#define RUNTIME (20) > +#define MAX_RETRIES 100 > +#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) > > FIXTURE(migration) > { > @@ -65,6 +65,7 @@ int migrate(uint64_t *ptr, int n1, int n2) > int ret, tmp; > int status = 0; > struct timespec ts1, ts2; > + int failures = 0; > > if (clock_gettime(CLOCK_MONOTONIC, &ts1)) > return -1; > @@ -79,13 +80,17 @@ int migrate(uint64_t *ptr, int n1, int n2) > ret = move_pages(0, 1, (void **) &ptr, &n2, &status, > MPOL_MF_MOVE_ALL); > if (ret) { > - if (ret > 0) > + if (ret > 0) { > + /* Migration is best effort; try again */ > + if (++failures < MAX_RETRIES) > + continue; > printf("Didn't migrate %d pages\n", ret); > + } > else > perror("Couldn't migrate pages"); > return -2; > } > - > + failures = 0; > tmp = n2; > n2 = n1; > n1 = tmp; thanks, -- Shuah
On Fri, 9 Aug 2024, Shuah Khan wrote: > "Given that migration is a best-effort service, it is wrong to fail the > test for just a single failure; hence, fail the test after 100 consecutive > failures (where 100 is still a subjective choice)." > > You do want to mention the above here. > > The reason being, I would like to know what this does to the run-time of > this test if migration fails and retried 100 times. If we backoff earlier without engaging too much with the page then we can in turn affort to retry more times.
On 8/9/24 22:43, Shuah Khan wrote: > On 8/9/24 04:31, Dev Jain wrote: >> Do not fail the test for just a single instance of migration failure, >> since migration is a best-effort service. > > The cover letter says: > > "Given that migration is a best-effort service, it is wrong to fail the > test for just a single failure; hence, fail the test after 100 > consecutive > failures (where 100 is still a subjective choice)." > > You do want to mention the above here. Sure, shall update in v2. > > The reason being, I would like to know what this does to the run-time of > this test if migration fails and retried 100 times. Sure; just for the note, it won't affect the execution time of the test since that is controlled by a timeout mechanism. > >> >> Signed-off-by: Dev Jain <dev.jain@arm.com> >> Suggested-by: David Hildenbrand <david@redhat.com> >> Reviewed-by: Ryan Roberts <ryan.roberts@arm.com> >> Tested-by: Ryan Roberts <ryan.roberts@arm.com> >> --- >> tools/testing/selftests/mm/migration.c | 17 +++++++++++------ >> 1 file changed, 11 insertions(+), 6 deletions(-) >> >> diff --git a/tools/testing/selftests/mm/migration.c >> b/tools/testing/selftests/mm/migration.c >> index 6908569ef406..64bcbb7151cf 100644 >> --- a/tools/testing/selftests/mm/migration.c >> +++ b/tools/testing/selftests/mm/migration.c >> @@ -15,10 +15,10 @@ >> #include <signal.h> >> #include <time.h> >> -#define TWOMEG (2<<20) >> -#define RUNTIME (20) >> - >> -#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) >> +#define TWOMEG (2<<20) >> +#define RUNTIME (20) >> +#define MAX_RETRIES 100 >> +#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) >> FIXTURE(migration) >> { >> @@ -65,6 +65,7 @@ int migrate(uint64_t *ptr, int n1, int n2) >> int ret, tmp; >> int status = 0; >> struct timespec ts1, ts2; >> + int failures = 0; >> if (clock_gettime(CLOCK_MONOTONIC, &ts1)) >> return -1; >> @@ -79,13 +80,17 @@ int migrate(uint64_t *ptr, int n1, int n2) >> ret = move_pages(0, 1, (void **) &ptr, &n2, &status, >> MPOL_MF_MOVE_ALL); >> if (ret) { >> - if (ret > 0) >> + if (ret > 0) { >> + /* Migration is best effort; try again */ >> + if (++failures < MAX_RETRIES) >> + continue; >> printf("Didn't migrate %d pages\n", ret); >> + } >> else >> perror("Couldn't migrate pages"); >> return -2; >> } >> - >> + failures = 0; >> tmp = n2; >> n2 = n1; >> n1 = tmp; > > thanks, > -- Shuah
diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c index 6908569ef406..64bcbb7151cf 100644 --- a/tools/testing/selftests/mm/migration.c +++ b/tools/testing/selftests/mm/migration.c @@ -15,10 +15,10 @@ #include <signal.h> #include <time.h> -#define TWOMEG (2<<20) -#define RUNTIME (20) - -#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) +#define TWOMEG (2<<20) +#define RUNTIME (20) +#define MAX_RETRIES 100 +#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) FIXTURE(migration) { @@ -65,6 +65,7 @@ int migrate(uint64_t *ptr, int n1, int n2) int ret, tmp; int status = 0; struct timespec ts1, ts2; + int failures = 0; if (clock_gettime(CLOCK_MONOTONIC, &ts1)) return -1; @@ -79,13 +80,17 @@ int migrate(uint64_t *ptr, int n1, int n2) ret = move_pages(0, 1, (void **) &ptr, &n2, &status, MPOL_MF_MOVE_ALL); if (ret) { - if (ret > 0) + if (ret > 0) { + /* Migration is best effort; try again */ + if (++failures < MAX_RETRIES) + continue; printf("Didn't migrate %d pages\n", ret); + } else perror("Couldn't migrate pages"); return -2; } - + failures = 0; tmp = n2; n2 = n1; n1 = tmp;