diff mbox series

[2/3] mm/gup_benchmark: support pin_user_pages() and related calls

Message ID 20200125021115.731629-3-jhubbard@nvidia.com (mailing list archive)
State New
Headers show
Series mm/gup: track FOLL_PIN pages (follow on from v12) | expand

Commit Message

John Hubbard Jan. 25, 2020, 2:11 a.m. UTC
Up until now, gup_benchmark supported testing of the
following kernel functions:

* get_user_pages(): via the '-U' command line option
* get_user_pages_longterm(): via the '-L' command line option
* get_user_pages_fast(): as the default (no options required)

Add test coverage for the new corresponding pin_*() functions:

* pin_user_pages_fast(): via the '-a' command line option
* pin_user_pages():      via the '-b' command line option

Also, add an option for clarity: '-u' for what is now (still) the
default choice: get_user_pages_fast().

Also, for the commands that set FOLL_PIN, verify that the pages
really are dma-pinned, via the new is_dma_pinned() routine.
Those commands are:

    PIN_FAST_BENCHMARK     : calls pin_user_pages_fast()
    PIN_BENCHMARK          : calls pin_user_pages()

In between the calls to pin_*() and unpin_user_pages(),
check each page: if page_dma_pinned() returns false, then
WARN and return.

Do this outside of the benchmark timestamps, so that it doesn't
affect reported times.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 mm/gup_benchmark.c                         | 70 ++++++++++++++++++++--
 tools/testing/selftests/vm/gup_benchmark.c | 15 ++++-
 2 files changed, 79 insertions(+), 6 deletions(-)

Comments

Nathan Chancellor Jan. 27, 2020, 8:52 p.m. UTC | #1
Hi John,

On Fri, Jan 24, 2020 at 06:11:14PM -0800, John Hubbard wrote:
> Up until now, gup_benchmark supported testing of the
> following kernel functions:
> 
> * get_user_pages(): via the '-U' command line option
> * get_user_pages_longterm(): via the '-L' command line option
> * get_user_pages_fast(): as the default (no options required)
> 
> Add test coverage for the new corresponding pin_*() functions:
> 
> * pin_user_pages_fast(): via the '-a' command line option
> * pin_user_pages():      via the '-b' command line option
> 
> Also, add an option for clarity: '-u' for what is now (still) the
> default choice: get_user_pages_fast().
> 
> Also, for the commands that set FOLL_PIN, verify that the pages
> really are dma-pinned, via the new is_dma_pinned() routine.
> Those commands are:
> 
>     PIN_FAST_BENCHMARK     : calls pin_user_pages_fast()
>     PIN_BENCHMARK          : calls pin_user_pages()
> 
> In between the calls to pin_*() and unpin_user_pages(),
> check each page: if page_dma_pinned() returns false, then
> WARN and return.
> 
> Do this outside of the benchmark timestamps, so that it doesn't
> affect reported times.
> 
> Reviewed-by: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: John Hubbard <jhubbard@nvidia.com>
> ---
>  mm/gup_benchmark.c                         | 70 ++++++++++++++++++++--
>  tools/testing/selftests/vm/gup_benchmark.c | 15 ++++-
>  2 files changed, 79 insertions(+), 6 deletions(-)
> 
> diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c
> index 8dba38e79a9f..3d5fb765e4e6 100644
> --- a/mm/gup_benchmark.c
> +++ b/mm/gup_benchmark.c
> @@ -8,6 +8,8 @@
>  #define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_benchmark)
>  #define GUP_LONGTERM_BENCHMARK	_IOWR('g', 2, struct gup_benchmark)
>  #define GUP_BENCHMARK		_IOWR('g', 3, struct gup_benchmark)
> +#define PIN_FAST_BENCHMARK	_IOWR('g', 4, struct gup_benchmark)
> +#define PIN_BENCHMARK		_IOWR('g', 5, struct gup_benchmark)
>  
>  struct gup_benchmark {
>  	__u64 get_delta_usec;
> @@ -19,6 +21,47 @@ struct gup_benchmark {
>  	__u64 expansion[10];	/* For future use */
>  };
>  
> +static void put_back_pages(int cmd, struct page **pages, unsigned long nr_pages)

We received a Clang build report on this patch because the use of
PIN_FAST_BENCHMARK and PIN_BENCHMARK in the switch statement below will
overflow int; this should be unsigned int to match the cmd parameter in
the ioctls.

The report can be read here if you care for it:

https://groups.google.com/d/msg/clang-built-linux/gyGayC_dnis/D1celSStEgAJ

Cheers,
Nathan

> +{
> +	int i;
> +
> +	switch (cmd) {
> +	case GUP_FAST_BENCHMARK:
> +	case GUP_LONGTERM_BENCHMARK:
> +	case GUP_BENCHMARK:
> +		for (i = 0; i < nr_pages; i++)
> +			put_page(pages[i]);
> +		break;
> +
> +	case PIN_FAST_BENCHMARK:
> +	case PIN_BENCHMARK:
> +		unpin_user_pages(pages, nr_pages);
> +		break;
> +	}
> +}
> +
> +static void verify_dma_pinned(int cmd, struct page **pages,
> +			      unsigned long nr_pages)
> +{
> +	int i;
> +	struct page *page;
> +
> +	switch (cmd) {
> +	case PIN_FAST_BENCHMARK:
> +	case PIN_BENCHMARK:
> +		for (i = 0; i < nr_pages; i++) {
> +			page = pages[i];
> +			if (WARN(!page_dma_pinned(page),
> +				 "pages[%d] is NOT dma-pinned\n", i)) {
> +
> +				dump_page(page, "gup_benchmark failure");
> +				break;
> +			}
> +		}
> +		break;
> +	}
> +}
> +
>  static int __gup_benchmark_ioctl(unsigned int cmd,
>  		struct gup_benchmark *gup)
>  {
> @@ -66,6 +109,14 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
>  			nr = get_user_pages(addr, nr, gup->flags, pages + i,
>  					    NULL);
>  			break;
> +		case PIN_FAST_BENCHMARK:
> +			nr = pin_user_pages_fast(addr, nr, gup->flags,
> +						 pages + i);
> +			break;
> +		case PIN_BENCHMARK:
> +			nr = pin_user_pages(addr, nr, gup->flags, pages + i,
> +					    NULL);
> +			break;
>  		default:
>  			kvfree(pages);
>  			ret = -EINVAL;
> @@ -78,15 +129,22 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
>  	}
>  	end_time = ktime_get();
>  
> +	/* Shifting the meaning of nr_pages: now it is actual number pinned: */
> +	nr_pages = i;
> +
>  	gup->get_delta_usec = ktime_us_delta(end_time, start_time);
>  	gup->size = addr - gup->addr;
>  
> +	/*
> +	 * Take an un-benchmark-timed moment to verify DMA pinned
> +	 * state: print a warning if any non-dma-pinned pages are found:
> +	 */
> +	verify_dma_pinned(cmd, pages, nr_pages);
> +
>  	start_time = ktime_get();
> -	for (i = 0; i < nr_pages; i++) {
> -		if (!pages[i])
> -			break;
> -		put_page(pages[i]);
> -	}
> +
> +	put_back_pages(cmd, pages, nr_pages);
> +
>  	end_time = ktime_get();
>  	gup->put_delta_usec = ktime_us_delta(end_time, start_time);
>  
> @@ -105,6 +163,8 @@ static long gup_benchmark_ioctl(struct file *filep, unsigned int cmd,
>  	case GUP_FAST_BENCHMARK:
>  	case GUP_LONGTERM_BENCHMARK:
>  	case GUP_BENCHMARK:
> +	case PIN_FAST_BENCHMARK:
> +	case PIN_BENCHMARK:
>  		break;
>  	default:
>  		return -EINVAL;
> diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
> index 389327e9b30a..43b4dfe161a2 100644
> --- a/tools/testing/selftests/vm/gup_benchmark.c
> +++ b/tools/testing/selftests/vm/gup_benchmark.c
> @@ -18,6 +18,10 @@
>  #define GUP_LONGTERM_BENCHMARK	_IOWR('g', 2, struct gup_benchmark)
>  #define GUP_BENCHMARK		_IOWR('g', 3, struct gup_benchmark)
>  
> +/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */
> +#define PIN_FAST_BENCHMARK	_IOWR('g', 4, struct gup_benchmark)
> +#define PIN_BENCHMARK		_IOWR('g', 5, struct gup_benchmark)
> +
>  /* Just the flags we need, copied from mm.h: */
>  #define FOLL_WRITE	0x01	/* check pte is writable */
>  
> @@ -40,8 +44,14 @@ int main(int argc, char **argv)
>  	char *file = "/dev/zero";
>  	char *p;
>  
> -	while ((opt = getopt(argc, argv, "m:r:n:f:tTLUwSH")) != -1) {
> +	while ((opt = getopt(argc, argv, "m:r:n:f:abtTLUuwSH")) != -1) {
>  		switch (opt) {
> +		case 'a':
> +			cmd = PIN_FAST_BENCHMARK;
> +			break;
> +		case 'b':
> +			cmd = PIN_BENCHMARK;
> +			break;
>  		case 'm':
>  			size = atoi(optarg) * MB;
>  			break;
> @@ -63,6 +73,9 @@ int main(int argc, char **argv)
>  		case 'U':
>  			cmd = GUP_BENCHMARK;
>  			break;
> +		case 'u':
> +			cmd = GUP_FAST_BENCHMARK;
> +			break;
>  		case 'w':
>  			write = 1;
>  			break;
> -- 
> 2.25.0
>
John Hubbard Jan. 27, 2020, 9:32 p.m. UTC | #2
On 1/27/20 12:52 PM, Nathan Chancellor wrote:
...
>> --- a/mm/gup_benchmark.c
>> +++ b/mm/gup_benchmark.c
>> @@ -8,6 +8,8 @@
>>  #define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_benchmark)
>>  #define GUP_LONGTERM_BENCHMARK	_IOWR('g', 2, struct gup_benchmark)
>>  #define GUP_BENCHMARK		_IOWR('g', 3, struct gup_benchmark)
>> +#define PIN_FAST_BENCHMARK	_IOWR('g', 4, struct gup_benchmark)
>> +#define PIN_BENCHMARK		_IOWR('g', 5, struct gup_benchmark)
>>  
>>  struct gup_benchmark {
>>  	__u64 get_delta_usec;
>> @@ -19,6 +21,47 @@ struct gup_benchmark {
>>  	__u64 expansion[10];	/* For future use */
>>  };
>>  
>> +static void put_back_pages(int cmd, struct page **pages, unsigned long nr_pages)
> 
> We received a Clang build report on this patch because the use of
> PIN_FAST_BENCHMARK and PIN_BENCHMARK in the switch statement below will
> overflow int; this should be unsigned int to match the cmd parameter in
> the ioctls.


Thanks for the report! Yes, that should have been "unsigned int cmd", to match the
one in the ioctls, just as you said.

I'll apply this diff, for the next version of the series:

diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c
index 3d5fb765e4e6..7d5573083ab3 100644
--- a/mm/gup_benchmark.c
+++ b/mm/gup_benchmark.c
@@ -21,7 +21,8 @@ struct gup_benchmark {
        __u64 expansion[10];    /* For future use */
 };
 
-static void put_back_pages(int cmd, struct page **pages, unsigned long nr_pages)
+static void put_back_pages(unsigned int cmd, struct page **pages,
+                          unsigned long nr_pages)
 {
        int i;
 
@@ -40,7 +41,7 @@ static void put_back_pages(int cmd, struct page **pages, unsigned long nr_pages)
        }
 }
 
-static void verify_dma_pinned(int cmd, struct page **pages,
+static void verify_dma_pinned(unsigned int cmd, struct page **pages,
                              unsigned long nr_pages)
 {
        int i;



thanks,
diff mbox series

Patch

diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c
index 8dba38e79a9f..3d5fb765e4e6 100644
--- a/mm/gup_benchmark.c
+++ b/mm/gup_benchmark.c
@@ -8,6 +8,8 @@ 
 #define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_benchmark)
 #define GUP_LONGTERM_BENCHMARK	_IOWR('g', 2, struct gup_benchmark)
 #define GUP_BENCHMARK		_IOWR('g', 3, struct gup_benchmark)
+#define PIN_FAST_BENCHMARK	_IOWR('g', 4, struct gup_benchmark)
+#define PIN_BENCHMARK		_IOWR('g', 5, struct gup_benchmark)
 
 struct gup_benchmark {
 	__u64 get_delta_usec;
@@ -19,6 +21,47 @@  struct gup_benchmark {
 	__u64 expansion[10];	/* For future use */
 };
 
+static void put_back_pages(int cmd, struct page **pages, unsigned long nr_pages)
+{
+	int i;
+
+	switch (cmd) {
+	case GUP_FAST_BENCHMARK:
+	case GUP_LONGTERM_BENCHMARK:
+	case GUP_BENCHMARK:
+		for (i = 0; i < nr_pages; i++)
+			put_page(pages[i]);
+		break;
+
+	case PIN_FAST_BENCHMARK:
+	case PIN_BENCHMARK:
+		unpin_user_pages(pages, nr_pages);
+		break;
+	}
+}
+
+static void verify_dma_pinned(int cmd, struct page **pages,
+			      unsigned long nr_pages)
+{
+	int i;
+	struct page *page;
+
+	switch (cmd) {
+	case PIN_FAST_BENCHMARK:
+	case PIN_BENCHMARK:
+		for (i = 0; i < nr_pages; i++) {
+			page = pages[i];
+			if (WARN(!page_dma_pinned(page),
+				 "pages[%d] is NOT dma-pinned\n", i)) {
+
+				dump_page(page, "gup_benchmark failure");
+				break;
+			}
+		}
+		break;
+	}
+}
+
 static int __gup_benchmark_ioctl(unsigned int cmd,
 		struct gup_benchmark *gup)
 {
@@ -66,6 +109,14 @@  static int __gup_benchmark_ioctl(unsigned int cmd,
 			nr = get_user_pages(addr, nr, gup->flags, pages + i,
 					    NULL);
 			break;
+		case PIN_FAST_BENCHMARK:
+			nr = pin_user_pages_fast(addr, nr, gup->flags,
+						 pages + i);
+			break;
+		case PIN_BENCHMARK:
+			nr = pin_user_pages(addr, nr, gup->flags, pages + i,
+					    NULL);
+			break;
 		default:
 			kvfree(pages);
 			ret = -EINVAL;
@@ -78,15 +129,22 @@  static int __gup_benchmark_ioctl(unsigned int cmd,
 	}
 	end_time = ktime_get();
 
+	/* Shifting the meaning of nr_pages: now it is actual number pinned: */
+	nr_pages = i;
+
 	gup->get_delta_usec = ktime_us_delta(end_time, start_time);
 	gup->size = addr - gup->addr;
 
+	/*
+	 * Take an un-benchmark-timed moment to verify DMA pinned
+	 * state: print a warning if any non-dma-pinned pages are found:
+	 */
+	verify_dma_pinned(cmd, pages, nr_pages);
+
 	start_time = ktime_get();
-	for (i = 0; i < nr_pages; i++) {
-		if (!pages[i])
-			break;
-		put_page(pages[i]);
-	}
+
+	put_back_pages(cmd, pages, nr_pages);
+
 	end_time = ktime_get();
 	gup->put_delta_usec = ktime_us_delta(end_time, start_time);
 
@@ -105,6 +163,8 @@  static long gup_benchmark_ioctl(struct file *filep, unsigned int cmd,
 	case GUP_FAST_BENCHMARK:
 	case GUP_LONGTERM_BENCHMARK:
 	case GUP_BENCHMARK:
+	case PIN_FAST_BENCHMARK:
+	case PIN_BENCHMARK:
 		break;
 	default:
 		return -EINVAL;
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index 389327e9b30a..43b4dfe161a2 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -18,6 +18,10 @@ 
 #define GUP_LONGTERM_BENCHMARK	_IOWR('g', 2, struct gup_benchmark)
 #define GUP_BENCHMARK		_IOWR('g', 3, struct gup_benchmark)
 
+/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */
+#define PIN_FAST_BENCHMARK	_IOWR('g', 4, struct gup_benchmark)
+#define PIN_BENCHMARK		_IOWR('g', 5, struct gup_benchmark)
+
 /* Just the flags we need, copied from mm.h: */
 #define FOLL_WRITE	0x01	/* check pte is writable */
 
@@ -40,8 +44,14 @@  int main(int argc, char **argv)
 	char *file = "/dev/zero";
 	char *p;
 
-	while ((opt = getopt(argc, argv, "m:r:n:f:tTLUwSH")) != -1) {
+	while ((opt = getopt(argc, argv, "m:r:n:f:abtTLUuwSH")) != -1) {
 		switch (opt) {
+		case 'a':
+			cmd = PIN_FAST_BENCHMARK;
+			break;
+		case 'b':
+			cmd = PIN_BENCHMARK;
+			break;
 		case 'm':
 			size = atoi(optarg) * MB;
 			break;
@@ -63,6 +73,9 @@  int main(int argc, char **argv)
 		case 'U':
 			cmd = GUP_BENCHMARK;
 			break;
+		case 'u':
+			cmd = GUP_FAST_BENCHMARK;
+			break;
 		case 'w':
 			write = 1;
 			break;