diff mbox series

dma-mapping: benchmark: Add support for multi-pages map/unmap

Message ID 1616059770-194515-1-git-send-email-chenxiang66@hisilicon.com (mailing list archive)
State Accepted
Commit ca947482b0b30443e6da1f0f5ba7244e34a4f65a
Headers show
Series dma-mapping: benchmark: Add support for multi-pages map/unmap | expand

Commit Message

chenxiang March 18, 2021, 9:29 a.m. UTC
From: Xiang Chen <chenxiang66@hisilicon.com>

Currently it only support one page map/unmap once a time for dma-map
benchmark, but there are some other scenaries which need to support for
multi-page map/unmap: for those multi-pages interfaces such as
dma_alloc_coherent() and dma_map_sg(), the time spent on multi-pages
map/unmap is not the time of a single page * npages (not linear) as it
may use block description instead of page description when it is satified
with the size such as 2M/1G, and also it can send a single TLB invalidation
command to invalidate multi-pages instead of multi-times when RIL is
enabled (which will short the time of unmap). So it is necessary to add
support for multi-pages map/unmap.

Add a parameter "-g" to support multi-pages map/unmap.

Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>
---
 kernel/dma/map_benchmark.c                      | 21 ++++++++++++++-------
 tools/testing/selftests/dma/dma_map_benchmark.c | 20 ++++++++++++++++----
 2 files changed, 30 insertions(+), 11 deletions(-)

Comments

Song Bao Hua (Barry Song) March 18, 2021, 9:37 a.m. UTC | #1
> -----Original Message-----
> From: chenxiang (M)
> Sent: Thursday, March 18, 2021 10:30 PM
> To: hch@lst.de; m.szyprowski@samsung.com; robin.murphy@arm.com; Song Bao Hua
> (Barry Song) <song.bao.hua@hisilicon.com>
> Cc: iommu@lists.linux-foundation.org; linuxarm@openeuler.org;
> linux-kselftest@vger.kernel.org; chenxiang (M) <chenxiang66@hisilicon.com>
> Subject: [PATCH] dma-mapping: benchmark: Add support for multi-pages map/unmap
> 
> From: Xiang Chen <chenxiang66@hisilicon.com>
> 
> Currently it only support one page map/unmap once a time for dma-map
> benchmark, but there are some other scenaries which need to support for
> multi-page map/unmap: for those multi-pages interfaces such as
> dma_alloc_coherent() and dma_map_sg(), the time spent on multi-pages
> map/unmap is not the time of a single page * npages (not linear) as it
> may use block description instead of page description when it is satified
> with the size such as 2M/1G, and also it can send a single TLB invalidation
> command to invalidate multi-pages instead of multi-times when RIL is
> enabled (which will short the time of unmap). So it is necessary to add
> support for multi-pages map/unmap.
> 
> Add a parameter "-g" to support multi-pages map/unmap.
> 
> Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>
> ---

Acked-by: Barry Song <song.bao.hua@hisilicon.com>

>  kernel/dma/map_benchmark.c                      | 21 ++++++++++++++-------
>  tools/testing/selftests/dma/dma_map_benchmark.c | 20 ++++++++++++++++----
>  2 files changed, 30 insertions(+), 11 deletions(-)
> 
> diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
> index e0e64f8..a5c1b01 100644
> --- a/kernel/dma/map_benchmark.c
> +++ b/kernel/dma/map_benchmark.c
> @@ -38,7 +38,8 @@ struct map_benchmark {
>  	__u32 dma_bits; /* DMA addressing capability */
>  	__u32 dma_dir; /* DMA data direction */
>  	__u32 dma_trans_ns; /* time for DMA transmission in ns */
> -	__u8 expansion[80];	/* For future use */
> +	__u32 granule;	/* how many PAGE_SIZE will do map/unmap once a time */
> +	__u8 expansion[76];	/* For future use */
>  };
> 
>  struct map_benchmark_data {
> @@ -58,9 +59,11 @@ static int map_benchmark_thread(void *data)
>  	void *buf;
>  	dma_addr_t dma_addr;
>  	struct map_benchmark_data *map = data;
> +	int npages = map->bparam.granule;
> +	u64 size = npages * PAGE_SIZE;
>  	int ret = 0;
> 
> -	buf = (void *)__get_free_page(GFP_KERNEL);
> +	buf = alloc_pages_exact(size, GFP_KERNEL);
>  	if (!buf)
>  		return -ENOMEM;
> 
> @@ -76,10 +79,10 @@ static int map_benchmark_thread(void *data)
>  		 * 66 means evertything goes well! 66 is lucky.
>  		 */
>  		if (map->dir != DMA_FROM_DEVICE)
> -			memset(buf, 0x66, PAGE_SIZE);
> +			memset(buf, 0x66, size);
> 
>  		map_stime = ktime_get();
> -		dma_addr = dma_map_single(map->dev, buf, PAGE_SIZE, map->dir);
> +		dma_addr = dma_map_single(map->dev, buf, size, map->dir);
>  		if (unlikely(dma_mapping_error(map->dev, dma_addr))) {
>  			pr_err("dma_map_single failed on %s\n",
>  				dev_name(map->dev));
> @@ -93,7 +96,7 @@ static int map_benchmark_thread(void *data)
>  		ndelay(map->bparam.dma_trans_ns);
> 
>  		unmap_stime = ktime_get();
> -		dma_unmap_single(map->dev, dma_addr, PAGE_SIZE, map->dir);
> +		dma_unmap_single(map->dev, dma_addr, size, map->dir);
>  		unmap_etime = ktime_get();
>  		unmap_delta = ktime_sub(unmap_etime, unmap_stime);
> 
> @@ -112,7 +115,7 @@ static int map_benchmark_thread(void *data)
>  	}
> 
>  out:
> -	free_page((unsigned long)buf);
> +	free_pages_exact(buf, size);
>  	return ret;
>  }
> 
> @@ -203,7 +206,6 @@ static long map_benchmark_ioctl(struct file *file, unsigned
> int cmd,
>  	struct map_benchmark_data *map = file->private_data;
>  	void __user *argp = (void __user *)arg;
>  	u64 old_dma_mask;
> -
>  	int ret;
> 
>  	if (copy_from_user(&map->bparam, argp, sizeof(map->bparam)))
> @@ -234,6 +236,11 @@ static long map_benchmark_ioctl(struct file *file, unsigned
> int cmd,
>  			return -EINVAL;
>  		}
> 
> +		if (map->bparam.granule < 1 || map->bparam.granule > 1024) {
> +			pr_err("invalid granule size\n");
> +			return -EINVAL;
> +		}
> +
>  		switch (map->bparam.dma_dir) {
>  		case DMA_MAP_BIDIRECTIONAL:
>  			map->dir = DMA_BIDIRECTIONAL;
> diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c
> b/tools/testing/selftests/dma/dma_map_benchmark.c
> index fb23ce9..6f2caa6 100644
> --- a/tools/testing/selftests/dma/dma_map_benchmark.c
> +++ b/tools/testing/selftests/dma/dma_map_benchmark.c
> @@ -40,7 +40,8 @@ struct map_benchmark {
>  	__u32 dma_bits; /* DMA addressing capability */
>  	__u32 dma_dir; /* DMA data direction */
>  	__u32 dma_trans_ns; /* time for DMA transmission in ns */
> -	__u8 expansion[80];	/* For future use */
> +	__u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */
> +	__u8 expansion[76];	/* For future use */
>  };
> 
>  int main(int argc, char **argv)
> @@ -51,11 +52,13 @@ int main(int argc, char **argv)
>  	int threads = 1, seconds = 20, node = -1;
>  	/* default dma mask 32bit, bidirectional DMA */
>  	int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL;
> +	/* default granule 1 PAGESIZE */
> +	int granule = 1;
> 
>  	int cmd = DMA_MAP_BENCHMARK;
>  	char *p;
> 
> -	while ((opt = getopt(argc, argv, "t:s:n:b:d:x:")) != -1) {
> +	while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) {
>  		switch (opt) {
>  		case 't':
>  			threads = atoi(optarg);
> @@ -75,6 +78,9 @@ int main(int argc, char **argv)
>  		case 'x':
>  			xdelay = atoi(optarg);
>  			break;
> +		case 'g':
> +			granule = atoi(optarg);
> +			break;
>  		default:
>  			return -1;
>  		}
> @@ -110,6 +116,11 @@ int main(int argc, char **argv)
>  		exit(1);
>  	}
> 
> +	if (granule < 1 || granule > 1024) {
> +		fprintf(stderr, "invalid granule size\n");
> +		exit(1);
> +	}
> +
>  	fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR);
>  	if (fd == -1) {
>  		perror("open");
> @@ -123,14 +134,15 @@ int main(int argc, char **argv)
>  	map.dma_bits = bits;
>  	map.dma_dir = dir;
>  	map.dma_trans_ns = xdelay;
> +	map.granule = granule;
> 
>  	if (ioctl(fd, cmd, &map)) {
>  		perror("ioctl");
>  		exit(1);
>  	}
> 
> -	printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s\n",
> -			threads, seconds, node, dir[directions]);
> +	printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s
> granule: %d\n",
> +			threads, seconds, node, dir[directions], granule);
>  	printf("average map latency(us):%.1f standard deviation:%.1f\n",
>  			map.avg_map_100ns/10.0, map.map_stddev/10.0);
>  	printf("average unmap latency(us):%.1f standard deviation:%.1f\n",
> --
> 2.8.1

Thanks
Barry
Christoph Hellwig April 2, 2021, 2:41 p.m. UTC | #2
Thanks, applied.
diff mbox series

Patch

diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
index e0e64f8..a5c1b01 100644
--- a/kernel/dma/map_benchmark.c
+++ b/kernel/dma/map_benchmark.c
@@ -38,7 +38,8 @@  struct map_benchmark {
 	__u32 dma_bits; /* DMA addressing capability */
 	__u32 dma_dir; /* DMA data direction */
 	__u32 dma_trans_ns; /* time for DMA transmission in ns */
-	__u8 expansion[80];	/* For future use */
+	__u32 granule;	/* how many PAGE_SIZE will do map/unmap once a time */
+	__u8 expansion[76];	/* For future use */
 };
 
 struct map_benchmark_data {
@@ -58,9 +59,11 @@  static int map_benchmark_thread(void *data)
 	void *buf;
 	dma_addr_t dma_addr;
 	struct map_benchmark_data *map = data;
+	int npages = map->bparam.granule;
+	u64 size = npages * PAGE_SIZE;
 	int ret = 0;
 
-	buf = (void *)__get_free_page(GFP_KERNEL);
+	buf = alloc_pages_exact(size, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
@@ -76,10 +79,10 @@  static int map_benchmark_thread(void *data)
 		 * 66 means evertything goes well! 66 is lucky.
 		 */
 		if (map->dir != DMA_FROM_DEVICE)
-			memset(buf, 0x66, PAGE_SIZE);
+			memset(buf, 0x66, size);
 
 		map_stime = ktime_get();
-		dma_addr = dma_map_single(map->dev, buf, PAGE_SIZE, map->dir);
+		dma_addr = dma_map_single(map->dev, buf, size, map->dir);
 		if (unlikely(dma_mapping_error(map->dev, dma_addr))) {
 			pr_err("dma_map_single failed on %s\n",
 				dev_name(map->dev));
@@ -93,7 +96,7 @@  static int map_benchmark_thread(void *data)
 		ndelay(map->bparam.dma_trans_ns);
 
 		unmap_stime = ktime_get();
-		dma_unmap_single(map->dev, dma_addr, PAGE_SIZE, map->dir);
+		dma_unmap_single(map->dev, dma_addr, size, map->dir);
 		unmap_etime = ktime_get();
 		unmap_delta = ktime_sub(unmap_etime, unmap_stime);
 
@@ -112,7 +115,7 @@  static int map_benchmark_thread(void *data)
 	}
 
 out:
-	free_page((unsigned long)buf);
+	free_pages_exact(buf, size);
 	return ret;
 }
 
@@ -203,7 +206,6 @@  static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
 	struct map_benchmark_data *map = file->private_data;
 	void __user *argp = (void __user *)arg;
 	u64 old_dma_mask;
-
 	int ret;
 
 	if (copy_from_user(&map->bparam, argp, sizeof(map->bparam)))
@@ -234,6 +236,11 @@  static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
 			return -EINVAL;
 		}
 
+		if (map->bparam.granule < 1 || map->bparam.granule > 1024) {
+			pr_err("invalid granule size\n");
+			return -EINVAL;
+		}
+
 		switch (map->bparam.dma_dir) {
 		case DMA_MAP_BIDIRECTIONAL:
 			map->dir = DMA_BIDIRECTIONAL;
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c
index fb23ce9..6f2caa6 100644
--- a/tools/testing/selftests/dma/dma_map_benchmark.c
+++ b/tools/testing/selftests/dma/dma_map_benchmark.c
@@ -40,7 +40,8 @@  struct map_benchmark {
 	__u32 dma_bits; /* DMA addressing capability */
 	__u32 dma_dir; /* DMA data direction */
 	__u32 dma_trans_ns; /* time for DMA transmission in ns */
-	__u8 expansion[80];	/* For future use */
+	__u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */
+	__u8 expansion[76];	/* For future use */
 };
 
 int main(int argc, char **argv)
@@ -51,11 +52,13 @@  int main(int argc, char **argv)
 	int threads = 1, seconds = 20, node = -1;
 	/* default dma mask 32bit, bidirectional DMA */
 	int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL;
+	/* default granule 1 PAGESIZE */
+	int granule = 1;
 
 	int cmd = DMA_MAP_BENCHMARK;
 	char *p;
 
-	while ((opt = getopt(argc, argv, "t:s:n:b:d:x:")) != -1) {
+	while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) {
 		switch (opt) {
 		case 't':
 			threads = atoi(optarg);
@@ -75,6 +78,9 @@  int main(int argc, char **argv)
 		case 'x':
 			xdelay = atoi(optarg);
 			break;
+		case 'g':
+			granule = atoi(optarg);
+			break;
 		default:
 			return -1;
 		}
@@ -110,6 +116,11 @@  int main(int argc, char **argv)
 		exit(1);
 	}
 
+	if (granule < 1 || granule > 1024) {
+		fprintf(stderr, "invalid granule size\n");
+		exit(1);
+	}
+
 	fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR);
 	if (fd == -1) {
 		perror("open");
@@ -123,14 +134,15 @@  int main(int argc, char **argv)
 	map.dma_bits = bits;
 	map.dma_dir = dir;
 	map.dma_trans_ns = xdelay;
+	map.granule = granule;
 
 	if (ioctl(fd, cmd, &map)) {
 		perror("ioctl");
 		exit(1);
 	}
 
-	printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s\n",
-			threads, seconds, node, dir[directions]);
+	printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n",
+			threads, seconds, node, dir[directions], granule);
 	printf("average map latency(us):%.1f standard deviation:%.1f\n",
 			map.avg_map_100ns/10.0, map.map_stddev/10.0);
 	printf("average unmap latency(us):%.1f standard deviation:%.1f\n",