diff mbox

tests/gem_gtt_hog: Fix for BDW

Message ID 1391723198-4577-1-git-send-email-rodrigo.vivi@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Rodrigo Vivi Feb. 6, 2014, 9:46 p.m. UTC
Update XY_COLOR_BLT command for Broadwell.

v2: stash devid and remove ugly double allocation. (by Chris).
v3: fix inverted blt command size and stash fd, devid and intel_gen.

Cc: Chris Wilson chris@chris-wilson.co.uk>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
---
 tests/gem_gtt_hog.c | 59 +++++++++++++++++++++++++++++++++++------------------
 1 file changed, 39 insertions(+), 20 deletions(-)

Comments

Chris Wilson Feb. 7, 2014, 9:51 a.m. UTC | #1
On Thu, Feb 06, 2014 at 07:46:38PM -0200, Rodrigo Vivi wrote:
> Update XY_COLOR_BLT command for Broadwell.
> 
> v2: stash devid and remove ugly double allocation. (by Chris).
> v3: fix inverted blt command size and stash fd, devid and intel_gen.
> 
> Cc: Chris Wilson chris@chris-wilson.co.uk>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
> ---
>  tests/gem_gtt_hog.c | 59 +++++++++++++++++++++++++++++++++++------------------
>  1 file changed, 39 insertions(+), 20 deletions(-)
> 
> diff --git a/tests/gem_gtt_hog.c b/tests/gem_gtt_hog.c
> index 53cd7eb..3149ff4 100644
> --- a/tests/gem_gtt_hog.c
> +++ b/tests/gem_gtt_hog.c
> @@ -44,30 +44,43 @@
>  
>  static const uint32_t canary = 0xdeadbeef;
>  
> +typedef struct data {
> +	int fd;
> +	int devid;
> +	int intel_gen;
> +} data_t;
> +
>  static double elapsed(const struct timeval *start,
>  		      const struct timeval *end)
>  {
>  	return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec);
>  }
>  
> -static void busy(int fd, uint32_t handle, int size, int loops)
> +static void busy(data_t *data, uint32_t handle, int size, int loops)
>  {
>  	struct drm_i915_gem_relocation_entry reloc[20];
>  	struct drm_i915_gem_exec_object2 gem_exec[2];
>  	struct drm_i915_gem_execbuffer2 execbuf;
>  	struct drm_i915_gem_pwrite gem_pwrite;
>  	struct drm_i915_gem_create create;
> -	uint32_t buf[122], *b;
> -	int i;
> +	uint32_t buf[170], *b;
> +	int i, b_size;
>  
>  	memset(reloc, 0, sizeof(reloc));
>  	memset(gem_exec, 0, sizeof(gem_exec));
>  	memset(&execbuf, 0, sizeof(execbuf));
>  
>  	b = buf;
> +	b_size = sizeof(uint32_t) * (data->intel_gen >= 8 ? 170 : 122);
>  	for (i = 0; i < 20; i++) {
> -		*b++ = XY_COLOR_BLT_CMD_NOLEN | 4 |
> -			COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
> +		if (data->intel_gen >= 8) {
> +			*b++ = MI_NOOP;

Not required, instead you just round up the buf to the next qword after
ending the batch.

> +			*b++ = XY_COLOR_BLT_CMD_NOLEN | 5 |
> +				COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
> +		} else {
> +			*b++ = XY_COLOR_BLT_CMD_NOLEN | 4 |
> +				COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
> +		}
>  		*b++ = 0xf0 << 16 | 1 << 25 | 1 << 24 | 4096;
>  		*b++ = 0;
>  		*b++ = size >> 12 << 16 | 1024;
> @@ -76,6 +89,8 @@ static void busy(int fd, uint32_t handle, int size, int loops)
>  		reloc[i].read_domains = I915_GEM_DOMAIN_RENDER;
>  		reloc[i].write_domain = I915_GEM_DOMAIN_RENDER;
>  		*b++ = 0;
> +		if (data->intel_gen >= 8)
> +			*b++ = 0;
>  		*b++ = canary;
>  	}
>  	*b++ = MI_BATCH_BUFFER_END;
if ((b - buf) & 1))
 *b++ = 0;

> @@ -86,56 +101,55 @@ static void busy(int fd, uint32_t handle, int size, int loops)
>  
>  	create.handle = 0;
>  	create.size = 4096;
> -	drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
> +	drmIoctl(data->fd, DRM_IOCTL_I915_GEM_CREATE, &create);
>  	gem_exec[1].handle = create.handle;
>  	gem_exec[1].relocation_count = 20;
>  	gem_exec[1].relocs_ptr = (uintptr_t)reloc;
>  
>  	execbuf.buffers_ptr = (uintptr_t)gem_exec;
>  	execbuf.buffer_count = 2;
> -	execbuf.batch_len = sizeof(buf);
> +	execbuf.batch_len = b_size;

I would have personally used (b - buf) * sizeof(buf[0]);

>  	execbuf.flags = 1 << 11;
> -	if (HAS_BLT_RING(intel_get_drm_devid(fd)))
> +	if (HAS_BLT_RING(data->devid))
>  		execbuf.flags |= I915_EXEC_BLT;
>  
>  	gem_pwrite.handle = gem_exec[1].handle;
>  	gem_pwrite.offset = 0;
> -	gem_pwrite.size = sizeof(buf);
> +	gem_pwrite.size = b_size;
and gem_pwrite.size = execbuf.batch_len;
-Chris
Rodrigo Vivi Feb. 7, 2014, 4:04 p.m. UTC | #2
On Fri, Feb 7, 2014 at 7:51 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Thu, Feb 06, 2014 at 07:46:38PM -0200, Rodrigo Vivi wrote:
>> Update XY_COLOR_BLT command for Broadwell.
>>
>> v2: stash devid and remove ugly double allocation. (by Chris).
>> v3: fix inverted blt command size and stash fd, devid and intel_gen.
>>
>> Cc: Chris Wilson chris@chris-wilson.co.uk>
>> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
>> ---
>>  tests/gem_gtt_hog.c | 59 +++++++++++++++++++++++++++++++++++------------------
>>  1 file changed, 39 insertions(+), 20 deletions(-)
>>
>> diff --git a/tests/gem_gtt_hog.c b/tests/gem_gtt_hog.c
>> index 53cd7eb..3149ff4 100644
>> --- a/tests/gem_gtt_hog.c
>> +++ b/tests/gem_gtt_hog.c
>> @@ -44,30 +44,43 @@
>>
>>  static const uint32_t canary = 0xdeadbeef;
>>
>> +typedef struct data {
>> +     int fd;
>> +     int devid;
>> +     int intel_gen;
>> +} data_t;
>> +
>>  static double elapsed(const struct timeval *start,
>>                     const struct timeval *end)
>>  {
>>       return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec);
>>  }
>>
>> -static void busy(int fd, uint32_t handle, int size, int loops)
>> +static void busy(data_t *data, uint32_t handle, int size, int loops)
>>  {
>>       struct drm_i915_gem_relocation_entry reloc[20];
>>       struct drm_i915_gem_exec_object2 gem_exec[2];
>>       struct drm_i915_gem_execbuffer2 execbuf;
>>       struct drm_i915_gem_pwrite gem_pwrite;
>>       struct drm_i915_gem_create create;
>> -     uint32_t buf[122], *b;
>> -     int i;
>> +     uint32_t buf[170], *b;
>> +     int i, b_size;
>>
>>       memset(reloc, 0, sizeof(reloc));
>>       memset(gem_exec, 0, sizeof(gem_exec));
>>       memset(&execbuf, 0, sizeof(execbuf));
>>
>>       b = buf;
>> +     b_size = sizeof(uint32_t) * (data->intel_gen >= 8 ? 170 : 122);
>>       for (i = 0; i < 20; i++) {
>> -             *b++ = XY_COLOR_BLT_CMD_NOLEN | 4 |
>> -                     COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
>> +             if (data->intel_gen >= 8) {
>> +                     *b++ = MI_NOOP;
>
> Not required, instead you just round up the buf to the next qword after
> ending the batch.

I'm afraid I didn't get here, but if possible I'd like to let it just
like other bdw patches are.

>
>> +                     *b++ = XY_COLOR_BLT_CMD_NOLEN | 5 |
>> +                             COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
>> +             } else {
>> +                     *b++ = XY_COLOR_BLT_CMD_NOLEN | 4 |
>> +                             COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
>> +             }
>>               *b++ = 0xf0 << 16 | 1 << 25 | 1 << 24 | 4096;
>>               *b++ = 0;
>>               *b++ = size >> 12 << 16 | 1024;
>> @@ -76,6 +89,8 @@ static void busy(int fd, uint32_t handle, int size, int loops)
>>               reloc[i].read_domains = I915_GEM_DOMAIN_RENDER;
>>               reloc[i].write_domain = I915_GEM_DOMAIN_RENDER;
>>               *b++ = 0;
>> +             if (data->intel_gen >= 8)
>> +                     *b++ = 0;
>>               *b++ = canary;
>>       }
>>       *b++ = MI_BATCH_BUFFER_END;
> if ((b - buf) & 1))
>  *b++ = 0;

is this related to your above's suggestion?

>
>> @@ -86,56 +101,55 @@ static void busy(int fd, uint32_t handle, int size, int loops)
>>
>>       create.handle = 0;
>>       create.size = 4096;
>> -     drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
>> +     drmIoctl(data->fd, DRM_IOCTL_I915_GEM_CREATE, &create);
>>       gem_exec[1].handle = create.handle;
>>       gem_exec[1].relocation_count = 20;
>>       gem_exec[1].relocs_ptr = (uintptr_t)reloc;
>>
>>       execbuf.buffers_ptr = (uintptr_t)gem_exec;
>>       execbuf.buffer_count = 2;
>> -     execbuf.batch_len = sizeof(buf);
>> +     execbuf.batch_len = b_size;
>
> I would have personally used (b - buf) * sizeof(buf[0]);

much better for sure. Already changed here and will send this on next version.

>
>>       execbuf.flags = 1 << 11;
>> -     if (HAS_BLT_RING(intel_get_drm_devid(fd)))
>> +     if (HAS_BLT_RING(data->devid))
>>               execbuf.flags |= I915_EXEC_BLT;
>>
>>       gem_pwrite.handle = gem_exec[1].handle;
>>       gem_pwrite.offset = 0;
>> -     gem_pwrite.size = sizeof(buf);
>> +     gem_pwrite.size = b_size;
> and gem_pwrite.size = execbuf.batch_len;
> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre

Thanks
Chris Wilson Feb. 7, 2014, 4:16 p.m. UTC | #3
On Fri, Feb 07, 2014 at 02:04:56PM -0200, Rodrigo Vivi wrote:
> >>       for (i = 0; i < 20; i++) {
> >> -             *b++ = XY_COLOR_BLT_CMD_NOLEN | 4 |
> >> -                     COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
> >> +             if (data->intel_gen >= 8) {
> >> +                     *b++ = MI_NOOP;
> >
> > Not required, instead you just round up the buf to the next qword after
> > ending the batch.
> 
> I'm afraid I didn't get here, but if possible I'd like to let it just
> like other bdw patches are.

This is just inserting a no-op between commands just to round each out
to a even number of dwords. Which is just lazy.
 
> >
> >> +                     *b++ = XY_COLOR_BLT_CMD_NOLEN | 5 |
> >> +                             COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
> >> +             } else {
> >> +                     *b++ = XY_COLOR_BLT_CMD_NOLEN | 4 |
> >> +                             COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
> >> +             }
> >>               *b++ = 0xf0 << 16 | 1 << 25 | 1 << 24 | 4096;
> >>               *b++ = 0;
> >>               *b++ = size >> 12 << 16 | 1024;
> >> @@ -76,6 +89,8 @@ static void busy(int fd, uint32_t handle, int size, int loops)
> >>               reloc[i].read_domains = I915_GEM_DOMAIN_RENDER;
> >>               reloc[i].write_domain = I915_GEM_DOMAIN_RENDER;
> >>               *b++ = 0;
> >> +             if (data->intel_gen >= 8)
> >> +                     *b++ = 0;
> >>               *b++ = canary;
> >>       }
> >>       *b++ = MI_BATCH_BUFFER_END;
> > if ((b - buf) & 1))
> >  *b++ = 0;
> 
> is this related to your above's suggestion?

and for the (b-buf) transform later.
-Chris
diff mbox

Patch

diff --git a/tests/gem_gtt_hog.c b/tests/gem_gtt_hog.c
index 53cd7eb..3149ff4 100644
--- a/tests/gem_gtt_hog.c
+++ b/tests/gem_gtt_hog.c
@@ -44,30 +44,43 @@ 
 
 static const uint32_t canary = 0xdeadbeef;
 
+typedef struct data {
+	int fd;
+	int devid;
+	int intel_gen;
+} data_t;
+
 static double elapsed(const struct timeval *start,
 		      const struct timeval *end)
 {
 	return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec);
 }
 
-static void busy(int fd, uint32_t handle, int size, int loops)
+static void busy(data_t *data, uint32_t handle, int size, int loops)
 {
 	struct drm_i915_gem_relocation_entry reloc[20];
 	struct drm_i915_gem_exec_object2 gem_exec[2];
 	struct drm_i915_gem_execbuffer2 execbuf;
 	struct drm_i915_gem_pwrite gem_pwrite;
 	struct drm_i915_gem_create create;
-	uint32_t buf[122], *b;
-	int i;
+	uint32_t buf[170], *b;
+	int i, b_size;
 
 	memset(reloc, 0, sizeof(reloc));
 	memset(gem_exec, 0, sizeof(gem_exec));
 	memset(&execbuf, 0, sizeof(execbuf));
 
 	b = buf;
+	b_size = sizeof(uint32_t) * (data->intel_gen >= 8 ? 170 : 122);
 	for (i = 0; i < 20; i++) {
-		*b++ = XY_COLOR_BLT_CMD_NOLEN | 4 |
-			COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
+		if (data->intel_gen >= 8) {
+			*b++ = MI_NOOP;
+			*b++ = XY_COLOR_BLT_CMD_NOLEN | 5 |
+				COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
+		} else {
+			*b++ = XY_COLOR_BLT_CMD_NOLEN | 4 |
+				COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
+		}
 		*b++ = 0xf0 << 16 | 1 << 25 | 1 << 24 | 4096;
 		*b++ = 0;
 		*b++ = size >> 12 << 16 | 1024;
@@ -76,6 +89,8 @@  static void busy(int fd, uint32_t handle, int size, int loops)
 		reloc[i].read_domains = I915_GEM_DOMAIN_RENDER;
 		reloc[i].write_domain = I915_GEM_DOMAIN_RENDER;
 		*b++ = 0;
+		if (data->intel_gen >= 8)
+			*b++ = 0;
 		*b++ = canary;
 	}
 	*b++ = MI_BATCH_BUFFER_END;
@@ -86,56 +101,55 @@  static void busy(int fd, uint32_t handle, int size, int loops)
 
 	create.handle = 0;
 	create.size = 4096;
-	drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
+	drmIoctl(data->fd, DRM_IOCTL_I915_GEM_CREATE, &create);
 	gem_exec[1].handle = create.handle;
 	gem_exec[1].relocation_count = 20;
 	gem_exec[1].relocs_ptr = (uintptr_t)reloc;
 
 	execbuf.buffers_ptr = (uintptr_t)gem_exec;
 	execbuf.buffer_count = 2;
-	execbuf.batch_len = sizeof(buf);
+	execbuf.batch_len = b_size;
 	execbuf.flags = 1 << 11;
-	if (HAS_BLT_RING(intel_get_drm_devid(fd)))
+	if (HAS_BLT_RING(data->devid))
 		execbuf.flags |= I915_EXEC_BLT;
 
 	gem_pwrite.handle = gem_exec[1].handle;
 	gem_pwrite.offset = 0;
-	gem_pwrite.size = sizeof(buf);
+	gem_pwrite.size = b_size;
 	gem_pwrite.data_ptr = (uintptr_t)buf;
-	if (drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &gem_pwrite) == 0) {
+	if (drmIoctl(data->fd, DRM_IOCTL_I915_GEM_PWRITE, &gem_pwrite) == 0) {
 		while (loops--)
-			drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
+			drmIoctl(data->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
 	}
 
-	drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &create.handle);
+	drmIoctl(data->fd, DRM_IOCTL_GEM_CLOSE, &create.handle);
 }
 
-static void run(int child)
+static void run(data_t *data, int child)
 {
 	const int size = 4096 * (256 + child * child);
 	const int tiling = child % 2;
 	const int write = child % 2;
-	int fd = drm_open_any();
-	uint32_t handle = gem_create(fd, size);
+	uint32_t handle = gem_create(data->fd, size);
 	uint32_t *ptr;
 	uint32_t x;
 
 	igt_assert(handle);
 
 	if (tiling != I915_TILING_NONE)
-		gem_set_tiling(fd, handle, tiling, 4096);
+		gem_set_tiling(data->fd, handle, tiling, 4096);
 
 	/* load up the unfaulted bo */
-	busy(fd, handle, size, 100);
+	busy(data, handle, size, 100);
 
 	/* Note that we ignore the API and rely on the implict
 	 * set-to-gtt-domain within the fault handler.
 	 */
 	if (write) {
-		ptr = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE);
+		ptr = gem_mmap(data->fd, handle, size, PROT_READ | PROT_WRITE);
 		ptr[rand() % (size / 4)] = canary;
 	} else
-		ptr = gem_mmap(fd, handle, size, PROT_READ);
+		ptr = gem_mmap(data->fd, handle, size, PROT_READ);
 	x = ptr[rand() % (size / 4)];
 	munmap(ptr, size);
 
@@ -147,15 +161,20 @@  igt_simple_main
 {
 	struct timeval start, end;
 	pid_t children[64];
+	data_t data = {};
 	int n;
 
 	igt_skip_on_simulation();
 
+	data.fd = drm_open_any();
+	data.devid = intel_get_drm_devid(data.fd);
+	data.intel_gen = intel_gen(data.devid);
+
 	gettimeofday(&start, NULL);
 	for (n = 0; n < ARRAY_SIZE(children); n++) {
 		switch ((children[n] = fork())) {
 		case -1: igt_assert(0);
-		case 0: run(n); break;
+		case 0: run(&data, n); break;
 		default: break;
 		}
 	}