Message ID | 1391723198-4577-1-git-send-email-rodrigo.vivi@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Feb 06, 2014 at 07:46:38PM -0200, Rodrigo Vivi wrote: > Update XY_COLOR_BLT command for Broadwell. > > v2: stash devid and remove ugly double allocation. (by Chris). > v3: fix inverted blt command size and stash fd, devid and intel_gen. > > Cc: Chris Wilson chris@chris-wilson.co.uk> > Signed-off-by: Rodrigo Vivi <rodrigo.vivi@gmail.com> > --- > tests/gem_gtt_hog.c | 59 +++++++++++++++++++++++++++++++++++------------------ > 1 file changed, 39 insertions(+), 20 deletions(-) > > diff --git a/tests/gem_gtt_hog.c b/tests/gem_gtt_hog.c > index 53cd7eb..3149ff4 100644 > --- a/tests/gem_gtt_hog.c > +++ b/tests/gem_gtt_hog.c > @@ -44,30 +44,43 @@ > > static const uint32_t canary = 0xdeadbeef; > > +typedef struct data { > + int fd; > + int devid; > + int intel_gen; > +} data_t; > + > static double elapsed(const struct timeval *start, > const struct timeval *end) > { > return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec); > } > > -static void busy(int fd, uint32_t handle, int size, int loops) > +static void busy(data_t *data, uint32_t handle, int size, int loops) > { > struct drm_i915_gem_relocation_entry reloc[20]; > struct drm_i915_gem_exec_object2 gem_exec[2]; > struct drm_i915_gem_execbuffer2 execbuf; > struct drm_i915_gem_pwrite gem_pwrite; > struct drm_i915_gem_create create; > - uint32_t buf[122], *b; > - int i; > + uint32_t buf[170], *b; > + int i, b_size; > > memset(reloc, 0, sizeof(reloc)); > memset(gem_exec, 0, sizeof(gem_exec)); > memset(&execbuf, 0, sizeof(execbuf)); > > b = buf; > + b_size = sizeof(uint32_t) * (data->intel_gen >= 8 ? 170 : 122); > for (i = 0; i < 20; i++) { > - *b++ = XY_COLOR_BLT_CMD_NOLEN | 4 | > - COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB; > + if (data->intel_gen >= 8) { > + *b++ = MI_NOOP; Not required, instead you just round up the buf to the next qword after ending the batch. > + *b++ = XY_COLOR_BLT_CMD_NOLEN | 5 | > + COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB; > + } else { > + *b++ = XY_COLOR_BLT_CMD_NOLEN | 4 | > + COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB; > + } > *b++ = 0xf0 << 16 | 1 << 25 | 1 << 24 | 4096; > *b++ = 0; > *b++ = size >> 12 << 16 | 1024; > @@ -76,6 +89,8 @@ static void busy(int fd, uint32_t handle, int size, int loops) > reloc[i].read_domains = I915_GEM_DOMAIN_RENDER; > reloc[i].write_domain = I915_GEM_DOMAIN_RENDER; > *b++ = 0; > + if (data->intel_gen >= 8) > + *b++ = 0; > *b++ = canary; > } > *b++ = MI_BATCH_BUFFER_END; if ((b - buf) & 1)) *b++ = 0; > @@ -86,56 +101,55 @@ static void busy(int fd, uint32_t handle, int size, int loops) > > create.handle = 0; > create.size = 4096; > - drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create); > + drmIoctl(data->fd, DRM_IOCTL_I915_GEM_CREATE, &create); > gem_exec[1].handle = create.handle; > gem_exec[1].relocation_count = 20; > gem_exec[1].relocs_ptr = (uintptr_t)reloc; > > execbuf.buffers_ptr = (uintptr_t)gem_exec; > execbuf.buffer_count = 2; > - execbuf.batch_len = sizeof(buf); > + execbuf.batch_len = b_size; I would have personally used (b - buf) * sizeof(buf[0]); > execbuf.flags = 1 << 11; > - if (HAS_BLT_RING(intel_get_drm_devid(fd))) > + if (HAS_BLT_RING(data->devid)) > execbuf.flags |= I915_EXEC_BLT; > > gem_pwrite.handle = gem_exec[1].handle; > gem_pwrite.offset = 0; > - gem_pwrite.size = sizeof(buf); > + gem_pwrite.size = b_size; and gem_pwrite.size = execbuf.batch_len; -Chris
On Fri, Feb 7, 2014 at 7:51 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote: > On Thu, Feb 06, 2014 at 07:46:38PM -0200, Rodrigo Vivi wrote: >> Update XY_COLOR_BLT command for Broadwell. >> >> v2: stash devid and remove ugly double allocation. (by Chris). >> v3: fix inverted blt command size and stash fd, devid and intel_gen. >> >> Cc: Chris Wilson chris@chris-wilson.co.uk> >> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@gmail.com> >> --- >> tests/gem_gtt_hog.c | 59 +++++++++++++++++++++++++++++++++++------------------ >> 1 file changed, 39 insertions(+), 20 deletions(-) >> >> diff --git a/tests/gem_gtt_hog.c b/tests/gem_gtt_hog.c >> index 53cd7eb..3149ff4 100644 >> --- a/tests/gem_gtt_hog.c >> +++ b/tests/gem_gtt_hog.c >> @@ -44,30 +44,43 @@ >> >> static const uint32_t canary = 0xdeadbeef; >> >> +typedef struct data { >> + int fd; >> + int devid; >> + int intel_gen; >> +} data_t; >> + >> static double elapsed(const struct timeval *start, >> const struct timeval *end) >> { >> return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec); >> } >> >> -static void busy(int fd, uint32_t handle, int size, int loops) >> +static void busy(data_t *data, uint32_t handle, int size, int loops) >> { >> struct drm_i915_gem_relocation_entry reloc[20]; >> struct drm_i915_gem_exec_object2 gem_exec[2]; >> struct drm_i915_gem_execbuffer2 execbuf; >> struct drm_i915_gem_pwrite gem_pwrite; >> struct drm_i915_gem_create create; >> - uint32_t buf[122], *b; >> - int i; >> + uint32_t buf[170], *b; >> + int i, b_size; >> >> memset(reloc, 0, sizeof(reloc)); >> memset(gem_exec, 0, sizeof(gem_exec)); >> memset(&execbuf, 0, sizeof(execbuf)); >> >> b = buf; >> + b_size = sizeof(uint32_t) * (data->intel_gen >= 8 ? 170 : 122); >> for (i = 0; i < 20; i++) { >> - *b++ = XY_COLOR_BLT_CMD_NOLEN | 4 | >> - COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB; >> + if (data->intel_gen >= 8) { >> + *b++ = MI_NOOP; > > Not required, instead you just round up the buf to the next qword after > ending the batch. I'm afraid I didn't get here, but if possible I'd like to let it just like other bdw patches are. > >> + *b++ = XY_COLOR_BLT_CMD_NOLEN | 5 | >> + COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB; >> + } else { >> + *b++ = XY_COLOR_BLT_CMD_NOLEN | 4 | >> + COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB; >> + } >> *b++ = 0xf0 << 16 | 1 << 25 | 1 << 24 | 4096; >> *b++ = 0; >> *b++ = size >> 12 << 16 | 1024; >> @@ -76,6 +89,8 @@ static void busy(int fd, uint32_t handle, int size, int loops) >> reloc[i].read_domains = I915_GEM_DOMAIN_RENDER; >> reloc[i].write_domain = I915_GEM_DOMAIN_RENDER; >> *b++ = 0; >> + if (data->intel_gen >= 8) >> + *b++ = 0; >> *b++ = canary; >> } >> *b++ = MI_BATCH_BUFFER_END; > if ((b - buf) & 1)) > *b++ = 0; is this related to your above's suggestion? > >> @@ -86,56 +101,55 @@ static void busy(int fd, uint32_t handle, int size, int loops) >> >> create.handle = 0; >> create.size = 4096; >> - drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create); >> + drmIoctl(data->fd, DRM_IOCTL_I915_GEM_CREATE, &create); >> gem_exec[1].handle = create.handle; >> gem_exec[1].relocation_count = 20; >> gem_exec[1].relocs_ptr = (uintptr_t)reloc; >> >> execbuf.buffers_ptr = (uintptr_t)gem_exec; >> execbuf.buffer_count = 2; >> - execbuf.batch_len = sizeof(buf); >> + execbuf.batch_len = b_size; > > I would have personally used (b - buf) * sizeof(buf[0]); much better for sure. Already changed here and will send this on next version. > >> execbuf.flags = 1 << 11; >> - if (HAS_BLT_RING(intel_get_drm_devid(fd))) >> + if (HAS_BLT_RING(data->devid)) >> execbuf.flags |= I915_EXEC_BLT; >> >> gem_pwrite.handle = gem_exec[1].handle; >> gem_pwrite.offset = 0; >> - gem_pwrite.size = sizeof(buf); >> + gem_pwrite.size = b_size; > and gem_pwrite.size = execbuf.batch_len; > -Chris > > -- > Chris Wilson, Intel Open Source Technology Centre Thanks
On Fri, Feb 07, 2014 at 02:04:56PM -0200, Rodrigo Vivi wrote: > >> for (i = 0; i < 20; i++) { > >> - *b++ = XY_COLOR_BLT_CMD_NOLEN | 4 | > >> - COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB; > >> + if (data->intel_gen >= 8) { > >> + *b++ = MI_NOOP; > > > > Not required, instead you just round up the buf to the next qword after > > ending the batch. > > I'm afraid I didn't get here, but if possible I'd like to let it just > like other bdw patches are. This is just inserting a no-op between commands just to round each out to a even number of dwords. Which is just lazy. > > > >> + *b++ = XY_COLOR_BLT_CMD_NOLEN | 5 | > >> + COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB; > >> + } else { > >> + *b++ = XY_COLOR_BLT_CMD_NOLEN | 4 | > >> + COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB; > >> + } > >> *b++ = 0xf0 << 16 | 1 << 25 | 1 << 24 | 4096; > >> *b++ = 0; > >> *b++ = size >> 12 << 16 | 1024; > >> @@ -76,6 +89,8 @@ static void busy(int fd, uint32_t handle, int size, int loops) > >> reloc[i].read_domains = I915_GEM_DOMAIN_RENDER; > >> reloc[i].write_domain = I915_GEM_DOMAIN_RENDER; > >> *b++ = 0; > >> + if (data->intel_gen >= 8) > >> + *b++ = 0; > >> *b++ = canary; > >> } > >> *b++ = MI_BATCH_BUFFER_END; > > if ((b - buf) & 1)) > > *b++ = 0; > > is this related to your above's suggestion? and for the (b-buf) transform later. -Chris
diff --git a/tests/gem_gtt_hog.c b/tests/gem_gtt_hog.c index 53cd7eb..3149ff4 100644 --- a/tests/gem_gtt_hog.c +++ b/tests/gem_gtt_hog.c @@ -44,30 +44,43 @@ static const uint32_t canary = 0xdeadbeef; +typedef struct data { + int fd; + int devid; + int intel_gen; +} data_t; + static double elapsed(const struct timeval *start, const struct timeval *end) { return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec); } -static void busy(int fd, uint32_t handle, int size, int loops) +static void busy(data_t *data, uint32_t handle, int size, int loops) { struct drm_i915_gem_relocation_entry reloc[20]; struct drm_i915_gem_exec_object2 gem_exec[2]; struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_pwrite gem_pwrite; struct drm_i915_gem_create create; - uint32_t buf[122], *b; - int i; + uint32_t buf[170], *b; + int i, b_size; memset(reloc, 0, sizeof(reloc)); memset(gem_exec, 0, sizeof(gem_exec)); memset(&execbuf, 0, sizeof(execbuf)); b = buf; + b_size = sizeof(uint32_t) * (data->intel_gen >= 8 ? 170 : 122); for (i = 0; i < 20; i++) { - *b++ = XY_COLOR_BLT_CMD_NOLEN | 4 | - COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB; + if (data->intel_gen >= 8) { + *b++ = MI_NOOP; + *b++ = XY_COLOR_BLT_CMD_NOLEN | 5 | + COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB; + } else { + *b++ = XY_COLOR_BLT_CMD_NOLEN | 4 | + COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB; + } *b++ = 0xf0 << 16 | 1 << 25 | 1 << 24 | 4096; *b++ = 0; *b++ = size >> 12 << 16 | 1024; @@ -76,6 +89,8 @@ static void busy(int fd, uint32_t handle, int size, int loops) reloc[i].read_domains = I915_GEM_DOMAIN_RENDER; reloc[i].write_domain = I915_GEM_DOMAIN_RENDER; *b++ = 0; + if (data->intel_gen >= 8) + *b++ = 0; *b++ = canary; } *b++ = MI_BATCH_BUFFER_END; @@ -86,56 +101,55 @@ static void busy(int fd, uint32_t handle, int size, int loops) create.handle = 0; create.size = 4096; - drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create); + drmIoctl(data->fd, DRM_IOCTL_I915_GEM_CREATE, &create); gem_exec[1].handle = create.handle; gem_exec[1].relocation_count = 20; gem_exec[1].relocs_ptr = (uintptr_t)reloc; execbuf.buffers_ptr = (uintptr_t)gem_exec; execbuf.buffer_count = 2; - execbuf.batch_len = sizeof(buf); + execbuf.batch_len = b_size; execbuf.flags = 1 << 11; - if (HAS_BLT_RING(intel_get_drm_devid(fd))) + if (HAS_BLT_RING(data->devid)) execbuf.flags |= I915_EXEC_BLT; gem_pwrite.handle = gem_exec[1].handle; gem_pwrite.offset = 0; - gem_pwrite.size = sizeof(buf); + gem_pwrite.size = b_size; gem_pwrite.data_ptr = (uintptr_t)buf; - if (drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &gem_pwrite) == 0) { + if (drmIoctl(data->fd, DRM_IOCTL_I915_GEM_PWRITE, &gem_pwrite) == 0) { while (loops--) - drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); + drmIoctl(data->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); } - drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &create.handle); + drmIoctl(data->fd, DRM_IOCTL_GEM_CLOSE, &create.handle); } -static void run(int child) +static void run(data_t *data, int child) { const int size = 4096 * (256 + child * child); const int tiling = child % 2; const int write = child % 2; - int fd = drm_open_any(); - uint32_t handle = gem_create(fd, size); + uint32_t handle = gem_create(data->fd, size); uint32_t *ptr; uint32_t x; igt_assert(handle); if (tiling != I915_TILING_NONE) - gem_set_tiling(fd, handle, tiling, 4096); + gem_set_tiling(data->fd, handle, tiling, 4096); /* load up the unfaulted bo */ - busy(fd, handle, size, 100); + busy(data, handle, size, 100); /* Note that we ignore the API and rely on the implict * set-to-gtt-domain within the fault handler. */ if (write) { - ptr = gem_mmap(fd, handle, size, PROT_READ | PROT_WRITE); + ptr = gem_mmap(data->fd, handle, size, PROT_READ | PROT_WRITE); ptr[rand() % (size / 4)] = canary; } else - ptr = gem_mmap(fd, handle, size, PROT_READ); + ptr = gem_mmap(data->fd, handle, size, PROT_READ); x = ptr[rand() % (size / 4)]; munmap(ptr, size); @@ -147,15 +161,20 @@ igt_simple_main { struct timeval start, end; pid_t children[64]; + data_t data = {}; int n; igt_skip_on_simulation(); + data.fd = drm_open_any(); + data.devid = intel_get_drm_devid(data.fd); + data.intel_gen = intel_gen(data.devid); + gettimeofday(&start, NULL); for (n = 0; n < ARRAY_SIZE(children); n++) { switch ((children[n] = fork())) { case -1: igt_assert(0); - case 0: run(n); break; + case 0: run(&data, n); break; default: break; } }
Update XY_COLOR_BLT command for Broadwell. v2: stash devid and remove ugly double allocation. (by Chris). v3: fix inverted blt command size and stash fd, devid and intel_gen. Cc: Chris Wilson chris@chris-wilson.co.uk> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@gmail.com> --- tests/gem_gtt_hog.c | 59 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 20 deletions(-)