diff mbox

[i-g-t,v3,3/6] tests/gem_scheduler: Add gem_scheduler test

Message ID 1457607814-18751-4-git-send-email-derek.j.morton@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Derek Morton March 10, 2016, 11:03 a.m. UTC
This is intended to test the scheduler behaviour is correct.
The subtests are
<ring>-basic
Tests that batch buffers of the same priority submitted to a ring
execute in the order they are submitted.
<ring>-read
Submits a batch buffer with a read dependency to a buffer object to
a ring which is held in the scheduler queue by a long running batch
buffer. Submit batch buffers to other rings that have a read dependency
to the same buffer object. Ensure they execute before the batch buffer
being held up behind the long running batch buffer.
<ring>-write
Submits a batch buffer with a write dependency to a buffer object to
a ring which is held in the scheduler queue by a long running batch
buffer. Submit batch buffers to other rings that have a write dependency
to the same buffer object. Submit batch buffers with no interdependencies
to all rings. Ensure the batch buffers that have write dependencies are
executed in submission order but the batch buffers without interdependencies
do not get held up.

v2: Addressed review comments from Daniele Ceraolo Spurio

v3: Added logic to use generic BSD ring if there is 1 and BSD1 / BSD2 if there
    are 2.

Signed-off-by: Derek Morton <derek.j.morton@intel.com>
---
 tests/Makefile.sources |   1 +
 tests/gem_scheduler.c  | 459 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 460 insertions(+)
 create mode 100644 tests/gem_scheduler.c

Comments

Daniele Ceraolo Spurio March 17, 2016, 8:49 a.m. UTC | #1
On 10/03/16 11:03, Derek Morton wrote:
> This is intended to test the scheduler behaviour is correct.
> The subtests are
> <ring>-basic
> Tests that batch buffers of the same priority submitted to a ring
> execute in the order they are submitted.
> <ring>-read
> Submits a batch buffer with a read dependency to a buffer object to
> a ring which is held in the scheduler queue by a long running batch
> buffer. Submit batch buffers to other rings that have a read dependency
> to the same buffer object. Ensure they execute before the batch buffer
> being held up behind the long running batch buffer.
> <ring>-write
> Submits a batch buffer with a write dependency to a buffer object to
> a ring which is held in the scheduler queue by a long running batch
> buffer. Submit batch buffers to other rings that have a write dependency
> to the same buffer object. Submit batch buffers with no interdependencies
> to all rings. Ensure the batch buffers that have write dependencies are
> executed in submission order but the batch buffers without interdependencies
> do not get held up.
>
> v2: Addressed review comments from Daniele Ceraolo Spurio
>
> v3: Added logic to use generic BSD ring if there is 1 and BSD1 / BSD2 if there
>      are 2.
>
> Signed-off-by: Derek Morton<derek.j.morton@intel.com>
> ---
>   tests/Makefile.sources |   1 +
>   tests/gem_scheduler.c  | 459 +++++++++++++++++++++++++++++++++++++++++++++++++
>   2 files changed, 460 insertions(+)
>   create mode 100644 tests/gem_scheduler.c
>
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index f8b18b0..c88e045 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -66,6 +66,7 @@ TESTS_progs_M = \
>   	gem_request_retire \
>   	gem_reset_stats \
>   	gem_ringfill \
> +	gem_scheduler \
>   	gem_set_tiling_vs_blt \
>   	gem_softpin \
>   	gem_stolen \
> diff --git a/tests/gem_scheduler.c b/tests/gem_scheduler.c
> new file mode 100644
> index 0000000..436440a
> --- /dev/null
> +++ b/tests/gem_scheduler.c
> @@ -0,0 +1,459 @@
> +/*
> + * Copyright © 2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Derek Morton<derek.j.morton@intel.com>
> + *
> + */
> +
> +#include "igt.h"
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <inttypes.h>
> +#include <time.h>
> +#include <sys/stat.h>
> +#include <sys/ioctl.h>
> +#include <fcntl.h>
> +
> +IGT_TEST_DESCRIPTION("Check scheduler behaviour. Basic tests ensure independant "
> +                     "batch buffers of the same priority are executed in "
> +                     "submission order. Read-read tests ensure "
> +                     "batch buffers with a read dependency to the same buffer "
> +                     "object do not block each other. Write-write dependency "
> +                     "tests ensure batch buffers with a write dependency to a "
> +                     "buffer object will be executed in submission order but "
> +                     "will not block execution of other independant batch "
> +                     "buffers.");
> +
> +#define SEC_TO_NSEC (1000 * 1000 * 1000)
> +
> +static struct ring {
> +	const char *name;
> +	int id;
> +	bool exists;
> +} rings[] = {
> +	{ "render", I915_EXEC_RENDER, false },
> +	{ "bsd",    I915_EXEC_BSD , false },
> +	{ "bsd2",    I915_EXEC_BSD | 2<<13, false },
> +	{ "blt",    I915_EXEC_BLT, false },
> +	{ "vebox",  I915_EXEC_VEBOX, false },
> +};

If you can't use intel_execution_engines could you add a comment to 
explain why? also considering the renaming going on in the kernel you 
cold replace "ring" with "engine"

> +
> +#define NBR_RINGS (sizeof(rings)/sizeof(struct ring))
> +
> +static void check_rings(int fd) {
> +	int loop;
> +	for(loop=0; loop < NBR_RINGS; loop++) {

Style: space between for/if and "(" in this file

> +		if(gem_has_ring(fd, rings[loop].id)) {
> +			if(rings[loop].id == (I915_EXEC_BSD | 2<<13)) {
> +				rings[loop].exists = gem_has_bsd2(fd);
> +			}
> +			else {
> +				rings[loop].exists = true;

You're marking the VEBOX as existing so adding a gen requirement in this 
function would be nice. I know that the library you're using already 
requires gen8, but the requirement doesn't look to be explicit anywhere 
in this file so it would be nice to add it for clarity.

> +				if(rings[loop].id == I915_EXEC_BSD)
> +					/* If there is BSD2, need to make BSD1
> +					 * explicit.
> +					 */
> +					if(gem_has_bsd2(fd))
> +						rings[loop].id |= (1<<13);
> +			}
> +		}
> +	}
> +}
> +
> +static drm_intel_bo *create_and_check_bo(drm_intel_bufmgr *bufmgr, const char *desc)
> +{
> +	drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, desc, BATCH_SZ, BATCH_SZ);
> +	igt_assert_f(bo, "Failed allocating %s\n", desc);
> +	return bo;
> +}
> +
> +static struct intel_batchbuffer *create_delay_bb(int fd, drm_intel_bufmgr *bufmgr,
> +                                                 int ringid, uint32_t loops,
> +                                                 drm_intel_bo *dest)
> +{
> +	struct intel_batchbuffer *buffer;
> +	buffer = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
> +	igt_assert(buffer);
> +	igt_create_delay_bb(fd, buffer, ringid, loops, dest);
> +	return buffer;
> +}
> +
> +static struct intel_batchbuffer *create_timestamp_bb(int fd, drm_intel_bufmgr *bufmgr,
> +                                                     int ringid, drm_intel_bo *dest,
> +                                                     drm_intel_bo *load, bool write)
> +{
> +	struct intel_batchbuffer *buffer;
> +	buffer = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
> +	igt_assert(buffer);
> +	igt_create_timestamp_bb(fd, buffer, ringid, dest, load, write);
> +	return buffer;
> +}
> +
> +static struct intel_batchbuffer *create_noop_bb(int fd, drm_intel_bufmgr *bufmgr,
> +                                                int noops)
> +{
> +	struct intel_batchbuffer *buffer;
> +	buffer = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
> +	igt_assert(buffer);
> +	igt_create_noop_bb(buffer, noops);
> +	return buffer;
> +}
> +
> +static void init_context(int *fd, drm_intel_bufmgr **bufmgr, int ringid)

init_context might be a bit misleading as a name as you're 
not init_context might be a bit misleading as a name as you're 
not init_context might be a bit misleading as a name as you're 
not init_context might be a bit misleading as a name as you're 
not init_context might be a bit misleading as a name as you're 
not init_context might be a bit misleading as a name as you're 
not init_context might be a bit misleading as a name as you're 
not init_context might be a bit misleading as a name as you're not  a 
ctx directly, although initializing

> +{
> +	struct intel_batchbuffer *noop_bb;
> +	*fd = drm_open_driver(DRIVER_INTEL);
> +	igt_assert(*fd >= 0);
> +	*bufmgr = drm_intel_bufmgr_gem_init(*fd, BATCH_SZ);
> +	igt_assert(*bufmgr);
> +	drm_intel_bufmgr_gem_enable_reuse(*bufmgr);
> +	/* Send a noop batch buffer to force any deferred initialisation */
> +	noop_bb = create_noop_bb(*fd, *bufmgr, 5);
> +	intel_batchbuffer_flush_on_ring(noop_bb, ringid);
> +	intel_batchbuffer_free(noop_bb);
> +}
> +
> +/* Basic test. Check batch buffers of the same priority and with no dependencies
> + * are executed in the order they are submitted.
> + */
> +#define NBR_BASIC_FDs (3)
> +static void run_test_basic(int in_flight, int ringid)
> +{
> +	int fd[NBR_BASIC_FDs];
> +	int loop;
> +	drm_intel_bufmgr *bufmgr[NBR_BASIC_FDs];
> +	uint32_t *delay_buf, *ts1_buf, *ts2_buf;
> +	struct intel_batchbuffer *ts1_bb, *ts2_bb;
> +	struct intel_batchbuffer **in_flight_bbs;
> +	uint32_t calibrated_1s;
> +	drm_intel_bo *delay_bo, *ts1_bo, *ts2_bo;
> +
> +	in_flight_bbs = malloc(in_flight * sizeof(struct intel_batchbuffer *));
> +	igt_assert(in_flight_bbs);
> +
> +	/* Need multiple i915 fd's. Scheduler will not change execution order of
> +	 * batch buffers from the same context.
> +	 */
> +	for(loop=0; loop < NBR_BASIC_FDs; loop++)
> +		init_context(&(fd[loop]), &(bufmgr[loop]), ringid);
> +
> +
> +	/* Create buffer objects */
> +	delay_bo = create_and_check_bo(bufmgr[0], "delay bo");
> +	ts1_bo = create_and_check_bo(bufmgr[1], "ts1 bo");
> +	ts2_bo = create_and_check_bo(bufmgr[2], "ts2 bo");
> +
> +	/* Put some non zero values in the delay bo */

You do this write for every delay batch, so you could move it to 
create_delay_bb (or to the library function)

> +	{

Why is this extra scope needed? "data" doesn't clash with any other 
variable name

> +		uint32_t data=0xffffffff;
> +		drm_intel_bo_subdata(delay_bo, 0, 4, &data);
> +	}
> +
> +	calibrated_1s = igt_calibrate_delay_bb(fd[0], bufmgr[0], ringid);
> +
> +	/* Batch buffers to fill the in flight queue */
> +	in_flight_bbs[0] = create_delay_bb(fd[0], bufmgr[0], ringid, calibrated_1s, delay_bo);
> +	for(loop = 1; loop < in_flight; loop++)
> +		in_flight_bbs[loop] = create_noop_bb(fd[0], bufmgr[0], 5);
> +
> +	/* Extra batch buffers in the scheduler queue */
> +	ts1_bb = create_timestamp_bb(fd[1], bufmgr[1], ringid, ts1_bo, NULL, false);
> +	ts2_bb = create_timestamp_bb(fd[2], bufmgr[2], ringid, ts2_bo, NULL, false);
> +
> +	/* Flush batchbuffers */
> +	for(loop = 0; loop < in_flight; loop++)
> +		intel_batchbuffer_flush_on_ring(in_flight_bbs[loop], ringid);
> +	intel_batchbuffer_flush_on_ring(ts1_bb, ringid);
> +	intel_batchbuffer_flush_on_ring(ts2_bb, ringid);
> +
> +	/* This will not return until the bo has finished executing */
> +	drm_intel_bo_map(delay_bo, 0);
> +	drm_intel_bo_map(ts1_bo, 0);
> +	drm_intel_bo_map(ts2_bo, 0);
> +
> +	delay_buf = delay_bo->virtual;
> +	ts1_buf = ts1_bo->virtual;
> +	ts2_buf = ts2_bo->virtual;
> +
> +	igt_debug("Delay Timestamp = 0x%08" PRIx32 "\n", delay_buf[2]);
> +	igt_debug("TS1 Timestamp = 0x%08" PRIx32 "\n", ts1_buf[0]);
> +	igt_debug("TS2 Timestamp = 0x%08" PRIx32 "\n", ts2_buf[0]);
> +
> +	/* buf[0] in the target buffer should be 0 if the batch buffer completed */
> +	igt_assert_f(delay_buf[0] == 0,
> +	             "delay_buf[0] expected 0x0, got 0x%" PRIx32 "\n", delay_buf[0]);
> +
> +	igt_assert_f(igt_compare_timestamps(delay_buf[2], ts1_buf[0]),
> +	             "Delay ts (0x%08" PRIx32 ") > TS1 ts (0x%08" PRIx32 ")\n",
> +	             delay_buf[2], ts1_buf[0]);
> +	igt_assert_f(igt_compare_timestamps(ts1_buf[0], ts2_buf[0]),
> +	             "TS1 ts (0x%08" PRIx32 ") > TS2 ts (0x%08" PRIx32 ")\n",
> +	             ts1_buf[0], ts2_buf[0]);
> +
> +	/* Cleanup */
> +	for(loop = 0; loop < in_flight; loop++)
> +		intel_batchbuffer_free(in_flight_bbs[loop]);
> +	intel_batchbuffer_free(ts1_bb);
> +	intel_batchbuffer_free(ts2_bb);
> +
> +	drm_intel_bo_unreference(delay_bo);
> +	drm_intel_bo_unreference(ts1_bo);
> +	drm_intel_bo_unreference(ts2_bo);
> +	for(loop = 0; loop < NBR_BASIC_FDs; loop++) {
> +		drm_intel_bufmgr_destroy(bufmgr[loop]);
> +		close(fd[loop]);
> +	}
> +	free(in_flight_bbs);
> +}
> +
> +/* Dependency test.
> + * write=0, Submit batch buffers with read dependencies to all rings. Delay one
> + * with a long executing batch buffer. Check the others are not held up.
> + * write=1, Submit batch buffers with write dependencies to all rings. Delay one
> + * with a long executing batch buffer. Also submit batch buffers with no
> + * dependencies to all rings. Batch buffers with write dependencies should be
> + * executed in submission order. The batch buffers with no dependencies should
> + * not be held up.
> + */
> +static void run_test_dependency(int in_flight, int ring, bool write)
> +{
> +	int fd[NBR_RINGS], fd2[NBR_RINGS];
> +	int loop;
> +	int prime_fd;
> +	uint32_t *delay_buf, *ts_buf[NBR_RINGS], *ts2_buf[NBR_RINGS], *shared_buf;
> +	uint32_t calibrated_1s;
> +	drm_intel_bufmgr *bufmgr[NBR_RINGS], *bufmgr2[NBR_RINGS];
> +	struct intel_batchbuffer *ts_bb[NBR_RINGS], *ts2_bb[NBR_RINGS], **in_flight_bbs;
> +	drm_intel_bo *delay_bo, *ts_bo[NBR_RINGS], *ts2_bo[NBR_RINGS], *shared_bo[NBR_RINGS];
> +
> +	in_flight_bbs = malloc(in_flight * sizeof(struct intel_batchbuffer *));
> +	igt_assert(in_flight_bbs);
> +
> +	/* Need multiple i915 fd's. Scheduler will not change execution order of
> +	 * batch buffers from the same context.
> +	 */
> +	for(loop=0; loop < NBR_RINGS; loop++) {

You have several loops on all engines but with a bit of reordering you 
should be able to fit everything in less loops. Unless I've missed a 
dependency, you should be able to do something like:

for (loop=0; loop < NBR_RINGS; loop++) {
	init_context(&(fd[loop]), &(bufmgr[loop]), rings[loop].id);
	ts_bo[loop] = create_and_check_bo(bufmgr[loop], "ts bo");

	if (loop == 0)
		shared_bo[0] = create_and_check_bo(...);
		drm_intel_bo_gem_export_to_prime(...);
	else
		shared_bo[loop] = drm_intel_bo_gem_create_from_prime(...);

	ts_bb[loop] = create_timestamp_bb(..);

	if (write) {
		....
	}
}


Also the operations performed in the "if (write)" case are the same of the ones in the main loop with the exception of the shared bo so potentially you could move the whole block in a separate function if it doesn't get too messy.


> +		init_context(&(fd[loop]), &(bufmgr[loop]), rings[loop].id);
> +		if(write)
> +			init_context(&(fd2[loop]), &(bufmgr2[loop]), rings[loop].id);
> +	}
> +
> +	/* Create buffer objects */
> +	delay_bo = create_and_check_bo(bufmgr[ring], "delay bo");
> +	for(loop = 0; loop < NBR_RINGS; loop++) {
> +		ts_bo[loop] = create_and_check_bo(bufmgr[loop], "ts bo");
> +		if(write)
> +			ts2_bo[loop] = create_and_check_bo(bufmgr2[loop], "ts2 bo");
> +	}
> +
> +	/* Create shared buffer object */
> +	shared_bo[0] = create_and_check_bo(bufmgr[0], "shared bo");
> +
> +	drm_intel_bo_gem_export_to_prime(shared_bo[0], &prime_fd);
> +	for(loop = 1; loop < NBR_RINGS; loop++) {
> +		shared_bo[loop] = drm_intel_bo_gem_create_from_prime(bufmgr[loop],
> +		                                                     prime_fd, BATCH_SZ);
> +		igt_assert(shared_bo[loop]);
> +	}
> +	close(prime_fd);
> +
> +	/* Put some non zero values in the delay and shared bo */
> +	drm_intel_bo_map(delay_bo, 1);
> +	delay_buf = delay_bo->virtual;
> +	delay_buf[0] = 0xff;
> +	drm_intel_bo_unmap(delay_bo);
> +	drm_intel_bo_map(shared_bo[0], 1);
> +	shared_buf = shared_bo[0]->virtual;
> +	shared_buf[0] = 0xff00ff00;
> +	drm_intel_bo_unmap(shared_bo[0]);

using subdata might be cleaner here as well

> +
> +	calibrated_1s = igt_calibrate_delay_bb(fd[ring], bufmgr[ring], rings[ring].id);
> +
> +	/* Batch buffers to fill the in flight queue */
> +	in_flight_bbs[0] = create_delay_bb(fd[ring], bufmgr[ring], rings[ring].id,
> +	                                   calibrated_1s, delay_bo);
> +	for(loop = 1; loop < in_flight; loop++)
> +		in_flight_bbs[loop] = create_noop_bb(fd[ring], bufmgr[ring], 5);
> +
> +	for(loop = 0; loop < NBR_RINGS; loop++) {
> +		ts_bb[loop] = create_timestamp_bb(fd[loop], bufmgr[loop],
> +		                                  rings[loop].id, ts_bo[loop],
> +		                                  shared_bo[loop], write);
> +		if(write)
> +			ts2_bb[loop] = create_timestamp_bb(fd2[loop], bufmgr2[loop],
> +			                                   rings[loop].id, ts2_bo[loop],
> +			                                   NULL, false);
> +	}
> +
> +	/* Flush batchbuffers */
> +	for(loop = 0; loop < in_flight; loop++)
> +		intel_batchbuffer_flush_on_ring(in_flight_bbs[loop], rings[ring].id);
> +
> +	intel_batchbuffer_flush_on_ring(ts_bb[ring], rings[ring].id);
> +	for(loop = 0; loop < NBR_RINGS; loop++)
> +		if((loop != ring) && rings[loop].exists)
> +			intel_batchbuffer_flush_on_ring(ts_bb[loop], rings[loop].id);
> +
> +	if(write) {
> +		intel_batchbuffer_flush_on_ring(ts2_bb[ring], rings[ring].id);
> +		for(loop = 0; loop < NBR_RINGS; loop++)
> +			if((loop != ring) && rings[loop].exists)
> +				intel_batchbuffer_flush_on_ring(ts2_bb[loop], rings[loop].id);
> +	}
> +
> +	/* This will not return until the bo has finished executing */
> +	drm_intel_bo_map(delay_bo, 0);
> +	delay_buf = delay_bo->virtual;
> +	for(loop = 0; loop < NBR_RINGS; loop++) {
> +		drm_intel_bo_map(ts_bo[loop], 0);
> +		ts_buf[loop] = ts_bo[loop]->virtual;
> +		if(write) {
> +			drm_intel_bo_map(ts2_bo[loop], 0);
> +			ts2_buf[loop] = ts2_bo[loop]->virtual;
> +		}
> +	}
> +
> +	/* buf[0] in the target buffer should be 0 if the batch buffer completed */
> +	igt_assert_f(delay_buf[0] == 0,
> +	             "delay_buf[0] expected 0x0, got 0x%" PRIx32 "\n",
> +	             delay_buf[0]);
> +
> +	igt_debug("%6s delay timestamp = 0x%08" PRIx32 "\n",
> +	          rings[ring].name, delay_buf[2]);
> +	for(loop = 0; loop < NBR_RINGS; loop++)
> +		igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n",
> +		          rings[loop].name, ts_buf[loop][0]);
> +
> +	if(write) {
> +		igt_debug("Independant batch buffers\n");
> +		for(loop = 0; loop < NBR_RINGS; loop++)
> +			igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n",
> +			          rings[loop].name, ts2_buf[loop][0]);
> +	}
> +
> +	for(loop = 0; loop < NBR_RINGS; loop++) {
> +		if((loop != ring) && rings[loop].exists) {
> +			if(write) {
> +				/* Write dependency, delayed ring should run first */
> +				igt_assert_f(igt_compare_timestamps(ts_buf[ring][0], ts_buf[loop][0]),
> +				             "%s ran before %s - 0x%08" PRIx32 " vs 0x%08" PRIx32 "\n",
> +				             rings[loop].name, rings[ring].name,
> +				             ts_buf[loop][0], ts_buf[ring][0]);
> +				/* Second bb without dependency should run first */
> +				igt_assert_f(igt_compare_timestamps(ts2_buf[loop][0], ts_buf[loop][0]),
> +				             "(%s) independant bb was held up - 0x%08" PRIx32 " vs 0x%08" PRIx32 "\n",
> +				             rings[loop].name, ts_buf[loop][0], ts2_buf[loop][0]);
> +			}
> +			else
> +				/* Read dependency, delayed ring should run last */
> +				igt_assert_f(igt_compare_timestamps(ts_buf[loop][0], ts_buf[ring][0]),
> +				             "%s ran after %s - 0x%08" PRIx32 " vs 0x%08" PRIx32 "\n",
> +				             rings[loop].name, rings[ring].name,
> +				             ts_buf[loop][0], ts_buf[ring][0]);
> +		}
> +	}
> +
> +	/* Cleanup */
> +	for(loop = 0; loop < in_flight; loop++)
> +		intel_batchbuffer_free(in_flight_bbs[loop]);
> +
> +	for(loop = 0; loop < NBR_RINGS; loop++) {
> +		intel_batchbuffer_free(ts_bb[loop]);
> +		drm_intel_bo_unreference(ts_bo[loop]);
> +		drm_intel_bo_unreference(shared_bo[loop]);
> +		if(write) {
> +			intel_batchbuffer_free(ts2_bb[loop]);
> +			drm_intel_bo_unreference(ts2_bo[loop]);
> +		}
> +	}
> +
> +	drm_intel_bo_unreference(delay_bo);

You can move this unreferencing to before the above loop and then squash 
the 2 loops onNBR_RINGSinto one.

Regards,
Daniele

> +
> +	for(loop = 0; loop < NBR_RINGS; loop++) {
> +		drm_intel_bufmgr_destroy(bufmgr[loop]);
> +		close(fd[loop]);
> +		if(write) {
> +			drm_intel_bufmgr_destroy(bufmgr2[loop]);
> +			close(fd2[loop]);
> +		}
> +	}
> +
> +	free(in_flight_bbs);
> +}
> +
> +igt_main
> +{
> +	int loop;
> +	int in_flight;
> +	int fd;
> +
> +	igt_fixture {
> +		int debug_fd;
> +		int l;
> +		char buf[6];
> +		/* Get nbr of batch buffers that the scheduler will queue in the
> +		 * HW. If this debugfs file does not exist there is no scheduler
> +		 * so skip the test.
> +		 */
> +		debug_fd = igt_debugfs_open("i915_scheduler_min_flying", O_RDONLY);
> +		igt_skip_on(debug_fd == -1);
> +		l = read(debug_fd, buf, sizeof(buf)-1);
> +		igt_assert(l > 0);
> +		igt_assert(l < sizeof(buf));
> +		buf[l] = '\0';
> +		/* May be a decimal or hex number depending on scheduler version */
> +		if(sscanf(buf, "0x%2x", &in_flight) != 1)
> +			igt_assert_f(sscanf(buf, "%2d", &in_flight) == 1,
> +			             "Error reading from i915_scheduler_min_flying\n");
> +		close(debug_fd);
> +		igt_debug("in flight = %d\n", in_flight);
> +		fd = drm_open_driver(DRIVER_INTEL);
> +		igt_assert(fd >= 0);
> +		check_rings(fd);
> +	}
> +
> +	for (loop=0; loop < NBR_RINGS; loop++)
> +		igt_subtest_f("%s-basic", rings[loop].name) {
> +			gem_require_ring(fd, rings[loop].id);
> +			run_test_basic(in_flight, rings[loop].id);
> +		}
> +
> +	for (loop=0; loop < NBR_RINGS; loop++)
> +		igt_subtest_f("%s-read", rings[loop].name) {
> +			gem_require_ring(fd, rings[loop].id);
> +			run_test_dependency(in_flight, loop, false);
> +		}
> +
> +	for (loop=0; loop < NBR_RINGS; loop++)
> +		igt_subtest_f("%s-write", rings[loop].name) {
> +			gem_require_ring(fd, rings[loop].id);
> +			run_test_dependency(in_flight, loop, true);
> +		}
> +
> +	igt_fixture {
> +		close(fd);
> +	}
> +}
Daniele Ceraolo Spurio March 17, 2016, 10:33 a.m. UTC | #2
On 17/03/16 08:49, Daniele Ceraolo Spurio wrote:
>
>
> On 10/03/16 11:03, Derek Morton wrote:
>> This is intended to test the scheduler behaviour is correct.
>> The subtests are
>> <ring>-basic
>> Tests that batch buffers of the same priority submitted to a ring
>> execute in the order they are submitted.
>> <ring>-read
>> Submits a batch buffer with a read dependency to a buffer object to
>> a ring which is held in the scheduler queue by a long running batch
>> buffer. Submit batch buffers to other rings that have a read dependency
>> to the same buffer object. Ensure they execute before the batch buffer
>> being held up behind the long running batch buffer.
>> <ring>-write
>> Submits a batch buffer with a write dependency to a buffer object to
>> a ring which is held in the scheduler queue by a long running batch
>> buffer. Submit batch buffers to other rings that have a write dependency
>> to the same buffer object. Submit batch buffers with no 
>> interdependencies
>> to all rings. Ensure the batch buffers that have write dependencies are
>> executed in submission order but the batch buffers without 
>> interdependencies
>> do not get held up.
>>
>> v2: Addressed review comments from Daniele Ceraolo Spurio
>>
>> v3: Added logic to use generic BSD ring if there is 1 and BSD1 / BSD2 
>> if there
>>      are 2.
>>
>> Signed-off-by: Derek Morton<derek.j.morton@intel.com>
>> ---
>>   tests/Makefile.sources |   1 +
>>   tests/gem_scheduler.c  | 459 
>> +++++++++++++++++++++++++++++++++++++++++++++++++
>>   2 files changed, 460 insertions(+)
>>   create mode 100644 tests/gem_scheduler.c
>>
>> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
>> index f8b18b0..c88e045 100644
>> --- a/tests/Makefile.sources
>> +++ b/tests/Makefile.sources
>> @@ -66,6 +66,7 @@ TESTS_progs_M = \
>>       gem_request_retire \
>>       gem_reset_stats \
>>       gem_ringfill \
>> +    gem_scheduler \
>>       gem_set_tiling_vs_blt \
>>       gem_softpin \
>>       gem_stolen \
>> diff --git a/tests/gem_scheduler.c b/tests/gem_scheduler.c
>> new file mode 100644
>> index 0000000..436440a
>> --- /dev/null
>> +++ b/tests/gem_scheduler.c
>> @@ -0,0 +1,459 @@
>> +/*
>> + * Copyright © 2016 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person 
>> obtaining a
>> + * copy of this software and associated documentation files (the 
>> "Software"),
>> + * to deal in the Software without restriction, including without 
>> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, 
>> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom 
>> the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including 
>> the next
>> + * paragraph) shall be included in all copies or substantial 
>> portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 
>> SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 
>> OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
>> ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
>> OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + * Authors:
>> + *    Derek Morton<derek.j.morton@intel.com>
>> + *
>> + */
>> +
>> +#include "igt.h"
>> +#include <unistd.h>
>> +#include <stdlib.h>
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <string.h>
>> +#include <inttypes.h>
>> +#include <time.h>
>> +#include <sys/stat.h>
>> +#include <sys/ioctl.h>
>> +#include <fcntl.h>
>> +
>> +IGT_TEST_DESCRIPTION("Check scheduler behaviour. Basic tests ensure 
>> independant "
>> +                     "batch buffers of the same priority are 
>> executed in "
>> +                     "submission order. Read-read tests ensure "
>> +                     "batch buffers with a read dependency to the 
>> same buffer "
>> +                     "object do not block each other. Write-write 
>> dependency "
>> +                     "tests ensure batch buffers with a write 
>> dependency to a "
>> +                     "buffer object will be executed in submission 
>> order but "
>> +                     "will not block execution of other independant 
>> batch "
>> +                     "buffers.");
>> +
>> +#define SEC_TO_NSEC (1000 * 1000 * 1000)
>> +
>> +static struct ring {
>> +    const char *name;
>> +    int id;
>> +    bool exists;
>> +} rings[] = {
>> +    { "render", I915_EXEC_RENDER, false },
>> +    { "bsd",    I915_EXEC_BSD , false },
>> +    { "bsd2",    I915_EXEC_BSD | 2<<13, false },
>> +    { "blt",    I915_EXEC_BLT, false },
>> +    { "vebox",  I915_EXEC_VEBOX, false },
>> +};
>
> If you can't use intel_execution_engines could you add a comment to 
> explain why? also considering the renaming going on in the kernel you 
> cold replace "ring" with "engine"
>
>> +
>> +#define NBR_RINGS (sizeof(rings)/sizeof(struct ring))
>> +
>> +static void check_rings(int fd) {
>> +    int loop;
>> +    for(loop=0; loop < NBR_RINGS; loop++) {
>
> Style: space between for/if and "(" in this file
>
>> +        if(gem_has_ring(fd, rings[loop].id)) {
>> +            if(rings[loop].id == (I915_EXEC_BSD | 2<<13)) {
>> +                rings[loop].exists = gem_has_bsd2(fd);
>> +            }
>> +            else {
>> +                rings[loop].exists = true;
>
> You're marking the VEBOX as existing so adding a gen requirement in 
> this function would be nice. I know that the library you're using 
> already requires gen8, but the requirement doesn't look to be explicit 
> anywhere in this file so it would be nice to add it for clarity.
>
>> +                if(rings[loop].id == I915_EXEC_BSD)
>> +                    /* If there is BSD2, need to make BSD1
>> +                     * explicit.
>> +                     */
>> +                    if(gem_has_bsd2(fd))
>> +                        rings[loop].id |= (1<<13);
>> +            }
>> +        }
>> +    }
>> +}
>> +
>> +static drm_intel_bo *create_and_check_bo(drm_intel_bufmgr *bufmgr, 
>> const char *desc)
>> +{
>> +    drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, desc, BATCH_SZ, 
>> BATCH_SZ);
>> +    igt_assert_f(bo, "Failed allocating %s\n", desc);
>> +    return bo;
>> +}
>> +
>> +static struct intel_batchbuffer *create_delay_bb(int fd, 
>> drm_intel_bufmgr *bufmgr,
>> +                                                 int ringid, 
>> uint32_t loops,
>> +                                                 drm_intel_bo *dest)
>> +{
>> +    struct intel_batchbuffer *buffer;
>> +    buffer = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
>> +    igt_assert(buffer);
>> +    igt_create_delay_bb(fd, buffer, ringid, loops, dest);
>> +    return buffer;
>> +}
>> +
>> +static struct intel_batchbuffer *create_timestamp_bb(int fd, 
>> drm_intel_bufmgr *bufmgr,
>> +                                                     int ringid, 
>> drm_intel_bo *dest,
>> + drm_intel_bo *load, bool write)
>> +{
>> +    struct intel_batchbuffer *buffer;
>> +    buffer = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
>> +    igt_assert(buffer);
>> +    igt_create_timestamp_bb(fd, buffer, ringid, dest, load, write);
>> +    return buffer;
>> +}
>> +
>> +static struct intel_batchbuffer *create_noop_bb(int fd, 
>> drm_intel_bufmgr *bufmgr,
>> +                                                int noops)
>> +{
>> +    struct intel_batchbuffer *buffer;
>> +    buffer = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
>> +    igt_assert(buffer);
>> +    igt_create_noop_bb(buffer, noops);
>> +    return buffer;
>> +}
>> +
>> +static void init_context(int *fd, drm_intel_bufmgr **bufmgr, int 
>> ringid)
>
> init_context might be a bit misleading as a name as you're not 
> init_context might be a bit misleading as a name as you're not 
> init_context might be a bit misleading as a name as you're not 
> init_context might be a bit misleading as a name as you're not 
> init_context might be a bit misleading as a name as you're not 
> init_context might be a bit misleading as a name as you're not 
> init_context might be a bit misleading as a name as you're not 
> init_context might be a bit misleading as a name as you're not  a ctx 
> directly, although initializing

Not really sure how this mess ended up in the email :-P

What I wanted to say is:

init_context might be a bit misleading as a name as you're not 
initializing a ctx directly, although initializing the fd and doing a 
submission you aim to initialize the default context so I'm not sure 
what to suggest as an alternative name.

Regards,
Daniele

>
>> +{
>> +    struct intel_batchbuffer *noop_bb;
>> +    *fd = drm_open_driver(DRIVER_INTEL);
>> +    igt_assert(*fd >= 0);
>> +    *bufmgr = drm_intel_bufmgr_gem_init(*fd, BATCH_SZ);
>> +    igt_assert(*bufmgr);
>> +    drm_intel_bufmgr_gem_enable_reuse(*bufmgr);
>> +    /* Send a noop batch buffer to force any deferred initialisation */
>> +    noop_bb = create_noop_bb(*fd, *bufmgr, 5);
>> +    intel_batchbuffer_flush_on_ring(noop_bb, ringid);
>> +    intel_batchbuffer_free(noop_bb);
>> +}
>> +
>> +/* Basic test. Check batch buffers of the same priority and with no 
>> dependencies
>> + * are executed in the order they are submitted.
>> + */
>> +#define NBR_BASIC_FDs (3)
>> +static void run_test_basic(int in_flight, int ringid)
>> +{
>> +    int fd[NBR_BASIC_FDs];
>> +    int loop;
>> +    drm_intel_bufmgr *bufmgr[NBR_BASIC_FDs];
>> +    uint32_t *delay_buf, *ts1_buf, *ts2_buf;
>> +    struct intel_batchbuffer *ts1_bb, *ts2_bb;
>> +    struct intel_batchbuffer **in_flight_bbs;
>> +    uint32_t calibrated_1s;
>> +    drm_intel_bo *delay_bo, *ts1_bo, *ts2_bo;
>> +
>> +    in_flight_bbs = malloc(in_flight * sizeof(struct 
>> intel_batchbuffer *));
>> +    igt_assert(in_flight_bbs);
>> +
>> +    /* Need multiple i915 fd's. Scheduler will not change execution 
>> order of
>> +     * batch buffers from the same context.
>> +     */
>> +    for(loop=0; loop < NBR_BASIC_FDs; loop++)
>> +        init_context(&(fd[loop]), &(bufmgr[loop]), ringid);
>> +
>> +
>> +    /* Create buffer objects */
>> +    delay_bo = create_and_check_bo(bufmgr[0], "delay bo");
>> +    ts1_bo = create_and_check_bo(bufmgr[1], "ts1 bo");
>> +    ts2_bo = create_and_check_bo(bufmgr[2], "ts2 bo");
>> +
>> +    /* Put some non zero values in the delay bo */
>
> You do this write for every delay batch, so you could move it to 
> create_delay_bb (or to the library function)
>
>> +    {
>
> Why is this extra scope needed? "data" doesn't clash with any other 
> variable name
>
>> +        uint32_t data=0xffffffff;
>> +        drm_intel_bo_subdata(delay_bo, 0, 4, &data);
>> +    }
>> +
>> +    calibrated_1s = igt_calibrate_delay_bb(fd[0], bufmgr[0], ringid);
>> +
>> +    /* Batch buffers to fill the in flight queue */
>> +    in_flight_bbs[0] = create_delay_bb(fd[0], bufmgr[0], ringid, 
>> calibrated_1s, delay_bo);
>> +    for(loop = 1; loop < in_flight; loop++)
>> +        in_flight_bbs[loop] = create_noop_bb(fd[0], bufmgr[0], 5);
>> +
>> +    /* Extra batch buffers in the scheduler queue */
>> +    ts1_bb = create_timestamp_bb(fd[1], bufmgr[1], ringid, ts1_bo, 
>> NULL, false);
>> +    ts2_bb = create_timestamp_bb(fd[2], bufmgr[2], ringid, ts2_bo, 
>> NULL, false);
>> +
>> +    /* Flush batchbuffers */
>> +    for(loop = 0; loop < in_flight; loop++)
>> +        intel_batchbuffer_flush_on_ring(in_flight_bbs[loop], ringid);
>> +    intel_batchbuffer_flush_on_ring(ts1_bb, ringid);
>> +    intel_batchbuffer_flush_on_ring(ts2_bb, ringid);
>> +
>> +    /* This will not return until the bo has finished executing */
>> +    drm_intel_bo_map(delay_bo, 0);
>> +    drm_intel_bo_map(ts1_bo, 0);
>> +    drm_intel_bo_map(ts2_bo, 0);
>> +
>> +    delay_buf = delay_bo->virtual;
>> +    ts1_buf = ts1_bo->virtual;
>> +    ts2_buf = ts2_bo->virtual;
>> +
>> +    igt_debug("Delay Timestamp = 0x%08" PRIx32 "\n", delay_buf[2]);
>> +    igt_debug("TS1 Timestamp = 0x%08" PRIx32 "\n", ts1_buf[0]);
>> +    igt_debug("TS2 Timestamp = 0x%08" PRIx32 "\n", ts2_buf[0]);
>> +
>> +    /* buf[0] in the target buffer should be 0 if the batch buffer 
>> completed */
>> +    igt_assert_f(delay_buf[0] == 0,
>> +                 "delay_buf[0] expected 0x0, got 0x%" PRIx32 "\n", 
>> delay_buf[0]);
>> +
>> +    igt_assert_f(igt_compare_timestamps(delay_buf[2], ts1_buf[0]),
>> +                 "Delay ts (0x%08" PRIx32 ") > TS1 ts (0x%08" PRIx32 
>> ")\n",
>> +                 delay_buf[2], ts1_buf[0]);
>> +    igt_assert_f(igt_compare_timestamps(ts1_buf[0], ts2_buf[0]),
>> +                 "TS1 ts (0x%08" PRIx32 ") > TS2 ts (0x%08" PRIx32 
>> ")\n",
>> +                 ts1_buf[0], ts2_buf[0]);
>> +
>> +    /* Cleanup */
>> +    for(loop = 0; loop < in_flight; loop++)
>> +        intel_batchbuffer_free(in_flight_bbs[loop]);
>> +    intel_batchbuffer_free(ts1_bb);
>> +    intel_batchbuffer_free(ts2_bb);
>> +
>> +    drm_intel_bo_unreference(delay_bo);
>> +    drm_intel_bo_unreference(ts1_bo);
>> +    drm_intel_bo_unreference(ts2_bo);
>> +    for(loop = 0; loop < NBR_BASIC_FDs; loop++) {
>> +        drm_intel_bufmgr_destroy(bufmgr[loop]);
>> +        close(fd[loop]);
>> +    }
>> +    free(in_flight_bbs);
>> +}
>> +
>> +/* Dependency test.
>> + * write=0, Submit batch buffers with read dependencies to all 
>> rings. Delay one
>> + * with a long executing batch buffer. Check the others are not held 
>> up.
>> + * write=1, Submit batch buffers with write dependencies to all 
>> rings. Delay one
>> + * with a long executing batch buffer. Also submit batch buffers 
>> with no
>> + * dependencies to all rings. Batch buffers with write dependencies 
>> should be
>> + * executed in submission order. The batch buffers with no 
>> dependencies should
>> + * not be held up.
>> + */
>> +static void run_test_dependency(int in_flight, int ring, bool write)
>> +{
>> +    int fd[NBR_RINGS], fd2[NBR_RINGS];
>> +    int loop;
>> +    int prime_fd;
>> +    uint32_t *delay_buf, *ts_buf[NBR_RINGS], *ts2_buf[NBR_RINGS], 
>> *shared_buf;
>> +    uint32_t calibrated_1s;
>> +    drm_intel_bufmgr *bufmgr[NBR_RINGS], *bufmgr2[NBR_RINGS];
>> +    struct intel_batchbuffer *ts_bb[NBR_RINGS], *ts2_bb[NBR_RINGS], 
>> **in_flight_bbs;
>> +    drm_intel_bo *delay_bo, *ts_bo[NBR_RINGS], *ts2_bo[NBR_RINGS], 
>> *shared_bo[NBR_RINGS];
>> +
>> +    in_flight_bbs = malloc(in_flight * sizeof(struct 
>> intel_batchbuffer *));
>> +    igt_assert(in_flight_bbs);
>> +
>> +    /* Need multiple i915 fd's. Scheduler will not change execution 
>> order of
>> +     * batch buffers from the same context.
>> +     */
>> +    for(loop=0; loop < NBR_RINGS; loop++) {
>
> You have several loops on all engines but with a bit of reordering you 
> should be able to fit everything in less loops. Unless I've missed a 
> dependency, you should be able to do something like:
>
> for (loop=0; loop < NBR_RINGS; loop++) {
>     init_context(&(fd[loop]), &(bufmgr[loop]), rings[loop].id);
>     ts_bo[loop] = create_and_check_bo(bufmgr[loop], "ts bo");
>
>     if (loop == 0)
>         shared_bo[0] = create_and_check_bo(...);
>         drm_intel_bo_gem_export_to_prime(...);
>     else
>         shared_bo[loop] = drm_intel_bo_gem_create_from_prime(...);
>
>     ts_bb[loop] = create_timestamp_bb(..);
>
>     if (write) {
>         ....
>     }
> }
>
>
> Also the operations performed in the "if (write)" case are the same of 
> the ones in the main loop with the exception of the shared bo so 
> potentially you could move the whole block in a separate function if 
> it doesn't get too messy.
>
>
>> +        init_context(&(fd[loop]), &(bufmgr[loop]), rings[loop].id);
>> +        if(write)
>> +            init_context(&(fd2[loop]), &(bufmgr2[loop]), 
>> rings[loop].id);
>> +    }
>> +
>> +    /* Create buffer objects */
>> +    delay_bo = create_and_check_bo(bufmgr[ring], "delay bo");
>> +    for(loop = 0; loop < NBR_RINGS; loop++) {
>> +        ts_bo[loop] = create_and_check_bo(bufmgr[loop], "ts bo");
>> +        if(write)
>> +            ts2_bo[loop] = create_and_check_bo(bufmgr2[loop], "ts2 
>> bo");
>> +    }
>> +
>> +    /* Create shared buffer object */
>> +    shared_bo[0] = create_and_check_bo(bufmgr[0], "shared bo");
>> +
>> +    drm_intel_bo_gem_export_to_prime(shared_bo[0], &prime_fd);
>> +    for(loop = 1; loop < NBR_RINGS; loop++) {
>> +        shared_bo[loop] = 
>> drm_intel_bo_gem_create_from_prime(bufmgr[loop],
>> + prime_fd, BATCH_SZ);
>> +        igt_assert(shared_bo[loop]);
>> +    }
>> +    close(prime_fd);
>> +
>> +    /* Put some non zero values in the delay and shared bo */
>> +    drm_intel_bo_map(delay_bo, 1);
>> +    delay_buf = delay_bo->virtual;
>> +    delay_buf[0] = 0xff;
>> +    drm_intel_bo_unmap(delay_bo);
>> +    drm_intel_bo_map(shared_bo[0], 1);
>> +    shared_buf = shared_bo[0]->virtual;
>> +    shared_buf[0] = 0xff00ff00;
>> +    drm_intel_bo_unmap(shared_bo[0]);
>
> using subdata might be cleaner here as well
>
>> +
>> +    calibrated_1s = igt_calibrate_delay_bb(fd[ring], bufmgr[ring], 
>> rings[ring].id);
>> +
>> +    /* Batch buffers to fill the in flight queue */
>> +    in_flight_bbs[0] = create_delay_bb(fd[ring], bufmgr[ring], 
>> rings[ring].id,
>> +                                       calibrated_1s, delay_bo);
>> +    for(loop = 1; loop < in_flight; loop++)
>> +        in_flight_bbs[loop] = create_noop_bb(fd[ring], bufmgr[ring], 
>> 5);
>> +
>> +    for(loop = 0; loop < NBR_RINGS; loop++) {
>> +        ts_bb[loop] = create_timestamp_bb(fd[loop], bufmgr[loop],
>> +                                          rings[loop].id, ts_bo[loop],
>> +                                          shared_bo[loop], write);
>> +        if(write)
>> +            ts2_bb[loop] = create_timestamp_bb(fd2[loop], 
>> bufmgr2[loop],
>> +                                               rings[loop].id, 
>> ts2_bo[loop],
>> +                                               NULL, false);
>> +    }
>> +
>> +    /* Flush batchbuffers */
>> +    for(loop = 0; loop < in_flight; loop++)
>> +        intel_batchbuffer_flush_on_ring(in_flight_bbs[loop], 
>> rings[ring].id);
>> +
>> +    intel_batchbuffer_flush_on_ring(ts_bb[ring], rings[ring].id);
>> +    for(loop = 0; loop < NBR_RINGS; loop++)
>> +        if((loop != ring) && rings[loop].exists)
>> +            intel_batchbuffer_flush_on_ring(ts_bb[loop], 
>> rings[loop].id);
>> +
>> +    if(write) {
>> +        intel_batchbuffer_flush_on_ring(ts2_bb[ring], rings[ring].id);
>> +        for(loop = 0; loop < NBR_RINGS; loop++)
>> +            if((loop != ring) && rings[loop].exists)
>> +                intel_batchbuffer_flush_on_ring(ts2_bb[loop], 
>> rings[loop].id);
>> +    }
>> +
>> +    /* This will not return until the bo has finished executing */
>> +    drm_intel_bo_map(delay_bo, 0);
>> +    delay_buf = delay_bo->virtual;
>> +    for(loop = 0; loop < NBR_RINGS; loop++) {
>> +        drm_intel_bo_map(ts_bo[loop], 0);
>> +        ts_buf[loop] = ts_bo[loop]->virtual;
>> +        if(write) {
>> +            drm_intel_bo_map(ts2_bo[loop], 0);
>> +            ts2_buf[loop] = ts2_bo[loop]->virtual;
>> +        }
>> +    }
>> +
>> +    /* buf[0] in the target buffer should be 0 if the batch buffer 
>> completed */
>> +    igt_assert_f(delay_buf[0] == 0,
>> +                 "delay_buf[0] expected 0x0, got 0x%" PRIx32 "\n",
>> +                 delay_buf[0]);
>> +
>> +    igt_debug("%6s delay timestamp = 0x%08" PRIx32 "\n",
>> +              rings[ring].name, delay_buf[2]);
>> +    for(loop = 0; loop < NBR_RINGS; loop++)
>> +        igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n",
>> +                  rings[loop].name, ts_buf[loop][0]);
>> +
>> +    if(write) {
>> +        igt_debug("Independant batch buffers\n");
>> +        for(loop = 0; loop < NBR_RINGS; loop++)
>> +            igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n",
>> +                      rings[loop].name, ts2_buf[loop][0]);
>> +    }
>> +
>> +    for(loop = 0; loop < NBR_RINGS; loop++) {
>> +        if((loop != ring) && rings[loop].exists) {
>> +            if(write) {
>> +                /* Write dependency, delayed ring should run first */
>> + igt_assert_f(igt_compare_timestamps(ts_buf[ring][0], ts_buf[loop][0]),
>> +                             "%s ran before %s - 0x%08" PRIx32 " vs 
>> 0x%08" PRIx32 "\n",
>> +                             rings[loop].name, rings[ring].name,
>> +                             ts_buf[loop][0], ts_buf[ring][0]);
>> +                /* Second bb without dependency should run first */
>> + igt_assert_f(igt_compare_timestamps(ts2_buf[loop][0], 
>> ts_buf[loop][0]),
>> +                             "(%s) independant bb was held up - 
>> 0x%08" PRIx32 " vs 0x%08" PRIx32 "\n",
>> +                             rings[loop].name, ts_buf[loop][0], 
>> ts2_buf[loop][0]);
>> +            }
>> +            else
>> +                /* Read dependency, delayed ring should run last */
>> + igt_assert_f(igt_compare_timestamps(ts_buf[loop][0], ts_buf[ring][0]),
>> +                             "%s ran after %s - 0x%08" PRIx32 " vs 
>> 0x%08" PRIx32 "\n",
>> +                             rings[loop].name, rings[ring].name,
>> +                             ts_buf[loop][0], ts_buf[ring][0]);
>> +        }
>> +    }
>> +
>> +    /* Cleanup */
>> +    for(loop = 0; loop < in_flight; loop++)
>> +        intel_batchbuffer_free(in_flight_bbs[loop]);
>> +
>> +    for(loop = 0; loop < NBR_RINGS; loop++) {
>> +        intel_batchbuffer_free(ts_bb[loop]);
>> +        drm_intel_bo_unreference(ts_bo[loop]);
>> +        drm_intel_bo_unreference(shared_bo[loop]);
>> +        if(write) {
>> +            intel_batchbuffer_free(ts2_bb[loop]);
>> +            drm_intel_bo_unreference(ts2_bo[loop]);
>> +        }
>> +    }
>> +
>> +    drm_intel_bo_unreference(delay_bo);
>
> You can move this unreferencing to before the above loop and then 
> squash the 2 loops onNBR_RINGSinto one.
>
> Regards,
> Daniele
>
>> +
>> +    for(loop = 0; loop < NBR_RINGS; loop++) {
>> +        drm_intel_bufmgr_destroy(bufmgr[loop]);
>> +        close(fd[loop]);
>> +        if(write) {
>> +            drm_intel_bufmgr_destroy(bufmgr2[loop]);
>> +            close(fd2[loop]);
>> +        }
>> +    }
>> +
>> +    free(in_flight_bbs);
>> +}
>> +
>> +igt_main
>> +{
>> +    int loop;
>> +    int in_flight;
>> +    int fd;
>> +
>> +    igt_fixture {
>> +        int debug_fd;
>> +        int l;
>> +        char buf[6];
>> +        /* Get nbr of batch buffers that the scheduler will queue in 
>> the
>> +         * HW. If this debugfs file does not exist there is no 
>> scheduler
>> +         * so skip the test.
>> +         */
>> +        debug_fd = igt_debugfs_open("i915_scheduler_min_flying", 
>> O_RDONLY);
>> +        igt_skip_on(debug_fd == -1);
>> +        l = read(debug_fd, buf, sizeof(buf)-1);
>> +        igt_assert(l > 0);
>> +        igt_assert(l < sizeof(buf));
>> +        buf[l] = '\0';
>> +        /* May be a decimal or hex number depending on scheduler 
>> version */
>> +        if(sscanf(buf, "0x%2x", &in_flight) != 1)
>> +            igt_assert_f(sscanf(buf, "%2d", &in_flight) == 1,
>> +                         "Error reading from 
>> i915_scheduler_min_flying\n");
>> +        close(debug_fd);
>> +        igt_debug("in flight = %d\n", in_flight);
>> +        fd = drm_open_driver(DRIVER_INTEL);
>> +        igt_assert(fd >= 0);
>> +        check_rings(fd);
>> +    }
>> +
>> +    for (loop=0; loop < NBR_RINGS; loop++)
>> +        igt_subtest_f("%s-basic", rings[loop].name) {
>> +            gem_require_ring(fd, rings[loop].id);
>> +            run_test_basic(in_flight, rings[loop].id);
>> +        }
>> +
>> +    for (loop=0; loop < NBR_RINGS; loop++)
>> +        igt_subtest_f("%s-read", rings[loop].name) {
>> +            gem_require_ring(fd, rings[loop].id);
>> +            run_test_dependency(in_flight, loop, false);
>> +        }
>> +
>> +    for (loop=0; loop < NBR_RINGS; loop++)
>> +        igt_subtest_f("%s-write", rings[loop].name) {
>> +            gem_require_ring(fd, rings[loop].id);
>> +            run_test_dependency(in_flight, loop, true);
>> +        }
>> +
>> +    igt_fixture {
>> +        close(fd);
>> +    }
>> +}
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index f8b18b0..c88e045 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -66,6 +66,7 @@  TESTS_progs_M = \
 	gem_request_retire \
 	gem_reset_stats \
 	gem_ringfill \
+	gem_scheduler \
 	gem_set_tiling_vs_blt \
 	gem_softpin \
 	gem_stolen \
diff --git a/tests/gem_scheduler.c b/tests/gem_scheduler.c
new file mode 100644
index 0000000..436440a
--- /dev/null
+++ b/tests/gem_scheduler.c
@@ -0,0 +1,459 @@ 
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Derek Morton <derek.j.morton@intel.com>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <time.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+
+IGT_TEST_DESCRIPTION("Check scheduler behaviour. Basic tests ensure independant "
+                     "batch buffers of the same priority are executed in "
+                     "submission order. Read-read tests ensure "
+                     "batch buffers with a read dependency to the same buffer "
+                     "object do not block each other. Write-write dependency "
+                     "tests ensure batch buffers with a write dependency to a "
+                     "buffer object will be executed in submission order but "
+                     "will not block execution of other independant batch "
+                     "buffers.");
+
+#define SEC_TO_NSEC (1000 * 1000 * 1000)
+
+static struct ring {
+	const char *name;
+	int id;
+	bool exists;
+} rings[] = {
+	{ "render", I915_EXEC_RENDER, false },
+	{ "bsd",    I915_EXEC_BSD , false },
+	{ "bsd2",    I915_EXEC_BSD | 2<<13, false },
+	{ "blt",    I915_EXEC_BLT, false },
+	{ "vebox",  I915_EXEC_VEBOX, false },
+};
+
+#define NBR_RINGS (sizeof(rings)/sizeof(struct ring))
+
+static void check_rings(int fd) {
+	int loop;
+	for(loop=0; loop < NBR_RINGS; loop++) {
+		if(gem_has_ring(fd, rings[loop].id)) {
+			if(rings[loop].id == (I915_EXEC_BSD | 2<<13)) {
+				rings[loop].exists = gem_has_bsd2(fd);
+			}
+			else {
+				rings[loop].exists = true;
+				if(rings[loop].id == I915_EXEC_BSD)
+					/* If there is BSD2, need to make BSD1
+					 * explicit.
+					 */
+					if(gem_has_bsd2(fd))
+						rings[loop].id |= (1<<13);
+			}
+		}
+	}
+}
+
+static drm_intel_bo *create_and_check_bo(drm_intel_bufmgr *bufmgr, const char *desc)
+{
+	drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, desc, BATCH_SZ, BATCH_SZ);
+	igt_assert_f(bo, "Failed allocating %s\n", desc);
+	return bo;
+}
+
+static struct intel_batchbuffer *create_delay_bb(int fd, drm_intel_bufmgr *bufmgr,
+                                                 int ringid, uint32_t loops,
+                                                 drm_intel_bo *dest)
+{
+	struct intel_batchbuffer *buffer;
+	buffer = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+	igt_assert(buffer);
+	igt_create_delay_bb(fd, buffer, ringid, loops, dest);
+	return buffer;
+}
+
+static struct intel_batchbuffer *create_timestamp_bb(int fd, drm_intel_bufmgr *bufmgr,
+                                                     int ringid, drm_intel_bo *dest,
+                                                     drm_intel_bo *load, bool write)
+{
+	struct intel_batchbuffer *buffer;
+	buffer = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+	igt_assert(buffer);
+	igt_create_timestamp_bb(fd, buffer, ringid, dest, load, write);
+	return buffer;
+}
+
+static struct intel_batchbuffer *create_noop_bb(int fd, drm_intel_bufmgr *bufmgr,
+                                                int noops)
+{
+	struct intel_batchbuffer *buffer;
+	buffer = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+	igt_assert(buffer);
+	igt_create_noop_bb(buffer, noops);
+	return buffer;
+}
+
+static void init_context(int *fd, drm_intel_bufmgr **bufmgr, int ringid)
+{
+	struct intel_batchbuffer *noop_bb;
+	*fd = drm_open_driver(DRIVER_INTEL);
+	igt_assert(*fd >= 0);
+	*bufmgr = drm_intel_bufmgr_gem_init(*fd, BATCH_SZ);
+	igt_assert(*bufmgr);
+	drm_intel_bufmgr_gem_enable_reuse(*bufmgr);
+	/* Send a noop batch buffer to force any deferred initialisation */
+	noop_bb = create_noop_bb(*fd, *bufmgr, 5);
+	intel_batchbuffer_flush_on_ring(noop_bb, ringid);
+	intel_batchbuffer_free(noop_bb);
+}
+
+/* Basic test. Check batch buffers of the same priority and with no dependencies
+ * are executed in the order they are submitted.
+ */
+#define NBR_BASIC_FDs (3)
+static void run_test_basic(int in_flight, int ringid)
+{
+	int fd[NBR_BASIC_FDs];
+	int loop;
+	drm_intel_bufmgr *bufmgr[NBR_BASIC_FDs];
+	uint32_t *delay_buf, *ts1_buf, *ts2_buf;
+	struct intel_batchbuffer *ts1_bb, *ts2_bb;
+	struct intel_batchbuffer **in_flight_bbs;
+	uint32_t calibrated_1s;
+	drm_intel_bo *delay_bo, *ts1_bo, *ts2_bo;
+
+	in_flight_bbs = malloc(in_flight * sizeof(struct intel_batchbuffer *));
+	igt_assert(in_flight_bbs);
+
+	/* Need multiple i915 fd's. Scheduler will not change execution order of
+	 * batch buffers from the same context.
+	 */
+	for(loop=0; loop < NBR_BASIC_FDs; loop++)
+		init_context(&(fd[loop]), &(bufmgr[loop]), ringid);
+
+
+	/* Create buffer objects */
+	delay_bo = create_and_check_bo(bufmgr[0], "delay bo");
+	ts1_bo = create_and_check_bo(bufmgr[1], "ts1 bo");
+	ts2_bo = create_and_check_bo(bufmgr[2], "ts2 bo");
+
+	/* Put some non zero values in the delay bo */
+	{
+		uint32_t data=0xffffffff;
+		drm_intel_bo_subdata(delay_bo, 0, 4, &data);
+	}
+
+	calibrated_1s = igt_calibrate_delay_bb(fd[0], bufmgr[0], ringid);
+
+	/* Batch buffers to fill the in flight queue */
+	in_flight_bbs[0] = create_delay_bb(fd[0], bufmgr[0], ringid, calibrated_1s, delay_bo);
+	for(loop = 1; loop < in_flight; loop++)
+		in_flight_bbs[loop] = create_noop_bb(fd[0], bufmgr[0], 5);
+
+	/* Extra batch buffers in the scheduler queue */
+	ts1_bb = create_timestamp_bb(fd[1], bufmgr[1], ringid, ts1_bo, NULL, false);
+	ts2_bb = create_timestamp_bb(fd[2], bufmgr[2], ringid, ts2_bo, NULL, false);
+
+	/* Flush batchbuffers */
+	for(loop = 0; loop < in_flight; loop++)
+		intel_batchbuffer_flush_on_ring(in_flight_bbs[loop], ringid);
+	intel_batchbuffer_flush_on_ring(ts1_bb, ringid);
+	intel_batchbuffer_flush_on_ring(ts2_bb, ringid);
+
+	/* This will not return until the bo has finished executing */
+	drm_intel_bo_map(delay_bo, 0);
+	drm_intel_bo_map(ts1_bo, 0);
+	drm_intel_bo_map(ts2_bo, 0);
+
+	delay_buf = delay_bo->virtual;
+	ts1_buf = ts1_bo->virtual;
+	ts2_buf = ts2_bo->virtual;
+
+	igt_debug("Delay Timestamp = 0x%08" PRIx32 "\n", delay_buf[2]);
+	igt_debug("TS1 Timestamp = 0x%08" PRIx32 "\n", ts1_buf[0]);
+	igt_debug("TS2 Timestamp = 0x%08" PRIx32 "\n", ts2_buf[0]);
+
+	/* buf[0] in the target buffer should be 0 if the batch buffer completed */
+	igt_assert_f(delay_buf[0] == 0,
+	             "delay_buf[0] expected 0x0, got 0x%" PRIx32 "\n", delay_buf[0]);
+
+	igt_assert_f(igt_compare_timestamps(delay_buf[2], ts1_buf[0]),
+	             "Delay ts (0x%08" PRIx32 ") > TS1 ts (0x%08" PRIx32 ")\n",
+	             delay_buf[2], ts1_buf[0]);
+	igt_assert_f(igt_compare_timestamps(ts1_buf[0], ts2_buf[0]),
+	             "TS1 ts (0x%08" PRIx32 ") > TS2 ts (0x%08" PRIx32 ")\n",
+	             ts1_buf[0], ts2_buf[0]);
+
+	/* Cleanup */
+	for(loop = 0; loop < in_flight; loop++)
+		intel_batchbuffer_free(in_flight_bbs[loop]);
+	intel_batchbuffer_free(ts1_bb);
+	intel_batchbuffer_free(ts2_bb);
+
+	drm_intel_bo_unreference(delay_bo);
+	drm_intel_bo_unreference(ts1_bo);
+	drm_intel_bo_unreference(ts2_bo);
+	for(loop = 0; loop < NBR_BASIC_FDs; loop++) {
+		drm_intel_bufmgr_destroy(bufmgr[loop]);
+		close(fd[loop]);
+	}
+	free(in_flight_bbs);
+}
+
+/* Dependency test.
+ * write=0, Submit batch buffers with read dependencies to all rings. Delay one
+ * with a long executing batch buffer. Check the others are not held up.
+ * write=1, Submit batch buffers with write dependencies to all rings. Delay one
+ * with a long executing batch buffer. Also submit batch buffers with no
+ * dependencies to all rings. Batch buffers with write dependencies should be
+ * executed in submission order. The batch buffers with no dependencies should
+ * not be held up.
+ */
+static void run_test_dependency(int in_flight, int ring, bool write)
+{
+	int fd[NBR_RINGS], fd2[NBR_RINGS];
+	int loop;
+	int prime_fd;
+	uint32_t *delay_buf, *ts_buf[NBR_RINGS], *ts2_buf[NBR_RINGS], *shared_buf;
+	uint32_t calibrated_1s;
+	drm_intel_bufmgr *bufmgr[NBR_RINGS], *bufmgr2[NBR_RINGS];
+	struct intel_batchbuffer *ts_bb[NBR_RINGS], *ts2_bb[NBR_RINGS], **in_flight_bbs;
+	drm_intel_bo *delay_bo, *ts_bo[NBR_RINGS], *ts2_bo[NBR_RINGS], *shared_bo[NBR_RINGS];
+
+	in_flight_bbs = malloc(in_flight * sizeof(struct intel_batchbuffer *));
+	igt_assert(in_flight_bbs);
+
+	/* Need multiple i915 fd's. Scheduler will not change execution order of
+	 * batch buffers from the same context.
+	 */
+	for(loop=0; loop < NBR_RINGS; loop++) {
+		init_context(&(fd[loop]), &(bufmgr[loop]), rings[loop].id);
+		if(write)
+			init_context(&(fd2[loop]), &(bufmgr2[loop]), rings[loop].id);
+	}
+
+	/* Create buffer objects */
+	delay_bo = create_and_check_bo(bufmgr[ring], "delay bo");
+	for(loop = 0; loop < NBR_RINGS; loop++) {
+		ts_bo[loop] = create_and_check_bo(bufmgr[loop], "ts bo");
+		if(write)
+			ts2_bo[loop] = create_and_check_bo(bufmgr2[loop], "ts2 bo");
+	}
+
+	/* Create shared buffer object */
+	shared_bo[0] = create_and_check_bo(bufmgr[0], "shared bo");
+
+	drm_intel_bo_gem_export_to_prime(shared_bo[0], &prime_fd);
+	for(loop = 1; loop < NBR_RINGS; loop++) {
+		shared_bo[loop] = drm_intel_bo_gem_create_from_prime(bufmgr[loop],
+		                                                     prime_fd, BATCH_SZ);
+		igt_assert(shared_bo[loop]);
+	}
+	close(prime_fd);
+
+	/* Put some non zero values in the delay and shared bo */
+	drm_intel_bo_map(delay_bo, 1);
+	delay_buf = delay_bo->virtual;
+	delay_buf[0] = 0xff;
+	drm_intel_bo_unmap(delay_bo);
+	drm_intel_bo_map(shared_bo[0], 1);
+	shared_buf = shared_bo[0]->virtual;
+	shared_buf[0] = 0xff00ff00;
+	drm_intel_bo_unmap(shared_bo[0]);
+
+	calibrated_1s = igt_calibrate_delay_bb(fd[ring], bufmgr[ring], rings[ring].id);
+
+	/* Batch buffers to fill the in flight queue */
+	in_flight_bbs[0] = create_delay_bb(fd[ring], bufmgr[ring], rings[ring].id,
+	                                   calibrated_1s, delay_bo);
+	for(loop = 1; loop < in_flight; loop++)
+		in_flight_bbs[loop] = create_noop_bb(fd[ring], bufmgr[ring], 5);
+
+	for(loop = 0; loop < NBR_RINGS; loop++) {
+		ts_bb[loop] = create_timestamp_bb(fd[loop], bufmgr[loop],
+		                                  rings[loop].id, ts_bo[loop],
+		                                  shared_bo[loop], write);
+		if(write)
+			ts2_bb[loop] = create_timestamp_bb(fd2[loop], bufmgr2[loop],
+			                                   rings[loop].id, ts2_bo[loop],
+			                                   NULL, false);
+	}
+
+	/* Flush batchbuffers */
+	for(loop = 0; loop < in_flight; loop++)
+		intel_batchbuffer_flush_on_ring(in_flight_bbs[loop], rings[ring].id);
+
+	intel_batchbuffer_flush_on_ring(ts_bb[ring], rings[ring].id);
+	for(loop = 0; loop < NBR_RINGS; loop++)
+		if((loop != ring) && rings[loop].exists)
+			intel_batchbuffer_flush_on_ring(ts_bb[loop], rings[loop].id);
+
+	if(write) {
+		intel_batchbuffer_flush_on_ring(ts2_bb[ring], rings[ring].id);
+		for(loop = 0; loop < NBR_RINGS; loop++)
+			if((loop != ring) && rings[loop].exists)
+				intel_batchbuffer_flush_on_ring(ts2_bb[loop], rings[loop].id);
+	}
+
+	/* This will not return until the bo has finished executing */
+	drm_intel_bo_map(delay_bo, 0);
+	delay_buf = delay_bo->virtual;
+	for(loop = 0; loop < NBR_RINGS; loop++) {
+		drm_intel_bo_map(ts_bo[loop], 0);
+		ts_buf[loop] = ts_bo[loop]->virtual;
+		if(write) {
+			drm_intel_bo_map(ts2_bo[loop], 0);
+			ts2_buf[loop] = ts2_bo[loop]->virtual;
+		}
+	}
+
+	/* buf[0] in the target buffer should be 0 if the batch buffer completed */
+	igt_assert_f(delay_buf[0] == 0,
+	             "delay_buf[0] expected 0x0, got 0x%" PRIx32 "\n",
+	             delay_buf[0]);
+
+	igt_debug("%6s delay timestamp = 0x%08" PRIx32 "\n",
+	          rings[ring].name, delay_buf[2]);
+	for(loop = 0; loop < NBR_RINGS; loop++)
+		igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n",
+		          rings[loop].name, ts_buf[loop][0]);
+
+	if(write) {
+		igt_debug("Independant batch buffers\n");
+		for(loop = 0; loop < NBR_RINGS; loop++)
+			igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n",
+			          rings[loop].name, ts2_buf[loop][0]);
+	}
+
+	for(loop = 0; loop < NBR_RINGS; loop++) {
+		if((loop != ring) && rings[loop].exists) {
+			if(write) {
+				/* Write dependency, delayed ring should run first */
+				igt_assert_f(igt_compare_timestamps(ts_buf[ring][0], ts_buf[loop][0]),
+				             "%s ran before %s - 0x%08" PRIx32 " vs 0x%08" PRIx32 "\n",
+				             rings[loop].name, rings[ring].name,
+				             ts_buf[loop][0], ts_buf[ring][0]);
+				/* Second bb without dependency should run first */
+				igt_assert_f(igt_compare_timestamps(ts2_buf[loop][0], ts_buf[loop][0]),
+				             "(%s) independant bb was held up - 0x%08" PRIx32 " vs 0x%08" PRIx32 "\n",
+				             rings[loop].name, ts_buf[loop][0], ts2_buf[loop][0]);
+			}
+			else
+				/* Read dependency, delayed ring should run last */
+				igt_assert_f(igt_compare_timestamps(ts_buf[loop][0], ts_buf[ring][0]),
+				             "%s ran after %s - 0x%08" PRIx32 " vs 0x%08" PRIx32 "\n",
+				             rings[loop].name, rings[ring].name,
+				             ts_buf[loop][0], ts_buf[ring][0]);
+		}
+	}
+
+	/* Cleanup */
+	for(loop = 0; loop < in_flight; loop++)
+		intel_batchbuffer_free(in_flight_bbs[loop]);
+
+	for(loop = 0; loop < NBR_RINGS; loop++) {
+		intel_batchbuffer_free(ts_bb[loop]);
+		drm_intel_bo_unreference(ts_bo[loop]);
+		drm_intel_bo_unreference(shared_bo[loop]);
+		if(write) {
+			intel_batchbuffer_free(ts2_bb[loop]);
+			drm_intel_bo_unreference(ts2_bo[loop]);
+		}
+	}
+
+	drm_intel_bo_unreference(delay_bo);
+
+	for(loop = 0; loop < NBR_RINGS; loop++) {
+		drm_intel_bufmgr_destroy(bufmgr[loop]);
+		close(fd[loop]);
+		if(write) {
+			drm_intel_bufmgr_destroy(bufmgr2[loop]);
+			close(fd2[loop]);
+		}
+	}
+
+	free(in_flight_bbs);
+}
+
+igt_main
+{
+	int loop;
+	int in_flight;
+	int fd;
+
+	igt_fixture {
+		int debug_fd;
+		int l;
+		char buf[6];
+		/* Get nbr of batch buffers that the scheduler will queue in the
+		 * HW. If this debugfs file does not exist there is no scheduler
+		 * so skip the test.
+		 */
+		debug_fd = igt_debugfs_open("i915_scheduler_min_flying", O_RDONLY);
+		igt_skip_on(debug_fd == -1);
+		l = read(debug_fd, buf, sizeof(buf)-1);
+		igt_assert(l > 0);
+		igt_assert(l < sizeof(buf));
+		buf[l] = '\0';
+		/* May be a decimal or hex number depending on scheduler version */
+		if(sscanf(buf, "0x%2x", &in_flight) != 1)
+			igt_assert_f(sscanf(buf, "%2d", &in_flight) == 1,
+			             "Error reading from i915_scheduler_min_flying\n");
+		close(debug_fd);
+		igt_debug("in flight = %d\n", in_flight);
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_assert(fd >= 0);
+		check_rings(fd);
+	}
+
+	for (loop=0; loop < NBR_RINGS; loop++)
+		igt_subtest_f("%s-basic", rings[loop].name) {
+			gem_require_ring(fd, rings[loop].id);
+			run_test_basic(in_flight, rings[loop].id);
+		}
+
+	for (loop=0; loop < NBR_RINGS; loop++)
+		igt_subtest_f("%s-read", rings[loop].name) {
+			gem_require_ring(fd, rings[loop].id);
+			run_test_dependency(in_flight, loop, false);
+		}
+
+	for (loop=0; loop < NBR_RINGS; loop++)
+		igt_subtest_f("%s-write", rings[loop].name) {
+			gem_require_ring(fd, rings[loop].id);
+			run_test_dependency(in_flight, loop, true);
+		}
+
+	igt_fixture {
+		close(fd);
+	}
+}