diff mbox

[i-g-t,1/4] lib/igt_bb_factory: Add igt_bb_factory library

Message ID 1455269934-7586-2-git-send-email-derek.j.morton@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Derek Morton Feb. 12, 2016, 9:38 a.m. UTC
Adds functions to create a number of different batch buffers to perform
several functions including:
Batch buffer which will run for a long duration to provide a delay on a
specified ring.
Function to calibrate the delay batch buffer to run for a specified period
of time.
Function to create a batch buffer which writes timestamps to a buffer object.
Function to compare timestamps allowing for wrapping of the values.

Intended for use by the gem_scheduler test initially but will be used by other
tests in development.

Signed-off-by: Derek Morton <derek.j.morton@intel.com>
---
 lib/Makefile.sources |   2 +
 lib/igt.h            |   1 +
 lib/igt_bb_factory.c | 401 +++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/igt_bb_factory.h |  47 ++++++
 4 files changed, 451 insertions(+)
 create mode 100644 lib/igt_bb_factory.c
 create mode 100644 lib/igt_bb_factory.h

Comments

Daniel Vetter Feb. 16, 2016, 3:54 p.m. UTC | #1
On Fri, Feb 12, 2016 at 09:38:51AM +0000, Derek Morton wrote:
> Adds functions to create a number of different batch buffers to perform
> several functions including:
> Batch buffer which will run for a long duration to provide a delay on a
> specified ring.
> Function to calibrate the delay batch buffer to run for a specified period
> of time.
> Function to create a batch buffer which writes timestamps to a buffer object.
> Function to compare timestamps allowing for wrapping of the values.
> 
> Intended for use by the gem_scheduler test initially but will be used by other
> tests in development.
> 
> Signed-off-by: Derek Morton <derek.j.morton@intel.com>

We already have batchbuffer factories and stuff in intel_batchbuffer.c.
Integrating it there also relieves from adding a dummy and fairly useless
topic section header in the docs.
-Daniel

> ---
>  lib/Makefile.sources |   2 +
>  lib/igt.h            |   1 +
>  lib/igt_bb_factory.c | 401 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/igt_bb_factory.h |  47 ++++++
>  4 files changed, 451 insertions(+)
>  create mode 100644 lib/igt_bb_factory.c
>  create mode 100644 lib/igt_bb_factory.h
> 
> diff --git a/lib/Makefile.sources b/lib/Makefile.sources
> index 4999868..c560b3e 100644
> --- a/lib/Makefile.sources
> +++ b/lib/Makefile.sources
> @@ -7,6 +7,8 @@ libintel_tools_la_SOURCES = 	\
>  	i915_reg.h		\
>  	i915_pciids.h		\
>  	igt.h			\
> +	igt_bb_factory.c	\
> +	igt_bb_factory.h	\
>  	igt_debugfs.c		\
>  	igt_debugfs.h		\
>  	igt_aux.c		\
> diff --git a/lib/igt.h b/lib/igt.h
> index 3be2551..0f29420 100644
> --- a/lib/igt.h
> +++ b/lib/igt.h
> @@ -36,6 +36,7 @@
>  #include "igt_gt.h"
>  #include "igt_kms.h"
>  #include "igt_stats.h"
> +#include "igt_bb_factory.h"
>  #include "instdone.h"
>  #include "intel_batchbuffer.h"
>  #include "intel_chipset.h"
> diff --git a/lib/igt_bb_factory.c b/lib/igt_bb_factory.c
> new file mode 100644
> index 0000000..eea63c6
> --- /dev/null
> +++ b/lib/igt_bb_factory.c
> @@ -0,0 +1,401 @@
> +/*
> + * Copyright © 2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Derek Morton <derek.j.morton@intel.com>
> + *
> + */
> +
> +#include "igt.h"
> +#include "intel_batchbuffer.h"
> +#include <stdint.h>
> +#include <inttypes.h>
> +#include <time.h>
> +
> +#define SEC_TO_NSEC (1000 * 1000 * 1000)
> +#define DWORDS_TO_BYTES(x) ((x)*4)
> +
> +#define MI_STORE_REGISTER_MEM(LENGTH)   ((0x024 << 23) | ((LENGTH - 2) & 0xff))
> +#define MI_MATH(NrInst)                 ((0x01A << 23) | ((NrInst - 1) & 0x3f))
> +#define MI_CONDITIONAL_BATCH_BUFFER_END ((0x036 << 23) | (1 << 21) | 2)
> +#define MI_COPY_MEM_MEM                 ((0x02E << 23) | (3))
> +
> +#define ALU_LOAD(TO, FROM)  ((0x080 << 20) | ((TO) << 10) | (FROM))
> +#define ALU_SUB             ( 0x101 << 20)
> +#define ALU_STORE(TO, FROM) ((0x180 << 20) | ((TO) << 10) | (FROM))
> +
> +#define TIMESTAMP_offset      (0x358) /* Elapsed time from system start */
> +#define CTX_TIMESTAMP_offset  (0x3A8) /* Elapsed Time from context creation */
> +#define ALU_GPU_R0_LSB_offset (0x600)
> +#define ALU_GPU_R0_MSB_offset (0x604)
> +#define ALU_GPU_R1_LSB_offset (0x608)
> +#define ALU_GPU_R1_MSB_offset (0x60C)
> +#define ALU_GPU_R2_LSB_offset (0x610)
> +#define ALU_GPU_R2_MSB_offset (0x614)
> +
> +#define ALU_R0_ENCODING   (0x00)
> +#define ALU_R1_ENCODING   (0x01)
> +#define ALU_SRCA_ENCODING (0x20)
> +#define ALU_SRCB_ENCODING (0x21)
> +#define ALU_ACCU_ENCODING (0x31)
> +
> +/**
> + * SECTION:igt_bb_factory
> + * @short_description: Utility functions for creating batch buffers
> + * @title: Batch Buffer Factory
> + * @include: igt.h
> + *
> + * This library implements functions for creating batch buffers which may be
> + * useful to multiple tests.
> + */
> +
> +static void check_gen_8(int fd)
> +{
> +	static bool checked = false;
> +	if(!checked) {
> +		igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
> +		checked = true;
> +	}
> +}
> +
> +static int bb_address_size_dw(int fd)
> +{
> +	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> +		return 2;
> +	else
> +		return 1;
> +}
> +
> +static uint32_t get_register_offset(int ringid)
> +{
> +	switch (ringid) {
> +	case I915_EXEC_RENDER:
> +		return 0x02000;
> +	case I915_EXEC_BSD:
> +		return 0x12000;
> +	case I915_EXEC_BLT:
> +		return 0x22000;
> +	case I915_EXEC_VEBOX:
> +		return 0x1A000;
> +	default:
> +		igt_assert_f(0, "Invalid ringid %d passed to get_register_offset()\n", ringid);
> +	}
> +}
> +
> +/**
> + * igt_create_delay_bb:
> + * @fd: file descriptor for i915 driver instance
> + * @bufmgr: Buffer manager to be used for creation of batch buffers
> + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
> + * loops: Number of times to loop
> + * dest: Buffer to use for saving the current loop count and timestamp.
> + *
> + * This creates a batch buffer which will iterate a loop a specified number
> + * of times. Intended for creating batch buffers which take an arbitarily
> + * long time to execute. This can be useful to keep a ring busy while
> + * constructing a test scenario.
> + *
> + * The dest buffer will have a number of Dwords written by the batch buffer
> + * when it runs. They are:
> + * DW0 & DW1 - These are loade with the value of 'loops' and are decremented
> + *             as the batch buffer executes. They will be 0 after the batch
> + *             buffer completes if it finished succesfully.
> + * DW2 Timestamp - An indication of when the batch buffer ran allowing a
> + *                 comparison between batch buffers to show execution order.
> + *                 May wrap so igt_compare_timestamps() should be used to
> + *                 compare timestamps.
> + *
> + * Returns:
> + * The struct intel_batchbuffer created.
> + */
> +struct intel_batchbuffer *igt_create_delay_bb(int fd, drm_intel_bufmgr *bufmgr,
> +		int ringid, uint32_t loops, drm_intel_bo *dest)
> +{
> +	struct intel_batchbuffer *batch;
> +	int addr_size_dw;
> +	uint32_t regOffset;
> +
> +	check_gen_8(fd);
> +
> +	addr_size_dw = bb_address_size_dw(fd);
> +	regOffset = get_register_offset(ringid);
> +	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
> +	igt_assert(batch);
> +
> +	BEGIN_BATCH(32, 5);
> +	/* store current timestamp in DW2 */
> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
> +	OUT_BATCH(regOffset + TIMESTAMP_offset);
> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));
> +
> +	/* Load R0 with loops */
> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);
> +	OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);
> +	OUT_BATCH(loops);
> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);
> +	OUT_BATCH(regOffset + ALU_GPU_R0_MSB_offset);
> +	OUT_BATCH(0x00000000);
> +	/* Load R1 with 1 */
> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);
> +	OUT_BATCH(regOffset + ALU_GPU_R1_LSB_offset);
> +	OUT_BATCH(0x00000001);
> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);
> +	OUT_BATCH(regOffset + ALU_GPU_R1_MSB_offset);
> +	OUT_BATCH(0x00000000);
> +	/* Copy R0 / R1 into SRCA / SRCB, Perform R0 - R1, Store result in R0 */
> +	/* e.g. R0 -= 1 */
> +	OUT_BATCH(MI_MATH(4));
> +	OUT_BATCH(ALU_LOAD(ALU_SRCA_ENCODING, ALU_R0_ENCODING));
> +	OUT_BATCH(ALU_LOAD(ALU_SRCB_ENCODING, ALU_R1_ENCODING));
> +	OUT_BATCH(ALU_SUB);
> +	OUT_BATCH(ALU_STORE(ALU_R0_ENCODING, ALU_ACCU_ENCODING));
> +	/* Copy R0 to dest
> +	 * On Gen8 MI_CONDITIONAL_BATCH_BUFFER_END BSD ring Compare address
> +	 * points to 2 Dwords, a mask (DW0) and data (DW1) which are ANDed
> +	 * together.
> +	 * On Gen9+, and the other rings on Gen8 Compare address points to
> +	 * just Data (DW0). For simplicity always copy R0 LSB to DW0 and DW1.
> +	 */
> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
> +	OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);
> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
> +	OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);
> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(1));
> +	/* Repeat until R0 == 0 */
> +	OUT_BATCH(MI_CONDITIONAL_BATCH_BUFFER_END);
> +	OUT_BATCH(0x00000000);
> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
> +	OUT_BATCH(MI_BATCH_BUFFER_START | (addr_size_dw - 1));
> +	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, DWORDS_TO_BYTES(15));
> +	/* Should never get here, but end if it happens */
> +
> +	OUT_BATCH(MI_BATCH_BUFFER_END);
> +	ADVANCE_BATCH();
> +
> +	return batch;
> +}
> +
> +/**
> + * igt_create_timestamp_bb:
> + * @fd: file descriptor for i915 driver instance
> + * @bufmgr: Buffer manager to be used for creation of batch buffers
> + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
> + * dest: Buffer to use for saving the timestamps.
> + * load: Buffer to access. Set NULL if not required.
> + * write: If true and load is not NULL, will also write a timestamp to load
> + * buffer. If false and load is not NULL, will read from load buffer into dest.
> + * Intended for dependency checking.
> + *
> + * This creates a batch buffer which writes timestamps into a buffer object.
> + * If 'load' is non null, data is either written to 'load' or copied from 'load'
> + * depending on whether 'write' is set.
> + *
> + * The dest buffer will have a number of Dwords written by the batch buffer
> + * when it runs. They are:
> + * DW0 Reported timestamp - An indication of when the batch buffer ran allowing a
> + *                          comparison between batch buffers to show execution order.
> + *                          May wrap so igt_compare_timestamps() should be used to
> + *                          compare timestamps.
> + * DW1 Context timestamp - Elapsed time since context was created.
> + * DW2 Value copied from DW0 of load if write == false
> + *
> + * Returns:
> + * The struct intel_batchbuffer created.
> + */
> +struct intel_batchbuffer *igt_create_timestamp_bb(int fd, drm_intel_bufmgr *bufmgr,
> +		int ringid, drm_intel_bo *dest, drm_intel_bo *load, bool write)
> +{
> +	struct intel_batchbuffer *batch;
> +	int addr_size_dw;
> +	uint32_t regOffset;
> +
> +	check_gen_8(fd);
> +
> +	addr_size_dw = bb_address_size_dw(fd);
> +	regOffset = get_register_offset(ringid);
> +	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
> +	igt_assert(batch);
> +
> +	BEGIN_BATCH(6, 2);
> +	/* store current reported timestamp in DW0 */
> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
> +	OUT_BATCH(regOffset + TIMESTAMP_offset);
> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));
> +
> +	/* store current context timestamp in DW1 */
> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
> +	OUT_BATCH(regOffset + CTX_TIMESTAMP_offset);
> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(1));
> +
> +	ADVANCE_BATCH();
> +
> +	if(load != NULL) {
> +		if(write) {
> +			BEGIN_BATCH(3, 1);
> +			OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
> +			OUT_BATCH(regOffset + TIMESTAMP_offset);
> +			OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));
> +			ADVANCE_BATCH();
> +		}
> +		else {
> +			BEGIN_BATCH(3, 2);
> +			OUT_BATCH(MI_COPY_MEM_MEM);
> +			OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));
> +			OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, 0, DWORDS_TO_BYTES(0));
> +			ADVANCE_BATCH();
> +		}
> +	}
> +
> +	BEGIN_BATCH(1, 0);
> +	OUT_BATCH(MI_BATCH_BUFFER_END);
> +	ADVANCE_BATCH();
> +
> +	return batch;
> +}
> +
> +/**
> + * igt_create_noop_bb:
> + * @fd: file descriptor for i915 driver instance
> + * @bufmgr: Buffer manager to be used for creation of batch buffers
> + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
> + * noops: Number of MI_NOOP instructions to add to the batch buffer.
> + *
> + * This creates a batch buffer with a specified number of MI_NOOP instructions.
> + *
> + * Returns:
> + * The struct intel_batchbuffer created.
> + */
> +struct intel_batchbuffer *igt_create_noop_bb(int fd, drm_intel_bufmgr *bufmgr,
> +		int ringid, int noops)
> +{
> +	struct intel_batchbuffer *batch;
> +	int loop;
> +
> +	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
> +	igt_assert(batch);
> +
> +	BEGIN_BATCH(noops + 1, 0);
> +	for(loop = 0; loop < noops; loop++)
> +		OUT_BATCH(MI_NOOP);
> +	OUT_BATCH(MI_BATCH_BUFFER_END);
> +	ADVANCE_BATCH();
> +
> +	return batch;
> +}
> +
> +/* Store calibrated values so they only need calculating once.
> + * I915_EXEC_RING_MASK allows 3 bits for ring ids so allow for storing 8 values */
> +static uint32_t calibrated_ring_value[8] = {0, 0, 0, 0, 0, 0, 0, 0};
> +
> +/**
> + * igt_calibrate_delay_bb:
> + * @fd: file descriptor for i915 driver instance
> + * @bufmgr: Buffer manager to be used for creation of batch buffers
> + * ringid: Ring to calibrate. e.g. I915_EXEC_RENDER
> + *
> + * This calculates the value of loops that would need to be passed to
> + * igt_create_delay_bb() to create a delay of about 1 second on the specified
> + * ring.
> + *
> + * Returns:
> + * uint32_t to be passed to igt_create_delay_bb().
> + */
> +#define CAL_SEED (0x100000)
> +uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int ringid)
> +{
> +	uint32_t *buf;
> +	struct intel_batchbuffer *bb;
> +	struct timespec start, end;
> +	uint64_t duration;
> +	uint64_t calibrated;
> +	drm_intel_bo *target_bo;
> +
> +	igt_assert(ringid < 8);
> +	if(calibrated_ring_value[ringid] != 0)
> +		return calibrated_ring_value[ringid];
> +
> +	target_bo = drm_intel_bo_alloc(bufmgr, "target bo", BATCH_SZ, BATCH_SZ);
> +	igt_assert(target_bo);
> +
> +	/* Put some non zero values in the target bo */
> +	drm_intel_bo_map(target_bo, 1);
> +	buf = target_bo->virtual;
> +	buf[0] = 0xff;
> +	drm_intel_bo_unmap(target_bo);
> +
> +	bb = igt_create_delay_bb(fd, bufmgr, ringid, CAL_SEED, target_bo);
> +
> +	gem_quiescent_gpu(fd);
> +	clock_gettime(CLOCK_MONOTONIC, &start);
> +	intel_batchbuffer_flush_on_ring(bb, ringid);
> +	/* This will not return until the bo has finished executing */
> +	drm_intel_bo_map(target_bo, 0);
> +	clock_gettime(CLOCK_MONOTONIC, &end);
> +
> +	buf = target_bo->virtual;
> +	/* buf[0] in the target buffer should be 0 if the batch buffer completed */
> +	igt_assert_f(buf[0] == 0, "buf[0] expected 0x0, got 0x%x\n", buf[0]);
> +
> +	duration = ((((uint64_t)end.tv_sec - (uint64_t)start.tv_sec) * SEC_TO_NSEC)
> +	           + (uint64_t)end.tv_nsec) - (uint64_t)start.tv_nsec;
> +
> +	calibrated = (((uint64_t)(CAL_SEED) * SEC_TO_NSEC) / duration);
> +	igt_debug("Uncalibrated run took %" PRIu64 ".%04" PRIu64 "s\n",
> +	          duration / SEC_TO_NSEC,
> +	          (duration % SEC_TO_NSEC) / 100000);
> +	drm_intel_bo_unreference(target_bo);
> +	intel_batchbuffer_free(bb);
> +
> +	/* Sanity check. If duration < 100ms, something has clearly gone wrong
> +	 * */
> +	igt_assert(duration > (SEC_TO_NSEC  / 10));
> +
> +	igt_assert_f(calibrated <= UINT32_MAX, "Calibrated value > UINT32_MAX\n");
> +
> +	calibrated_ring_value[ringid] = (uint32_t)calibrated;
> +	return (uint32_t)calibrated;
> +}
> +
> +/**
> + * igt_compare_timestamps:
> + * @ts1: timestamp 1
> + * @ts2: timestamp 2
> + *
> + * This compares two uint32_t timestamps. To handle wrapping it assumes the
> + * difference between the two timestamps is less than 1/4 the max elapsed time
> + * represented by the counters.
> + * It also assumes the timestamps are samples from the same counter.
> + *
> + * Returns:
> + * True if ts2 > ts1, allowing for counter wrapping, false otherwise.
> + */
> +
> +bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2)
> +{
> +	if (ts2 > ts1)
> +		return true;
> +	else if ((ts1 > 0x80000000) && (ts2 < 0x40000000))
> +		return true; /* Assuming timestamp counter wrapped */
> +	else
> +		return false;
> +}
> diff --git a/lib/igt_bb_factory.h b/lib/igt_bb_factory.h
> new file mode 100644
> index 0000000..3ab7f13
> --- /dev/null
> +++ b/lib/igt_bb_factory.h
> @@ -0,0 +1,47 @@
> +/*
> + * Copyright © 2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Derek Morton <derek.j.morton@intel.com>
> + *
> + */
> +
> +#ifndef IGT_BB_FACTORY_H
> +#define IGT_BB_FACTORY_H
> +
> +#include "intel_batchbuffer.h"
> +#include <stdint.h>
> +
> +struct intel_batchbuffer *igt_create_delay_bb(int fd, drm_intel_bufmgr *bufmgr,
> +		int ringid, uint32_t loops, drm_intel_bo *dest);
> +
> +struct intel_batchbuffer *igt_create_timestamp_bb(int fd, drm_intel_bufmgr *bufmgr,
> +		int ringid, drm_intel_bo *dest, drm_intel_bo *load, bool write);
> +
> +struct intel_batchbuffer *igt_create_noop_bb(int fd, drm_intel_bufmgr *bufmgr,
> +		int ringid, int noops);
> +
> +uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int ringid);
> +
> +bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2);
> +
> +#endif /* IGT_BB_FACTORY_H */
> -- 
> 1.9.1
>
Daniele Ceraolo Spurio Feb. 17, 2016, 9:48 a.m. UTC | #2
On 12/02/16 09:38, Derek Morton wrote:
> Adds functions to create a number of different batch buffers to perform
> several functions including:
> Batch buffer which will run for a long duration to provide a delay on a
> specified ring.
> Function to calibrate the delay batch buffer to run for a specified period
> of time.
> Function to create a batch buffer which writes timestamps to a buffer object.
> Function to compare timestamps allowing for wrapping of the values.
>
> Intended for use by the gem_scheduler test initially but will be used by other
> tests in development.
>
> Signed-off-by: Derek Morton <derek.j.morton@intel.com>
> ---
>   lib/Makefile.sources |   2 +
>   lib/igt.h            |   1 +
>   lib/igt_bb_factory.c | 401 +++++++++++++++++++++++++++++++++++++++++++++++++++
>   lib/igt_bb_factory.h |  47 ++++++
>   4 files changed, 451 insertions(+)
>   create mode 100644 lib/igt_bb_factory.c
>   create mode 100644 lib/igt_bb_factory.h
>
> diff --git a/lib/Makefile.sources b/lib/Makefile.sources
> index 4999868..c560b3e 100644
> --- a/lib/Makefile.sources
> +++ b/lib/Makefile.sources
> @@ -7,6 +7,8 @@ libintel_tools_la_SOURCES = 	\
>   	i915_reg.h		\
>   	i915_pciids.h		\
>   	igt.h			\
> +	igt_bb_factory.c	\
> +	igt_bb_factory.h	\
>   	igt_debugfs.c		\
>   	igt_debugfs.h		\
>   	igt_aux.c		\
> diff --git a/lib/igt.h b/lib/igt.h
> index 3be2551..0f29420 100644
> --- a/lib/igt.h
> +++ b/lib/igt.h
> @@ -36,6 +36,7 @@
>   #include "igt_gt.h"
>   #include "igt_kms.h"
>   #include "igt_stats.h"
> +#include "igt_bb_factory.h"
>   #include "instdone.h"
>   #include "intel_batchbuffer.h"
>   #include "intel_chipset.h"
> diff --git a/lib/igt_bb_factory.c b/lib/igt_bb_factory.c
> new file mode 100644
> index 0000000..eea63c6
> --- /dev/null
> +++ b/lib/igt_bb_factory.c
> @@ -0,0 +1,401 @@
> +/*
> + * Copyright © 2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Derek Morton <derek.j.morton@intel.com>
> + *
> + */
> +
> +#include "igt.h"
> +#include "intel_batchbuffer.h"
> +#include <stdint.h>
> +#include <inttypes.h>
> +#include <time.h>
> +
> +#define SEC_TO_NSEC (1000 * 1000 * 1000)
> +#define DWORDS_TO_BYTES(x) ((x)*4)
> +
> +#define MI_STORE_REGISTER_MEM(LENGTH)   ((0x024 << 23) | ((LENGTH - 2) & 0xff))
> +#define MI_MATH(NrInst)                 ((0x01A << 23) | ((NrInst - 1) & 0x3f))
> +#define MI_CONDITIONAL_BATCH_BUFFER_END ((0x036 << 23) | (1 << 21) | 2)
> +#define MI_COPY_MEM_MEM                 ((0x02E << 23) | (3))
> +
> +#define ALU_LOAD(TO, FROM)  ((0x080 << 20) | ((TO) << 10) | (FROM))
> +#define ALU_SUB             ( 0x101 << 20)
> +#define ALU_STORE(TO, FROM) ((0x180 << 20) | ((TO) << 10) | (FROM))
> +
> +#define TIMESTAMP_offset      (0x358) /* Elapsed time from system start */
> +#define CTX_TIMESTAMP_offset  (0x3A8) /* Elapsed Time from context creation */
> +#define ALU_GPU_R0_LSB_offset (0x600)
> +#define ALU_GPU_R0_MSB_offset (0x604)
> +#define ALU_GPU_R1_LSB_offset (0x608)
> +#define ALU_GPU_R1_MSB_offset (0x60C)
> +#define ALU_GPU_R2_LSB_offset (0x610)
> +#define ALU_GPU_R2_MSB_offset (0x614)
> +
> +#define ALU_R0_ENCODING   (0x00)
> +#define ALU_R1_ENCODING   (0x01)
> +#define ALU_SRCA_ENCODING (0x20)
> +#define ALU_SRCB_ENCODING (0x21)
> +#define ALU_ACCU_ENCODING (0x31)
> +
> +/**
> + * SECTION:igt_bb_factory
> + * @short_description: Utility functions for creating batch buffers
> + * @title: Batch Buffer Factory
> + * @include: igt.h
> + *
> + * This library implements functions for creating batch buffers which may be
> + * useful to multiple tests.
> + */
> +
> +static void check_gen_8(int fd)
> +{
> +	static bool checked = false;
> +	if(!checked) {
> +		igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
> +		checked = true;
> +	}
> +}
> +
> +static int bb_address_size_dw(int fd)
> +{
> +	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> +		return 2;
> +	else
> +		return 1;
> +}
> +
> +static uint32_t get_register_offset(int ringid)
register_offset feels a bit confusing as name, I suggest to use 
mmio_base instead

> +{
> +	switch (ringid) {
> +	case I915_EXEC_RENDER:
> +		return 0x02000;
> +	case I915_EXEC_BSD:
> +		return 0x12000;
What about the BSD2?

> +	case I915_EXEC_BLT:
> +		return 0x22000;
> +	case I915_EXEC_VEBOX:
> +		return 0x1A000;
> +	default:
> +		igt_assert_f(0, "Invalid ringid %d passed to get_register_offset()\n", ringid);
> +	}
> +}
> +
> +/**
> + * igt_create_delay_bb:
> + * @fd: file descriptor for i915 driver instance
> + * @bufmgr: Buffer manager to be used for creation of batch buffers
> + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
> + * loops: Number of times to loop
> + * dest: Buffer to use for saving the current loop count and timestamp.
> + *
> + * This creates a batch buffer which will iterate a loop a specified number
> + * of times. Intended for creating batch buffers which take an arbitarily
> + * long time to execute. This can be useful to keep a ring busy while
> + * constructing a test scenario.
> + *
> + * The dest buffer will have a number of Dwords written by the batch buffer
> + * when it runs. They are:
> + * DW0 & DW1 - These are loade with the value of 'loops' and are decremented
> + *             as the batch buffer executes. They will be 0 after the batch
> + *             buffer completes if it finished succesfully.
> + * DW2 Timestamp - An indication of when the batch buffer ran allowing a
> + *                 comparison between batch buffers to show execution order.
> + *                 May wrap so igt_compare_timestamps() should be used to
> + *                 compare timestamps.
If I recall correctly the lower dword of the timestamp wraps in minutes, 
which is safe enough for normal tests but might be an issue on very long 
stress tests. Maybe add a comment to clarify when this is safe to use, 
also considering that due to the wrapping the actual supported maximum 
time difference between 2 timestamps is less than the total wrapping time

> + *
> + * Returns:
> + * The struct intel_batchbuffer created.
> + */
> +struct intel_batchbuffer *igt_create_delay_bb(int fd, drm_intel_bufmgr *bufmgr,
> +		int ringid, uint32_t loops, drm_intel_bo *dest)
Other batch-creating functions take the batch as a parameter instead of 
returning one (e.g. igt_blitter_fast_copy, rendercopy), so for 
consistency I would do the same here and in the other functions that 
you've added

> +{
> +	struct intel_batchbuffer *batch;
> +	int addr_size_dw;
> +	uint32_t regOffset;
> +
> +	check_gen_8(fd);
HSW RCS does support this, so maybe we could have something like:

static void has_delay_bb(int fd, int ringid){
     static bool checked = false;
     if (!checked) {
         int devid = intel_get_drm_devid(fd);
         igt_require(intel_gen(devid) >= 8 ||
                         (IS_HASWELL(devid) && ringid == I915_EXEC_RENDER));
         checked = true;
     }
}

The batch manipulation macros should adjust the number of dwords for 
addresses automatically
> +
> +	addr_size_dw = bb_address_size_dw(fd);
> +	regOffset = get_register_offset(ringid);
Similarly to above, I would rename regOffset to mmio_base
> +	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
> +	igt_assert(batch);
> +
> +	BEGIN_BATCH(32, 5);
> +	/* store current timestamp in DW2 */
> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
> +	OUT_BATCH(regOffset + TIMESTAMP_offset);
> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));
> +
> +	/* Load R0 with loops */
> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);
> +	OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);
> +	OUT_BATCH(loops);
> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);
> +	OUT_BATCH(regOffset + ALU_GPU_R0_MSB_offset);
> +	OUT_BATCH(0x00000000);
> +	/* Load R1 with 1 */
> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);
> +	OUT_BATCH(regOffset + ALU_GPU_R1_LSB_offset);
> +	OUT_BATCH(0x00000001);
> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);
> +	OUT_BATCH(regOffset + ALU_GPU_R1_MSB_offset);
> +	OUT_BATCH(0x00000000);
instead of emitting 4 MI_LOAD_REGISTER_IMM you can use 1 and specify 
extra dword lenght

> +	/* Copy R0 / R1 into SRCA / SRCB, Perform R0 - R1, Store result in R0 */
> +	/* e.g. R0 -= 1 */
> +	OUT_BATCH(MI_MATH(4));
> +	OUT_BATCH(ALU_LOAD(ALU_SRCA_ENCODING, ALU_R0_ENCODING));
> +	OUT_BATCH(ALU_LOAD(ALU_SRCB_ENCODING, ALU_R1_ENCODING));
> +	OUT_BATCH(ALU_SUB);
> +	OUT_BATCH(ALU_STORE(ALU_R0_ENCODING, ALU_ACCU_ENCODING));
> +	/* Copy R0 to dest
> +	 * On Gen8 MI_CONDITIONAL_BATCH_BUFFER_END BSD ring Compare address
> +	 * points to 2 Dwords, a mask (DW0) and data (DW1) which are ANDed
> +	 * together.
> +	 * On Gen9+, and the other rings on Gen8 Compare address points to
> +	 * just Data (DW0). For simplicity always copy R0 LSB to DW0 and DW1.
> +	 */
> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
> +	OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);
> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
> +	OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);
> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(1));
> +	/* Repeat until R0 == 0 */
> +	OUT_BATCH(MI_CONDITIONAL_BATCH_BUFFER_END);
> +	OUT_BATCH(0x00000000);
> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
> +	OUT_BATCH(MI_BATCH_BUFFER_START | (addr_size_dw - 1));
> +	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, DWORDS_TO_BYTES(15));
Instead of using the hardcoded DWORDS_TO_BYTES(15) you could capture 
batch->ptr before emitting the dword you want to jump to and pass it here

> +	/* Should never get here, but end if it happens */
> +
> +	OUT_BATCH(MI_BATCH_BUFFER_END);
> +	ADVANCE_BATCH();
> +
> +	return batch;
> +}
> +
> +/**
> + * igt_create_timestamp_bb:
> + * @fd: file descriptor for i915 driver instance
> + * @bufmgr: Buffer manager to be used for creation of batch buffers
> + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
> + * dest: Buffer to use for saving the timestamps.
> + * load: Buffer to access. Set NULL if not required.
> + * write: If true and load is not NULL, will also write a timestamp to load
> + * buffer. If false and load is not NULL, will read from load buffer into dest.
> + * Intended for dependency checking.
The "load" buffer handling feels a bit too scheduler-specific to be in a 
common library. I would move it to the scheduler tests and maybe leave 
here an emit_timestamp_capture() or something like that.

> + *
> + * This creates a batch buffer which writes timestamps into a buffer object.
> + * If 'load' is non null, data is either written to 'load' or copied from 'load'
> + * depending on whether 'write' is set.
> + *
> + * The dest buffer will have a number of Dwords written by the batch buffer
> + * when it runs. They are:
> + * DW0 Reported timestamp - An indication of when the batch buffer ran allowing a
> + *                          comparison between batch buffers to show execution order.
> + *                          May wrap so igt_compare_timestamps() should be used to
> + *                          compare timestamps.
> + * DW1 Context timestamp - Elapsed time since context was created.
> + * DW2 Value copied from DW0 of load if write == false
> + *
> + * Returns:
> + * The struct intel_batchbuffer created.
> + */
> +struct intel_batchbuffer *igt_create_timestamp_bb(int fd, drm_intel_bufmgr *bufmgr,
> +		int ringid, drm_intel_bo *dest, drm_intel_bo *load, bool write)
> +{
> +	struct intel_batchbuffer *batch;
> +	int addr_size_dw;
> +	uint32_t regOffset;
> +
> +	check_gen_8(fd);
The TIMESTAMP reg exists also on pre GEN8, so instead of skipping on 
those platform we could capture only that register and not the 
CTX_TIMESTAMP one.

> +
> +	addr_size_dw = bb_address_size_dw(fd);
> +	regOffset = get_register_offset(ringid);
> +	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
> +	igt_assert(batch);
> +
> +	BEGIN_BATCH(6, 2);
> +	/* store current reported timestamp in DW0 */
> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
> +	OUT_BATCH(regOffset + TIMESTAMP_offset);
> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));
> +
> +	/* store current context timestamp in DW1 */
> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
> +	OUT_BATCH(regOffset + CTX_TIMESTAMP_offset);
> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(1));
> +
> +	ADVANCE_BATCH();
> +
> +	if(load != NULL) {
> +		if(write) {
> +			BEGIN_BATCH(3, 1);
> +			OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
> +			OUT_BATCH(regOffset + TIMESTAMP_offset);
> +			OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));
> +			ADVANCE_BATCH();
> +		}
> +		else {
> +			BEGIN_BATCH(3, 2);
> +			OUT_BATCH(MI_COPY_MEM_MEM);
> +			OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));
> +			OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, 0, DWORDS_TO_BYTES(0));
> +			ADVANCE_BATCH();
> +		}
> +	}
> +
> +	BEGIN_BATCH(1, 0);
> +	OUT_BATCH(MI_BATCH_BUFFER_END);
> +	ADVANCE_BATCH();
> +
> +	return batch;
> +}
> +
> +/**
> + * igt_create_noop_bb:
> + * @fd: file descriptor for i915 driver instance
> + * @bufmgr: Buffer manager to be used for creation of batch buffers
> + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
> + * noops: Number of MI_NOOP instructions to add to the batch buffer.
> + *
> + * This creates a batch buffer with a specified number of MI_NOOP instructions.
> + *
> + * Returns:
> + * The struct intel_batchbuffer created.
> + */
> +struct intel_batchbuffer *igt_create_noop_bb(int fd, drm_intel_bufmgr *bufmgr,
> +		int ringid, int noops)
> +{
> +	struct intel_batchbuffer *batch;
> +	int loop;
> +
> +	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
> +	igt_assert(batch);
> +
> +	BEGIN_BATCH(noops + 1, 0);
> +	for(loop = 0; loop < noops; loop++)
> +		OUT_BATCH(MI_NOOP);
> +	OUT_BATCH(MI_BATCH_BUFFER_END);
> +	ADVANCE_BATCH();
> +
> +	return batch;
> +}
> +
> +/* Store calibrated values so they only need calculating once.
> + * I915_EXEC_RING_MASK allows 3 bits for ring ids so allow for storing 8 values */
> +static uint32_t calibrated_ring_value[8] = {0, 0, 0, 0, 0, 0, 0, 0};
> +
> +/**
> + * igt_calibrate_delay_bb:
> + * @fd: file descriptor for i915 driver instance
> + * @bufmgr: Buffer manager to be used for creation of batch buffers
> + * ringid: Ring to calibrate. e.g. I915_EXEC_RENDER
> + *
> + * This calculates the value of loops that would need to be passed to
> + * igt_create_delay_bb() to create a delay of about 1 second on the specified
> + * ring.
> + *
> + * Returns:
> + * uint32_t to be passed to igt_create_delay_bb().
> + */
> +#define CAL_SEED (0x100000)
Can you add a comment regarding how this seed was picked? something like 
the expected duration of the execution with this seed on a specific 
platform would be nice.

> +uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int ringid)
> +{
> +	uint32_t *buf;
> +	struct intel_batchbuffer *bb;
> +	struct timespec start, end;
> +	uint64_t duration;
> +	uint64_t calibrated;
> +	drm_intel_bo *target_bo;
> +
> +	igt_assert(ringid < 8);
> +	if(calibrated_ring_value[ringid] != 0)
> +		return calibrated_ring_value[ringid];
> +
> +	target_bo = drm_intel_bo_alloc(bufmgr, "target bo", BATCH_SZ, BATCH_SZ);
> +	igt_assert(target_bo);
> +
> +	/* Put some non zero values in the target bo */
> +	drm_intel_bo_map(target_bo, 1);
Missing return code check here. Also, instead of mapping and unmapping a 
single subdata call would in my opinion be cleaner to write a single dword.

> +	buf = target_bo->virtual;
> +	buf[0] = 0xff;
> +	drm_intel_bo_unmap(target_bo);
> +
> +	bb = igt_create_delay_bb(fd, bufmgr, ringid, CAL_SEED, target_bo);
> +
> +	gem_quiescent_gpu(fd);
> +	clock_gettime(CLOCK_MONOTONIC, &start);
> +	intel_batchbuffer_flush_on_ring(bb, ringid);
> +	/* This will not return until the bo has finished executing */
> +	drm_intel_bo_map(target_bo, 0);
Missing return code check and unmap. Maybe using 
drm_intel_bo_wait_for_rendering here and get_subdata below could be 
slightly neater

> +	clock_gettime(CLOCK_MONOTONIC, &end);
> +
> +	buf = target_bo->virtual;
> +	/* buf[0] in the target buffer should be 0 if the batch buffer completed */
> +	igt_assert_f(buf[0] == 0, "buf[0] expected 0x0, got 0x%x\n", buf[0]);
> +
> +	duration = ((((uint64_t)end.tv_sec - (uint64_t)start.tv_sec) * SEC_TO_NSEC)
> +	           + (uint64_t)end.tv_nsec) - (uint64_t)start.tv_nsec;
> +
> +	calibrated = (((uint64_t)(CAL_SEED) * SEC_TO_NSEC) / duration);
> +	igt_debug("Uncalibrated run took %" PRIu64 ".%04" PRIu64 "s\n",
> +	          duration / SEC_TO_NSEC,
> +	          (duration % SEC_TO_NSEC) / 100000);
> +	drm_intel_bo_unreference(target_bo);
> +	intel_batchbuffer_free(bb);
> +
> +	/* Sanity check. If duration < 100ms, something has clearly gone wrong
> +	 * */
Without any details on the expected duration when using CAL_SEED this 
comment is not very clear

> +	igt_assert(duration > (SEC_TO_NSEC  / 10));
> +
> +	igt_assert_f(calibrated <= UINT32_MAX, "Calibrated value > UINT32_MAX\n");
> +
> +	calibrated_ring_value[ringid] = (uint32_t)calibrated;
> +	return (uint32_t)calibrated;
> +}
> +
> +/**
> + * igt_compare_timestamps:
> + * @ts1: timestamp 1
> + * @ts2: timestamp 2
> + *
> + * This compares two uint32_t timestamps. To handle wrapping it assumes the
> + * difference between the two timestamps is less than 1/4 the max elapsed time
> + * represented by the counters.
> + * It also assumes the timestamps are samples from the same counter.
> + *
> + * Returns:
> + * True if ts2 > ts1, allowing for counter wrapping, false otherwise.
> + */
> +
> +bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2)
> +{
> +	if (ts2 > ts1)
> +		return true;
> +	else if ((ts1 > 0x80000000) && (ts2 < 0x40000000))
> +		return true; /* Assuming timestamp counter wrapped */
> +	else
> +		return false;
> +}
> diff --git a/lib/igt_bb_factory.h b/lib/igt_bb_factory.h
> new file mode 100644
> index 0000000..3ab7f13
> --- /dev/null
> +++ b/lib/igt_bb_factory.h
> @@ -0,0 +1,47 @@
> +/*
> + * Copyright © 2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Derek Morton <derek.j.morton@intel.com>
> + *
> + */
> +
> +#ifndef IGT_BB_FACTORY_H
> +#define IGT_BB_FACTORY_H
> +
> +#include "intel_batchbuffer.h"
> +#include <stdint.h>
> +
> +struct intel_batchbuffer *igt_create_delay_bb(int fd, drm_intel_bufmgr *bufmgr,
> +		int ringid, uint32_t loops, drm_intel_bo *dest);
> +
> +struct intel_batchbuffer *igt_create_timestamp_bb(int fd, drm_intel_bufmgr *bufmgr,
> +		int ringid, drm_intel_bo *dest, drm_intel_bo *load, bool write);
> +
> +struct intel_batchbuffer *igt_create_noop_bb(int fd, drm_intel_bufmgr *bufmgr,
> +		int ringid, int noops);
> +
> +uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int ringid);
> +
> +bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2);
> +
> +#endif /* IGT_BB_FACTORY_H */
Derek Morton March 1, 2016, 10:30 a.m. UTC | #3
>

>

>-----Original Message-----

>From: Ceraolo Spurio, Daniele 

>Sent: Wednesday, February 17, 2016 9:48 AM

>To: Morton, Derek J <derek.j.morton@intel.com>; intel-gfx@lists.freedesktop.org

>Subject: Re: [Intel-gfx] [PATCH i-g-t 1/4] lib/igt_bb_factory: Add igt_bb_factory library

>

>

>

>On 12/02/16 09:38, Derek Morton wrote:

>> Adds functions to create a number of different batch buffers to 

>> perform several functions including:

>> Batch buffer which will run for a long duration to provide a delay on 

>> a specified ring.

>> Function to calibrate the delay batch buffer to run for a specified 

>> period of time.

>> Function to create a batch buffer which writes timestamps to a buffer object.

>> Function to compare timestamps allowing for wrapping of the values.

>>

>> Intended for use by the gem_scheduler test initially but will be used 

>> by other tests in development.

>>

>> Signed-off-by: Derek Morton <derek.j.morton@intel.com>

>> ---

>>   lib/Makefile.sources |   2 +

>>   lib/igt.h            |   1 +

>>   lib/igt_bb_factory.c | 401 +++++++++++++++++++++++++++++++++++++++++++++++++++

>>   lib/igt_bb_factory.h |  47 ++++++

>>   4 files changed, 451 insertions(+)

>>   create mode 100644 lib/igt_bb_factory.c

>>   create mode 100644 lib/igt_bb_factory.h

>>

>> diff --git a/lib/Makefile.sources b/lib/Makefile.sources index 

>> 4999868..c560b3e 100644

>> --- a/lib/Makefile.sources

>> +++ b/lib/Makefile.sources

>> @@ -7,6 +7,8 @@ libintel_tools_la_SOURCES = 	\

>>   	i915_reg.h		\

>>   	i915_pciids.h		\

>>   	igt.h			\

>> +	igt_bb_factory.c	\

>> +	igt_bb_factory.h	\

>>   	igt_debugfs.c		\

>>   	igt_debugfs.h		\

>>   	igt_aux.c		\

>> diff --git a/lib/igt.h b/lib/igt.h

>> index 3be2551..0f29420 100644

>> --- a/lib/igt.h

>> +++ b/lib/igt.h

>> @@ -36,6 +36,7 @@

>>   #include "igt_gt.h"

>>   #include "igt_kms.h"

>>   #include "igt_stats.h"

>> +#include "igt_bb_factory.h"

>>   #include "instdone.h"

>>   #include "intel_batchbuffer.h"

>>   #include "intel_chipset.h"

>> diff --git a/lib/igt_bb_factory.c b/lib/igt_bb_factory.c new file mode 

>> 100644 index 0000000..eea63c6

>> --- /dev/null

>> +++ b/lib/igt_bb_factory.c

>> @@ -0,0 +1,401 @@

>> +/*

>> + * Copyright © 2016 Intel Corporation

>> + *

>> + * Permission is hereby granted, free of charge, to any person 

>> +obtaining a

>> + * copy of this software and associated documentation files (the 

>> +"Software"),

>> + * to deal in the Software without restriction, including without 

>> +limitation

>> + * the rights to use, copy, modify, merge, publish, distribute, 

>> +sublicense,

>> + * and/or sell copies of the Software, and to permit persons to whom 

>> +the

>> + * Software is furnished to do so, subject to the following conditions:

>> + *

>> + * The above copyright notice and this permission notice (including 

>> +the next

>> + * paragraph) shall be included in all copies or substantial portions 

>> +of the

>> + * Software.

>> + *

>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

>> +EXPRESS OR

>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

>> +MERCHANTABILITY,

>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT 

>> +SHALL

>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 

>> +OR OTHER

>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 

>> +ARISING

>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 

>> +OTHER DEALINGS

>> + * IN THE SOFTWARE.

>> + *

>> + * Authors:

>> + *    Derek Morton <derek.j.morton@intel.com>

>> + *

>> + */

>> +

>> +#include "igt.h"

>> +#include "intel_batchbuffer.h"

>> +#include <stdint.h>

>> +#include <inttypes.h>

>> +#include <time.h>

>> +

>> +#define SEC_TO_NSEC (1000 * 1000 * 1000) #define DWORDS_TO_BYTES(x) 

>> +((x)*4)

>> +

>> +#define MI_STORE_REGISTER_MEM(LENGTH)   ((0x024 << 23) | ((LENGTH - 2) & 0xff))

>> +#define MI_MATH(NrInst)                 ((0x01A << 23) | ((NrInst - 1) & 0x3f))

>> +#define MI_CONDITIONAL_BATCH_BUFFER_END ((0x036 << 23) | (1 << 21) | 2)

>> +#define MI_COPY_MEM_MEM                 ((0x02E << 23) | (3))

>> +

>> +#define ALU_LOAD(TO, FROM)  ((0x080 << 20) | ((TO) << 10) | (FROM))

>> +#define ALU_SUB             ( 0x101 << 20)

>> +#define ALU_STORE(TO, FROM) ((0x180 << 20) | ((TO) << 10) | (FROM))

>> +

>> +#define TIMESTAMP_offset      (0x358) /* Elapsed time from system start */

>> +#define CTX_TIMESTAMP_offset  (0x3A8) /* Elapsed Time from context 

>> +creation */ #define ALU_GPU_R0_LSB_offset (0x600) #define 

>> +ALU_GPU_R0_MSB_offset (0x604) #define ALU_GPU_R1_LSB_offset (0x608) 

>> +#define ALU_GPU_R1_MSB_offset (0x60C) #define ALU_GPU_R2_LSB_offset 

>> +(0x610) #define ALU_GPU_R2_MSB_offset (0x614)

>> +

>> +#define ALU_R0_ENCODING   (0x00)

>> +#define ALU_R1_ENCODING   (0x01)

>> +#define ALU_SRCA_ENCODING (0x20)

>> +#define ALU_SRCB_ENCODING (0x21)

>> +#define ALU_ACCU_ENCODING (0x31)

>> +

>> +/**

>> + * SECTION:igt_bb_factory

>> + * @short_description: Utility functions for creating batch buffers

>> + * @title: Batch Buffer Factory

>> + * @include: igt.h

>> + *

>> + * This library implements functions for creating batch buffers which 

>> +may be

>> + * useful to multiple tests.

>> + */

>> +

>> +static void check_gen_8(int fd)

>> +{

>> +	static bool checked = false;

>> +	if(!checked) {

>> +		igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);

>> +		checked = true;

>> +	}

>> +}

>> +

>> +static int bb_address_size_dw(int fd) {

>> +	if (intel_gen(intel_get_drm_devid(fd)) >= 8)

>> +		return 2;

>> +	else

>> +		return 1;

>> +}

>> +

>> +static uint32_t get_register_offset(int ringid)

>register_offset feels a bit confusing as name, I suggest to use mmio_base instead


Ok

>

>> +{

>> +	switch (ringid) {

>> +	case I915_EXEC_RENDER:

>> +		return 0x02000;

>> +	case I915_EXEC_BSD:

>> +		return 0x12000;

>What about the BSD2?


I can add that though my tests do not use BSD2

>

>> +	case I915_EXEC_BLT:

>> +		return 0x22000;

>> +	case I915_EXEC_VEBOX:

>> +		return 0x1A000;

>> +	default:

>> +		igt_assert_f(0, "Invalid ringid %d passed to get_register_offset()\n", ringid);

>> +	}

>> +}

>> +

>> +/**

>> + * igt_create_delay_bb:

>> + * @fd: file descriptor for i915 driver instance

>> + * @bufmgr: Buffer manager to be used for creation of batch buffers

>> + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER

>> + * loops: Number of times to loop

>> + * dest: Buffer to use for saving the current loop count and timestamp.

>> + *

>> + * This creates a batch buffer which will iterate a loop a specified 

>> +number

>> + * of times. Intended for creating batch buffers which take an 

>> +arbitarily

>> + * long time to execute. This can be useful to keep a ring busy while

>> + * constructing a test scenario.

>> + *

>> + * The dest buffer will have a number of Dwords written by the batch 

>> +buffer

>> + * when it runs. They are:

>> + * DW0 & DW1 - These are loade with the value of 'loops' and are decremented

>> + *             as the batch buffer executes. They will be 0 after the batch

>> + *             buffer completes if it finished succesfully.

>> + * DW2 Timestamp - An indication of when the batch buffer ran allowing a

>> + *                 comparison between batch buffers to show execution order.

>> + *                 May wrap so igt_compare_timestamps() should be used to

>> + *                 compare timestamps.

>If I recall correctly the lower dword of the timestamp wraps in minutes, which is safe enough for normal tests but might be an issue on very long stress tests. Maybe add a comment to clarify when this is safe to use, also considering that due to the wrapping the actual supported maximum time difference between 2 timestamps is less than the total wrapping time


Will add a comment

>

>> + *

>> + * Returns:

>> + * The struct intel_batchbuffer created.

>> + */

>> +struct intel_batchbuffer *igt_create_delay_bb(int fd, drm_intel_bufmgr *bufmgr,

>> +		int ringid, uint32_t loops, drm_intel_bo *dest)

>Other batch-creating functions take the batch as a parameter instead of returning one (e.g. igt_blitter_fast_copy, rendercopy), so for consistency I would do the same here and in the other functions that you've added


Ok, though that adds more code to the tests than gets removed from these library functions.

>

>> +{

>> +	struct intel_batchbuffer *batch;

>> +	int addr_size_dw;

>> +	uint32_t regOffset;

>> +

>> +	check_gen_8(fd);

>HSW RCS does support this, so maybe we could have something like:

>

>static void has_delay_bb(int fd, int ringid){

>     static bool checked = false;

>     if (!checked) {

>         int devid = intel_get_drm_devid(fd);

>         igt_require(intel_gen(devid) >= 8 ||

>                         (IS_HASWELL(devid) && ringid == I915_EXEC_RENDER));

>         checked = true;

>     }

>}


Ok I can update the check_gen() function

>

>The batch manipulation macros should adjust the number of dwords for addresses automatically


That would require the macro to know the gen version which they don't.

>> +

>> +	addr_size_dw = bb_address_size_dw(fd);

>> +	regOffset = get_register_offset(ringid);

>Similarly to above, I would rename regOffset to mmio_base


Ok

>> +	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));

>> +	igt_assert(batch);

>> +

>> +	BEGIN_BATCH(32, 5);

>> +	/* store current timestamp in DW2 */

>> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));

>> +	OUT_BATCH(regOffset + TIMESTAMP_offset);

>> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 

>> +I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));

>> +

>> +	/* Load R0 with loops */

>> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);

>> +	OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);

>> +	OUT_BATCH(loops);

>> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);

>> +	OUT_BATCH(regOffset + ALU_GPU_R0_MSB_offset);

>> +	OUT_BATCH(0x00000000);

>> +	/* Load R1 with 1 */

>> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);

>> +	OUT_BATCH(regOffset + ALU_GPU_R1_LSB_offset);

>> +	OUT_BATCH(0x00000001);

>> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);

>> +	OUT_BATCH(regOffset + ALU_GPU_R1_MSB_offset);

>> +	OUT_BATCH(0x00000000);

>instead of emitting 4 MI_LOAD_REGISTER_IMM you can use 1 and specify extra dword length


The MI_LOAD_REGISTER_IMM macro only supports 1 register/value pair. I created a new macro and tried this but I don't think it improved the code so removed it again.

>

>> +	/* Copy R0 / R1 into SRCA / SRCB, Perform R0 - R1, Store result in R0 */

>> +	/* e.g. R0 -= 1 */

>> +	OUT_BATCH(MI_MATH(4));

>> +	OUT_BATCH(ALU_LOAD(ALU_SRCA_ENCODING, ALU_R0_ENCODING));

>> +	OUT_BATCH(ALU_LOAD(ALU_SRCB_ENCODING, ALU_R1_ENCODING));

>> +	OUT_BATCH(ALU_SUB);

>> +	OUT_BATCH(ALU_STORE(ALU_R0_ENCODING, ALU_ACCU_ENCODING));

>> +	/* Copy R0 to dest

>> +	 * On Gen8 MI_CONDITIONAL_BATCH_BUFFER_END BSD ring Compare address

>> +	 * points to 2 Dwords, a mask (DW0) and data (DW1) which are ANDed

>> +	 * together.

>> +	 * On Gen9+, and the other rings on Gen8 Compare address points to

>> +	 * just Data (DW0). For simplicity always copy R0 LSB to DW0 and DW1.

>> +	 */

>> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));

>> +	OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);

>> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);

>> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));

>> +	OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);

>> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(1));

>> +	/* Repeat until R0 == 0 */

>> +	OUT_BATCH(MI_CONDITIONAL_BATCH_BUFFER_END);

>> +	OUT_BATCH(0x00000000);

>> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);

>> +	OUT_BATCH(MI_BATCH_BUFFER_START | (addr_size_dw - 1));

>> +	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 

>> +DWORDS_TO_BYTES(15));

>Instead of using the hardcoded DWORDS_TO_BYTES(15) you could capture 

>batch->ptr before emitting the dword you want to jump to and pass it 

>batch->here


I will add an igt_batch_used() function to do this. There is a static version of this in several tests which could be removed once this patch set has landed.

>

>> +	/* Should never get here, but end if it happens */

>> +

>> +	OUT_BATCH(MI_BATCH_BUFFER_END);

>> +	ADVANCE_BATCH();

>> +

>> +	return batch;

>> +}

>> +

>> +/**

>> + * igt_create_timestamp_bb:

>> + * @fd: file descriptor for i915 driver instance

>> + * @bufmgr: Buffer manager to be used for creation of batch buffers

>> + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER

>> + * dest: Buffer to use for saving the timestamps.

>> + * load: Buffer to access. Set NULL if not required.

>> + * write: If true and load is not NULL, will also write a timestamp 

>> +to load

>> + * buffer. If false and load is not NULL, will read from load buffer into dest.

>> + * Intended for dependency checking.

>The "load" buffer handling feels a bit too scheduler-specific to be in a common library. I would move it to the scheduler tests and maybe leave here an emit_timestamp_capture() or something like that.


I would prefer to leave it here as it may be useful to other tests in the future. If the load handling is not needed just set the buffer to NULL.

>

>> + *

>> + * This creates a batch buffer which writes timestamps into a buffer object.

>> + * If 'load' is non null, data is either written to 'load' or copied from 'load'

>> + * depending on whether 'write' is set.

>> + *

>> + * The dest buffer will have a number of Dwords written by the batch 

>> +buffer

>> + * when it runs. They are:

>> + * DW0 Reported timestamp - An indication of when the batch buffer ran allowing a

>> + *                          comparison between batch buffers to show execution order.

>> + *                          May wrap so igt_compare_timestamps() should be used to

>> + *                          compare timestamps.

>> + * DW1 Context timestamp - Elapsed time since context was created.

>> + * DW2 Value copied from DW0 of load if write == false

>> + *

>> + * Returns:

>> + * The struct intel_batchbuffer created.

>> + */

>> +struct intel_batchbuffer *igt_create_timestamp_bb(int fd, drm_intel_bufmgr *bufmgr,

>> +		int ringid, drm_intel_bo *dest, drm_intel_bo *load, bool write) {

>> +	struct intel_batchbuffer *batch;

>> +	int addr_size_dw;

>> +	uint32_t regOffset;

>> +

>> +	check_gen_8(fd);

>The TIMESTAMP reg exists also on pre GEN8, so instead of skipping on those platform we could capture only that register and not the CTX_TIMESTAMP one.


I don't need the CTX timestamp so can remove it.

>

>> +

>> +	addr_size_dw = bb_address_size_dw(fd);

>> +	regOffset = get_register_offset(ringid);

>> +	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));

>> +	igt_assert(batch);

>> +

>> +	BEGIN_BATCH(6, 2);

>> +	/* store current reported timestamp in DW0 */

>> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));

>> +	OUT_BATCH(regOffset + TIMESTAMP_offset);

>> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 

>> +I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));

>> +

>> +	/* store current context timestamp in DW1 */

>> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));

>> +	OUT_BATCH(regOffset + CTX_TIMESTAMP_offset);

>> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 

>> +I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(1));

>> +

>> +	ADVANCE_BATCH();

>> +

>> +	if(load != NULL) {

>> +		if(write) {

>> +			BEGIN_BATCH(3, 1);

>> +			OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));

>> +			OUT_BATCH(regOffset + TIMESTAMP_offset);

>> +			OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));

>> +			ADVANCE_BATCH();

>> +		}

>> +		else {

>> +			BEGIN_BATCH(3, 2);

>> +			OUT_BATCH(MI_COPY_MEM_MEM);

>> +			OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));

>> +			OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, 0, DWORDS_TO_BYTES(0));

>> +			ADVANCE_BATCH();

>> +		}

>> +	}

>> +

>> +	BEGIN_BATCH(1, 0);

>> +	OUT_BATCH(MI_BATCH_BUFFER_END);

>> +	ADVANCE_BATCH();

>> +

>> +	return batch;

>> +}

>> +

>> +/**

>> + * igt_create_noop_bb:

>> + * @fd: file descriptor for i915 driver instance

>> + * @bufmgr: Buffer manager to be used for creation of batch buffers

>> + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER

>> + * noops: Number of MI_NOOP instructions to add to the batch buffer.

>> + *

>> + * This creates a batch buffer with a specified number of MI_NOOP instructions.

>> + *

>> + * Returns:

>> + * The struct intel_batchbuffer created.

>> + */

>> +struct intel_batchbuffer *igt_create_noop_bb(int fd, drm_intel_bufmgr *bufmgr,

>> +		int ringid, int noops)

>> +{

>> +	struct intel_batchbuffer *batch;

>> +	int loop;

>> +

>> +	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));

>> +	igt_assert(batch);

>> +

>> +	BEGIN_BATCH(noops + 1, 0);

>> +	for(loop = 0; loop < noops; loop++)

>> +		OUT_BATCH(MI_NOOP);

>> +	OUT_BATCH(MI_BATCH_BUFFER_END);

>> +	ADVANCE_BATCH();

>> +

>> +	return batch;

>> +}

>> +

>> +/* Store calibrated values so they only need calculating once.

>> + * I915_EXEC_RING_MASK allows 3 bits for ring ids so allow for 

>> +storing 8 values */ static uint32_t calibrated_ring_value[8] = {0, 0, 

>> +0, 0, 0, 0, 0, 0};

>> +

>> +/**

>> + * igt_calibrate_delay_bb:

>> + * @fd: file descriptor for i915 driver instance

>> + * @bufmgr: Buffer manager to be used for creation of batch buffers

>> + * ringid: Ring to calibrate. e.g. I915_EXEC_RENDER

>> + *

>> + * This calculates the value of loops that would need to be passed to

>> + * igt_create_delay_bb() to create a delay of about 1 second on the 

>> +specified

>> + * ring.

>> + *

>> + * Returns:

>> + * uint32_t to be passed to igt_create_delay_bb().

>> + */

>> +#define CAL_SEED (0x100000)

>Can you add a comment regarding how this seed was picked? something like the expected duration of the execution with this seed on a specific platform would be nice.


Will state some example timings

>

>> +uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int 

>> +ringid) {

>> +	uint32_t *buf;

>> +	struct intel_batchbuffer *bb;

>> +	struct timespec start, end;

>> +	uint64_t duration;

>> +	uint64_t calibrated;

>> +	drm_intel_bo *target_bo;

>> +

>> +	igt_assert(ringid < 8);

>> +	if(calibrated_ring_value[ringid] != 0)

>> +		return calibrated_ring_value[ringid];

>> +

>> +	target_bo = drm_intel_bo_alloc(bufmgr, "target bo", BATCH_SZ, BATCH_SZ);

>> +	igt_assert(target_bo);

>> +

>> +	/* Put some non zero values in the target bo */

>> +	drm_intel_bo_map(target_bo, 1);

>Missing return code check here. Also, instead of mapping and unmapping a single subdata call would in my opinion be cleaner to write a single dword.


Ok

>

>> +	buf = target_bo->virtual;

>> +	buf[0] = 0xff;

>> +	drm_intel_bo_unmap(target_bo);

>> +

>> +	bb = igt_create_delay_bb(fd, bufmgr, ringid, CAL_SEED, target_bo);

>> +

>> +	gem_quiescent_gpu(fd);

>> +	clock_gettime(CLOCK_MONOTONIC, &start);

>> +	intel_batchbuffer_flush_on_ring(bb, ringid);

>> +	/* This will not return until the bo has finished executing */

>> +	drm_intel_bo_map(target_bo, 0);

>Missing return code check and unmap. Maybe using drm_intel_bo_wait_for_rendering here and get_subdata below could be slightly neater


Ok

>

>> +	clock_gettime(CLOCK_MONOTONIC, &end);

>> +

>> +	buf = target_bo->virtual;

>> +	/* buf[0] in the target buffer should be 0 if the batch buffer completed */

>> +	igt_assert_f(buf[0] == 0, "buf[0] expected 0x0, got 0x%x\n", 

>> +buf[0]);

>> +

>> +	duration = ((((uint64_t)end.tv_sec - (uint64_t)start.tv_sec) * SEC_TO_NSEC)

>> +	           + (uint64_t)end.tv_nsec) - (uint64_t)start.tv_nsec;

>> +

>> +	calibrated = (((uint64_t)(CAL_SEED) * SEC_TO_NSEC) / duration);

>> +	igt_debug("Uncalibrated run took %" PRIu64 ".%04" PRIu64 "s\n",

>> +	          duration / SEC_TO_NSEC,

>> +	          (duration % SEC_TO_NSEC) / 100000);

>> +	drm_intel_bo_unreference(target_bo);

>> +	intel_batchbuffer_free(bb);

>> +

>> +	/* Sanity check. If duration < 100ms, something has clearly gone wrong

>> +	 * */

>Without any details on the expected duration when using CAL_SEED this comment is not very clear

>

>> +	igt_assert(duration > (SEC_TO_NSEC  / 10));

>> +

>> +	igt_assert_f(calibrated <= UINT32_MAX, "Calibrated value > 

>> +UINT32_MAX\n");

>> +

>> +	calibrated_ring_value[ringid] = (uint32_t)calibrated;

>> +	return (uint32_t)calibrated;

>> +}

>> +

>> +/**

>> + * igt_compare_timestamps:

>> + * @ts1: timestamp 1

>> + * @ts2: timestamp 2

>> + *

>> + * This compares two uint32_t timestamps. To handle wrapping it 

>> +assumes the

>> + * difference between the two timestamps is less than 1/4 the max 

>> +elapsed time

>> + * represented by the counters.

>> + * It also assumes the timestamps are samples from the same counter.

>> + *

>> + * Returns:

>> + * True if ts2 > ts1, allowing for counter wrapping, false otherwise.

>> + */

>> +

>> +bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2) {

>> +	if (ts2 > ts1)

>> +		return true;

>> +	else if ((ts1 > 0x80000000) && (ts2 < 0x40000000))

>> +		return true; /* Assuming timestamp counter wrapped */

>> +	else

>> +		return false;

>> +}

>> diff --git a/lib/igt_bb_factory.h b/lib/igt_bb_factory.h new file mode 

>> 100644 index 0000000..3ab7f13

>> --- /dev/null

>> +++ b/lib/igt_bb_factory.h

>> @@ -0,0 +1,47 @@

>> +/*

>> + * Copyright © 2016 Intel Corporation

>> + *

>> + * Permission is hereby granted, free of charge, to any person 

>> +obtaining a

>> + * copy of this software and associated documentation files (the 

>> +"Software"),

>> + * to deal in the Software without restriction, including without 

>> +limitation

>> + * the rights to use, copy, modify, merge, publish, distribute, 

>> +sublicense,

>> + * and/or sell copies of the Software, and to permit persons to whom 

>> +the

>> + * Software is furnished to do so, subject to the following conditions:

>> + *

>> + * The above copyright notice and this permission notice (including 

>> +the next

>> + * paragraph) shall be included in all copies or substantial portions 

>> +of the

>> + * Software.

>> + *

>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

>> +EXPRESS OR

>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

>> +MERCHANTABILITY,

>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT 

>> +SHALL

>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 

>> +OR OTHER

>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 

>> +ARISING

>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 

>> +OTHER DEALINGS

>> + * IN THE SOFTWARE.

>> + *

>> + * Authors:

>> + *    Derek Morton <derek.j.morton@intel.com>

>> + *

>> + */

>> +

>> +#ifndef IGT_BB_FACTORY_H

>> +#define IGT_BB_FACTORY_H

>> +

>> +#include "intel_batchbuffer.h"

>> +#include <stdint.h>

>> +

>> +struct intel_batchbuffer *igt_create_delay_bb(int fd, drm_intel_bufmgr *bufmgr,

>> +		int ringid, uint32_t loops, drm_intel_bo *dest);

>> +

>> +struct intel_batchbuffer *igt_create_timestamp_bb(int fd, drm_intel_bufmgr *bufmgr,

>> +		int ringid, drm_intel_bo *dest, drm_intel_bo *load, bool write);

>> +

>> +struct intel_batchbuffer *igt_create_noop_bb(int fd, drm_intel_bufmgr *bufmgr,

>> +		int ringid, int noops);

>> +

>> +uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int 

>> +ringid);

>> +

>> +bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2);

>> +

>> +#endif /* IGT_BB_FACTORY_H */

>

>
Derek Morton March 2, 2016, 6:08 p.m. UTC | #4
>

>

>-----Original Message-----

>From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf Of Morton, Derek J

>Sent: Tuesday, March 1, 2016 10:31 AM

>To: Ceraolo Spurio, Daniele <daniele.ceraolospurio@intel.com>; intel-gfx@lists.freedesktop.org

>Subject: Re: [Intel-gfx] [PATCH i-g-t 1/4] lib/igt_bb_factory: Add igt_bb_factory library

>

>>

>>

>>-----Original Message-----

>>From: Ceraolo Spurio, Daniele

>>Sent: Wednesday, February 17, 2016 9:48 AM

>>To: Morton, Derek J <derek.j.morton@intel.com>; 

>>intel-gfx@lists.freedesktop.org

>>Subject: Re: [Intel-gfx] [PATCH i-g-t 1/4] lib/igt_bb_factory: Add 

>>igt_bb_factory library

>>

>>

>>

>>On 12/02/16 09:38, Derek Morton wrote:

>>> Adds functions to create a number of different batch buffers to 

>>> perform several functions including:

>>> Batch buffer which will run for a long duration to provide a delay on 

>>> a specified ring.

>>> Function to calibrate the delay batch buffer to run for a specified 

>>> period of time.

>>> Function to create a batch buffer which writes timestamps to a buffer object.

>>> Function to compare timestamps allowing for wrapping of the values.

>>>

>>> Intended for use by the gem_scheduler test initially but will be used 

>>> by other tests in development.

>>>

>>> Signed-off-by: Derek Morton <derek.j.morton@intel.com>

>>> ---

>>>   lib/Makefile.sources |   2 +

>>>   lib/igt.h            |   1 +

>>>   lib/igt_bb_factory.c | 401 +++++++++++++++++++++++++++++++++++++++++++++++++++

>>>   lib/igt_bb_factory.h |  47 ++++++

>>>   4 files changed, 451 insertions(+)

>>>   create mode 100644 lib/igt_bb_factory.c

>>>   create mode 100644 lib/igt_bb_factory.h

>>>

>>> diff --git a/lib/Makefile.sources b/lib/Makefile.sources index 

>>> 4999868..c560b3e 100644

>>> --- a/lib/Makefile.sources

>>> +++ b/lib/Makefile.sources

>>> @@ -7,6 +7,8 @@ libintel_tools_la_SOURCES = 	\

>>>   	i915_reg.h		\

>>>   	i915_pciids.h		\

>>>   	igt.h			\

>>> +	igt_bb_factory.c	\

>>> +	igt_bb_factory.h	\

>>>   	igt_debugfs.c		\

>>>   	igt_debugfs.h		\

>>>   	igt_aux.c		\

>>> diff --git a/lib/igt.h b/lib/igt.h

>>> index 3be2551..0f29420 100644

>>> --- a/lib/igt.h

>>> +++ b/lib/igt.h

>>> @@ -36,6 +36,7 @@

>>>   #include "igt_gt.h"

>>>   #include "igt_kms.h"

>>>   #include "igt_stats.h"

>>> +#include "igt_bb_factory.h"

>>>   #include "instdone.h"

>>>   #include "intel_batchbuffer.h"

>>>   #include "intel_chipset.h"

>>> diff --git a/lib/igt_bb_factory.c b/lib/igt_bb_factory.c new file 

>>> mode

>>> 100644 index 0000000..eea63c6

>>> --- /dev/null

>>> +++ b/lib/igt_bb_factory.c

>>> @@ -0,0 +1,401 @@

>>> +/*

>>> + * Copyright © 2016 Intel Corporation

>>> + *

>>> + * Permission is hereby granted, free of charge, to any person 

>>> +obtaining a

>>> + * copy of this software and associated documentation files (the 

>>> +"Software"),

>>> + * to deal in the Software without restriction, including without 

>>> +limitation

>>> + * the rights to use, copy, modify, merge, publish, distribute, 

>>> +sublicense,

>>> + * and/or sell copies of the Software, and to permit persons to whom 

>>> +the

>>> + * Software is furnished to do so, subject to the following conditions:

>>> + *

>>> + * The above copyright notice and this permission notice (including 

>>> +the next

>>> + * paragraph) shall be included in all copies or substantial 

>>> +portions of the

>>> + * Software.

>>> + *

>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

>>> +EXPRESS OR

>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

>>> +MERCHANTABILITY,

>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 

>>> +EVENT SHALL

>>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 

>>> +OR OTHER

>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 

>>> +ARISING

>>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 

>>> +OTHER DEALINGS

>>> + * IN THE SOFTWARE.

>>> + *

>>> + * Authors:

>>> + *    Derek Morton <derek.j.morton@intel.com>

>>> + *

>>> + */

>>> +

>>> +#include "igt.h"

>>> +#include "intel_batchbuffer.h"

>>> +#include <stdint.h>

>>> +#include <inttypes.h>

>>> +#include <time.h>

>>> +

>>> +#define SEC_TO_NSEC (1000 * 1000 * 1000) #define DWORDS_TO_BYTES(x)

>>> +((x)*4)

>>> +

>>> +#define MI_STORE_REGISTER_MEM(LENGTH)   ((0x024 << 23) | ((LENGTH - 2) & 0xff))

>>> +#define MI_MATH(NrInst)                 ((0x01A << 23) | ((NrInst - 1) & 0x3f))

>>> +#define MI_CONDITIONAL_BATCH_BUFFER_END ((0x036 << 23) | (1 << 21) | 2)

>>> +#define MI_COPY_MEM_MEM                 ((0x02E << 23) | (3))

>>> +

>>> +#define ALU_LOAD(TO, FROM)  ((0x080 << 20) | ((TO) << 10) | (FROM))

>>> +#define ALU_SUB             ( 0x101 << 20)

>>> +#define ALU_STORE(TO, FROM) ((0x180 << 20) | ((TO) << 10) | (FROM))

>>> +

>>> +#define TIMESTAMP_offset      (0x358) /* Elapsed time from system start */

>>> +#define CTX_TIMESTAMP_offset  (0x3A8) /* Elapsed Time from context 

>>> +creation */ #define ALU_GPU_R0_LSB_offset (0x600) #define 

>>> +ALU_GPU_R0_MSB_offset (0x604) #define ALU_GPU_R1_LSB_offset (0x608) 

>>> +#define ALU_GPU_R1_MSB_offset (0x60C) #define ALU_GPU_R2_LSB_offset

>>> +(0x610) #define ALU_GPU_R2_MSB_offset (0x614)

>>> +

>>> +#define ALU_R0_ENCODING   (0x00)

>>> +#define ALU_R1_ENCODING   (0x01)

>>> +#define ALU_SRCA_ENCODING (0x20)

>>> +#define ALU_SRCB_ENCODING (0x21)

>>> +#define ALU_ACCU_ENCODING (0x31)

>>> +

>>> +/**

>>> + * SECTION:igt_bb_factory

>>> + * @short_description: Utility functions for creating batch buffers

>>> + * @title: Batch Buffer Factory

>>> + * @include: igt.h

>>> + *

>>> + * This library implements functions for creating batch buffers 

>>> +which may be

>>> + * useful to multiple tests.

>>> + */

>>> +

>>> +static void check_gen_8(int fd)

>>> +{

>>> +	static bool checked = false;

>>> +	if(!checked) {

>>> +		igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);

>>> +		checked = true;

>>> +	}

>>> +}

>>> +

>>> +static int bb_address_size_dw(int fd) {

>>> +	if (intel_gen(intel_get_drm_devid(fd)) >= 8)

>>> +		return 2;

>>> +	else

>>> +		return 1;

>>> +}

>>> +

>>> +static uint32_t get_register_offset(int ringid)

>>register_offset feels a bit confusing as name, I suggest to use 

>>mmio_base instead

>

>Ok

>

>>

>>> +{

>>> +	switch (ringid) {

>>> +	case I915_EXEC_RENDER:

>>> +		return 0x02000;

>>> +	case I915_EXEC_BSD:

>>> +		return 0x12000;

>>What about the BSD2?

>

>I can add that though my tests do not use BSD2

>

>>

>>> +	case I915_EXEC_BLT:

>>> +		return 0x22000;

>>> +	case I915_EXEC_VEBOX:

>>> +		return 0x1A000;

>>> +	default:

>>> +		igt_assert_f(0, "Invalid ringid %d passed to get_register_offset()\n", ringid);

>>> +	}

>>> +}

>>> +

>>> +/**

>>> + * igt_create_delay_bb:

>>> + * @fd: file descriptor for i915 driver instance

>>> + * @bufmgr: Buffer manager to be used for creation of batch buffers

>>> + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER

>>> + * loops: Number of times to loop

>>> + * dest: Buffer to use for saving the current loop count and timestamp.

>>> + *

>>> + * This creates a batch buffer which will iterate a loop a specified 

>>> +number

>>> + * of times. Intended for creating batch buffers which take an 

>>> +arbitarily

>>> + * long time to execute. This can be useful to keep a ring busy 

>>> +while

>>> + * constructing a test scenario.

>>> + *

>>> + * The dest buffer will have a number of Dwords written by the batch 

>>> +buffer

>>> + * when it runs. They are:

>>> + * DW0 & DW1 - These are loade with the value of 'loops' and are decremented

>>> + *             as the batch buffer executes. They will be 0 after the batch

>>> + *             buffer completes if it finished succesfully.

>>> + * DW2 Timestamp - An indication of when the batch buffer ran allowing a

>>> + *                 comparison between batch buffers to show execution order.

>>> + *                 May wrap so igt_compare_timestamps() should be used to

>>> + *                 compare timestamps.

>>If I recall correctly the lower dword of the timestamp wraps in 

>>minutes, which is safe enough for normal tests but might be an issue on 

>>very long stress tests. Maybe add a comment to clarify when this is 

>>safe to use, also considering that due to the wrapping the actual 

>>supported maximum time difference between 2 timestamps is less than the 

>>total wrapping time

>

>Will add a comment

>

>>

>>> + *

>>> + * Returns:

>>> + * The struct intel_batchbuffer created.

>>> + */

>>> +struct intel_batchbuffer *igt_create_delay_bb(int fd, drm_intel_bufmgr *bufmgr,

>>> +		int ringid, uint32_t loops, drm_intel_bo *dest)

>>Other batch-creating functions take the batch as a parameter instead of 

>>returning one (e.g. igt_blitter_fast_copy, rendercopy), so for 

>>consistency I would do the same here and in the other functions that 

>>you've added

>

>Ok, though that adds more code to the tests than gets removed from these library functions.

>

>>

>>> +{

>>> +	struct intel_batchbuffer *batch;

>>> +	int addr_size_dw;

>>> +	uint32_t regOffset;

>>> +

>>> +	check_gen_8(fd);

>>HSW RCS does support this, so maybe we could have something like:

>>

>>static void has_delay_bb(int fd, int ringid){

>>     static bool checked = false;

>>     if (!checked) {

>>         int devid = intel_get_drm_devid(fd);

>>         igt_require(intel_gen(devid) >= 8 ||

>>                         (IS_HASWELL(devid) && ringid == I915_EXEC_RENDER));

>>         checked = true;

>>     }

>>}

>

>Ok I can update the check_gen() function


The CMD parser blacklists the TIMESTAMP register on gen 7.5 so I will leave this as requiring gen8+

//Derek

>

>>

>>The batch manipulation macros should adjust the number of dwords for 

>>addresses automatically

>

>That would require the macro to know the gen version which they don't.

>

>>> +

>>> +	addr_size_dw = bb_address_size_dw(fd);

>>> +	regOffset = get_register_offset(ringid);

>>Similarly to above, I would rename regOffset to mmio_base

>

>Ok

>

>>> +	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));

>>> +	igt_assert(batch);

>>> +

>>> +	BEGIN_BATCH(32, 5);

>>> +	/* store current timestamp in DW2 */

>>> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));

>>> +	OUT_BATCH(regOffset + TIMESTAMP_offset);

>>> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 

>>> +I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));

>>> +

>>> +	/* Load R0 with loops */

>>> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);

>>> +	OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);

>>> +	OUT_BATCH(loops);

>>> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);

>>> +	OUT_BATCH(regOffset + ALU_GPU_R0_MSB_offset);

>>> +	OUT_BATCH(0x00000000);

>>> +	/* Load R1 with 1 */

>>> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);

>>> +	OUT_BATCH(regOffset + ALU_GPU_R1_LSB_offset);

>>> +	OUT_BATCH(0x00000001);

>>> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);

>>> +	OUT_BATCH(regOffset + ALU_GPU_R1_MSB_offset);

>>> +	OUT_BATCH(0x00000000);

>>instead of emitting 4 MI_LOAD_REGISTER_IMM you can use 1 and specify 

>>extra dword length

>

>The MI_LOAD_REGISTER_IMM macro only supports 1 register/value pair. I created a new macro and tried this but I don't think it improved the code so removed it again.

>

>>

>>> +	/* Copy R0 / R1 into SRCA / SRCB, Perform R0 - R1, Store result in R0 */

>>> +	/* e.g. R0 -= 1 */

>>> +	OUT_BATCH(MI_MATH(4));

>>> +	OUT_BATCH(ALU_LOAD(ALU_SRCA_ENCODING, ALU_R0_ENCODING));

>>> +	OUT_BATCH(ALU_LOAD(ALU_SRCB_ENCODING, ALU_R1_ENCODING));

>>> +	OUT_BATCH(ALU_SUB);

>>> +	OUT_BATCH(ALU_STORE(ALU_R0_ENCODING, ALU_ACCU_ENCODING));

>>> +	/* Copy R0 to dest

>>> +	 * On Gen8 MI_CONDITIONAL_BATCH_BUFFER_END BSD ring Compare address

>>> +	 * points to 2 Dwords, a mask (DW0) and data (DW1) which are ANDed

>>> +	 * together.

>>> +	 * On Gen9+, and the other rings on Gen8 Compare address points to

>>> +	 * just Data (DW0). For simplicity always copy R0 LSB to DW0 and DW1.

>>> +	 */

>>> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));

>>> +	OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);

>>> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);

>>> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));

>>> +	OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);

>>> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(1));

>>> +	/* Repeat until R0 == 0 */

>>> +	OUT_BATCH(MI_CONDITIONAL_BATCH_BUFFER_END);

>>> +	OUT_BATCH(0x00000000);

>>> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);

>>> +	OUT_BATCH(MI_BATCH_BUFFER_START | (addr_size_dw - 1));

>>> +	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 

>>> +DWORDS_TO_BYTES(15));

>>Instead of using the hardcoded DWORDS_TO_BYTES(15) you could capture

>>batch->ptr before emitting the dword you want to jump to and pass it 

>>batch->here

>

>I will add an igt_batch_used() function to do this. There is a static version of this in several tests which could be removed once this patch set has landed.

>

>>

>>> +	/* Should never get here, but end if it happens */

>>> +

>>> +	OUT_BATCH(MI_BATCH_BUFFER_END);

>>> +	ADVANCE_BATCH();

>>> +

>>> +	return batch;

>>> +}

>>> +

>>> +/**

>>> + * igt_create_timestamp_bb:

>>> + * @fd: file descriptor for i915 driver instance

>>> + * @bufmgr: Buffer manager to be used for creation of batch buffers

>>> + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER

>>> + * dest: Buffer to use for saving the timestamps.

>>> + * load: Buffer to access. Set NULL if not required.

>>> + * write: If true and load is not NULL, will also write a timestamp 

>>> +to load

>>> + * buffer. If false and load is not NULL, will read from load buffer into dest.

>>> + * Intended for dependency checking.

>>The "load" buffer handling feels a bit too scheduler-specific to be in a common library. I would move it to the scheduler tests and maybe leave here an emit_timestamp_capture() or something like that.

>

>I would prefer to leave it here as it may be useful to other tests in the future. If the load handling is not needed just set the buffer to NULL.

>

>>

>>> + *

>>> + * This creates a batch buffer which writes timestamps into a buffer object.

>>> + * If 'load' is non null, data is either written to 'load' or copied from 'load'

>>> + * depending on whether 'write' is set.

>>> + *

>>> + * The dest buffer will have a number of Dwords written by the batch 

>>> +buffer

>>> + * when it runs. They are:

>>> + * DW0 Reported timestamp - An indication of when the batch buffer ran allowing a

>>> + *                          comparison between batch buffers to show execution order.

>>> + *                          May wrap so igt_compare_timestamps() should be used to

>>> + *                          compare timestamps.

>>> + * DW1 Context timestamp - Elapsed time since context was created.

>>> + * DW2 Value copied from DW0 of load if write == false

>>> + *

>>> + * Returns:

>>> + * The struct intel_batchbuffer created.

>>> + */

>>> +struct intel_batchbuffer *igt_create_timestamp_bb(int fd, drm_intel_bufmgr *bufmgr,

>>> +		int ringid, drm_intel_bo *dest, drm_intel_bo *load, bool write) {

>>> +	struct intel_batchbuffer *batch;

>>> +	int addr_size_dw;

>>> +	uint32_t regOffset;

>>> +

>>> +	check_gen_8(fd);

>>The TIMESTAMP reg exists also on pre GEN8, so instead of skipping on those platform we could capture only that register and not the CTX_TIMESTAMP one.

>

>I don't need the CTX timestamp so can remove it.

>

>>

>>> +

>>> +	addr_size_dw = bb_address_size_dw(fd);

>>> +	regOffset = get_register_offset(ringid);

>>> +	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));

>>> +	igt_assert(batch);

>>> +

>>> +	BEGIN_BATCH(6, 2);

>>> +	/* store current reported timestamp in DW0 */

>>> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));

>>> +	OUT_BATCH(regOffset + TIMESTAMP_offset);

>>> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 

>>> +I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));

>>> +

>>> +	/* store current context timestamp in DW1 */

>>> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));

>>> +	OUT_BATCH(regOffset + CTX_TIMESTAMP_offset);

>>> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 

>>> +I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(1));

>>> +

>>> +	ADVANCE_BATCH();

>>> +

>>> +	if(load != NULL) {

>>> +		if(write) {

>>> +			BEGIN_BATCH(3, 1);

>>> +			OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));

>>> +			OUT_BATCH(regOffset + TIMESTAMP_offset);

>>> +			OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));

>>> +			ADVANCE_BATCH();

>>> +		}

>>> +		else {

>>> +			BEGIN_BATCH(3, 2);

>>> +			OUT_BATCH(MI_COPY_MEM_MEM);

>>> +			OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));

>>> +			OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, 0, DWORDS_TO_BYTES(0));

>>> +			ADVANCE_BATCH();

>>> +		}

>>> +	}

>>> +

>>> +	BEGIN_BATCH(1, 0);

>>> +	OUT_BATCH(MI_BATCH_BUFFER_END);

>>> +	ADVANCE_BATCH();

>>> +

>>> +	return batch;

>>> +}

>>> +

>>> +/**

>>> + * igt_create_noop_bb:

>>> + * @fd: file descriptor for i915 driver instance

>>> + * @bufmgr: Buffer manager to be used for creation of batch buffers

>>> + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER

>>> + * noops: Number of MI_NOOP instructions to add to the batch buffer.

>>> + *

>>> + * This creates a batch buffer with a specified number of MI_NOOP instructions.

>>> + *

>>> + * Returns:

>>> + * The struct intel_batchbuffer created.

>>> + */

>>> +struct intel_batchbuffer *igt_create_noop_bb(int fd, drm_intel_bufmgr *bufmgr,

>>> +		int ringid, int noops)

>>> +{

>>> +	struct intel_batchbuffer *batch;

>>> +	int loop;

>>> +

>>> +	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));

>>> +	igt_assert(batch);

>>> +

>>> +	BEGIN_BATCH(noops + 1, 0);

>>> +	for(loop = 0; loop < noops; loop++)

>>> +		OUT_BATCH(MI_NOOP);

>>> +	OUT_BATCH(MI_BATCH_BUFFER_END);

>>> +	ADVANCE_BATCH();

>>> +

>>> +	return batch;

>>> +}

>>> +

>>> +/* Store calibrated values so they only need calculating once.

>>> + * I915_EXEC_RING_MASK allows 3 bits for ring ids so allow for 

>>> +storing 8 values */ static uint32_t calibrated_ring_value[8] = {0, 

>>> +0, 0, 0, 0, 0, 0, 0};

>>> +

>>> +/**

>>> + * igt_calibrate_delay_bb:

>>> + * @fd: file descriptor for i915 driver instance

>>> + * @bufmgr: Buffer manager to be used for creation of batch buffers

>>> + * ringid: Ring to calibrate. e.g. I915_EXEC_RENDER

>>> + *

>>> + * This calculates the value of loops that would need to be passed 

>>> +to

>>> + * igt_create_delay_bb() to create a delay of about 1 second on the 

>>> +specified

>>> + * ring.

>>> + *

>>> + * Returns:

>>> + * uint32_t to be passed to igt_create_delay_bb().

>>> + */

>>> +#define CAL_SEED (0x100000)

>>Can you add a comment regarding how this seed was picked? something like the expected duration of the execution with this seed on a specific platform would be nice.

>

>Will state some example timings

>

>>

>>> +uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, 

>>> +int

>>> +ringid) {

>>> +	uint32_t *buf;

>>> +	struct intel_batchbuffer *bb;

>>> +	struct timespec start, end;

>>> +	uint64_t duration;

>>> +	uint64_t calibrated;

>>> +	drm_intel_bo *target_bo;

>>> +

>>> +	igt_assert(ringid < 8);

>>> +	if(calibrated_ring_value[ringid] != 0)

>>> +		return calibrated_ring_value[ringid];

>>> +

>>> +	target_bo = drm_intel_bo_alloc(bufmgr, "target bo", BATCH_SZ, BATCH_SZ);

>>> +	igt_assert(target_bo);

>>> +

>>> +	/* Put some non zero values in the target bo */

>>> +	drm_intel_bo_map(target_bo, 1);

>>Missing return code check here. Also, instead of mapping and unmapping a single subdata call would in my opinion be cleaner to write a single dword.

>

>Ok

>

>>

>>> +	buf = target_bo->virtual;

>>> +	buf[0] = 0xff;

>>> +	drm_intel_bo_unmap(target_bo);

>>> +

>>> +	bb = igt_create_delay_bb(fd, bufmgr, ringid, CAL_SEED, target_bo);

>>> +

>>> +	gem_quiescent_gpu(fd);

>>> +	clock_gettime(CLOCK_MONOTONIC, &start);

>>> +	intel_batchbuffer_flush_on_ring(bb, ringid);

>>> +	/* This will not return until the bo has finished executing */

>>> +	drm_intel_bo_map(target_bo, 0);

>>Missing return code check and unmap. Maybe using 

>>drm_intel_bo_wait_for_rendering here and get_subdata below could be 

>>slightly neater

>

>Ok

>

>>

>>> +	clock_gettime(CLOCK_MONOTONIC, &end);

>>> +

>>> +	buf = target_bo->virtual;

>>> +	/* buf[0] in the target buffer should be 0 if the batch buffer completed */

>>> +	igt_assert_f(buf[0] == 0, "buf[0] expected 0x0, got 0x%x\n", 

>>> +buf[0]);

>>> +

>>> +	duration = ((((uint64_t)end.tv_sec - (uint64_t)start.tv_sec) * SEC_TO_NSEC)

>>> +	           + (uint64_t)end.tv_nsec) - (uint64_t)start.tv_nsec;

>>> +

>>> +	calibrated = (((uint64_t)(CAL_SEED) * SEC_TO_NSEC) / duration);

>>> +	igt_debug("Uncalibrated run took %" PRIu64 ".%04" PRIu64 "s\n",

>>> +	          duration / SEC_TO_NSEC,

>>> +	          (duration % SEC_TO_NSEC) / 100000);

>>> +	drm_intel_bo_unreference(target_bo);

>>> +	intel_batchbuffer_free(bb);

>>> +

>>> +	/* Sanity check. If duration < 100ms, something has clearly gone wrong

>>> +	 * */

>>Without any details on the expected duration when using CAL_SEED this 

>>comment is not very clear

>>

>>> +	igt_assert(duration > (SEC_TO_NSEC  / 10));

>>> +

>>> +	igt_assert_f(calibrated <= UINT32_MAX, "Calibrated value > 

>>> +UINT32_MAX\n");

>>> +

>>> +	calibrated_ring_value[ringid] = (uint32_t)calibrated;

>>> +	return (uint32_t)calibrated;

>>> +}

>>> +

>>> +/**

>>> + * igt_compare_timestamps:

>>> + * @ts1: timestamp 1

>>> + * @ts2: timestamp 2

>>> + *

>>> + * This compares two uint32_t timestamps. To handle wrapping it 

>>> +assumes the

>>> + * difference between the two timestamps is less than 1/4 the max 

>>> +elapsed time

>>> + * represented by the counters.

>>> + * It also assumes the timestamps are samples from the same counter.

>>> + *

>>> + * Returns:

>>> + * True if ts2 > ts1, allowing for counter wrapping, false otherwise.

>>> + */

>>> +

>>> +bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2) {

>>> +	if (ts2 > ts1)

>>> +		return true;

>>> +	else if ((ts1 > 0x80000000) && (ts2 < 0x40000000))

>>> +		return true; /* Assuming timestamp counter wrapped */

>>> +	else

>>> +		return false;

>>> +}

>>> diff --git a/lib/igt_bb_factory.h b/lib/igt_bb_factory.h new file 

>>> mode

>>> 100644 index 0000000..3ab7f13

>>> --- /dev/null

>>> +++ b/lib/igt_bb_factory.h

>>> @@ -0,0 +1,47 @@

>>> +/*

>>> + * Copyright © 2016 Intel Corporation

>>> + *

>>> + * Permission is hereby granted, free of charge, to any person 

>>> +obtaining a

>>> + * copy of this software and associated documentation files (the 

>>> +"Software"),

>>> + * to deal in the Software without restriction, including without 

>>> +limitation

>>> + * the rights to use, copy, modify, merge, publish, distribute, 

>>> +sublicense,

>>> + * and/or sell copies of the Software, and to permit persons to whom 

>>> +the

>>> + * Software is furnished to do so, subject to the following conditions:

>>> + *

>>> + * The above copyright notice and this permission notice (including 

>>> +the next

>>> + * paragraph) shall be included in all copies or substantial 

>>> +portions of the

>>> + * Software.

>>> + *

>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

>>> +EXPRESS OR

>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

>>> +MERCHANTABILITY,

>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO 

>>> +EVENT SHALL

>>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 

>>> +OR OTHER

>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 

>>> +ARISING

>>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 

>>> +OTHER DEALINGS

>>> + * IN THE SOFTWARE.

>>> + *

>>> + * Authors:

>>> + *    Derek Morton <derek.j.morton@intel.com>

>>> + *

>>> + */

>>> +

>>> +#ifndef IGT_BB_FACTORY_H

>>> +#define IGT_BB_FACTORY_H

>>> +

>>> +#include "intel_batchbuffer.h"

>>> +#include <stdint.h>

>>> +

>>> +struct intel_batchbuffer *igt_create_delay_bb(int fd, drm_intel_bufmgr *bufmgr,

>>> +		int ringid, uint32_t loops, drm_intel_bo *dest);

>>> +

>>> +struct intel_batchbuffer *igt_create_timestamp_bb(int fd, drm_intel_bufmgr *bufmgr,

>>> +		int ringid, drm_intel_bo *dest, drm_intel_bo *load, bool write);

>>> +

>>> +struct intel_batchbuffer *igt_create_noop_bb(int fd, drm_intel_bufmgr *bufmgr,

>>> +		int ringid, int noops);

>>> +

>>> +uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, 

>>> +int ringid);

>>> +

>>> +bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2);

>>> +

>>> +#endif /* IGT_BB_FACTORY_H */

>>

>>

>_______________________________________________

>Intel-gfx mailing list

>Intel-gfx@lists.freedesktop.org

>https://lists.freedesktop.org/mailman/listinfo/intel-gfx

>
Dave Gordon March 3, 2016, 6:54 p.m. UTC | #5
On 02/03/16 18:08, Morton, Derek J wrote:
>
> The CMD parser blacklists the TIMESTAMP register on gen 7.5 so I will leave this as requiring gen8+
>
> //Derek

You might still be able to store its value (to memory or the PPHWSP?) 
using a PIPE_CONTROL or MI_FLUSH_DW instruction rather than accessing 
the register.

.Dave.
diff mbox

Patch

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 4999868..c560b3e 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -7,6 +7,8 @@  libintel_tools_la_SOURCES = 	\
 	i915_reg.h		\
 	i915_pciids.h		\
 	igt.h			\
+	igt_bb_factory.c	\
+	igt_bb_factory.h	\
 	igt_debugfs.c		\
 	igt_debugfs.h		\
 	igt_aux.c		\
diff --git a/lib/igt.h b/lib/igt.h
index 3be2551..0f29420 100644
--- a/lib/igt.h
+++ b/lib/igt.h
@@ -36,6 +36,7 @@ 
 #include "igt_gt.h"
 #include "igt_kms.h"
 #include "igt_stats.h"
+#include "igt_bb_factory.h"
 #include "instdone.h"
 #include "intel_batchbuffer.h"
 #include "intel_chipset.h"
diff --git a/lib/igt_bb_factory.c b/lib/igt_bb_factory.c
new file mode 100644
index 0000000..eea63c6
--- /dev/null
+++ b/lib/igt_bb_factory.c
@@ -0,0 +1,401 @@ 
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Derek Morton <derek.j.morton@intel.com>
+ *
+ */
+
+#include "igt.h"
+#include "intel_batchbuffer.h"
+#include <stdint.h>
+#include <inttypes.h>
+#include <time.h>
+
+#define SEC_TO_NSEC (1000 * 1000 * 1000)
+#define DWORDS_TO_BYTES(x) ((x)*4)
+
+#define MI_STORE_REGISTER_MEM(LENGTH)   ((0x024 << 23) | ((LENGTH - 2) & 0xff))
+#define MI_MATH(NrInst)                 ((0x01A << 23) | ((NrInst - 1) & 0x3f))
+#define MI_CONDITIONAL_BATCH_BUFFER_END ((0x036 << 23) | (1 << 21) | 2)
+#define MI_COPY_MEM_MEM                 ((0x02E << 23) | (3))
+
+#define ALU_LOAD(TO, FROM)  ((0x080 << 20) | ((TO) << 10) | (FROM))
+#define ALU_SUB             ( 0x101 << 20)
+#define ALU_STORE(TO, FROM) ((0x180 << 20) | ((TO) << 10) | (FROM))
+
+#define TIMESTAMP_offset      (0x358) /* Elapsed time from system start */
+#define CTX_TIMESTAMP_offset  (0x3A8) /* Elapsed Time from context creation */
+#define ALU_GPU_R0_LSB_offset (0x600)
+#define ALU_GPU_R0_MSB_offset (0x604)
+#define ALU_GPU_R1_LSB_offset (0x608)
+#define ALU_GPU_R1_MSB_offset (0x60C)
+#define ALU_GPU_R2_LSB_offset (0x610)
+#define ALU_GPU_R2_MSB_offset (0x614)
+
+#define ALU_R0_ENCODING   (0x00)
+#define ALU_R1_ENCODING   (0x01)
+#define ALU_SRCA_ENCODING (0x20)
+#define ALU_SRCB_ENCODING (0x21)
+#define ALU_ACCU_ENCODING (0x31)
+
+/**
+ * SECTION:igt_bb_factory
+ * @short_description: Utility functions for creating batch buffers
+ * @title: Batch Buffer Factory
+ * @include: igt.h
+ *
+ * This library implements functions for creating batch buffers which may be
+ * useful to multiple tests.
+ */
+
+static void check_gen_8(int fd)
+{
+	static bool checked = false;
+	if(!checked) {
+		igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
+		checked = true;
+	}
+}
+
+static int bb_address_size_dw(int fd)
+{
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		return 2;
+	else
+		return 1;
+}
+
+static uint32_t get_register_offset(int ringid)
+{
+	switch (ringid) {
+	case I915_EXEC_RENDER:
+		return 0x02000;
+	case I915_EXEC_BSD:
+		return 0x12000;
+	case I915_EXEC_BLT:
+		return 0x22000;
+	case I915_EXEC_VEBOX:
+		return 0x1A000;
+	default:
+		igt_assert_f(0, "Invalid ringid %d passed to get_register_offset()\n", ringid);
+	}
+}
+
+/**
+ * igt_create_delay_bb:
+ * @fd: file descriptor for i915 driver instance
+ * @bufmgr: Buffer manager to be used for creation of batch buffers
+ * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
+ * loops: Number of times to loop
+ * dest: Buffer to use for saving the current loop count and timestamp.
+ *
+ * This creates a batch buffer which will iterate a loop a specified number
+ * of times. Intended for creating batch buffers which take an arbitarily
+ * long time to execute. This can be useful to keep a ring busy while
+ * constructing a test scenario.
+ *
+ * The dest buffer will have a number of Dwords written by the batch buffer
+ * when it runs. They are:
+ * DW0 & DW1 - These are loade with the value of 'loops' and are decremented
+ *             as the batch buffer executes. They will be 0 after the batch
+ *             buffer completes if it finished succesfully.
+ * DW2 Timestamp - An indication of when the batch buffer ran allowing a
+ *                 comparison between batch buffers to show execution order.
+ *                 May wrap so igt_compare_timestamps() should be used to
+ *                 compare timestamps.
+ *
+ * Returns:
+ * The struct intel_batchbuffer created.
+ */
+struct intel_batchbuffer *igt_create_delay_bb(int fd, drm_intel_bufmgr *bufmgr,
+		int ringid, uint32_t loops, drm_intel_bo *dest)
+{
+	struct intel_batchbuffer *batch;
+	int addr_size_dw;
+	uint32_t regOffset;
+
+	check_gen_8(fd);
+
+	addr_size_dw = bb_address_size_dw(fd);
+	regOffset = get_register_offset(ringid);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+	igt_assert(batch);
+
+	BEGIN_BATCH(32, 5);
+	/* store current timestamp in DW2 */
+	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+	OUT_BATCH(regOffset + TIMESTAMP_offset);
+	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));
+
+	/* Load R0 with loops */
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);
+	OUT_BATCH(loops);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(regOffset + ALU_GPU_R0_MSB_offset);
+	OUT_BATCH(0x00000000);
+	/* Load R1 with 1 */
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(regOffset + ALU_GPU_R1_LSB_offset);
+	OUT_BATCH(0x00000001);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(regOffset + ALU_GPU_R1_MSB_offset);
+	OUT_BATCH(0x00000000);
+	/* Copy R0 / R1 into SRCA / SRCB, Perform R0 - R1, Store result in R0 */
+	/* e.g. R0 -= 1 */
+	OUT_BATCH(MI_MATH(4));
+	OUT_BATCH(ALU_LOAD(ALU_SRCA_ENCODING, ALU_R0_ENCODING));
+	OUT_BATCH(ALU_LOAD(ALU_SRCB_ENCODING, ALU_R1_ENCODING));
+	OUT_BATCH(ALU_SUB);
+	OUT_BATCH(ALU_STORE(ALU_R0_ENCODING, ALU_ACCU_ENCODING));
+	/* Copy R0 to dest
+	 * On Gen8 MI_CONDITIONAL_BATCH_BUFFER_END BSD ring Compare address
+	 * points to 2 Dwords, a mask (DW0) and data (DW1) which are ANDed
+	 * together.
+	 * On Gen9+, and the other rings on Gen8 Compare address points to
+	 * just Data (DW0). For simplicity always copy R0 LSB to DW0 and DW1.
+	 */
+	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+	OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);
+	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
+	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+	OUT_BATCH(regOffset + ALU_GPU_R0_LSB_offset);
+	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(1));
+	/* Repeat until R0 == 0 */
+	OUT_BATCH(MI_CONDITIONAL_BATCH_BUFFER_END);
+	OUT_BATCH(0x00000000);
+	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
+	OUT_BATCH(MI_BATCH_BUFFER_START | (addr_size_dw - 1));
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, DWORDS_TO_BYTES(15));
+	/* Should never get here, but end if it happens */
+
+	OUT_BATCH(MI_BATCH_BUFFER_END);
+	ADVANCE_BATCH();
+
+	return batch;
+}
+
+/**
+ * igt_create_timestamp_bb:
+ * @fd: file descriptor for i915 driver instance
+ * @bufmgr: Buffer manager to be used for creation of batch buffers
+ * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
+ * dest: Buffer to use for saving the timestamps.
+ * load: Buffer to access. Set NULL if not required.
+ * write: If true and load is not NULL, will also write a timestamp to load
+ * buffer. If false and load is not NULL, will read from load buffer into dest.
+ * Intended for dependency checking.
+ *
+ * This creates a batch buffer which writes timestamps into a buffer object.
+ * If 'load' is non null, data is either written to 'load' or copied from 'load'
+ * depending on whether 'write' is set.
+ *
+ * The dest buffer will have a number of Dwords written by the batch buffer
+ * when it runs. They are:
+ * DW0 Reported timestamp - An indication of when the batch buffer ran allowing a
+ *                          comparison between batch buffers to show execution order.
+ *                          May wrap so igt_compare_timestamps() should be used to
+ *                          compare timestamps.
+ * DW1 Context timestamp - Elapsed time since context was created.
+ * DW2 Value copied from DW0 of load if write == false
+ *
+ * Returns:
+ * The struct intel_batchbuffer created.
+ */
+struct intel_batchbuffer *igt_create_timestamp_bb(int fd, drm_intel_bufmgr *bufmgr,
+		int ringid, drm_intel_bo *dest, drm_intel_bo *load, bool write)
+{
+	struct intel_batchbuffer *batch;
+	int addr_size_dw;
+	uint32_t regOffset;
+
+	check_gen_8(fd);
+
+	addr_size_dw = bb_address_size_dw(fd);
+	regOffset = get_register_offset(ringid);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+	igt_assert(batch);
+
+	BEGIN_BATCH(6, 2);
+	/* store current reported timestamp in DW0 */
+	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+	OUT_BATCH(regOffset + TIMESTAMP_offset);
+	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));
+
+	/* store current context timestamp in DW1 */
+	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+	OUT_BATCH(regOffset + CTX_TIMESTAMP_offset);
+	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(1));
+
+	ADVANCE_BATCH();
+
+	if(load != NULL) {
+		if(write) {
+			BEGIN_BATCH(3, 1);
+			OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+			OUT_BATCH(regOffset + TIMESTAMP_offset);
+			OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));
+			ADVANCE_BATCH();
+		}
+		else {
+			BEGIN_BATCH(3, 2);
+			OUT_BATCH(MI_COPY_MEM_MEM);
+			OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));
+			OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, 0, DWORDS_TO_BYTES(0));
+			ADVANCE_BATCH();
+		}
+	}
+
+	BEGIN_BATCH(1, 0);
+	OUT_BATCH(MI_BATCH_BUFFER_END);
+	ADVANCE_BATCH();
+
+	return batch;
+}
+
+/**
+ * igt_create_noop_bb:
+ * @fd: file descriptor for i915 driver instance
+ * @bufmgr: Buffer manager to be used for creation of batch buffers
+ * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
+ * noops: Number of MI_NOOP instructions to add to the batch buffer.
+ *
+ * This creates a batch buffer with a specified number of MI_NOOP instructions.
+ *
+ * Returns:
+ * The struct intel_batchbuffer created.
+ */
+struct intel_batchbuffer *igt_create_noop_bb(int fd, drm_intel_bufmgr *bufmgr,
+		int ringid, int noops)
+{
+	struct intel_batchbuffer *batch;
+	int loop;
+
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+	igt_assert(batch);
+
+	BEGIN_BATCH(noops + 1, 0);
+	for(loop = 0; loop < noops; loop++)
+		OUT_BATCH(MI_NOOP);
+	OUT_BATCH(MI_BATCH_BUFFER_END);
+	ADVANCE_BATCH();
+
+	return batch;
+}
+
+/* Store calibrated values so they only need calculating once.
+ * I915_EXEC_RING_MASK allows 3 bits for ring ids so allow for storing 8 values */
+static uint32_t calibrated_ring_value[8] = {0, 0, 0, 0, 0, 0, 0, 0};
+
+/**
+ * igt_calibrate_delay_bb:
+ * @fd: file descriptor for i915 driver instance
+ * @bufmgr: Buffer manager to be used for creation of batch buffers
+ * ringid: Ring to calibrate. e.g. I915_EXEC_RENDER
+ *
+ * This calculates the value of loops that would need to be passed to
+ * igt_create_delay_bb() to create a delay of about 1 second on the specified
+ * ring.
+ *
+ * Returns:
+ * uint32_t to be passed to igt_create_delay_bb().
+ */
+#define CAL_SEED (0x100000)
+uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int ringid)
+{
+	uint32_t *buf;
+	struct intel_batchbuffer *bb;
+	struct timespec start, end;
+	uint64_t duration;
+	uint64_t calibrated;
+	drm_intel_bo *target_bo;
+
+	igt_assert(ringid < 8);
+	if(calibrated_ring_value[ringid] != 0)
+		return calibrated_ring_value[ringid];
+
+	target_bo = drm_intel_bo_alloc(bufmgr, "target bo", BATCH_SZ, BATCH_SZ);
+	igt_assert(target_bo);
+
+	/* Put some non zero values in the target bo */
+	drm_intel_bo_map(target_bo, 1);
+	buf = target_bo->virtual;
+	buf[0] = 0xff;
+	drm_intel_bo_unmap(target_bo);
+
+	bb = igt_create_delay_bb(fd, bufmgr, ringid, CAL_SEED, target_bo);
+
+	gem_quiescent_gpu(fd);
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	intel_batchbuffer_flush_on_ring(bb, ringid);
+	/* This will not return until the bo has finished executing */
+	drm_intel_bo_map(target_bo, 0);
+	clock_gettime(CLOCK_MONOTONIC, &end);
+
+	buf = target_bo->virtual;
+	/* buf[0] in the target buffer should be 0 if the batch buffer completed */
+	igt_assert_f(buf[0] == 0, "buf[0] expected 0x0, got 0x%x\n", buf[0]);
+
+	duration = ((((uint64_t)end.tv_sec - (uint64_t)start.tv_sec) * SEC_TO_NSEC)
+	           + (uint64_t)end.tv_nsec) - (uint64_t)start.tv_nsec;
+
+	calibrated = (((uint64_t)(CAL_SEED) * SEC_TO_NSEC) / duration);
+	igt_debug("Uncalibrated run took %" PRIu64 ".%04" PRIu64 "s\n",
+	          duration / SEC_TO_NSEC,
+	          (duration % SEC_TO_NSEC) / 100000);
+	drm_intel_bo_unreference(target_bo);
+	intel_batchbuffer_free(bb);
+
+	/* Sanity check. If duration < 100ms, something has clearly gone wrong
+	 * */
+	igt_assert(duration > (SEC_TO_NSEC  / 10));
+
+	igt_assert_f(calibrated <= UINT32_MAX, "Calibrated value > UINT32_MAX\n");
+
+	calibrated_ring_value[ringid] = (uint32_t)calibrated;
+	return (uint32_t)calibrated;
+}
+
+/**
+ * igt_compare_timestamps:
+ * @ts1: timestamp 1
+ * @ts2: timestamp 2
+ *
+ * This compares two uint32_t timestamps. To handle wrapping it assumes the
+ * difference between the two timestamps is less than 1/4 the max elapsed time
+ * represented by the counters.
+ * It also assumes the timestamps are samples from the same counter.
+ *
+ * Returns:
+ * True if ts2 > ts1, allowing for counter wrapping, false otherwise.
+ */
+
+bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2)
+{
+	if (ts2 > ts1)
+		return true;
+	else if ((ts1 > 0x80000000) && (ts2 < 0x40000000))
+		return true; /* Assuming timestamp counter wrapped */
+	else
+		return false;
+}
diff --git a/lib/igt_bb_factory.h b/lib/igt_bb_factory.h
new file mode 100644
index 0000000..3ab7f13
--- /dev/null
+++ b/lib/igt_bb_factory.h
@@ -0,0 +1,47 @@ 
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Derek Morton <derek.j.morton@intel.com>
+ *
+ */
+
+#ifndef IGT_BB_FACTORY_H
+#define IGT_BB_FACTORY_H
+
+#include "intel_batchbuffer.h"
+#include <stdint.h>
+
+struct intel_batchbuffer *igt_create_delay_bb(int fd, drm_intel_bufmgr *bufmgr,
+		int ringid, uint32_t loops, drm_intel_bo *dest);
+
+struct intel_batchbuffer *igt_create_timestamp_bb(int fd, drm_intel_bufmgr *bufmgr,
+		int ringid, drm_intel_bo *dest, drm_intel_bo *load, bool write);
+
+struct intel_batchbuffer *igt_create_noop_bb(int fd, drm_intel_bufmgr *bufmgr,
+		int ringid, int noops);
+
+uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int ringid);
+
+bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2);
+
+#endif /* IGT_BB_FACTORY_H */