diff mbox

[i-g-t,v3,2/6] lib/intel_batchbuffer: Add functions to be used in the scheduler test

Message ID 1457607814-18751-3-git-send-email-derek.j.morton@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Derek Morton March 10, 2016, 11:03 a.m. UTC
Adds functions to create a number of different batch buffers to perform
several functions including:
Batch buffer which will run for a long duration to provide a delay on a
specified ring.
Function to calibrate the delay batch buffer to run for a specified period
of time.
Function to create a batch buffer which writes timestamps to a buffer object.
Function to compare timestamps allowing for wrapping of the values.

v2: Moved code to intel_batchbuffer (Daniel Vetter)
    Addressed review comments from Daniele Ceraolo Spurio

Signed-off-by: Derek Morton <derek.j.morton@intel.com>
---
 lib/intel_batchbuffer.c | 384 +++++++++++++++++++++++++++++++++++++++++++++++-
 lib/intel_batchbuffer.h |  14 ++
 2 files changed, 393 insertions(+), 5 deletions(-)

Comments

Daniele Ceraolo Spurio March 14, 2016, 12:16 p.m. UTC | #1
On 10/03/16 11:03, Derek Morton wrote:
> Adds functions to create a number of different batch buffers to perform
> several functions including:
> Batch buffer which will run for a long duration to provide a delay on a
> specified ring.
> Function to calibrate the delay batch buffer to run for a specified period
> of time.
> Function to create a batch buffer which writes timestamps to a buffer object.
> Function to compare timestamps allowing for wrapping of the values.
>
> v2: Moved code to intel_batchbuffer (Daniel Vetter)
>      Addressed review comments from Daniele Ceraolo Spurio
>
> Signed-off-by: Derek Morton <derek.j.morton@intel.com>
> ---
>   lib/intel_batchbuffer.c | 384 +++++++++++++++++++++++++++++++++++++++++++++++-
>   lib/intel_batchbuffer.h |  14 ++
>   2 files changed, 393 insertions(+), 5 deletions(-)
>
> diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
> index 692521f..30e78c5 100644
> --- a/lib/intel_batchbuffer.c
> +++ b/lib/intel_batchbuffer.c
> @@ -1,8 +1,8 @@
>   /**************************************************************************
> - *
> + *
>    * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
>    * All Rights Reserved.
> - *
> + *
>    * Permission is hereby granted, free of charge, to any person obtaining a
>    * copy of this software and associated documentation files (the
>    * "Software"), to deal in the Software without restriction, including
> @@ -10,11 +10,11 @@
>    * distribute, sub license, and/or sell copies of the Software, and to
>    * permit persons to whom the Software is furnished to do so, subject to
>    * the following conditions:
> - *
> + *
>    * The above copyright notice and this permission notice (including the
>    * next paragraph) shall be included in all copies or substantial portions
>    * of the Software.
> - *
> + *
>    * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
>    * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>    * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
> @@ -22,7 +22,7 @@
>    * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
>    * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
>    * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> - *
> + *
>    **************************************************************************/
>   
>   #include <inttypes.h>
> @@ -30,6 +30,8 @@
>   #include <stdio.h>
>   #include <string.h>
>   #include <assert.h>
> +#include <stdint.h>
> +#include <time.h>
>   
>   #include "drm.h"
>   #include "drmtest.h"
> @@ -42,6 +44,7 @@
>   #include "ioctl_wrappers.h"
>   #include "media_spin.h"
>   #include "gpgpu_fill.h"
> +#include "igt_gt.h"
>   
>   #include <i915_drm.h>
>   
> @@ -817,3 +820,374 @@ igt_media_spinfunc_t igt_get_media_spinfunc(int devid)
>   
>   	return spin;
>   }
> +
> +#define SEC_TO_NSEC (1000 * 1000 * 1000)
> +#define DWORDS_TO_BYTES(x) ((x)*4)
> +
> +#define MI_STORE_REGISTER_MEM(LENGTH)   ((0x024 << 23) | ((LENGTH - 2) & 0xff))
> +#define MI_MATH(NrInst)                 ((0x01A << 23) | ((NrInst - 1) & 0x3f))
> +#define MI_CONDITIONAL_BATCH_BUFFER_END ((0x036 << 23) | (1 << 21) | 2)
> +#define MI_COPY_MEM_MEM                 ((0x02E << 23) | (3))
> +
> +#define ALU_LOAD(TO, FROM)  ((0x080 << 20) | ((TO) << 10) | (FROM))
> +#define ALU_SUB             ( 0x101 << 20)
> +#define ALU_STORE(TO, FROM) ((0x180 << 20) | ((TO) << 10) | (FROM))
> +
> +#define TIMESTAMP_offset      (0x358) /* Elapsed time from system start */
> +#define CTX_TIMESTAMP_offset  (0x3A8) /* Elapsed Time from context creation */
> +#define ALU_GPU_R0_LSB_offset (0x600)
> +#define ALU_GPU_R0_MSB_offset (0x604)
> +#define ALU_GPU_R1_LSB_offset (0x608)
> +#define ALU_GPU_R1_MSB_offset (0x60C)
> +#define ALU_GPU_R2_LSB_offset (0x610)
> +#define ALU_GPU_R2_MSB_offset (0x614)
> +
> +#define ALU_R0_ENCODING   (0x00)
> +#define ALU_R1_ENCODING   (0x01)
> +#define ALU_SRCA_ENCODING (0x20)
> +#define ALU_SRCB_ENCODING (0x21)
> +#define ALU_ACCU_ENCODING (0x31)
> +
> +static int bb_address_size_dw(int fd)
> +{
> +	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> +		return 2;
> +	else
> +		return 1;
> +}
> +
> +static uint32_t get_mmio_base(int ringid)
> +{
> +	switch (ringid) {
> +	case I915_EXEC_RENDER:
> +		return 0x02000;
> +	case I915_EXEC_BSD:
> +	case I915_EXEC_BSD | 1<<13: /* BSD1 */
> +		return 0x12000;
> +	case I915_EXEC_BSD | 2<<13: /* BSD2 */
> +		return 0x1c000;
> +	case I915_EXEC_BLT:
> +		return 0x22000;
> +	case I915_EXEC_VEBOX:
> +		return 0x1A000;
> +	default:
> +		igt_assert_f(0, "Invalid ringid %d passed to get_mmio_base()\n", ringid);
> +	}
> +}
> +
> +/**
> + * igt_batch_used
> + * @batch batchbuffer to get offset from
> + *
> + * This returns the number of bytes of the batchbuffer that have been used.
> + * e.g. The offset into the batchbuffer that the next OUT_BATCH would write to.
> + *
> + * Returns:
> + * The number of bytes of the batchbuffer that have been used.
> + */
> +uint32_t igt_batch_used(struct intel_batchbuffer *batch)
> +{
> +	return batch->ptr - batch->buffer;
> +}
> +
> +/**
> + * igt_create_delay_bb:
> + * @fd: file descriptor for i915 driver instance
> + * @batch: Batch buffer to write to
> + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER

ringid is the execution flag of the ring

> + * loops: Number of times to loop
> + * dest: Buffer to use for saving the current loop count and timestamp.
> + *
> + * This creates a batch buffer which will iterate a loop a specified number
> + * of times. Intended for creating batch buffers which take an arbitarily
> + * long time to execute. This can be useful to keep a ring busy while
> + * constructing a test scenario.
> + *
> + * The dest buffer will have a number of Dwords written by the batch buffer
> + * when it runs. They are:
> + * DW0 & DW1 - These are loaded with the value of 'loops' and are decremented
> + *             as the batch buffer executes. They will be 0 after the batch
> + *             buffer completes if it finished succesfully.
> + * DW2 Timestamp - An indication of when the batch buffer ran allowing a
> + *                 comparison between batch buffers to show execution order.
> + *                 May wrap so igt_compare_timestamps() should be used to
> + *                 compare timestamps.
> + *                 The timestamp will wrap every few minutes.
> + *
> + */
> +void igt_create_delay_bb(int fd, struct intel_batchbuffer *batch,

Now that you're passing the batch from the outside you're only using the 
fd to get the gen. However, the gen is already saved inside the batch so 
you could get it from there and drop the fd parameter. the 
bb_address_size_dw would need to be modified to get the batch or the gen 
as a parameter, but it looks like an ok change to me. Same reasoning 
applies to other functions you've added.

> +                         int ringid, uint32_t loops, drm_intel_bo *dest)
> +{
> +	int addr_size_dw;
> +	uint32_t mmio_base, jump_offset;
> +
> +	/* CMD parser blocks reading TIMESTAMP register on gen 7.5 */
> +	igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
> +
> +	addr_size_dw = bb_address_size_dw(fd);
> +	mmio_base = get_mmio_base(ringid);
> +	igt_assert(batch);
> +	BEGIN_BATCH(32, 5);
> +
> +	/* store current timestamp in DW2 */
> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
> +	OUT_BATCH(mmio_base + TIMESTAMP_offset);
> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));
> +
> +	/* Load R0 with loops */
> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);
> +	OUT_BATCH(mmio_base + ALU_GPU_R0_LSB_offset);
> +	OUT_BATCH(loops);
> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);
> +	OUT_BATCH(mmio_base + ALU_GPU_R0_MSB_offset);
> +	OUT_BATCH(0x00000000);
> +	/* Load R1 with 1 */
> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);
> +	OUT_BATCH(mmio_base + ALU_GPU_R1_LSB_offset);
> +	OUT_BATCH(0x00000001);
> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);
> +	OUT_BATCH(mmio_base + ALU_GPU_R1_MSB_offset);
> +	OUT_BATCH(0x00000000);
> +	/* Copy R0 / R1 into SRCA / SRCB, Perform R0 - R1, Store result in R0 */
> +	/* e.g. R0 -= 1 */
> +	jump_offset=igt_batch_used(batch);

for clarity the sampling of jump offset could go above the ALU comment 
(and have a comment of its own)

> +	OUT_BATCH(MI_MATH(4));
> +	OUT_BATCH(ALU_LOAD(ALU_SRCA_ENCODING, ALU_R0_ENCODING));
> +	OUT_BATCH(ALU_LOAD(ALU_SRCB_ENCODING, ALU_R1_ENCODING));
> +	OUT_BATCH(ALU_SUB);
> +	OUT_BATCH(ALU_STORE(ALU_R0_ENCODING, ALU_ACCU_ENCODING));
> +	/* Copy R0 to dest
> +	 * On Gen8 MI_CONDITIONAL_BATCH_BUFFER_END BSD ring Compare address
> +	 * points to 2 Dwords, a mask (DW0) and data (DW1) which are ANDed
> +	 * together.
> +	 * On Gen9+, and the other rings on Gen8 Compare address points to
> +	 * just Data (DW0). For simplicity always copy R0 LSB to DW0 and DW1.
> +	 */
> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
> +	OUT_BATCH(mmio_base + ALU_GPU_R0_LSB_offset);
> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
> +	OUT_BATCH(mmio_base + ALU_GPU_R0_LSB_offset);
> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(1));
> +	/* Repeat until R0 == 0 */
> +	OUT_BATCH(MI_CONDITIONAL_BATCH_BUFFER_END);
> +	OUT_BATCH(0x00000000);
> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
> +	OUT_BATCH(MI_BATCH_BUFFER_START | (addr_size_dw - 1));
> +	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, jump_offset);
> +
> +	/* Should never get here, but end if it happens */
> +	OUT_BATCH(MI_BATCH_BUFFER_END);
> +	ADVANCE_BATCH();
> +}
> +
> +/**
> + * igt_create_timestamp_bb:
> + * @fd: file descriptor for i915 driver instance
> + * @batch: Batch buffer to write to
> + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
> + * dest: Buffer to use for saving the timestamps.
> + * load: Buffer to access. Set NULL if not required.
> + * write: If true and load is not NULL, will also write a timestamp to load
> + * buffer. If false and load is not NULL, will read from load buffer into dest.
> + * Intended for dependency checking.
> + *
> + * This creates a batch buffer which writes timestamps into a buffer object.
> + * If 'load' is non null, data is either written to 'load' or copied from 'load'
> + * depending on whether 'write' is set.
> + *
> + * The dest buffer will have a number of Dwords written by the batch buffer
> + * when it runs. They are:
> + * DW0 Reported timestamp - An indication of when the batch buffer ran allowing a
> + *                          comparison between batch buffers to show execution order.
> + *                          May wrap so igt_compare_timestamps() should be used to
> + *                          compare timestamps.
> + *                          The timestamp will wrap every few minutes.
> + * DW2 Value copied from DW0 of load if write == false
> + *
> + */
> +void igt_create_timestamp_bb(int fd, struct intel_batchbuffer *batch, int ringid,
> +                             drm_intel_bo *dest, drm_intel_bo *load, bool write)

I still think that the timestamp capture should be its own function, 
because it could be useful to add it multiple time in the same batch. 
You could split that out in a separate function and then call it from 
igt_create_timestamp_bb, i.e. something like:

igt_emit_timestamp_sample(struct intel_batchbuffer *batch, int ringid, 
drm_intel_bo *dest, unsigned dest_offset);

Personally I also still don't really like the "load" handling, which is 
not related to timestamps in any way. won't oppose it if other people 
are ok with it though.

> +{
> +	int addr_size_dw;
> +	uint32_t mmio_base;
> +
> +	/* CMD parser blocks reading TIMESTAMP register on gen 7.5 */
> +	igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
> +
> +	addr_size_dw = bb_address_size_dw(fd);
> +	mmio_base = get_mmio_base(ringid);
> +	igt_assert(batch);
> +
> +	BEGIN_BATCH(3, 1);
> +	/* store current reported timestamp in DW0 */
> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
> +	OUT_BATCH(mmio_base + TIMESTAMP_offset);
> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));
> +
> +	ADVANCE_BATCH();
> +
> +	if(load != NULL) {

style: we tend to put a space between if/while/for and the parenthesis, 
but you didn't anywhere in this patch

> +		if(write) {
> +			BEGIN_BATCH(3, 1);
> +			OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
> +			OUT_BATCH(mmio_base + TIMESTAMP_offset);
> +			OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));
> +			ADVANCE_BATCH();
> +		}
> +		else {

weird indent

> +			BEGIN_BATCH(3, 2);
> +			OUT_BATCH(MI_COPY_MEM_MEM);
> +			OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));
> +			OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, 0, DWORDS_TO_BYTES(0));
> +			ADVANCE_BATCH();
> +		}
> +	}
> +
> +	BEGIN_BATCH(1, 0);
> +	OUT_BATCH(MI_BATCH_BUFFER_END);
> +	ADVANCE_BATCH();
> +}
> +
> +/**
> + * igt_create_noop_bb:
> + * @batch: Batch buffer to write to
> + * noops: Number of MI_NOOP instructions to add to the batch buffer.
> + *
> + * This creates a batch buffer with a specified number of MI_NOOP instructions.
> + */
> +void igt_create_noop_bb(struct intel_batchbuffer *batch, int noops)
> +{
> +	int loop;
> +
> +	igt_assert(batch);
> +
> +	BEGIN_BATCH(noops + 1, 0);
> +	for(loop = 0; loop < noops; loop++)
> +		OUT_BATCH(MI_NOOP);
> +	OUT_BATCH(MI_BATCH_BUFFER_END);
> +	ADVANCE_BATCH();
> +
> +}
> +
> +/* Store calibrated values so they only need calculating once.
> + * Use intel_execution_engines array as list of supported rings
> + */
> +static uint32_t *calibrated_ring_value = NULL;
> +
> +/**
> + * igt_calibrate_delay_bb:
> + * @fd: file descriptor for i915 driver instance
> + * @bufmgr: Buffer manager to be used for creation of batch buffers
> + * ringid: Ring to calibrate. e.g. I915_EXEC_RENDER
> + *
> + * This calculates the value of loops that would need to be passed to
> + * igt_create_delay_bb() to create a delay of about 1 second on the specified
> + * ring.
> + *
> + * Returns:
> + * uint32_t to be passed to igt_create_delay_bb().
> + */
> +/* 0x100000 will run for about 0.6 - 0.8 seconds (dependant on ring) on BXT HW */
> +#define CAL_SEED (0x100000)
> +uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int ringid)
> +{
> +	uint32_t buf[2];
> +	struct intel_batchbuffer *bb;
> +	struct timespec start, end;
> +	uint64_t duration;
> +	uint64_t calibrated;
> +	drm_intel_bo *target_bo;
> +	int ring_index=0;
> +
> +/*	igt_assert(ringid < 8);
> +	if(calibrated_ring_value[ringid] != 0)
> +		return calibrated_ring_value[ringid];*/

dead code

> +
> +	if(calibrated_ring_value == NULL) {
> +		int count;
> +		for(count = 0; intel_execution_engines[count].name != NULL; count++) {}

Can you add a comment to explain why this loop is required to count the 
engines? it could also be possible to add a function in igt_gt to return 
the exec_engines count, but I'm not sure that's worth the effort. Also, 
BSD1 and BSD2 will have the same calibration value so you can calibrate 
only once for I915_EXEC_BSD

> +		calibrated_ring_value = calloc(count, sizeof(uint32_t));
> +		igt_assert(calibrated_ring_value);
> +	}
> +
> +	/* Check if there is already a calibration value for this ring */
> +	while(intel_execution_engines[ring_index].name != NULL) {
> +		if((intel_execution_engines[ring_index].exec_id |
> +		    intel_execution_engines[ring_index].flags) == ringid) {
> +			if(calibrated_ring_value[ring_index] != 0) {
> +				return calibrated_ring_value[ring_index];
> +			}
> +			break;
> +		}
> +		ring_index++;

Could use a for loop instead of incrementing on each iteration.

> +	}
> +
> +	target_bo = drm_intel_bo_alloc(bufmgr, "target bo", BATCH_SZ, BATCH_SZ);
> +	igt_assert(target_bo);
> +
> +	/* Put some non zero values in the target bo */
> +	{

This bracket used to give scope to the data variable doesn't look 
essential as there are no other uses of a variable named data in this 
function

> +		uint32_t data=0xffffffff;

style: spaces around "="

> +		drm_intel_bo_subdata(target_bo, 0, 4, &data);

the subdata call can fail. In the tree we don't check the return code on 
every usage, but I believe it is safer to do it. do_or_die() should do 
the trick.

> +	}
> +
> +	bb = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
> +	igt_assert(bb);
> +	igt_create_delay_bb(fd, bb, ringid, CAL_SEED, target_bo);
> +
> +	gem_quiescent_gpu(fd);
> +	clock_gettime(CLOCK_MONOTONIC, &start);
> +	intel_batchbuffer_flush_on_ring(bb, ringid);
> +	/* This will not return until the bo has finished executing */
> +	drm_intel_bo_wait_rendering(target_bo);
> +	clock_gettime(CLOCK_MONOTONIC, &end);
> +
> +	drm_intel_bo_get_subdata(target_bo, 0, 4, (void*)buf);

Why are you using a 2 words array to read only 1 word? what's the 
purpose of buf[1]? if you don't need it you could reuse the data 
variable from above

> +
> +	/* buf[0] in the target buffer should be 0 if the batch buffer completed */
> +	igt_assert_f(buf[0] == 0, "buf[0] expected 0x0, got 0x%x\n", buf[0]);
> +
> +	duration = ((((uint64_t)end.tv_sec - (uint64_t)start.tv_sec) * SEC_TO_NSEC)
> +	           + (uint64_t)end.tv_nsec) - (uint64_t)start.tv_nsec;
> +
> +	calibrated = (((uint64_t)(CAL_SEED) * SEC_TO_NSEC) / duration);
> +	igt_debug("Uncalibrated run took %" PRIu64 ".%04" PRIu64 "s\n",
> +	          duration / SEC_TO_NSEC,
> +	          (duration % SEC_TO_NSEC) / 100000);
> +	drm_intel_bo_unreference(target_bo);
> +	intel_batchbuffer_free(bb);
> +
> +	/* Sanity check. If duration < 100ms, something has clearly gone wrong */
> +	igt_assert(duration > (SEC_TO_NSEC  / 10));
> +
> +	igt_assert_f(calibrated <= UINT32_MAX, "Calibrated value > UINT32_MAX\n");
> +
> +	if(intel_execution_engines[ring_index].name != NULL)
> +		calibrated_ring_value[ring_index] = (uint32_t)calibrated;
> +	return (uint32_t)calibrated;
> +}
> +
> +/**
> + * igt_compare_timestamps:
> + * @ts1: timestamp 1
> + * @ts2: timestamp 2
> + *
> + * This compares two uint32_t timestamps. To handle wrapping it assumes the
> + * difference between the two timestamps is less than 1/4 the max elapsed time
> + * represented by the counters.
> + * It also assumes the timestamps are samples from the same counter.
> + *
> + * Returns:
> + * True if ts2 > ts1, allowing for counter wrapping, false otherwise.
> + */
> +
> +bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2)
> +{
> +	if (ts2 > ts1)

This will return true if ts2 = 0xFFFFFFFF and ts1 = 0x00000001 even if 
ts1 came after ts2 due to wrapping.

> +		return true;
> +	else if ((ts1 > 0x80000000) && (ts2 < 0x40000000))
> +		return true; /* Assuming timestamp counter wrapped */
> +	else
> +		return false;
> +}

You could use something like:

     return (int32_t)(ts2 - ts1) > 0;

This should give you the right result as long as the 2 samples are not 
more than ~50% of the timestamp period apart

Regards,
Daniele

> diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
> index 869747d..5b66fa3 100644
> --- a/lib/intel_batchbuffer.h
> +++ b/lib/intel_batchbuffer.h
> @@ -323,4 +323,18 @@ typedef void (*igt_media_spinfunc_t)(struct intel_batchbuffer *batch,
>   
>   igt_media_spinfunc_t igt_get_media_spinfunc(int devid);
>   
> +uint32_t igt_batch_used(struct intel_batchbuffer *batch);
> +
> +void igt_create_delay_bb(int fd, struct intel_batchbuffer *batch,
> +		int ringid, uint32_t loops, drm_intel_bo *dest);
> +
> +void igt_create_timestamp_bb(int fd, struct intel_batchbuffer *batch, int ringid,
> +			drm_intel_bo *dest, drm_intel_bo *load, bool write);
> +
> +void igt_create_noop_bb(struct intel_batchbuffer *batch, int noops);
> +
> +uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int ringid);
> +
> +bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2);
> +
>   #endif
Derek Morton March 29, 2016, 3:04 p.m. UTC | #2
>
>
>-----Original Message-----
>From: Ceraolo Spurio, Daniele 
>Sent: Monday, March 14, 2016 12:16 PM
>To: Morton, Derek J <derek.j.morton@intel.com>; intel-gfx@lists.freedesktop.org
>Subject: Re: [PATCH i-g-t v3 2/6] lib/intel_batchbuffer: Add functions to be used in the scheduler test
>
>
>
>On 10/03/16 11:03, Derek Morton wrote:
>> Adds functions to create a number of different batch buffers to 
>> perform several functions including:
>> Batch buffer which will run for a long duration to provide a delay on 
>> a specified ring.
>> Function to calibrate the delay batch buffer to run for a specified 
>> period of time.
>> Function to create a batch buffer which writes timestamps to a buffer object.
>> Function to compare timestamps allowing for wrapping of the values.
>>
>> v2: Moved code to intel_batchbuffer (Daniel Vetter)
>>      Addressed review comments from Daniele Ceraolo Spurio
>>
>> Signed-off-by: Derek Morton <derek.j.morton@intel.com>
>> ---
>>   lib/intel_batchbuffer.c | 384 +++++++++++++++++++++++++++++++++++++++++++++++-
>>   lib/intel_batchbuffer.h |  14 ++
>>   2 files changed, 393 insertions(+), 5 deletions(-)
>>
>> diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c index 
>> 692521f..30e78c5 100644
>> --- a/lib/intel_batchbuffer.c
>> +++ b/lib/intel_batchbuffer.c
>> @@ -1,8 +1,8 @@
>>   
>> /*********************************************************************
>> *****
>> - *
>> + *
>>    * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
>>    * All Rights Reserved.
>> - *
>> + *
>>    * Permission is hereby granted, free of charge, to any person obtaining a
>>    * copy of this software and associated documentation files (the
>>    * "Software"), to deal in the Software without restriction, 
>> including @@ -10,11 +10,11 @@
>>    * distribute, sub license, and/or sell copies of the Software, and to
>>    * permit persons to whom the Software is furnished to do so, subject to
>>    * the following conditions:
>> - *
>> + *
>>    * The above copyright notice and this permission notice (including the
>>    * next paragraph) shall be included in all copies or substantial portions
>>    * of the Software.
>> - *
>> + *
>>    * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
>>    * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>    * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
>> @@ -22,7 +22,7 @@
>>    * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
>>    * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
>>    * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>> - *
>> + *
>>    
>> **********************************************************************
>> ****/
>>   
>>   #include <inttypes.h>
>> @@ -30,6 +30,8 @@
>>   #include <stdio.h>
>>   #include <string.h>
>>   #include <assert.h>
>> +#include <stdint.h>
>> +#include <time.h>
>>   
>>   #include "drm.h"
>>   #include "drmtest.h"
>> @@ -42,6 +44,7 @@
>>   #include "ioctl_wrappers.h"
>>   #include "media_spin.h"
>>   #include "gpgpu_fill.h"
>> +#include "igt_gt.h"
>>   
>>   #include <i915_drm.h>
>>   
>> @@ -817,3 +820,374 @@ igt_media_spinfunc_t igt_get_media_spinfunc(int 
>> devid)
>>   
>>   	return spin;
>>   }
>> +
>> +#define SEC_TO_NSEC (1000 * 1000 * 1000) #define DWORDS_TO_BYTES(x) 
>> +((x)*4)
>> +
>> +#define MI_STORE_REGISTER_MEM(LENGTH)   ((0x024 << 23) | ((LENGTH - 2) & 0xff))
>> +#define MI_MATH(NrInst)                 ((0x01A << 23) | ((NrInst - 1) & 0x3f))
>> +#define MI_CONDITIONAL_BATCH_BUFFER_END ((0x036 << 23) | (1 << 21) | 2)
>> +#define MI_COPY_MEM_MEM                 ((0x02E << 23) | (3))
>> +
>> +#define ALU_LOAD(TO, FROM)  ((0x080 << 20) | ((TO) << 10) | (FROM))
>> +#define ALU_SUB             ( 0x101 << 20)
>> +#define ALU_STORE(TO, FROM) ((0x180 << 20) | ((TO) << 10) | (FROM))
>> +
>> +#define TIMESTAMP_offset      (0x358) /* Elapsed time from system start */
>> +#define CTX_TIMESTAMP_offset  (0x3A8) /* Elapsed Time from context 
>> +creation */ #define ALU_GPU_R0_LSB_offset (0x600) #define 
>> +ALU_GPU_R0_MSB_offset (0x604) #define ALU_GPU_R1_LSB_offset (0x608) 
>> +#define ALU_GPU_R1_MSB_offset (0x60C) #define ALU_GPU_R2_LSB_offset 
>> +(0x610) #define ALU_GPU_R2_MSB_offset (0x614)
>> +
>> +#define ALU_R0_ENCODING   (0x00)
>> +#define ALU_R1_ENCODING   (0x01)
>> +#define ALU_SRCA_ENCODING (0x20)
>> +#define ALU_SRCB_ENCODING (0x21)
>> +#define ALU_ACCU_ENCODING (0x31)
>> +
>> +static int bb_address_size_dw(int fd) {
>> +	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
>> +		return 2;
>> +	else
>> +		return 1;
>> +}
>> +
>> +static uint32_t get_mmio_base(int ringid) {
>> +	switch (ringid) {
>> +	case I915_EXEC_RENDER:
>> +		return 0x02000;
>> +	case I915_EXEC_BSD:
>> +	case I915_EXEC_BSD | 1<<13: /* BSD1 */
>> +		return 0x12000;
>> +	case I915_EXEC_BSD | 2<<13: /* BSD2 */
>> +		return 0x1c000;
>> +	case I915_EXEC_BLT:
>> +		return 0x22000;
>> +	case I915_EXEC_VEBOX:
>> +		return 0x1A000;
>> +	default:
>> +		igt_assert_f(0, "Invalid ringid %d passed to get_mmio_base()\n", ringid);
>> +	}
>> +}
>> +
>> +/**
>> + * igt_batch_used
>> + * @batch batchbuffer to get offset from
>> + *
>> + * This returns the number of bytes of the batchbuffer that have been used.
>> + * e.g. The offset into the batchbuffer that the next OUT_BATCH would write to.
>> + *
>> + * Returns:
>> + * The number of bytes of the batchbuffer that have been used.
>> + */
>> +uint32_t igt_batch_used(struct intel_batchbuffer *batch) {
>> +	return batch->ptr - batch->buffer;
>> +}
>> +
>> +/**
>> + * igt_create_delay_bb:
>> + * @fd: file descriptor for i915 driver instance
>> + * @batch: Batch buffer to write to
>> + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
>
>ringid is the execution flag of the ring

Ok

>
>> + * loops: Number of times to loop
>> + * dest: Buffer to use for saving the current loop count and timestamp.
>> + *
>> + * This creates a batch buffer which will iterate a loop a specified 
>> +number
>> + * of times. Intended for creating batch buffers which take an 
>> +arbitarily
>> + * long time to execute. This can be useful to keep a ring busy while
>> + * constructing a test scenario.
>> + *
>> + * The dest buffer will have a number of Dwords written by the batch 
>> +buffer
>> + * when it runs. They are:
>> + * DW0 & DW1 - These are loaded with the value of 'loops' and are decremented
>> + *             as the batch buffer executes. They will be 0 after the batch
>> + *             buffer completes if it finished succesfully.
>> + * DW2 Timestamp - An indication of when the batch buffer ran allowing a
>> + *                 comparison between batch buffers to show execution order.
>> + *                 May wrap so igt_compare_timestamps() should be used to
>> + *                 compare timestamps.
>> + *                 The timestamp will wrap every few minutes.
>> + *
>> + */
>> +void igt_create_delay_bb(int fd, struct intel_batchbuffer *batch,
>
>Now that you're passing the batch from the outside you're only using the fd to get the gen. However, the gen is already saved inside the batch so you could get it from there and drop the fd parameter. the bb_address_size_dw would need to be modified to get the batch or the gen as a parameter, but it looks like an ok change to me. Same reasoning applies to other functions you've added.

Ok

>
>> +                         int ringid, uint32_t loops, drm_intel_bo 
>> +*dest) {
>> +	int addr_size_dw;
>> +	uint32_t mmio_base, jump_offset;
>> +
>> +	/* CMD parser blocks reading TIMESTAMP register on gen 7.5 */
>> +	igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
>> +
>> +	addr_size_dw = bb_address_size_dw(fd);
>> +	mmio_base = get_mmio_base(ringid);
>> +	igt_assert(batch);
>> +	BEGIN_BATCH(32, 5);
>> +
>> +	/* store current timestamp in DW2 */
>> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
>> +	OUT_BATCH(mmio_base + TIMESTAMP_offset);
>> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 
>> +I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));
>> +
>> +	/* Load R0 with loops */
>> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);
>> +	OUT_BATCH(mmio_base + ALU_GPU_R0_LSB_offset);
>> +	OUT_BATCH(loops);
>> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);
>> +	OUT_BATCH(mmio_base + ALU_GPU_R0_MSB_offset);
>> +	OUT_BATCH(0x00000000);
>> +	/* Load R1 with 1 */
>> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);
>> +	OUT_BATCH(mmio_base + ALU_GPU_R1_LSB_offset);
>> +	OUT_BATCH(0x00000001);
>> +	OUT_BATCH(MI_LOAD_REGISTER_IMM);
>> +	OUT_BATCH(mmio_base + ALU_GPU_R1_MSB_offset);
>> +	OUT_BATCH(0x00000000);
>> +	/* Copy R0 / R1 into SRCA / SRCB, Perform R0 - R1, Store result in R0 */
>> +	/* e.g. R0 -= 1 */
>> +	jump_offset=igt_batch_used(batch);
>
>for clarity the sampling of jump offset could go above the ALU comment (and have a comment of its own)

Ok

>
>> +	OUT_BATCH(MI_MATH(4));
>> +	OUT_BATCH(ALU_LOAD(ALU_SRCA_ENCODING, ALU_R0_ENCODING));
>> +	OUT_BATCH(ALU_LOAD(ALU_SRCB_ENCODING, ALU_R1_ENCODING));
>> +	OUT_BATCH(ALU_SUB);
>> +	OUT_BATCH(ALU_STORE(ALU_R0_ENCODING, ALU_ACCU_ENCODING));
>> +	/* Copy R0 to dest
>> +	 * On Gen8 MI_CONDITIONAL_BATCH_BUFFER_END BSD ring Compare address
>> +	 * points to 2 Dwords, a mask (DW0) and data (DW1) which are ANDed
>> +	 * together.
>> +	 * On Gen9+, and the other rings on Gen8 Compare address points to
>> +	 * just Data (DW0). For simplicity always copy R0 LSB to DW0 and DW1.
>> +	 */
>> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
>> +	OUT_BATCH(mmio_base + ALU_GPU_R0_LSB_offset);
>> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
>> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
>> +	OUT_BATCH(mmio_base + ALU_GPU_R0_LSB_offset);
>> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(1));
>> +	/* Repeat until R0 == 0 */
>> +	OUT_BATCH(MI_CONDITIONAL_BATCH_BUFFER_END);
>> +	OUT_BATCH(0x00000000);
>> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
>> +	OUT_BATCH(MI_BATCH_BUFFER_START | (addr_size_dw - 1));
>> +	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, jump_offset);
>> +
>> +	/* Should never get here, but end if it happens */
>> +	OUT_BATCH(MI_BATCH_BUFFER_END);
>> +	ADVANCE_BATCH();
>> +}
>> +
>> +/**
>> + * igt_create_timestamp_bb:
>> + * @fd: file descriptor for i915 driver instance
>> + * @batch: Batch buffer to write to
>> + * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
>> + * dest: Buffer to use for saving the timestamps.
>> + * load: Buffer to access. Set NULL if not required.
>> + * write: If true and load is not NULL, will also write a timestamp 
>> +to load
>> + * buffer. If false and load is not NULL, will read from load buffer into dest.
>> + * Intended for dependency checking.
>> + *
>> + * This creates a batch buffer which writes timestamps into a buffer object.
>> + * If 'load' is non null, data is either written to 'load' or copied from 'load'
>> + * depending on whether 'write' is set.
>> + *
>> + * The dest buffer will have a number of Dwords written by the batch 
>> +buffer
>> + * when it runs. They are:
>> + * DW0 Reported timestamp - An indication of when the batch buffer ran allowing a
>> + *                          comparison between batch buffers to show execution order.
>> + *                          May wrap so igt_compare_timestamps() should be used to
>> + *                          compare timestamps.
>> + *                          The timestamp will wrap every few minutes.
>> + * DW2 Value copied from DW0 of load if write == false
>> + *
>> + */
>> +void igt_create_timestamp_bb(int fd, struct intel_batchbuffer *batch, int ringid,
>> +                             drm_intel_bo *dest, drm_intel_bo *load, 
>> +bool write)
>
>I still think that the timestamp capture should be its own function, because it could be useful to add it multiple time in the same batch. 
>You could split that out in a separate function and then call it from igt_create_timestamp_bb, i.e. something like:
>
>igt_emit_timestamp_sample(struct intel_batchbuffer *batch, int ringid, drm_intel_bo *dest, unsigned dest_offset);

I can create a separate function. Will make it static for now as it is not needed outside this file.
>
>Personally I also still don't really like the "load" handling, which is not related to timestamps in any way. won't oppose it if other people are ok with it though.
>
>> +{
>> +	int addr_size_dw;
>> +	uint32_t mmio_base;
>> +
>> +	/* CMD parser blocks reading TIMESTAMP register on gen 7.5 */
>> +	igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
>> +
>> +	addr_size_dw = bb_address_size_dw(fd);
>> +	mmio_base = get_mmio_base(ringid);
>> +	igt_assert(batch);
>> +
>> +	BEGIN_BATCH(3, 1);
>> +	/* store current reported timestamp in DW0 */
>> +	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
>> +	OUT_BATCH(mmio_base + TIMESTAMP_offset);
>> +	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, 
>> +I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));
>> +
>> +	ADVANCE_BATCH();
>> +
>> +	if(load != NULL) {
>
>style: we tend to put a space between if/while/for and the parenthesis, but you didn't anywhere in this patch

Ok

>
>> +		if(write) {
>> +			BEGIN_BATCH(3, 1);
>> +			OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
>> +			OUT_BATCH(mmio_base + TIMESTAMP_offset);
>> +			OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));
>> +			ADVANCE_BATCH();
>> +		}
>> +		else {
>
>weird indent

What is weird? The if and else line up and it is all tabs.

>
>> +			BEGIN_BATCH(3, 2);
>> +			OUT_BATCH(MI_COPY_MEM_MEM);
>> +			OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));
>> +			OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, 0, DWORDS_TO_BYTES(0));
>> +			ADVANCE_BATCH();
>> +		}
>> +	}
>> +
>> +	BEGIN_BATCH(1, 0);
>> +	OUT_BATCH(MI_BATCH_BUFFER_END);
>> +	ADVANCE_BATCH();
>> +}
>> +
>> +/**
>> + * igt_create_noop_bb:
>> + * @batch: Batch buffer to write to
>> + * noops: Number of MI_NOOP instructions to add to the batch buffer.
>> + *
>> + * This creates a batch buffer with a specified number of MI_NOOP instructions.
>> + */
>> +void igt_create_noop_bb(struct intel_batchbuffer *batch, int noops) {
>> +	int loop;
>> +
>> +	igt_assert(batch);
>> +
>> +	BEGIN_BATCH(noops + 1, 0);
>> +	for(loop = 0; loop < noops; loop++)
>> +		OUT_BATCH(MI_NOOP);
>> +	OUT_BATCH(MI_BATCH_BUFFER_END);
>> +	ADVANCE_BATCH();
>> +
>> +}
>> +
>> +/* Store calibrated values so they only need calculating once.
>> + * Use intel_execution_engines array as list of supported rings  */ 
>> +static uint32_t *calibrated_ring_value = NULL;
>> +
>> +/**
>> + * igt_calibrate_delay_bb:
>> + * @fd: file descriptor for i915 driver instance
>> + * @bufmgr: Buffer manager to be used for creation of batch buffers
>> + * ringid: Ring to calibrate. e.g. I915_EXEC_RENDER
>> + *
>> + * This calculates the value of loops that would need to be passed to
>> + * igt_create_delay_bb() to create a delay of about 1 second on the 
>> +specified
>> + * ring.
>> + *
>> + * Returns:
>> + * uint32_t to be passed to igt_create_delay_bb().
>> + */
>> +/* 0x100000 will run for about 0.6 - 0.8 seconds (dependant on ring) 
>> +on BXT HW */ #define CAL_SEED (0x100000) uint32_t 
>> +igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int ringid) 
>> +{
>> +	uint32_t buf[2];
>> +	struct intel_batchbuffer *bb;
>> +	struct timespec start, end;
>> +	uint64_t duration;
>> +	uint64_t calibrated;
>> +	drm_intel_bo *target_bo;
>> +	int ring_index=0;
>> +
>> +/*	igt_assert(ringid < 8);
>> +	if(calibrated_ring_value[ringid] != 0)
>> +		return calibrated_ring_value[ringid];*/
>
>dead code

Removed

>
>> +
>> +	if(calibrated_ring_value == NULL) {
>> +		int count;
>> +		for(count = 0; intel_execution_engines[count].name != NULL; 
>> +count++) {}
>
>Can you add a comment to explain why this loop is required to count the engines? it could also be possible to add a function in igt_gt to return the exec_engines count, but I'm not sure that's worth the effort. Also,
>BSD1 and BSD2 will have the same calibration value so you can calibrate only once for I915_EXEC_BSD

Can add a comment. It will require adding a load of logic to use a single calibration value for BSD1 and BSD2, and that would then make the assumption that they would have the same calibration value. I would prefer not to make such assumptions.

>
>> +		calibrated_ring_value = calloc(count, sizeof(uint32_t));
>> +		igt_assert(calibrated_ring_value);
>> +	}
>> +
>> +	/* Check if there is already a calibration value for this ring */
>> +	while(intel_execution_engines[ring_index].name != NULL) {
>> +		if((intel_execution_engines[ring_index].exec_id |
>> +		    intel_execution_engines[ring_index].flags) == ringid) {
>> +			if(calibrated_ring_value[ring_index] != 0) {
>> +				return calibrated_ring_value[ring_index];
>> +			}
>> +			break;
>> +		}
>> +		ring_index++;
>
>Could use a for loop instead of incrementing on each iteration.

Ok

>
>> +	}
>> +
>> +	target_bo = drm_intel_bo_alloc(bufmgr, "target bo", BATCH_SZ, BATCH_SZ);
>> +	igt_assert(target_bo);
>> +
>> +	/* Put some non zero values in the target bo */
>> +	{
>
>This bracket used to give scope to the data variable doesn't look essential as there are no other uses of a variable named data in this function
>
>> +		uint32_t data=0xffffffff;
>
>style: spaces around "="
>
>> +		drm_intel_bo_subdata(target_bo, 0, 4, &data);
>
>the subdata call can fail. In the tree we don't check the return code on every usage, but I believe it is safer to do it. do_or_die() should do the trick.
>
>> +	}
>> +
>> +	bb = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
>> +	igt_assert(bb);
>> +	igt_create_delay_bb(fd, bb, ringid, CAL_SEED, target_bo);
>> +
>> +	gem_quiescent_gpu(fd);
>> +	clock_gettime(CLOCK_MONOTONIC, &start);
>> +	intel_batchbuffer_flush_on_ring(bb, ringid);
>> +	/* This will not return until the bo has finished executing */
>> +	drm_intel_bo_wait_rendering(target_bo);
>> +	clock_gettime(CLOCK_MONOTONIC, &end);
>> +
>> +	drm_intel_bo_get_subdata(target_bo, 0, 4, (void*)buf);
>
>Why are you using a 2 words array to read only 1 word? what's the purpose of buf[1]? if you don't need it you could reuse the data variable from above
>
>> +
>> +	/* buf[0] in the target buffer should be 0 if the batch buffer completed */
>> +	igt_assert_f(buf[0] == 0, "buf[0] expected 0x0, got 0x%x\n", 
>> +buf[0]);
>> +
>> +	duration = ((((uint64_t)end.tv_sec - (uint64_t)start.tv_sec) * SEC_TO_NSEC)
>> +	           + (uint64_t)end.tv_nsec) - (uint64_t)start.tv_nsec;
>> +
>> +	calibrated = (((uint64_t)(CAL_SEED) * SEC_TO_NSEC) / duration);
>> +	igt_debug("Uncalibrated run took %" PRIu64 ".%04" PRIu64 "s\n",
>> +	          duration / SEC_TO_NSEC,
>> +	          (duration % SEC_TO_NSEC) / 100000);
>> +	drm_intel_bo_unreference(target_bo);
>> +	intel_batchbuffer_free(bb);
>> +
>> +	/* Sanity check. If duration < 100ms, something has clearly gone wrong */
>> +	igt_assert(duration > (SEC_TO_NSEC  / 10));
>> +
>> +	igt_assert_f(calibrated <= UINT32_MAX, "Calibrated value > 
>> +UINT32_MAX\n");
>> +
>> +	if(intel_execution_engines[ring_index].name != NULL)
>> +		calibrated_ring_value[ring_index] = (uint32_t)calibrated;
>> +	return (uint32_t)calibrated;
>> +}
>> +
>> +/**
>> + * igt_compare_timestamps:
>> + * @ts1: timestamp 1
>> + * @ts2: timestamp 2
>> + *
>> + * This compares two uint32_t timestamps. To handle wrapping it 
>> +assumes the
>> + * difference between the two timestamps is less than 1/4 the max 
>> +elapsed time
>> + * represented by the counters.
>> + * It also assumes the timestamps are samples from the same counter.
>> + *
>> + * Returns:
>> + * True if ts2 > ts1, allowing for counter wrapping, false otherwise.
>> + */
>> +
>> +bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2) {
>> +	if (ts2 > ts1)
>
>This will return true if ts2 = 0xFFFFFFFF and ts1 = 0x00000001 even if
>ts1 came after ts2 due to wrapping.
>
>> +		return true;
>> +	else if ((ts1 > 0x80000000) && (ts2 < 0x40000000))
>> +		return true; /* Assuming timestamp counter wrapped */
>> +	else
>> +		return false;
>> +}
>
>You could use something like:
>
>     return (int32_t)(ts2 - ts1) > 0;

Ok

>
>This should give you the right result as long as the 2 samples are not more than ~50% of the timestamp period apart
>
>Regards,
>Daniele
>
>> diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h index 
>> 869747d..5b66fa3 100644
>> --- a/lib/intel_batchbuffer.h
>> +++ b/lib/intel_batchbuffer.h
>> @@ -323,4 +323,18 @@ typedef void (*igt_media_spinfunc_t)(struct 
>> intel_batchbuffer *batch,
>>   
>>   igt_media_spinfunc_t igt_get_media_spinfunc(int devid);
>>   
>> +uint32_t igt_batch_used(struct intel_batchbuffer *batch);
>> +
>> +void igt_create_delay_bb(int fd, struct intel_batchbuffer *batch,
>> +		int ringid, uint32_t loops, drm_intel_bo *dest);
>> +
>> +void igt_create_timestamp_bb(int fd, struct intel_batchbuffer *batch, int ringid,
>> +			drm_intel_bo *dest, drm_intel_bo *load, bool write);
>> +
>> +void igt_create_noop_bb(struct intel_batchbuffer *batch, int noops);
>> +
>> +uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int 
>> +ringid);
>> +
>> +bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2);
>> +
>>   #endif
>
>
diff mbox

Patch

diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 692521f..30e78c5 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -1,8 +1,8 @@ 
 /**************************************************************************
- * 
+ *
  * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@ 
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@ 
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #include <inttypes.h>
@@ -30,6 +30,8 @@ 
 #include <stdio.h>
 #include <string.h>
 #include <assert.h>
+#include <stdint.h>
+#include <time.h>
 
 #include "drm.h"
 #include "drmtest.h"
@@ -42,6 +44,7 @@ 
 #include "ioctl_wrappers.h"
 #include "media_spin.h"
 #include "gpgpu_fill.h"
+#include "igt_gt.h"
 
 #include <i915_drm.h>
 
@@ -817,3 +820,374 @@  igt_media_spinfunc_t igt_get_media_spinfunc(int devid)
 
 	return spin;
 }
+
+#define SEC_TO_NSEC (1000 * 1000 * 1000)
+#define DWORDS_TO_BYTES(x) ((x)*4)
+
+#define MI_STORE_REGISTER_MEM(LENGTH)   ((0x024 << 23) | ((LENGTH - 2) & 0xff))
+#define MI_MATH(NrInst)                 ((0x01A << 23) | ((NrInst - 1) & 0x3f))
+#define MI_CONDITIONAL_BATCH_BUFFER_END ((0x036 << 23) | (1 << 21) | 2)
+#define MI_COPY_MEM_MEM                 ((0x02E << 23) | (3))
+
+#define ALU_LOAD(TO, FROM)  ((0x080 << 20) | ((TO) << 10) | (FROM))
+#define ALU_SUB             ( 0x101 << 20)
+#define ALU_STORE(TO, FROM) ((0x180 << 20) | ((TO) << 10) | (FROM))
+
+#define TIMESTAMP_offset      (0x358) /* Elapsed time from system start */
+#define CTX_TIMESTAMP_offset  (0x3A8) /* Elapsed Time from context creation */
+#define ALU_GPU_R0_LSB_offset (0x600)
+#define ALU_GPU_R0_MSB_offset (0x604)
+#define ALU_GPU_R1_LSB_offset (0x608)
+#define ALU_GPU_R1_MSB_offset (0x60C)
+#define ALU_GPU_R2_LSB_offset (0x610)
+#define ALU_GPU_R2_MSB_offset (0x614)
+
+#define ALU_R0_ENCODING   (0x00)
+#define ALU_R1_ENCODING   (0x01)
+#define ALU_SRCA_ENCODING (0x20)
+#define ALU_SRCB_ENCODING (0x21)
+#define ALU_ACCU_ENCODING (0x31)
+
+static int bb_address_size_dw(int fd)
+{
+	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
+		return 2;
+	else
+		return 1;
+}
+
+static uint32_t get_mmio_base(int ringid)
+{
+	switch (ringid) {
+	case I915_EXEC_RENDER:
+		return 0x02000;
+	case I915_EXEC_BSD:
+	case I915_EXEC_BSD | 1<<13: /* BSD1 */
+		return 0x12000;
+	case I915_EXEC_BSD | 2<<13: /* BSD2 */
+		return 0x1c000;
+	case I915_EXEC_BLT:
+		return 0x22000;
+	case I915_EXEC_VEBOX:
+		return 0x1A000;
+	default:
+		igt_assert_f(0, "Invalid ringid %d passed to get_mmio_base()\n", ringid);
+	}
+}
+
+/**
+ * igt_batch_used
+ * @batch batchbuffer to get offset from
+ *
+ * This returns the number of bytes of the batchbuffer that have been used.
+ * e.g. The offset into the batchbuffer that the next OUT_BATCH would write to.
+ *
+ * Returns:
+ * The number of bytes of the batchbuffer that have been used.
+ */
+uint32_t igt_batch_used(struct intel_batchbuffer *batch)
+{
+	return batch->ptr - batch->buffer;
+}
+
+/**
+ * igt_create_delay_bb:
+ * @fd: file descriptor for i915 driver instance
+ * @batch: Batch buffer to write to
+ * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
+ * loops: Number of times to loop
+ * dest: Buffer to use for saving the current loop count and timestamp.
+ *
+ * This creates a batch buffer which will iterate a loop a specified number
+ * of times. Intended for creating batch buffers which take an arbitarily
+ * long time to execute. This can be useful to keep a ring busy while
+ * constructing a test scenario.
+ *
+ * The dest buffer will have a number of Dwords written by the batch buffer
+ * when it runs. They are:
+ * DW0 & DW1 - These are loaded with the value of 'loops' and are decremented
+ *             as the batch buffer executes. They will be 0 after the batch
+ *             buffer completes if it finished succesfully.
+ * DW2 Timestamp - An indication of when the batch buffer ran allowing a
+ *                 comparison between batch buffers to show execution order.
+ *                 May wrap so igt_compare_timestamps() should be used to
+ *                 compare timestamps.
+ *                 The timestamp will wrap every few minutes.
+ *
+ */
+void igt_create_delay_bb(int fd, struct intel_batchbuffer *batch,
+                         int ringid, uint32_t loops, drm_intel_bo *dest)
+{
+	int addr_size_dw;
+	uint32_t mmio_base, jump_offset;
+
+	/* CMD parser blocks reading TIMESTAMP register on gen 7.5 */
+	igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
+
+	addr_size_dw = bb_address_size_dw(fd);
+	mmio_base = get_mmio_base(ringid);
+	igt_assert(batch);
+	BEGIN_BATCH(32, 5);
+
+	/* store current timestamp in DW2 */
+	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+	OUT_BATCH(mmio_base + TIMESTAMP_offset);
+	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));
+
+	/* Load R0 with loops */
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(mmio_base + ALU_GPU_R0_LSB_offset);
+	OUT_BATCH(loops);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(mmio_base + ALU_GPU_R0_MSB_offset);
+	OUT_BATCH(0x00000000);
+	/* Load R1 with 1 */
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(mmio_base + ALU_GPU_R1_LSB_offset);
+	OUT_BATCH(0x00000001);
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(mmio_base + ALU_GPU_R1_MSB_offset);
+	OUT_BATCH(0x00000000);
+	/* Copy R0 / R1 into SRCA / SRCB, Perform R0 - R1, Store result in R0 */
+	/* e.g. R0 -= 1 */
+	jump_offset=igt_batch_used(batch);
+	OUT_BATCH(MI_MATH(4));
+	OUT_BATCH(ALU_LOAD(ALU_SRCA_ENCODING, ALU_R0_ENCODING));
+	OUT_BATCH(ALU_LOAD(ALU_SRCB_ENCODING, ALU_R1_ENCODING));
+	OUT_BATCH(ALU_SUB);
+	OUT_BATCH(ALU_STORE(ALU_R0_ENCODING, ALU_ACCU_ENCODING));
+	/* Copy R0 to dest
+	 * On Gen8 MI_CONDITIONAL_BATCH_BUFFER_END BSD ring Compare address
+	 * points to 2 Dwords, a mask (DW0) and data (DW1) which are ANDed
+	 * together.
+	 * On Gen9+, and the other rings on Gen8 Compare address points to
+	 * just Data (DW0). For simplicity always copy R0 LSB to DW0 and DW1.
+	 */
+	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+	OUT_BATCH(mmio_base + ALU_GPU_R0_LSB_offset);
+	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
+	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+	OUT_BATCH(mmio_base + ALU_GPU_R0_LSB_offset);
+	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(1));
+	/* Repeat until R0 == 0 */
+	OUT_BATCH(MI_CONDITIONAL_BATCH_BUFFER_END);
+	OUT_BATCH(0x00000000);
+	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
+	OUT_BATCH(MI_BATCH_BUFFER_START | (addr_size_dw - 1));
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, jump_offset);
+
+	/* Should never get here, but end if it happens */
+	OUT_BATCH(MI_BATCH_BUFFER_END);
+	ADVANCE_BATCH();
+}
+
+/**
+ * igt_create_timestamp_bb:
+ * @fd: file descriptor for i915 driver instance
+ * @batch: Batch buffer to write to
+ * ringid: Ring to create batch buffer for. e.g. I915_EXEC_RENDER
+ * dest: Buffer to use for saving the timestamps.
+ * load: Buffer to access. Set NULL if not required.
+ * write: If true and load is not NULL, will also write a timestamp to load
+ * buffer. If false and load is not NULL, will read from load buffer into dest.
+ * Intended for dependency checking.
+ *
+ * This creates a batch buffer which writes timestamps into a buffer object.
+ * If 'load' is non null, data is either written to 'load' or copied from 'load'
+ * depending on whether 'write' is set.
+ *
+ * The dest buffer will have a number of Dwords written by the batch buffer
+ * when it runs. They are:
+ * DW0 Reported timestamp - An indication of when the batch buffer ran allowing a
+ *                          comparison between batch buffers to show execution order.
+ *                          May wrap so igt_compare_timestamps() should be used to
+ *                          compare timestamps.
+ *                          The timestamp will wrap every few minutes.
+ * DW2 Value copied from DW0 of load if write == false
+ *
+ */
+void igt_create_timestamp_bb(int fd, struct intel_batchbuffer *batch, int ringid,
+                             drm_intel_bo *dest, drm_intel_bo *load, bool write)
+{
+	int addr_size_dw;
+	uint32_t mmio_base;
+
+	/* CMD parser blocks reading TIMESTAMP register on gen 7.5 */
+	igt_require(intel_gen(intel_get_drm_devid(fd)) >= 8);
+
+	addr_size_dw = bb_address_size_dw(fd);
+	mmio_base = get_mmio_base(ringid);
+	igt_assert(batch);
+
+	BEGIN_BATCH(3, 1);
+	/* store current reported timestamp in DW0 */
+	OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+	OUT_BATCH(mmio_base + TIMESTAMP_offset);
+	OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));
+
+	ADVANCE_BATCH();
+
+	if(load != NULL) {
+		if(write) {
+			BEGIN_BATCH(3, 1);
+			OUT_BATCH(MI_STORE_REGISTER_MEM(addr_size_dw + 2));
+			OUT_BATCH(mmio_base + TIMESTAMP_offset);
+			OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(0));
+			ADVANCE_BATCH();
+		}
+		else {
+			BEGIN_BATCH(3, 2);
+			OUT_BATCH(MI_COPY_MEM_MEM);
+			OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, DWORDS_TO_BYTES(2));
+			OUT_RELOC(load, I915_GEM_DOMAIN_INSTRUCTION, 0, DWORDS_TO_BYTES(0));
+			ADVANCE_BATCH();
+		}
+	}
+
+	BEGIN_BATCH(1, 0);
+	OUT_BATCH(MI_BATCH_BUFFER_END);
+	ADVANCE_BATCH();
+}
+
+/**
+ * igt_create_noop_bb:
+ * @batch: Batch buffer to write to
+ * noops: Number of MI_NOOP instructions to add to the batch buffer.
+ *
+ * This creates a batch buffer with a specified number of MI_NOOP instructions.
+ */
+void igt_create_noop_bb(struct intel_batchbuffer *batch, int noops)
+{
+	int loop;
+
+	igt_assert(batch);
+
+	BEGIN_BATCH(noops + 1, 0);
+	for(loop = 0; loop < noops; loop++)
+		OUT_BATCH(MI_NOOP);
+	OUT_BATCH(MI_BATCH_BUFFER_END);
+	ADVANCE_BATCH();
+
+}
+
+/* Store calibrated values so they only need calculating once.
+ * Use intel_execution_engines array as list of supported rings
+ */
+static uint32_t *calibrated_ring_value = NULL;
+
+/**
+ * igt_calibrate_delay_bb:
+ * @fd: file descriptor for i915 driver instance
+ * @bufmgr: Buffer manager to be used for creation of batch buffers
+ * ringid: Ring to calibrate. e.g. I915_EXEC_RENDER
+ *
+ * This calculates the value of loops that would need to be passed to
+ * igt_create_delay_bb() to create a delay of about 1 second on the specified
+ * ring.
+ *
+ * Returns:
+ * uint32_t to be passed to igt_create_delay_bb().
+ */
+/* 0x100000 will run for about 0.6 - 0.8 seconds (dependant on ring) on BXT HW */
+#define CAL_SEED (0x100000)
+uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int ringid)
+{
+	uint32_t buf[2];
+	struct intel_batchbuffer *bb;
+	struct timespec start, end;
+	uint64_t duration;
+	uint64_t calibrated;
+	drm_intel_bo *target_bo;
+	int ring_index=0;
+
+/*	igt_assert(ringid < 8);
+	if(calibrated_ring_value[ringid] != 0)
+		return calibrated_ring_value[ringid];*/
+
+	if(calibrated_ring_value == NULL) {
+		int count;
+		for(count = 0; intel_execution_engines[count].name != NULL; count++) {}
+		calibrated_ring_value = calloc(count, sizeof(uint32_t));
+		igt_assert(calibrated_ring_value);
+	}
+
+	/* Check if there is already a calibration value for this ring */
+	while(intel_execution_engines[ring_index].name != NULL) {
+		if((intel_execution_engines[ring_index].exec_id |
+		    intel_execution_engines[ring_index].flags) == ringid) {
+			if(calibrated_ring_value[ring_index] != 0) {
+				return calibrated_ring_value[ring_index];
+			}
+			break;
+		}
+		ring_index++;
+	}
+
+	target_bo = drm_intel_bo_alloc(bufmgr, "target bo", BATCH_SZ, BATCH_SZ);
+	igt_assert(target_bo);
+
+	/* Put some non zero values in the target bo */
+	{
+		uint32_t data=0xffffffff;
+		drm_intel_bo_subdata(target_bo, 0, 4, &data);
+	}
+
+	bb = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+	igt_assert(bb);
+	igt_create_delay_bb(fd, bb, ringid, CAL_SEED, target_bo);
+
+	gem_quiescent_gpu(fd);
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	intel_batchbuffer_flush_on_ring(bb, ringid);
+	/* This will not return until the bo has finished executing */
+	drm_intel_bo_wait_rendering(target_bo);
+	clock_gettime(CLOCK_MONOTONIC, &end);
+
+	drm_intel_bo_get_subdata(target_bo, 0, 4, (void*)buf);
+
+	/* buf[0] in the target buffer should be 0 if the batch buffer completed */
+	igt_assert_f(buf[0] == 0, "buf[0] expected 0x0, got 0x%x\n", buf[0]);
+
+	duration = ((((uint64_t)end.tv_sec - (uint64_t)start.tv_sec) * SEC_TO_NSEC)
+	           + (uint64_t)end.tv_nsec) - (uint64_t)start.tv_nsec;
+
+	calibrated = (((uint64_t)(CAL_SEED) * SEC_TO_NSEC) / duration);
+	igt_debug("Uncalibrated run took %" PRIu64 ".%04" PRIu64 "s\n",
+	          duration / SEC_TO_NSEC,
+	          (duration % SEC_TO_NSEC) / 100000);
+	drm_intel_bo_unreference(target_bo);
+	intel_batchbuffer_free(bb);
+
+	/* Sanity check. If duration < 100ms, something has clearly gone wrong */
+	igt_assert(duration > (SEC_TO_NSEC  / 10));
+
+	igt_assert_f(calibrated <= UINT32_MAX, "Calibrated value > UINT32_MAX\n");
+
+	if(intel_execution_engines[ring_index].name != NULL)
+		calibrated_ring_value[ring_index] = (uint32_t)calibrated;
+	return (uint32_t)calibrated;
+}
+
+/**
+ * igt_compare_timestamps:
+ * @ts1: timestamp 1
+ * @ts2: timestamp 2
+ *
+ * This compares two uint32_t timestamps. To handle wrapping it assumes the
+ * difference between the two timestamps is less than 1/4 the max elapsed time
+ * represented by the counters.
+ * It also assumes the timestamps are samples from the same counter.
+ *
+ * Returns:
+ * True if ts2 > ts1, allowing for counter wrapping, false otherwise.
+ */
+
+bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2)
+{
+	if (ts2 > ts1)
+		return true;
+	else if ((ts1 > 0x80000000) && (ts2 < 0x40000000))
+		return true; /* Assuming timestamp counter wrapped */
+	else
+		return false;
+}
diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
index 869747d..5b66fa3 100644
--- a/lib/intel_batchbuffer.h
+++ b/lib/intel_batchbuffer.h
@@ -323,4 +323,18 @@  typedef void (*igt_media_spinfunc_t)(struct intel_batchbuffer *batch,
 
 igt_media_spinfunc_t igt_get_media_spinfunc(int devid);
 
+uint32_t igt_batch_used(struct intel_batchbuffer *batch);
+
+void igt_create_delay_bb(int fd, struct intel_batchbuffer *batch,
+		int ringid, uint32_t loops, drm_intel_bo *dest);
+
+void igt_create_timestamp_bb(int fd, struct intel_batchbuffer *batch, int ringid,
+			drm_intel_bo *dest, drm_intel_bo *load, bool write);
+
+void igt_create_noop_bb(struct intel_batchbuffer *batch, int noops);
+
+uint32_t igt_calibrate_delay_bb(int fd, drm_intel_bufmgr *bufmgr, int ringid);
+
+bool igt_compare_timestamps(uint32_t ts1, uint32_t ts2);
+
 #endif