@@ -444,6 +444,10 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
case I915_PARAM_MMAP_GTT_COHERENT:
value = INTEL_INFO(dev_priv)->has_coherent_ggtt;
break;
+ case I915_PARAM_HAS_EXEC_PERF_CONFIG:
+ /* Obviously requires perf support. */
+ value = dev_priv->perf.initialized;
+ break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
@@ -286,6 +286,8 @@ struct i915_execbuffer {
*/
int lut_size;
struct hlist_head *buckets; /** ht for relocation handles */
+
+ struct i915_vma *oa_config; /** HW configuration for OA, NULL is not needed. */
};
#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1121,6 +1123,32 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
*addr = value;
}
+static int
+get_execbuf_oa_config(struct drm_i915_private *dev_priv,
+ int perf_fd, u32 oa_config_id,
+ struct i915_vma **out_oa_vma)
+{
+ struct file *perf_file;
+ int ret;
+
+ if (!dev_priv->perf.oa.exclusive_stream)
+ return -EINVAL;
+
+ perf_file = fget(perf_fd);
+ if (!perf_file)
+ return -EINVAL;
+
+ if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream)
+ return -EINVAL;
+
+ fput(perf_file);
+
+ ret = i915_perf_get_oa_config(dev_priv, oa_config_id,
+ NULL, out_oa_vma);
+
+ return ret;
+}
+
static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
struct i915_vma *vma,
unsigned int len)
@@ -1173,6 +1201,9 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
goto err_unpin;
}
+ rq->oa_config = eb->oa_config;
+ eb->oa_config = NULL;
+
err = i915_request_await_object(rq, vma->obj, true);
if (err)
goto err_request;
@@ -1875,12 +1906,15 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
return false;
}
- if (exec->DR4 == 0xffffffff) {
- DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
- exec->DR4 = 0;
+ /* We reuse DR1 & DR4 fields for passing the perf config detail. */
+ if (!(exec->flags & I915_EXEC_PERF_CONFIG)) {
+ if (exec->DR4 == 0xffffffff) {
+ DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
+ exec->DR4 = 0;
+ }
+ if (exec->DR1 || exec->DR4)
+ return false;
}
- if (exec->DR1 || exec->DR4)
- return false;
if ((exec->batch_start_offset | exec->batch_len) & 0x7)
return false;
@@ -2224,6 +2258,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.buffer_count = args->buffer_count;
eb.batch_start_offset = args->batch_start_offset;
eb.batch_len = args->batch_len;
+ eb.oa_config = NULL;
eb.batch_flags = 0;
if (args->flags & I915_EXEC_SECURE) {
@@ -2253,9 +2288,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
}
+ if (args->flags & I915_EXEC_PERF_CONFIG) {
+ err = get_execbuf_oa_config(eb.i915, args->DR1, args->DR4,
+ &eb.oa_config);
+ if (err)
+ goto err_out_fence;
+ }
+
err = eb_create(&eb);
if (err)
- goto err_out_fence;
+ goto err_perf;
GEM_BUG_ON(!eb.lut_size);
@@ -2365,6 +2407,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_batch_unpin;
}
+ eb.request->oa_config = eb.oa_config;
+ eb.oa_config = NULL;
+
if (in_fence) {
err = i915_request_await_dma_fence(eb.request, in_fence);
if (err < 0)
@@ -2426,6 +2471,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
i915_gem_context_put(eb.ctx);
err_destroy:
eb_destroy(&eb);
+err_perf:
+ if (eb.oa_config)
+ i915_vma_put(eb.oa_config);
err_out_fence:
if (out_fence_fd != -1)
put_unused_fd(out_fence_fd);
@@ -379,6 +379,9 @@ static void i915_request_retire(struct i915_request *request)
unreserve_gt(request->i915);
+ if (request->oa_config)
+ i915_vma_put(request->oa_config);
+
i915_sched_node_fini(request->i915, &request->sched);
i915_request_put(request);
}
@@ -704,6 +707,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
rq->batch = NULL;
rq->capture_list = NULL;
rq->waitboost = false;
+ rq->oa_config = NULL;
/*
* Reserve space in the ring buffer for all the commands required to
@@ -188,6 +188,8 @@ struct i915_request {
struct drm_i915_file_private *file_priv;
/** file_priv list entry for this request */
struct list_head client_link;
+
+ struct i915_vma *oa_config; /** HW configuration for OA, NULL is not needed. */
};
#define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
@@ -1858,6 +1858,8 @@ static int gen8_emit_bb_start(struct i915_request *rq,
{
u32 *cs;
int ret;
+ bool use_oa_config =
+ rq->i915->perf.oa.exclusive_stream && rq->oa_config;
/* Don't rely in hw updating PDPs, specially in lite-restore.
* Ideally, we should set Force PD Restore in ctx descriptor,
@@ -1875,10 +1877,19 @@ static int gen8_emit_bb_start(struct i915_request *rq,
rq->gem_context->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
}
- cs = intel_ring_begin(rq, 6);
+ cs = intel_ring_begin(rq, use_oa_config ? 10 : 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
+ if (use_oa_config) {
+ u32 oa_config_offset = i915_ggtt_offset(rq->oa_config);
+
+ *cs++ = MI_BATCH_BUFFER_START_GEN8;
+ *cs++ = oa_config_offset;
+ *cs++ = 0;
+ *cs++ = MI_NOOP;
+ }
+
/*
* WaDisableCtxRestoreArbitration:bdw,chv
*
@@ -2037,11 +2037,20 @@ hsw_emit_bb_start(struct i915_request *rq,
unsigned int dispatch_flags)
{
u32 *cs;
+ bool use_oa_config =
+ rq->i915->perf.oa.exclusive_stream && rq->oa_config;
- cs = intel_ring_begin(rq, 2);
+ cs = intel_ring_begin(rq, use_oa_config ? 4 : 2);
if (IS_ERR(cs))
return PTR_ERR(cs);
+ if (use_oa_config) {
+ u32 oa_config_offset = i915_ggtt_offset(rq->oa_config);
+
+ *cs++ = MI_BATCH_BUFFER_START;
+ *cs++ = oa_config_offset;
+ }
+
*cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ?
0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW);
/* bit0-7 is the length on GEN6+ */
@@ -559,6 +559,8 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_MMAP_GTT_COHERENT 52
+#define I915_PARAM_HAS_EXEC_PERF_CONFIG 53
+
typedef struct drm_i915_getparam {
__s32 param;
/*
@@ -1078,7 +1080,15 @@ struct drm_i915_gem_execbuffer2 {
*/
#define I915_EXEC_FENCE_ARRAY (1<<19)
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
+/* Request that perf monitoring hardware be reprogrammed before executing the
+ * commands from the batch in the execbuf. The DR1 & DR4 fields of the execbuf
+ * must respectively contain the file descriptor of the perf monitoring device
+ * and the configuration to program.
+ */
+#define I915_EXEC_PERF_CONFIG (1<<20)
+
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_PERF_CONFIG<<1))
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
We want the ability to dispatch a set of command buffer to the hardware, each with a different OA configuration. To achieve this, we reuse a couple of fields from the execbuf2 struct (I CAN HAZ execbuf3?) to notify what OA configuration should be used for a batch buffer. This requires the process making the execbuf with this flag to also own the perf fd at the time of execbuf. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> --- drivers/gpu/drm/i915/i915_drv.c | 4 ++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 60 +++++++++++++++++++--- drivers/gpu/drm/i915/i915_request.c | 4 ++ drivers/gpu/drm/i915/i915_request.h | 2 + drivers/gpu/drm/i915/intel_lrc.c | 13 ++++- drivers/gpu/drm/i915/intel_ringbuffer.c | 11 +++- include/uapi/drm/i915_drm.h | 12 ++++- 7 files changed, 97 insertions(+), 9 deletions(-)