@@ -1660,6 +1660,11 @@ enum i915_oa_event_state {
I915_OA_EVENT_STOPPED,
};
+enum i915_profile_mode {
+ I915_PROFILE_OA = 0,
+ I915_PROFILE_MAX,
+};
+
struct i915_oa_rcs_node {
struct list_head head;
struct drm_i915_gem_request *req;
@@ -1974,6 +1979,9 @@ struct drm_i915_private {
struct work_struct work_timer;
struct work_struct work_event_destroy;
} oa_pmu;
+
+ void (*insert_profile_cmd[I915_PROFILE_MAX])
+ (struct intel_ringbuffer *ringbuf, u32 ctx_id);
#endif
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
@@ -3154,6 +3162,7 @@ void i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
struct intel_context *context);
void i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv,
struct intel_context *context);
+void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id);
#else
static inline void
i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
@@ -3161,6 +3170,8 @@ i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
static inline void
i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv,
struct intel_context *context) {}
+void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf,
+ u32 ctx_id) {};
#endif
/* i915_gem_evict.c */
@@ -1317,6 +1317,9 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
goto error;
}
+ i915_insert_profiling_cmd(ring->buffer,
+ i915_execbuffer2_get_context_id(*args));
+
exec_len = args->batch_len;
if (cliprects) {
for (i = 0; i < args->num_cliprects; i++) {
@@ -1339,6 +1342,9 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
return ret;
}
+ i915_insert_profiling_cmd(ring->buffer,
+ i915_execbuffer2_get_context_id(*args));
+
trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags);
i915_gem_execbuffer_move_to_active(vmas, ring);
@@ -25,6 +25,78 @@ static int hsw_perf_format_sizes[] = {
64 /* C4_B8_HSW */
};
+void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
+{
+ struct intel_engine_cs *ring = ringbuf->ring;
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
+ int i;
+
+ for (i = I915_PROFILE_OA; i < I915_PROFILE_MAX; i++) {
+ if (dev_priv->insert_profile_cmd[i])
+ dev_priv->insert_profile_cmd[i](ringbuf, ctx_id);
+ }
+}
+
+void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
+{
+ struct intel_engine_cs *ring = ringbuf->ring;
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
+ struct drm_i915_gem_object *obj = dev_priv->oa_pmu.oa_rcs_buffer.obj;
+ struct i915_oa_rcs_node *entry;
+ unsigned long lock_flags;
+ u32 addr = 0;
+ int ret;
+
+ /* OA counters are only supported on the render ring */
+ if (ring->id != RCS)
+ return;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (entry == NULL) {
+ DRM_ERROR("alloc failed\n");
+ return;
+ }
+ entry->ctx_id = ctx_id;
+ i915_gem_request_assign(&entry->req, ring->outstanding_lazy_request);
+
+ spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags);
+ if (list_empty(&dev_priv->oa_pmu.node_list))
+ entry->offset = 0;
+ else {
+ struct i915_oa_rcs_node *last_entry;
+ int max_offset = dev_priv->oa_pmu.oa_rcs_buffer.node_count *
+ dev_priv->oa_pmu.oa_rcs_buffer.node_size;
+
+ last_entry = list_last_entry(&dev_priv->oa_pmu.node_list,
+ struct i915_oa_rcs_node, head);
+ entry->offset = last_entry->offset +
+ dev_priv->oa_pmu.oa_rcs_buffer.node_size;
+
+ if (entry->offset > max_offset)
+ entry->offset = 0;
+ }
+ list_add_tail(&entry->head, &dev_priv->oa_pmu.node_list);
+ spin_unlock_irqrestore(&dev_priv->oa_pmu.lock, lock_flags);
+
+ addr = i915_gem_obj_ggtt_offset(obj) + entry->offset;
+
+ /* addr should be 64 byte aligned */
+ BUG_ON(addr & 0x3f);
+
+ ret = intel_ring_begin(ring, 4);
+ if (ret)
+ return;
+
+ intel_ring_emit(ring, MI_REPORT_PERF_COUNT | (1<<0));
+ intel_ring_emit(ring, addr | MI_REPORT_PERF_COUNT_GGTT);
+ intel_ring_emit(ring, ring->outstanding_lazy_request->seqno);
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_advance(ring);
+
+ obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
+ i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring);
+}
+
static void forward_one_oa_snapshot_to_event(struct drm_i915_private *dev_priv,
u8 *snapshot,
struct perf_event *event)
@@ -1025,6 +1097,10 @@ static void i915_oa_event_start(struct perf_event *event, int flags)
dev_priv->oa_pmu.event_state = I915_OA_EVENT_STARTED;
update_oacontrol(dev_priv);
+ if (dev_priv->oa_pmu.multiple_ctx_mode)
+ dev_priv->insert_profile_cmd[I915_PROFILE_OA] =
+ i915_oa_insert_cmd;
+
/* Reset the head ptr to ensure we don't forward reports relating
* to a previous perf event */
oastatus1 = I915_READ(GEN7_OASTATUS1);
@@ -1061,6 +1137,7 @@ static void i915_oa_event_stop(struct perf_event *event, int flags)
spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags);
+ dev_priv->insert_profile_cmd[I915_PROFILE_OA] = NULL;
dev_priv->oa_pmu.event_state = I915_OA_EVENT_STOP_IN_PROGRESS;
list_for_each_entry(entry, &dev_priv->oa_pmu.node_list, head)
entry->discard = true;