@@ -99,6 +99,79 @@ void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id,
queue_hdr->wrap_count++;
}
+/* Returns the ring's ID mask (i.e. I915_EXEC_<ring>) */
+#define ring_id_mask(ring) ((ring)->id + 1)
+
+void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id,
+ int perftag)
+{
+ struct intel_engine_cs *ring = ringbuf->ring;
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
+ struct drm_i915_ts_node_info *node_info = NULL;
+ struct drm_i915_ts_queue_header *queue_hdr =
+ (struct drm_i915_ts_queue_header *)
+ dev_priv->gen_pmu.buffer.addr;
+ void *data_ptr = (u8 *)queue_hdr + queue_hdr->data_offset;
+ int data_size = (queue_hdr->size_in_bytes - queue_hdr->data_offset);
+ u32 node_offset, timestamp_offset, addr = 0;
+ int ret;
+
+ struct drm_i915_ts_node *nodes = data_ptr;
+ int num_nodes = 0;
+ int index = 0;
+
+ num_nodes = data_size / sizeof(*nodes);
+ index = queue_hdr->node_count % num_nodes;
+
+ timestamp_offset = offsetof(struct drm_i915_ts_data, ts_low);
+
+ node_offset = i915_gem_obj_ggtt_offset(dev_priv->gen_pmu.buffer.obj) +
+ queue_hdr->data_offset +
+ index * sizeof(struct drm_i915_ts_node);
+ addr = node_offset +
+ offsetof(struct drm_i915_ts_node, timestamp) +
+ timestamp_offset;
+
+ if (ring->id == RCS) {
+ ret = intel_ring_begin(ring, 6);
+ if (ret)
+ return;
+
+ intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
+ intel_ring_emit(ring,
+ PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_TIMESTAMP_WRITE);
+ intel_ring_emit(ring, addr | PIPE_CONTROL_GLOBAL_GTT);
+ intel_ring_emit(ring, 0); /* imm low, must be zero */
+ intel_ring_emit(ring, 0); /* imm high, must be zero */
+ intel_ring_emit(ring, MI_NOOP);
+ intel_ring_advance(ring);
+ } else {
+ ret = intel_ring_begin(ring, 4);
+ if (ret)
+ return;
+
+ intel_ring_emit(ring,
+ MI_FLUSH_DW | MI_FLUSH_DW_OP_STAMP);
+ intel_ring_emit(ring, addr | MI_FLUSH_DW_USE_GTT);
+ intel_ring_emit(ring, 0); /* imm low, must be zero */
+ intel_ring_emit(ring, 0); /* imm high, must be zero */
+ intel_ring_advance(ring);
+ }
+ node_info = &nodes[index].node_info;
+ i915_gem_request_assign(&node_info->req,
+ ring->outstanding_lazy_request);
+
+ node_info = &nodes[index].node_info;
+ node_info->pid = current->pid;
+ node_info->ctx_id = ctx_id;
+ node_info->ring = ring_id_mask(ring);
+ node_info->perftag = perftag;
+ queue_hdr->node_count++;
+ if (queue_hdr->node_count > num_nodes)
+ queue_hdr->wrap_count++;
+}
+
static void init_oa_async_buf_queue(struct drm_i915_private *dev_priv)
{
struct drm_i915_oa_async_queue_header *hdr =
@@ -344,6 +417,7 @@ void i915_gen_pmu_stop_work_fn(struct work_struct *__work)
container_of(__work, typeof(*dev_priv),
gen_pmu.work_event_stop);
struct perf_event *event = dev_priv->gen_pmu.exclusive_event;
+ struct drm_i915_insert_cmd *entry, *next;
struct drm_i915_ts_queue_header *hdr =
(struct drm_i915_ts_queue_header *)
dev_priv->gen_pmu.buffer.addr;
@@ -361,6 +435,13 @@ void i915_gen_pmu_stop_work_fn(struct work_struct *__work)
if (ret)
return;
+ list_for_each_entry_safe(entry, next, &dev_priv->profile_cmd, list) {
+ if (entry->insert_cmd == i915_gen_insert_cmd_ts) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ }
+
i915_gen_pmu_wait_gpu(dev_priv);
/* Ensure that all requests are completed*/
@@ -1430,10 +1511,17 @@ static void i915_gen_event_start(struct perf_event *event, int flags)
struct drm_i915_private *dev_priv =
container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu);
unsigned long lock_flags;
+ struct drm_i915_insert_cmd *entry;
+
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry)
+ return;
+ entry->insert_cmd = i915_gen_insert_cmd_ts;
spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags);
dev_priv->gen_pmu.event_active = true;
+ list_add_tail(&entry->list, &dev_priv->profile_cmd);
spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags);
@@ -347,6 +347,7 @@
#define MI_FLUSH_DW_STORE_INDEX (1<<21)
#define MI_INVALIDATE_TLB (1<<18)
#define MI_FLUSH_DW_OP_STOREDW (1<<14)
+#define MI_FLUSH_DW_OP_STAMP (3<<14)
#define MI_FLUSH_DW_OP_MASK (3<<14)
#define MI_FLUSH_DW_NOTIFY (1<<8)
#define MI_INVALIDATE_BSD (1<<7)
@@ -422,6 +423,7 @@
#define PIPE_CONTROL_TLB_INVALIDATE (1<<18)
#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16)
#define PIPE_CONTROL_QW_WRITE (1<<14)
+#define PIPE_CONTROL_TIMESTAMP_WRITE (3<<14)
#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14)
#define PIPE_CONTROL_DEPTH_STALL (1<<13)
#define PIPE_CONTROL_WRITE_FLUSH (1<<12)