@@ -1691,8 +1691,10 @@ struct drm_i915_oa_async_queue_header {
struct drm_i915_oa_async_node_info {
__u32 pid;
__u32 ctx_id;
+ __u32 perftag;
+ __u32 padding;
struct drm_i915_gem_request *req;
- __u32 pad[12];
+ __u32 pad[10];
};
struct drm_i915_oa_async_node {
@@ -3164,7 +3166,8 @@ void i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
struct intel_context *context);
void i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv,
struct intel_context *context);
-void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id);
+void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id,
+ int perftag);
#else
static inline void
i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
@@ -1320,7 +1320,8 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
exec_len = args->batch_len;
i915_insert_profiling_cmd(ring->buffer,
- i915_execbuffer2_get_context_id(*args));
+ i915_execbuffer2_get_context_id(*args),
+ i915_execbuffer2_get_perftag(*args));
if (cliprects) {
for (i = 0; i < args->num_cliprects; i++) {
@@ -1344,7 +1345,8 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
}
i915_insert_profiling_cmd(ring->buffer,
- i915_execbuffer2_get_context_id(*args));
+ i915_execbuffer2_get_context_id(*args),
+ i915_execbuffer2_get_perftag(*args));
trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags);
@@ -27,20 +27,23 @@ static int hsw_perf_format_sizes[] = {
struct drm_i915_insert_cmd {
struct list_head list;
- void (*insert_cmd)(struct intel_ringbuffer *ringbuf, u32 ctx_id);
+ void (*insert_cmd)(struct intel_ringbuffer *ringbuf, u32 ctx_id,
+ int perftag);
};
-void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
+void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id,
+ int perftag)
{
struct intel_engine_cs *ring = ringbuf->ring;
struct drm_i915_private *dev_priv = ring->dev->dev_private;
struct drm_i915_insert_cmd *entry;
list_for_each_entry(entry, &dev_priv->profile_cmd, list)
- entry->insert_cmd(ringbuf, ctx_id);
+ entry->insert_cmd(ringbuf, ctx_id, perftag);
}
-void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
+void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id,
+ int perftag)
{
struct intel_engine_cs *ring = ringbuf->ring;
struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -90,6 +93,7 @@ void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
node_info->pid = current->pid;
node_info->ctx_id = ctx_id;
+ node_info->perftag = perftag;
queue_hdr->node_count++;
if (queue_hdr->node_count > num_nodes)
queue_hdr->wrap_count++;
@@ -127,6 +127,8 @@ enum drm_i915_oa_event_type {
struct drm_i915_oa_async_node_footer {
__u32 pid;
__u32 ctx_id;
+ __u32 perftag;
+ __u32 pad;
};
/* Each region is a minimum of 16k, and there are at most 255 of them.
@@ -797,7 +799,7 @@ struct drm_i915_gem_execbuffer2 {
#define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
__u64 flags;
__u64 rsvd1; /* now used for context info */
- __u64 rsvd2;
+ __u64 rsvd2; /* used for perftag */
};
/** Resets the SO write offset registers for transform feedback on gen7. */
@@ -835,7 +837,12 @@ struct drm_i915_gem_execbuffer2 {
#define I915_EXEC_BSD_RING1 (1<<13)
#define I915_EXEC_BSD_RING2 (2<<13)
-#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15)
+/** Inform the kernel that the perftag is passed through rsvd2 field of
+ * execbuffer args
+ */
+#define I915_EXEC_PERFTAG (1<<15)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(1<<16)
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
@@ -843,6 +850,10 @@ struct drm_i915_gem_execbuffer2 {
#define i915_execbuffer2_get_context_id(eb2) \
((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
+#define I915_EXEC_PERFTAG_MASK (0xffffffff)
+#define i915_execbuffer2_get_perftag(eb2) \
+ ((eb2).rsvd2 & I915_EXEC_PERFTAG_MASK)
+
struct drm_i915_gem_pin {
/** Handle of the buffer to be pinned. */
__u32 handle;