@@ -1673,6 +1673,7 @@ struct i915_oa_rcs_node {
bool discard;
u32 ctx_id;
u32 pid;
+ u32 tag;
};
extern const struct i915_oa_reg i915_oa_3d_mux_config_hsw[];
@@ -1983,11 +1984,12 @@ struct drm_i915_private {
struct work_struct forward_work;
struct work_struct event_destroy_work;
#define I915_OA_SAMPLE_PID (1<<0)
+#define I915_OA_SAMPLE_TAG (1<<1)
int sample_info_flags;
} oa_pmu;
void (*emit_profiling_data[I915_PROFILE_MAX])
- (struct drm_i915_gem_request *req, u32 global_ctx_id);
+ (struct drm_i915_gem_request *req, u32 global_ctx_id, u32 tag);
#endif
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
@@ -3169,7 +3171,7 @@ void i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
void i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv,
struct intel_context *context);
void i915_emit_profiling_data(struct drm_i915_gem_request *req,
- u32 global_ctx_id);
+ u32 global_ctx_id, u32 tag);
#else
static inline void
i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
@@ -3178,7 +3180,7 @@ static inline void
i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv,
struct intel_context *context) {}
void i915_emit_profiling_data(struct drm_i915_gem_request *req,
- u32 global_ctx_id) {};
+ u32 global_ctx_id, u32 tag) {};
#endif
/* i915_gem_evict.c */
@@ -1317,7 +1317,8 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
goto error;
}
- i915_emit_profiling_data(intel_ring_get_request(ring), ctx->global_id);
+ i915_emit_profiling_data(intel_ring_get_request(ring), ctx->global_id,
+ i915_execbuffer2_get_tag(*args));
exec_len = args->batch_len;
if (cliprects) {
@@ -1341,7 +1342,8 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
return ret;
}
- i915_emit_profiling_data(intel_ring_get_request(ring), ctx->global_id);
+ i915_emit_profiling_data(intel_ring_get_request(ring), ctx->global_id,
+ i915_execbuffer2_get_tag(*args));
trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags);
@@ -26,7 +26,7 @@ static int hsw_perf_format_sizes[] = {
};
void i915_emit_profiling_data(struct drm_i915_gem_request *req,
- u32 global_ctx_id)
+ u32 global_ctx_id, u32 tag)
{
struct intel_engine_cs *ring = req->ring;
struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -34,7 +34,8 @@ void i915_emit_profiling_data(struct drm_i915_gem_request *req,
for (i = I915_PROFILE_OA; i < I915_PROFILE_MAX; i++) {
if (dev_priv->emit_profiling_data[i])
- dev_priv->emit_profiling_data[i](req, global_ctx_id);
+ dev_priv->emit_profiling_data[i](req, global_ctx_id,
+ tag);
}
}
@@ -42,7 +43,7 @@ void i915_emit_profiling_data(struct drm_i915_gem_request *req,
* Emits the commands to capture OA perf report, into the Render CS
*/
static void i915_oa_emit_perf_report(struct drm_i915_gem_request *req,
- u32 global_ctx_id)
+ u32 global_ctx_id, u32 tag)
{
struct intel_engine_cs *ring = req->ring;
struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -71,6 +72,8 @@ static void i915_oa_emit_perf_report(struct drm_i915_gem_request *req,
entry->ctx_id = global_ctx_id;
if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_PID)
entry->pid = current->pid;
+ if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_TAG)
+ entry->tag = tag;
i915_gem_request_assign(&entry->req, ring->outstanding_lazy_request);
spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags);
@@ -308,6 +311,7 @@ static void forward_one_oa_rcs_sample(struct drm_i915_private *dev_priv,
u8 *snapshot, *current_ptr;
struct drm_i915_oa_node_ctx_id *ctx_info;
struct drm_i915_oa_node_pid *pid_info;
+ struct drm_i915_oa_node_tag *tag_info;
struct perf_raw_record raw;
u64 snapshot_ts;
@@ -326,6 +330,13 @@ static void forward_one_oa_rcs_sample(struct drm_i915_private *dev_priv,
current_ptr = snapshot + snapshot_size;
}
+ if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_TAG) {
+ tag_info = (struct drm_i915_oa_node_tag *)current_ptr;
+ tag_info->tag = node->tag;
+ snapshot_size += sizeof(*tag_info);
+ current_ptr = snapshot + snapshot_size;
+ }
+
/* Flush the periodic snapshots till the ts of this OA report */
snapshot_ts = *(u64 *)(snapshot + 4);
flush_oa_snapshots(dev_priv, true, snapshot_ts);
@@ -686,6 +697,9 @@ static int init_oa_rcs_buffer(struct perf_event *event)
if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_PID)
node_size += sizeof(struct drm_i915_oa_node_pid);
+ if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_TAG)
+ node_size += sizeof(struct drm_i915_oa_node_tag);
+
/* node size has to be aligned to 64 bytes, since only 64 byte aligned
* addresses can be given to OA unit for dumping OA reports */
node_size = ALIGN(node_size, 64);
@@ -841,6 +855,9 @@ static int i915_oa_event_init(struct perf_event *event)
if (oa_attr.sample_pid)
dev_priv->oa_pmu.sample_info_flags |=
I915_OA_SAMPLE_PID;
+ if (oa_attr.sample_tag)
+ dev_priv->oa_pmu.sample_info_flags |=
+ I915_OA_SAMPLE_TAG;
}
report_format = oa_attr.format;
@@ -94,7 +94,8 @@ typedef struct _drm_i915_oa_attr {
__u64 single_context : 1,
multiple_context_mode:1,
sample_pid:1,
- __reserved_1:61;
+ sample_tag:1,
+ __reserved_1:60;
} drm_i915_oa_attr_t;
/* Header for PERF_RECORD_DEVICE type events */
@@ -134,6 +135,11 @@ struct drm_i915_oa_node_pid {
__u32 pad;
};
+struct drm_i915_oa_node_tag {
+ __u32 tag;
+ __u32 pad;
+};
+
/* Each region is a minimum of 16k, and there are at most 255 of them.
*/
#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use
@@ -848,6 +854,11 @@ struct drm_i915_gem_execbuffer2 {
#define i915_execbuffer2_get_context_id(eb2) \
((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
+/* upper 32 bits of rsvd1 field contain tag */
+#define I915_EXEC_TAG_MASK (0xffffffff00000000UL)
+#define i915_execbuffer2_get_tag(eb2) \
+ ((eb2).rsvd1 & I915_EXEC_TAG_MASK)
+
struct drm_i915_gem_pin {
/** Handle of the buffer to be pinned. */
__u32 handle;