@@ -1672,6 +1672,7 @@ struct i915_oa_rcs_node {
bool discard;
u32 ctx_id;
u32 pid;
+ u32 tag;
};
extern const struct i915_oa_reg i915_oa_3d_mux_config_hsw[];
@@ -1980,11 +1981,12 @@ struct drm_i915_private {
struct work_struct work_timer;
struct work_struct work_event_destroy;
#define I915_OA_SAMPLE_PID (1<<0)
+#define I915_OA_SAMPLE_TAG (1<<1)
int sample_info_flags;
} oa_pmu;
void (*insert_profile_cmd[I915_PROFILE_MAX])
- (struct intel_ringbuffer *ringbuf, u32 ctx_id);
+ (struct intel_ringbuffer *ringbuf, u32 ctx_id, int tag);
#endif
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
@@ -3165,7 +3167,8 @@ void i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
struct intel_context *context);
void i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv,
struct intel_context *context);
-void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id);
+void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id,
+ int tag);
#else
static inline void
i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
@@ -3174,7 +3177,7 @@ static inline void
i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv,
struct intel_context *context) {}
void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf,
- u32 ctx_id) {};
+ u32 ctx_id, int tag) {};
#endif
/* i915_gem_evict.c */
@@ -1203,6 +1203,7 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
u64 exec_len;
int instp_mode;
u32 instp_mask;
+ u32 tag = 0;
int i, ret = 0;
if (args->num_cliprects != 0) {
@@ -1317,8 +1318,11 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
goto error;
}
+ if (args->flags & I915_EXEC_TAG)
+ tag = i915_execbuffer2_get_tag(*args);
+
i915_insert_profiling_cmd(ring->buffer,
- i915_execbuffer2_get_context_id(*args));
+ i915_execbuffer2_get_context_id(*args), tag);
exec_len = args->batch_len;
if (cliprects) {
@@ -1343,7 +1347,7 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
}
i915_insert_profiling_cmd(ring->buffer,
- i915_execbuffer2_get_context_id(*args));
+ i915_execbuffer2_get_context_id(*args), tag);
trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags);
@@ -25,7 +25,8 @@ static int hsw_perf_format_sizes[] = {
64 /* C4_B8_HSW */
};
-void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
+void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id,
+ int tag)
{
struct intel_engine_cs *ring = ringbuf->ring;
struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -33,11 +34,11 @@ void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
for (i = I915_PROFILE_OA; i < I915_PROFILE_MAX; i++) {
if (dev_priv->insert_profile_cmd[i])
- dev_priv->insert_profile_cmd[i](ringbuf, ctx_id);
+ dev_priv->insert_profile_cmd[i](ringbuf, ctx_id, tag);
}
}
-void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
+void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id, int tag)
{
struct intel_engine_cs *ring = ringbuf->ring;
struct drm_i915_private *dev_priv = ring->dev->dev_private;
@@ -59,6 +60,8 @@ void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
entry->ctx_id = ctx_id;
if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_PID)
entry->pid = current->pid;
+ if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_TAG)
+ entry->tag = tag;
i915_gem_request_assign(&entry->req, ring->outstanding_lazy_request);
spin_lock_irqsave(&dev_priv->oa_pmu.lock, lock_flags);
@@ -320,6 +323,7 @@ static void forward_one_oa_rcs_sample(struct drm_i915_private *dev_priv,
u8 *snapshot, *current_ptr;
struct drm_i915_oa_node_ctx_id *ctx_info;
struct drm_i915_oa_node_pid *pid_info;
+ struct drm_i915_oa_node_tag *tag_info;
struct perf_raw_record raw;
u64 snapshot_ts;
@@ -338,6 +342,13 @@ static void forward_one_oa_rcs_sample(struct drm_i915_private *dev_priv,
current_ptr = snapshot + snapshot_size;
}
+ if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_TAG) {
+ tag_info = (struct drm_i915_oa_node_tag *)current_ptr;
+ tag_info->tag = node->tag;
+ snapshot_size += sizeof(*tag_info);
+ current_ptr = snapshot + snapshot_size;
+ }
+
/* Flush the periodic snapshots till the ts of this OA report */
snapshot_ts = *(u64 *)(snapshot + 4);
flush_oa_snapshots(dev_priv, true, snapshot_ts);
@@ -694,6 +705,9 @@ static int init_oa_rcs_buffer(struct perf_event *event)
if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_PID)
node_size += sizeof(struct drm_i915_oa_node_pid);
+ if (dev_priv->oa_pmu.sample_info_flags & I915_OA_SAMPLE_TAG)
+ node_size += sizeof(struct drm_i915_oa_node_tag);
+
/* node size has to be aligned to 64 bytes, since only 64 byte aligned
* addresses can be given to OA unit for dumping OA reports */
node_size = ALIGN(node_size, 64);
@@ -850,6 +864,9 @@ static int i915_oa_event_init(struct perf_event *event)
if (oa_attr.sample_pid)
dev_priv->oa_pmu.sample_info_flags |=
I915_OA_SAMPLE_PID;
+ if (oa_attr.sample_tag)
+ dev_priv->oa_pmu.sample_info_flags |=
+ I915_OA_SAMPLE_TAG;
}
report_format = oa_attr.format;
@@ -94,7 +94,8 @@ typedef struct _drm_i915_oa_attr {
__u64 single_context : 1,
multiple_context_mode:1,
sample_pid:1,
- __reserved_1:61;
+ sample_tag:1,
+ __reserved_1:60;
} drm_i915_oa_attr_t;
/* Header for PERF_RECORD_DEVICE type events */
@@ -134,6 +135,11 @@ struct drm_i915_oa_node_pid {
__u32 pad;
};
+struct drm_i915_oa_node_tag {
+ __u32 tag;
+ __u32 pad;
+};
+
/* Each region is a minimum of 16k, and there are at most 255 of them.
*/
#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use
@@ -802,7 +808,7 @@ struct drm_i915_gem_execbuffer2 {
#define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
__u64 flags;
__u64 rsvd1; /* now used for context info */
- __u64 rsvd2;
+ __u64 rsvd2; /* used for tag */
};
/** Resets the SO write offset registers for transform feedback on gen7. */
@@ -840,7 +846,12 @@ struct drm_i915_gem_execbuffer2 {
#define I915_EXEC_BSD_RING1 (1<<13)
#define I915_EXEC_BSD_RING2 (2<<13)
-#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15)
+/** Inform the kernel that tag is passed through rsvd2 field of
+ * execbuffer args
+ */
+#define I915_EXEC_TAG (1<<15)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(1<<16)
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
@@ -848,6 +859,10 @@ struct drm_i915_gem_execbuffer2 {
#define i915_execbuffer2_get_context_id(eb2) \
((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
+#define I915_EXEC_TAG_MASK (0xffffffff)
+#define i915_execbuffer2_get_tag(eb2) \
+ ((eb2).rsvd2 & I915_EXEC_TAG_MASK)
+
struct drm_i915_gem_pin {
/** Handle of the buffer to be pinned. */
__u32 handle;