@@ -2043,7 +2043,8 @@ struct i915_perf_stream_ops {
*/
void (*emit_sample_capture)(struct i915_perf_stream *stream,
struct drm_i915_gem_request *request,
- bool preallocate);
+ bool preallocate,
+ u32 tag);
/*
* @patch_sample_capture: Patch the offsets in commands to capture perf
@@ -2130,6 +2131,7 @@ struct i915_perf_stream {
u32 last_ctx_id;
u64 last_pid;
+ u32 last_tag;
};
/**
@@ -2268,6 +2270,17 @@ struct i915_perf_cs_sample {
* submitted, pertaining to this perf sample
*/
u64 pid;
+
+ /**
+ * @tag: Tag associated with workload, for which the perf sample is
+ * being collected.
+ *
+ * Userspace can specify tags (provided via execbuffer ioctl), which
+ * can be associated with the perf samples, and be used to functionally
+ * distinguish different workload stages, and associate samples with
+ * these different stages.
+ */
+ u32 tag;
};
struct intel_cdclk_state {
@@ -3985,7 +3998,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
extern void i915_perf_register(struct drm_i915_private *dev_priv);
extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
void i915_perf_emit_sample_capture(struct drm_i915_gem_request *req,
- bool preallocate);
+ bool preallocate,
+ u32 tag);
void i915_perf_patch_request(struct drm_i915_gem_request *request);
void i915_perf_streams_mark_idle(struct drm_i915_private *dev_priv);
@@ -245,6 +245,7 @@ struct i915_execbuffer {
*/
int lut_size;
struct hlist_head *buckets; /** ht for relocation handles */
+ uint32_t tag;
};
#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1115,7 +1116,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
if (err)
goto err_request;
- i915_perf_emit_sample_capture(rq, true);
+ i915_perf_emit_sample_capture(rq, true, eb->tag);
err = eb->engine->emit_bb_start(rq,
batch->node.start, PAGE_SIZE,
@@ -1123,7 +1124,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
if (err)
goto err_request;
- i915_perf_emit_sample_capture(rq, false);
+ i915_perf_emit_sample_capture(rq, false, eb->tag);
GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
i915_vma_move_to_active(batch, rq, 0);
@@ -1974,7 +1975,7 @@ static int eb_submit(struct i915_execbuffer *eb)
return err;
}
- i915_perf_emit_sample_capture(eb->request, true);
+ i915_perf_emit_sample_capture(eb->request, true, eb->tag);
err = eb->engine->emit_bb_start(eb->request,
eb->batch->node.start +
@@ -1984,7 +1985,7 @@ static int eb_submit(struct i915_execbuffer *eb)
if (err)
return err;
- i915_perf_emit_sample_capture(eb->request, false);
+ i915_perf_emit_sample_capture(eb->request, false, eb->tag);
return 0;
}
@@ -2232,6 +2233,8 @@ static int eb_submit(struct i915_execbuffer *eb)
if (!eb.engine)
return -EINVAL;
+ eb.tag = i915_execbuffer2_get_tag(*args);
+
if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
if (!HAS_RESOURCE_STREAMER(eb.i915)) {
DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
@@ -199,6 +199,7 @@ struct drm_i915_gem_request {
u32 *pre_oa_offset;
u32 *post_oa_offset;
u64 pid;
+ u32 tag;
};
extern const struct dma_fence_ops i915_fence_ops;
@@ -283,6 +283,7 @@
#define INVALID_CTX_ID 0xffffffff
#define INVALID_PID 0xffffffff
+#define INVALID_TAG 0xffffffff
/* On Gen8+ automatically triggered OA reports include a 'reason' field... */
#define OAREPORT_REASON_MASK 0x3f
@@ -296,6 +297,7 @@ struct i915_perf_sample_data {
u64 source;
u64 ctx_id;
u64 pid;
+ u64 tag;
const u8 *report;
};
@@ -352,6 +354,7 @@ struct i915_perf_sample_data {
#define SAMPLE_OA_SOURCE (1<<1)
#define SAMPLE_CTX_ID (1<<2)
#define SAMPLE_PID (1<<3)
+#define SAMPLE_TAG (1<<4)
/**
* struct perf_open_properties - for validated properties given to open a stream
@@ -500,11 +503,14 @@ static int i915_emit_oa_report_capture(struct drm_i915_gem_request *request,
* @stream: Stream to which this request corresponds.
* @request: request in whose context the metrics are being collected.
* @preallocate: allocate space in ring for related sample.
+ * @tag: userspace provided tag to be associated with the perf sample
*/
static void i915_perf_stream_emit_sample_capture(
struct i915_perf_stream *stream,
struct drm_i915_gem_request *request,
- bool preallocate)
+ bool preallocate,
+ u32 tag)
+
{
struct reservation_object *resv = stream->cs_buffer.vma->resv;
int ret;
@@ -518,6 +524,9 @@ static void i915_perf_stream_emit_sample_capture(
if (stream->sample_flags & SAMPLE_PID)
request->pid = current->pid;
+ if (stream->sample_flags & SAMPLE_TAG)
+ request->tag = tag;
+
reservation_object_lock(resv, NULL);
if (reservation_object_reserve_shared(resv) == 0)
reservation_object_add_shared_fence(resv, &request->fence);
@@ -532,12 +541,14 @@ static void i915_perf_stream_emit_sample_capture(
* the command stream of a GPU engine.
* @request: request in whose context the metrics are being collected.
* @preallocate: allocate space in ring for related sample.
+ * @tag: userspace provided tag to be associated with the perf sample
*
* The function provides a hook through which the commands to capture perf
* metrics, are inserted into the command stream of a GPU engine.
*/
void i915_perf_emit_sample_capture(struct drm_i915_gem_request *request,
- bool preallocate)
+ bool preallocate,
+ u32 tag)
{
struct drm_i915_private *dev_priv = request->i915;
struct i915_perf_stream *stream;
@@ -551,7 +562,8 @@ void i915_perf_emit_sample_capture(struct drm_i915_gem_request *request,
&dev_priv->perf.oa.srcu);
if (stream && stream->enabled && stream->cs_mode)
stream->ops->emit_sample_capture(stream, request,
- preallocate);
+ preallocate, tag);
+
srcu_read_unlock(&dev_priv->perf.oa.srcu, idx);
}
@@ -633,6 +645,7 @@ static void i915_perf_stream_patch_request(struct i915_perf_stream *stream,
sample->id = sample_id;
sample->ctx_id = request->ctx->hw_id;
sample->pid = request->pid;
+ sample->tag = request->tag;
if (stream->sample_flags &
(SAMPLE_OA_REPORT | SAMPLE_OA_SOURCE))
i915_perf_stream_patch_sample_oa(stream, request,
@@ -956,6 +969,12 @@ static int append_perf_sample(struct i915_perf_stream *stream,
buf += 8;
}
+ if (sample_flags & SAMPLE_TAG) {
+ if (copy_to_user(buf, &data->tag, 8))
+ return -EFAULT;
+ buf += 8;
+ }
+
if (sample_flags & SAMPLE_OA_REPORT) {
if (copy_to_user(buf, data->report, report_size))
return -EFAULT;
@@ -996,6 +1015,9 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
if (sample_flags & SAMPLE_PID)
data.pid = stream->last_pid;
+ if (sample_flags & SAMPLE_TAG)
+ data.tag = stream->last_tag;
+
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
@@ -1612,6 +1634,14 @@ static int append_cs_buffer_sample(struct i915_perf_stream *stream,
stream->last_pid = INVALID_PID;
}
+ if (sample_flags & SAMPLE_TAG) {
+ data.tag = node->tag;
+ if (node->id == PRE_REQUEST_SAMPLE_ID)
+ stream->last_tag = node->tag;
+ else
+ stream->last_tag = INVALID_TAG;
+ }
+
return append_perf_sample(stream, buf, count, offset, &data);
}
@@ -2844,7 +2874,8 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
bool require_oa_unit = props->sample_flags & (SAMPLE_OA_REPORT |
SAMPLE_OA_SOURCE);
bool cs_sample_data = props->sample_flags & SAMPLE_OA_REPORT;
- bool require_cs_mode = props->sample_flags & SAMPLE_PID;
+ bool require_cs_mode = props->sample_flags & (SAMPLE_PID |
+ SAMPLE_TAG);
struct i915_perf_stream *curr_stream;
struct intel_engine_cs *engine = NULL;
int ret;
@@ -3006,7 +3037,7 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
}
if (require_cs_mode && !props->cs_mode) {
- DRM_ERROR("PID sampling requires a ring to be specified");
+ DRM_ERROR("PID/TAG sampling requires a ring to be specified");
ret = -EINVAL;
goto err_enable;
}
@@ -3039,6 +3070,12 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
stream->last_pid = INVALID_PID;
}
+ if (props->sample_flags & SAMPLE_TAG) {
+ stream->sample_flags |= SAMPLE_TAG;
+ stream->sample_size += 8;
+ stream->last_tag = INVALID_TAG;
+ }
+
idx = srcu_read_lock(&dev_priv->perf.oa.srcu);
curr_stream = srcu_dereference(
dev_priv->perf.oa.exclusive_stream,
@@ -3762,6 +3799,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
case DRM_I915_PERF_PROP_SAMPLE_PID:
props->sample_flags |= SAMPLE_PID;
break;
+ case DRM_I915_PERF_PROP_SAMPLE_TAG:
+ props->sample_flags |= SAMPLE_TAG;
+ break;
case DRM_I915_PERF_PROP_MAX:
MISSING_CASE(id);
return -EINVAL;
@@ -966,6 +966,14 @@ struct drm_i915_gem_execbuffer2 {
#define i915_execbuffer2_get_context_id(eb2) \
((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
+/* upper 32 bits of rsvd1 field contain tag */
+#define I915_EXEC_TAG_MASK (0xffffffff00000000UL)
+#define i915_execbuffer2_set_tag(eb2, tag) \
+ (eb2).rsvd1 = ((eb2).rsvd1 & ~I915_EXEC_TAG_MASK) |\
+ ((tag << 32) & I915_EXEC_TAG_MASK)
+#define i915_execbuffer2_get_tag(eb2) \
+ (((eb2).rsvd1 & I915_EXEC_TAG_MASK) >> 32)
+
struct drm_i915_gem_pin {
/** Handle of the buffer to be pinned. */
__u32 handle;
@@ -1445,6 +1453,12 @@ enum drm_i915_perf_property_id {
*/
DRM_I915_PERF_PROP_SAMPLE_PID,
+ /**
+ * The value of this property set to 1 requests inclusion of tag in the
+ * perf sample data.
+ */
+ DRM_I915_PERF_PROP_SAMPLE_TAG,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
@@ -1513,6 +1527,7 @@ enum drm_i915_perf_record_type {
* { u64 source; } && DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE
* { u64 ctx_id; } && DRM_I915_PERF_PROP_SAMPLE_CTX_ID
* { u64 pid; } && DRM_I915_PERF_PROP_SAMPLE_PID
+ * { u64 tag; } && DRM_I915_PERF_PROP_SAMPLE_TAG
* { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA
* };
*/