@@ -2245,6 +2245,11 @@ struct i915_perf_cs_sample {
/* Is this sample prior to request start or post request end */
enum request_sample_id id;
+
+ /**
+ * @ctx_id: Context ID associated with this perf sample
+ */
+ u32 ctx_id;
};
struct intel_cdclk_state {
@@ -293,6 +293,7 @@
/* Data common to periodic and RCS based OA samples */
struct i915_perf_sample_data {
u64 source;
+ u64 ctx_id;
const u8 *report;
};
@@ -347,6 +348,7 @@ struct i915_perf_sample_data {
#define SAMPLE_OA_REPORT (1<<0)
#define SAMPLE_OA_SOURCE (1<<1)
+#define SAMPLE_CTX_ID (1<<2)
/**
* struct perf_open_properties - for validated properties given to open a stream
@@ -620,6 +622,7 @@ static void i915_perf_stream_patch_request(struct i915_perf_stream *stream,
list_move_tail(&sample->link, &stream->cs_samples);
sample->request = i915_gem_request_get(request);
sample->id = sample_id;
+ sample->ctx_id = request->ctx->hw_id;
if (stream->sample_flags &
(SAMPLE_OA_REPORT | SAMPLE_OA_SOURCE))
i915_perf_stream_patch_sample_oa(stream, request,
@@ -877,6 +880,12 @@ static int append_perf_sample(struct i915_perf_stream *stream,
buf += 8;
}
+ if (sample_flags & SAMPLE_CTX_ID) {
+ if (copy_to_user(buf, &data->ctx_id, 8))
+ return -EFAULT;
+ buf += 8;
+ }
+
if (sample_flags & SAMPLE_OA_REPORT) {
if (copy_to_user(buf, data->report, report_size))
return -EFAULT;
@@ -903,12 +912,27 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
char __user *buf, size_t count,
size_t *offset, const u8 *report)
{
+ struct drm_i915_private *dev_priv = stream->dev_priv;
u32 sample_flags = stream->sample_flags;
struct i915_perf_sample_data data = { 0 };
+ u32 *report32 = (u32 *)report;
if (sample_flags & SAMPLE_OA_SOURCE)
data.source = I915_PERF_SAMPLE_OA_SOURCE_OABUFFER;
+ if (sample_flags & SAMPLE_CTX_ID) {
+ if (INTEL_INFO(dev_priv)->gen < 8)
+ data.ctx_id = 0;
+ else {
+ /*
+ * XXX: Just keep the lower 21 bits for now since I'm
+ * not entirely sure if the HW touches any of the higher
+ * bits in this field
+ */
+ data.ctx_id = report32[2] & 0x1fffff;
+ }
+ }
+
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
@@ -1487,6 +1511,9 @@ static int append_cs_buffer_sample(struct i915_perf_stream *stream,
if (sample_flags & SAMPLE_OA_SOURCE)
data.source = I915_PERF_SAMPLE_OA_SOURCE_CS;
+ if (sample_flags & SAMPLE_CTX_ID)
+ data.ctx_id = node->ctx_id;
+
return append_perf_sample(stream, buf, count, offset, &data);
}
@@ -2665,6 +2692,19 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
struct intel_engine_cs *engine = NULL;
int ret;
+ if ((props->sample_flags & SAMPLE_CTX_ID) && !props->cs_mode) {
+ if (IS_HASWELL(dev_priv)) {
+ DRM_ERROR("On HSW, context ID sampling only supported "
+ "via command stream\n");
+ return -EINVAL;
+ } else if (!i915.enable_execlists) {
+ DRM_ERROR("On Gen8+ without execlists, context ID "
+ "sampling only supported via "
+ "command stream\n");
+ return -EINVAL;
+ }
+ }
+
/* We set up some ratelimit state to potentially throttle any _NOTES
* about spurious, invalid OA reports which we don't forward to
* userspace.
@@ -2794,6 +2834,12 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
}
+ if (props->sample_flags & SAMPLE_CTX_ID) {
+ stream->sample_flags |= SAMPLE_CTX_ID;
+ stream->sample_size += 8;
+ stream->last_ctx_id = INVALID_CTX_ID;
+ }
+
if (props->cs_mode) {
if (!cs_sample_data) {
DRM_DEBUG_DRIVER(
@@ -2803,6 +2849,13 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
goto err_enable;
}
+ if (!(props->sample_flags & SAMPLE_CTX_ID)) {
+ DRM_ERROR("Stream engine given without requesting any "
+ "CS specific property\n");
+ ret = -EINVAL;
+ goto err_enable;
+ }
+
idx = srcu_read_lock(&dev_priv->perf.oa.srcu);
curr_stream = srcu_dereference(
dev_priv->perf.oa.exclusive_stream,
@@ -3520,6 +3573,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
props->engine = engine;
}
break;
+ case DRM_I915_PERF_PROP_SAMPLE_CTX_ID:
+ props->sample_flags |= SAMPLE_CTX_ID;
+ break;
case DRM_I915_PERF_PROP_MAX:
MISSING_CASE(id);
return -EINVAL;
@@ -1433,6 +1433,12 @@ enum drm_i915_perf_property_id {
*/
DRM_I915_PERF_PROP_ENGINE,
+ /**
+ * The value of this property set to 1 requests inclusion of context ID
+ * in the perf sample data.
+ */
+ DRM_I915_PERF_PROP_SAMPLE_CTX_ID,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
@@ -1499,6 +1505,7 @@ enum drm_i915_perf_record_type {
* struct drm_i915_perf_record_header header;
*
* { u64 source; } && DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE
+ * { u64 ctx_id; } && DRM_I915_PERF_PROP_SAMPLE_CTX_ID
* { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA
* };
*/
This patch enables to sample CTX ID corresponding to requests being submitted on the engines through perf stream. OA report already embed this information from Gen8+. For previous Gens we can leverage value sampled from request->ctx->hw_id to associate with OA reports. v2: Updated stream->last_ctx_id to INVALID_CTX_ID during stream_init. Testcase: igt/intel_perf_dapc/perf-ctxid Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 5 ++++ drivers/gpu/drm/i915/i915_perf.c | 56 ++++++++++++++++++++++++++++++++++++++++ include/uapi/drm/i915_drm.h | 7 +++++ 3 files changed, 68 insertions(+)