@@ -1982,7 +1982,8 @@ struct i915_perf_stream_ops {
* the batch buffer.
*/
void (*command_stream_hook)(struct i915_perf_stream *stream,
- struct drm_i915_gem_request *request);
+ struct drm_i915_gem_request *request,
+ u32 tag);
};
enum i915_perf_stream_state {
@@ -2171,6 +2172,17 @@ struct i915_perf_cs_sample {
* submitted, pertaining to this perf sample
*/
u32 pid;
+
+ /**
+ * @tag: Tag associated with workload, for which the perf sample is
+ * being collected.
+ *
+ * Userspace can specify tags (provided via execbuffer ioctl), which
+ * can be associated with the perf samples, and be used to functionally
+ * distinguish different workload stages, and associate samples with
+ * these different stages.
+ */
+ u32 tag;
};
struct intel_cdclk_state {
@@ -2627,6 +2639,7 @@ struct drm_i915_private {
u32 last_cmd_stream_ctx_id;
u32 last_pid;
+ u32 last_tag;
struct list_head cs_samples;
spinlock_t sample_lock;
} perf;
@@ -3690,7 +3703,7 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
void i915_oa_update_reg_state(struct intel_engine_cs *engine,
struct i915_gem_context *ctx,
uint32_t *reg_state);
-void i915_perf_command_stream_hook(struct drm_i915_gem_request *req);
+void i915_perf_command_stream_hook(struct drm_i915_gem_request *req, u32 tag);
/* i915_gem_evict.c */
int __must_check i915_gem_evict_something(struct i915_address_space *vm,
@@ -59,6 +59,7 @@ struct i915_execbuffer_params {
struct intel_engine_cs *engine;
struct i915_gem_context *ctx;
struct drm_i915_gem_request *request;
+ uint32_t tag;
};
struct eb_vmas {
@@ -1441,7 +1442,7 @@ static void eb_export_fence(struct drm_i915_gem_object *obj,
if (exec_len == 0)
exec_len = params->batch->size - params->args_batch_start_offset;
- i915_perf_command_stream_hook(params->request);
+ i915_perf_command_stream_hook(params->request, params->tag);
ret = params->engine->emit_bb_start(params->request,
exec_start, exec_len,
@@ -1449,7 +1450,7 @@ static void eb_export_fence(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- i915_perf_command_stream_hook(params->request);
+ i915_perf_command_stream_hook(params->request, params->tag);
i915_gem_execbuffer_move_to_active(vmas, params->request);
@@ -1791,6 +1792,7 @@ static void eb_export_fence(struct drm_i915_gem_object *obj,
params->engine = engine;
params->dispatch_flags = dispatch_flags;
params->ctx = ctx;
+ params->tag = i915_execbuffer2_get_tag(*args);
trace_i915_gem_request_queue(params->request, dispatch_flags);
@@ -290,6 +290,7 @@ struct oa_sample_data {
u32 source;
u32 ctx_id;
u32 pid;
+ u32 tag;
const u8 *report;
};
@@ -344,6 +345,7 @@ struct oa_sample_data {
#define SAMPLE_OA_SOURCE_INFO (1<<1)
#define SAMPLE_CTX_ID (1<<2)
#define SAMPLE_PID (1<<3)
+#define SAMPLE_TAG (1<<4)
/**
* struct perf_open_properties - for validated properties given to open a stream
@@ -387,7 +389,8 @@ struct perf_open_properties {
* The function provides a hook through which the commands to capture perf
* metrics, are inserted into the command stream of a GPU engine.
*/
-void i915_perf_command_stream_hook(struct drm_i915_gem_request *request)
+void i915_perf_command_stream_hook(struct drm_i915_gem_request *request,
+ u32 tag)
{
struct intel_engine_cs *engine = request->engine;
struct drm_i915_private *dev_priv = engine->i915;
@@ -400,7 +403,7 @@ void i915_perf_command_stream_hook(struct drm_i915_gem_request *request)
list_for_each_entry(stream, &dev_priv->perf.streams, link) {
if ((stream->state == I915_PERF_STREAM_ENABLED) &&
stream->cs_mode)
- stream->ops->command_stream_hook(stream, request);
+ stream->ops->command_stream_hook(stream, request, tag);
}
mutex_unlock(&dev_priv->perf.streams_lock);
}
@@ -510,10 +513,11 @@ static void insert_perf_sample(struct drm_i915_private *dev_priv,
* metrics into the render command stream
* @stream: An i915-perf stream opened for OA metrics
* @request: request in whose context the metrics are being collected.
- *
+ * @tag: userspace provided tag to be associated with the perf sample
*/
static void i915_perf_command_stream_hook_oa(struct i915_perf_stream *stream,
- struct drm_i915_gem_request *request)
+ struct drm_i915_gem_request *request,
+ u32 tag)
{
struct drm_i915_private *dev_priv = request->i915;
struct i915_gem_context *ctx = request->ctx;
@@ -535,6 +539,7 @@ static void i915_perf_command_stream_hook_oa(struct i915_perf_stream *stream,
sample->ctx_id = ctx->hw_id;
sample->pid = current->pid;
+ sample->tag = tag;
i915_gem_request_assign(&sample->request, request);
insert_perf_sample(dev_priv, sample);
@@ -986,6 +991,12 @@ static int append_oa_sample(struct i915_perf_stream *stream,
buf += 4;
}
+ if (sample_flags & SAMPLE_TAG) {
+ if (copy_to_user(buf, &data->tag, 4))
+ return -EFAULT;
+ buf += 4;
+ }
+
if (sample_flags & SAMPLE_OA_REPORT) {
if (copy_to_user(buf, data->report, report_size))
return -EFAULT;
@@ -1025,6 +1036,9 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
if (sample_flags & SAMPLE_PID)
data.pid = dev_priv->perf.last_pid;
+ if (sample_flags & SAMPLE_TAG)
+ data.tag = dev_priv->perf.last_tag;
+
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
@@ -1606,6 +1620,11 @@ static int append_oa_rcs_sample(struct i915_perf_stream *stream,
dev_priv->perf.last_pid = node->pid;
}
+ if (sample_flags & SAMPLE_TAG) {
+ data.tag = node->tag;
+ dev_priv->perf.last_tag = node->tag;
+ }
+
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
@@ -2682,7 +2701,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
struct drm_i915_private *dev_priv = stream->dev_priv;
bool require_oa_unit = props->sample_flags & (SAMPLE_OA_REPORT |
SAMPLE_OA_SOURCE_INFO);
- bool require_cs_mode = props->sample_flags & SAMPLE_PID;
+ bool require_cs_mode = props->sample_flags & (SAMPLE_PID |
+ SAMPLE_TAG);
bool cs_sample_data = props->sample_flags & SAMPLE_OA_REPORT;
int ret;
@@ -2819,7 +2839,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
}
if (require_cs_mode && !props->cs_mode) {
- DRM_ERROR("PID sampling requires a ring to be specified");
+ DRM_ERROR("PID/TAG sampling requires a ring to be specified");
ret = -EINVAL;
goto err_enable;
}
@@ -2851,6 +2871,11 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
stream->sample_size += 4;
}
+ if (props->sample_flags & SAMPLE_TAG) {
+ stream->sample_flags |= SAMPLE_TAG;
+ stream->sample_size += 4;
+ }
+
ret = alloc_command_stream_buf(dev_priv);
if (ret)
goto err_enable;
@@ -3648,6 +3673,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
case DRM_I915_PERF_PROP_SAMPLE_PID:
props->sample_flags |= SAMPLE_PID;
break;
+ case DRM_I915_PERF_PROP_SAMPLE_TAG:
+ props->sample_flags |= SAMPLE_TAG;
+ break;
default:
MISSING_CASE(id);
DRM_DEBUG("Unknown i915 perf property ID\n");
@@ -899,6 +899,11 @@ struct drm_i915_gem_execbuffer2 {
#define i915_execbuffer2_get_context_id(eb2) \
((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
+/* upper 32 bits of rsvd1 field contain tag */
+#define I915_EXEC_TAG_MASK (0xffffffff00000000UL)
+#define i915_execbuffer2_get_tag(eb2) \
+ ((eb2).rsvd1 & I915_EXEC_TAG_MASK)
+
struct drm_i915_gem_pin {
/** Handle of the buffer to be pinned. */
__u32 handle;
@@ -1379,6 +1384,12 @@ enum drm_i915_perf_property_id {
*/
DRM_I915_PERF_PROP_SAMPLE_PID,
+ /**
+ * The value of this property set to 1 requests inclusion of tag in the
+ * perf sample data.
+ */
+ DRM_I915_PERF_PROP_SAMPLE_TAG,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
@@ -1447,6 +1458,7 @@ enum drm_i915_perf_record_type {
* { u32 source_info; } && DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE
* { u32 ctx_id; } && DRM_I915_PERF_PROP_SAMPLE_CTX_ID
* { u32 pid; } && DRM_I915_PERF_PROP_SAMPLE_PID
+ * { u32 tag; } && DRM_I915_PERF_PROP_SAMPLE_TAG
* { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA
* };
*/