@@ -2260,12 +2260,24 @@ struct i915_perf_cs_sample {
struct drm_i915_gem_request *request;
/**
- * @oa_offset: Offset into ``&stream->cs_buffer``
- * where the perf metrics will be collected, when the commands inserted
- * into the command stream are executed by GPU.
+ * @oa_offset: Offset into ``&stream->cs_buffer
+ * where the OA report will be collected (if the stream is configured
+ * for collection of OA samples).
*/
u32 oa_offset;
+ /**
+ * @ts_offset: Offset into ``&stream->cs_buffer
+ * where the timestamps will be collected (if the stream is configured
+ * for collection of timestamp data)
+ */
+ u32 ts_offset;
+
+ /**
+ * @size: buffer size corresponding to this perf sample
+ */
+ u32 size;
+
/* Is this sample prior to request start or post request end */
enum request_sample_id id;
@@ -200,6 +200,8 @@ struct drm_i915_gem_request {
u32 *post_oa_offset;
u64 pid;
u32 tag;
+ u32 *pre_ts_offset;
+ u32 *post_ts_offset;
};
extern const struct dma_fence_ops i915_fence_ops;
@@ -292,12 +292,17 @@
#define OAREPORT_REASON_CTX_SWITCH (1<<3)
#define OAREPORT_REASON_CLK_RATIO (1<<5)
-/* Data common to periodic and RCS based OA samples */
+#define OA_ADDR_ALIGN 64
+#define TS_ADDR_ALIGN 8
+#define I915_PERF_TS_SAMPLE_SIZE 8
+
+/*Data common to perf samples (periodic OA / CS based OA / Timestamps)*/
struct i915_perf_sample_data {
u64 source;
u64 ctx_id;
u64 pid;
u64 tag;
+ u64 ts;
const u8 *report;
};
@@ -355,6 +360,7 @@ struct i915_perf_sample_data {
#define SAMPLE_CTX_ID (1<<2)
#define SAMPLE_PID (1<<3)
#define SAMPLE_TAG (1<<4)
+#define SAMPLE_TS (1<<5)
/**
* struct perf_open_properties - for validated properties given to open a stream
@@ -498,6 +504,86 @@ static int i915_emit_oa_report_capture(struct drm_i915_gem_request *request,
}
/**
+ * i915_emit_ts_capture - Insert the commands to capture timestamp
+ * data into the GPU command stream
+ * @request: request in whose context the timestamps are being collected.
+ * @preallocate: allocate space in ring for related sample.
+ */
+static int i915_emit_ts_capture(struct drm_i915_gem_request *request,
+ bool preallocate)
+{
+ struct drm_i915_private *dev_priv = request->i915;
+ u32 cmd, len = 6, *cs;
+
+ if (preallocate)
+ request->reserved_space += len;
+ else
+ request->reserved_space -= len;
+
+ cs = intel_ring_begin(request, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ if (request->engine->id == RCS) {
+ if (INTEL_GEN(dev_priv) >= 8)
+ cmd = GFX_OP_PIPE_CONTROL(6);
+ else
+ cmd = GFX_OP_PIPE_CONTROL(5);
+
+ *cs++ = cmd;
+ *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_TIMESTAMP_WRITE;
+ /*
+ * Save the address in the ringbuffer where offset for OA report
+ * capture is to be placed during __i915_gem_request_submit.
+ */
+ if (preallocate)
+ request->pre_ts_offset = cs++;
+ else
+ request->post_ts_offset = cs++;
+
+ *cs++ = 0;
+ *cs++ = 0;
+
+ if (INTEL_GEN(dev_priv) >= 8)
+ *cs++ = 0;
+ else
+ *cs++ = MI_NOOP;
+ } else {
+ uint32_t cmd;
+
+ cmd = MI_FLUSH_DW + 1;
+ if (INTEL_GEN(dev_priv) >= 8)
+ cmd += 1;
+
+ cmd |= MI_FLUSH_DW_OP_STAMP;
+
+ *cs++ = cmd;
+ /*
+ * Save the address in the ringbuffer where offset for OA report
+ * capture is to be placed during __i915_gem_request_submit.
+ */
+ if (preallocate)
+ request->pre_ts_offset = cs++;
+ else
+ request->post_ts_offset = cs++;
+
+ *cs++ = 0;
+ *cs++ = 0;
+
+ if (INTEL_GEN(dev_priv) >= 8)
+ *cs++ = 0;
+ else
+ *cs++ = MI_NOOP;
+ *cs++ = MI_NOOP;
+ }
+
+ intel_ring_advance(request, cs);
+
+ return 0;
+}
+
+/**
* i915_perf_stream_emit_sample_capture - Insert the commands to capture perf
* metrics into the GPU command stream
* @stream: Stream to which this request corresponds.
@@ -519,6 +605,15 @@ static void i915_perf_stream_emit_sample_capture(
ret = i915_emit_oa_report_capture(request, preallocate);
if (ret)
DRM_ERROR("Emit of OA capture commands failed\n");
+ } else if (stream->sample_flags & SAMPLE_TS) {
+ /*
+ * XXX: Since TS data can anyways be derived from OA report, so
+ * no need to capture it for RCS engine, if capture oa data is
+ * called already.
+ */
+ ret = i915_emit_ts_capture(request, preallocate);
+ if (ret)
+ DRM_ERROR("Emit of TS capture commands failed\n");
}
if (stream->sample_flags & SAMPLE_PID)
@@ -615,6 +710,34 @@ static void i915_perf_stream_patch_sample_oa(struct i915_perf_stream *stream,
}
}
+static void i915_perf_stream_patch_sample_ts(struct i915_perf_stream *stream,
+ struct drm_i915_gem_request *request,
+ struct i915_perf_cs_sample *sample)
+{
+ u32 ts_addr = stream->cs_buffer.vma->node.start + sample->ts_offset;
+
+ switch (sample->id) {
+ case PRE_REQUEST_SAMPLE_ID:
+ if (request->engine->id == RCS)
+ *request->pre_ts_offset = ts_addr |
+ PIPE_CONTROL_GLOBAL_GTT;
+ else
+ *request->pre_ts_offset = ts_addr |
+ MI_FLUSH_DW_USE_GTT;
+ break;
+ case POST_REQUEST_SAMPLE_ID:
+ if (request->engine->id == RCS)
+ *request->post_ts_offset = ts_addr |
+ PIPE_CONTROL_GLOBAL_GTT;
+ else
+ *request->post_ts_offset = ts_addr |
+ MI_FLUSH_DW_USE_GTT;
+ break;
+ default:
+ DRM_ERROR("Invalid sample being patched\n");
+ }
+}
+
/**
* i915_perf_stream_patch_request - Assign free sample. If none available,
* remove one. Patch offset of the perf sample address with the one from
@@ -650,6 +773,10 @@ static void i915_perf_stream_patch_request(struct i915_perf_stream *stream,
(SAMPLE_OA_REPORT | SAMPLE_OA_SOURCE))
i915_perf_stream_patch_sample_oa(stream, request,
sample);
+ else if (stream->sample_flags & SAMPLE_TS)
+ i915_perf_stream_patch_sample_ts(stream, request,
+ sample);
+
spin_unlock_irqrestore(&stream->samples_lock, flags);
sample_id++;
}
@@ -976,6 +1103,12 @@ static int append_perf_sample(struct i915_perf_stream *stream,
buf += 8;
}
+ if (sample_flags & SAMPLE_TS) {
+ if (copy_to_user(buf, &data->ts, I915_PERF_TS_SAMPLE_SIZE))
+ return -EFAULT;
+ buf += I915_PERF_TS_SAMPLE_SIZE;
+ }
+
if (sample_flags & SAMPLE_OA_REPORT) {
if (copy_to_user(buf, data->report, report_size))
return -EFAULT;
@@ -1019,6 +1152,12 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
if (sample_flags & SAMPLE_TAG)
data.tag = stream->last_tag;
+ /* TODO: Derive timestamp from OA report,
+ * after scaling with the ts base
+ */
+ if (sample_flags & SAMPLE_TS)
+ data.ts = 0;
+
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
@@ -1643,6 +1782,19 @@ static int append_cs_buffer_sample(struct i915_perf_stream *stream,
stream->last_tag = INVALID_TAG;
}
+ if (sample_flags & SAMPLE_TS) {
+ /* For RCS, if OA samples are also being collected, derive the
+ * timestamp from OA report, after scaling with the TS base.
+ * Else, forward the timestamp collected via command stream.
+ */
+ /* TODO: derive the timestamp from OA report */
+ if (sample_flags & SAMPLE_OA_REPORT)
+ data.ts = 0;
+ else
+ data.ts = *(u64 *) (stream->cs_buffer.vaddr +
+ node->ts_offset);
+ }
+
return append_perf_sample(stream, buf, count, offset, &data);
}
@@ -2257,11 +2409,21 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
static int init_perf_samples(struct i915_perf_stream *stream)
{
+ struct drm_i915_private *dev_priv = stream->dev_priv;
struct i915_perf_cs_sample *sample;
u32 sample_size = 0;
u32 offset = 0;
- sample_size = stream->dev_priv->perf.oa.oa_buffer.format_size;
+ if (stream->sample_flags & SAMPLE_OA_REPORT)
+ sample_size += dev_priv->perf.oa.oa_buffer.format_size;
+ else if (stream->sample_flags & SAMPLE_TS) {
+ /*
+ * XXX: Since TS data can anyways be derived from OA report, so
+ * no need to capture it for RCS engine, if capture oa data is
+ * called already.
+ */
+ sample_size += I915_PERF_TS_SAMPLE_SIZE;
+ }
while ((offset + sample_size) < stream->cs_buffer.vma->size) {
sample = kzalloc(sizeof(*sample), GFP_KERNEL);
@@ -2269,9 +2431,22 @@ static int init_perf_samples(struct i915_perf_stream *stream)
DRM_ERROR("Perf sample alloc failed\n");
return -ENOMEM;
}
- sample->oa_offset = offset;
+ if (stream->sample_flags & SAMPLE_OA_REPORT) {
+ sample->oa_offset = offset;
+ /* Ensure 64 byte alignment of oa_offset */
+ sample->oa_offset = ALIGN(sample->oa_offset,
+ OA_ADDR_ALIGN);
+ offset = sample->oa_offset +
+ dev_priv->perf.oa.oa_buffer.format_size;
+ } else if (stream->sample_flags & SAMPLE_TS) {
+ sample->ts_offset = offset;
+ /* Ensure 8 byte alignment of ts_offset */
+ sample->ts_offset = ALIGN(sample->ts_offset,
+ TS_ADDR_ALIGN);
+ offset = sample->ts_offset + I915_PERF_TS_SAMPLE_SIZE;
+ }
+
list_add_tail(&sample->link, &stream->free_samples);
- offset += sample_size;
}
return 0;
@@ -2862,7 +3037,8 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
int format_size, idx;
bool require_oa_unit = props->sample_flags & (SAMPLE_OA_REPORT |
SAMPLE_OA_SOURCE);
- bool cs_sample_data = props->sample_flags & SAMPLE_OA_REPORT;
+ bool cs_sample_data = props->sample_flags & (SAMPLE_OA_REPORT |
+ SAMPLE_TS);
bool require_cs_mode = props->sample_flags & (SAMPLE_PID |
SAMPLE_TAG);
struct i915_perf_stream *curr_stream;
@@ -3026,8 +3202,22 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
require_cs_mode = true;
}
+ if (props->sample_flags & SAMPLE_TS) {
+ stream->sample_flags |= SAMPLE_TS;
+ stream->sample_size += I915_PERF_TS_SAMPLE_SIZE;
+
+ /*
+ * NB: it's meaningful to request SAMPLE_TS with just CS
+ * mode or periodic OA mode sampling but we don't allow
+ * SAMPLE_TS without either mode
+ */
+ if (!require_oa_unit)
+ require_cs_mode = true;
+ }
+
if (require_cs_mode && !props->cs_mode) {
- DRM_ERROR("PID/TAG sampling requires a ring to be specified");
+ DRM_ERROR("PID/TAG/TS sampling requires engine "
+ "to be specified");
ret = -EINVAL;
goto err_enable;
}
@@ -3043,11 +3233,12 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
/*
* The only time we should allow enabling CS mode if it's not
- * strictly required, is if SAMPLE_CTX_ID has been requested
- * as it's usable with periodic OA or CS sampling.
+ * strictly required, is if SAMPLE_CTX_ID/SAMPLE_TS has been
+ * requested as they're usable with periodic OA or CS sampling.
*/
if (!require_cs_mode &&
- !(props->sample_flags & SAMPLE_CTX_ID)) {
+ !(props->sample_flags & (SAMPLE_CTX_ID | SAMPLE_TS))) {
+
DRM_ERROR("Stream engine given without requesting any "
"CS specific property\n");
ret = -EINVAL;
@@ -3770,21 +3961,12 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
case DRM_I915_PERF_PROP_ENGINE: {
unsigned int user_ring_id =
value & I915_EXEC_RING_MASK;
- enum intel_engine_id engine;
if (user_ring_id > I915_USER_RINGS)
return -EINVAL;
- /* XXX: Currently only RCS is supported.
- * Remove this check when support for other
- * engines is added
- */
- engine = user_ring_map[user_ring_id];
- if (engine != RCS)
- return -EINVAL;
-
props->cs_mode = true;
- props->engine = engine;
+ props->engine = user_ring_map[user_ring_id];
}
break;
case DRM_I915_PERF_PROP_SAMPLE_CTX_ID:
@@ -3796,6 +3978,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
case DRM_I915_PERF_PROP_SAMPLE_TAG:
props->sample_flags |= SAMPLE_TAG;
break;
+ case DRM_I915_PERF_PROP_SAMPLE_TS:
+ props->sample_flags |= SAMPLE_TS;
+ break;
case DRM_I915_PERF_PROP_MAX:
MISSING_CASE(id);
return -EINVAL;
@@ -547,6 +547,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define MI_FLUSH_DW_STORE_INDEX (1<<21)
#define MI_INVALIDATE_TLB (1<<18)
#define MI_FLUSH_DW_OP_STOREDW (1<<14)
+#define MI_FLUSH_DW_OP_STAMP (3<<14)
#define MI_FLUSH_DW_OP_MASK (3<<14)
#define MI_FLUSH_DW_NOTIFY (1<<8)
#define MI_INVALIDATE_BSD (1<<7)
@@ -630,6 +631,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define PIPE_CONTROL_TLB_INVALIDATE (1<<18)
#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16)
#define PIPE_CONTROL_QW_WRITE (1<<14)
+#define PIPE_CONTROL_TIMESTAMP_WRITE (3<<14)
#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14)
#define PIPE_CONTROL_DEPTH_STALL (1<<13)
#define PIPE_CONTROL_WRITE_FLUSH (1<<12)
@@ -1459,6 +1459,12 @@ enum drm_i915_perf_property_id {
*/
DRM_I915_PERF_PROP_SAMPLE_TAG,
+ /**
+ * The value of this property set to 1 requests inclusion of timestamp
+ * in the perf sample data.
+ */
+ DRM_I915_PERF_PROP_SAMPLE_TS,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
@@ -1528,6 +1534,7 @@ enum drm_i915_perf_record_type {
* { u64 ctx_id; } && DRM_I915_PERF_PROP_SAMPLE_CTX_ID
* { u64 pid; } && DRM_I915_PERF_PROP_SAMPLE_PID
* { u64 tag; } && DRM_I915_PERF_PROP_SAMPLE_TAG
+ * { u64 timestamp; } && DRM_I915_PERF_PROP_SAMPLE_TS
* { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA
* };
*/