@@ -2739,6 +2739,7 @@ struct drm_i915_private {
u32 ctx_flexeu0_offset;
u32 n_pending_periodic_samples;
u32 pending_periodic_ts;
+ u64 last_gpu_ts;
/**
* The RPT_ID/reason field for Gen8+ includes a bit
@@ -1121,6 +1121,26 @@ static int append_perf_sample(struct i915_perf_stream *stream,
}
/**
+ * get_gpu_ts_from_oa_report - Retrieve absolute gpu timestamp from OA report
+ *
+ * Note: We are assuming that we're updating last_gpu_ts frequently enough so
+ * that it's never possible to see multiple overflows before we compare
+ * sample_ts to last_gpu_ts. Since this is significantly large duration
+ * (~6min for 80ns ts base), we can safely assume so.
+ */
+static u64 get_gpu_ts_from_oa_report(struct drm_i915_private *dev_priv,
+ const u8 *report)
+{
+ u32 sample_ts = *(u32 *)(report + 4);
+ u32 delta;
+
+ delta = sample_ts - (u32)dev_priv->perf.oa.last_gpu_ts;
+ dev_priv->perf.oa.last_gpu_ts += delta;
+
+ return dev_priv->perf.oa.last_gpu_ts;
+}
+
+/**
* append_oa_buffer_sample - Copies single periodic OA report into userspace
* read() buffer.
* @stream: An i915-perf stream opened for OA metrics
@@ -1152,11 +1172,8 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
if (sample_flags & SAMPLE_TAG)
data.tag = stream->last_tag;
- /* TODO: Derive timestamp from OA report,
- * after scaling with the ts base
- */
if (sample_flags & SAMPLE_TS)
- data.ts = 0;
+ data.ts = get_gpu_ts_from_oa_report(dev_priv, report);
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
@@ -1730,6 +1747,7 @@ static int append_cs_buffer_sample(struct i915_perf_stream *stream,
struct drm_i915_private *dev_priv = stream->dev_priv;
struct i915_perf_sample_data data = { 0 };
u32 sample_flags = stream->sample_flags;
+ u64 gpu_ts = 0;
int ret = 0;
if (sample_flags & SAMPLE_OA_REPORT) {
@@ -1745,6 +1763,9 @@ static int append_cs_buffer_sample(struct i915_perf_stream *stream,
sample_ts, U32_MAX);
if (ret)
return ret;
+
+ if (sample_flags & SAMPLE_TS)
+ gpu_ts = get_gpu_ts_from_oa_report(dev_priv, report);
}
if (sample_flags & SAMPLE_OA_SOURCE)
@@ -1783,16 +1804,13 @@ static int append_cs_buffer_sample(struct i915_perf_stream *stream,
}
if (sample_flags & SAMPLE_TS) {
- /* For RCS, if OA samples are also being collected, derive the
- * timestamp from OA report, after scaling with the TS base.
+ /* If OA sampling is enabled, derive the ts from OA report.
* Else, forward the timestamp collected via command stream.
*/
- /* TODO: derive the timestamp from OA report */
- if (sample_flags & SAMPLE_OA_REPORT)
- data.ts = 0;
- else
- data.ts = *(u64 *) (stream->cs_buffer.vaddr +
+ if (!(sample_flags & SAMPLE_OA_REPORT))
+ gpu_ts = *(u64 *) (stream->cs_buffer.vaddr +
node->ts_offset);
+ data.ts = gpu_ts;
}
return append_perf_sample(stream, buf, count, offset, &data);
@@ -2959,9 +2977,15 @@ static void i915_perf_stream_enable(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- if (stream->sample_flags & SAMPLE_OA_REPORT)
+ if (stream->sample_flags & SAMPLE_OA_REPORT) {
dev_priv->perf.oa.ops.oa_enable(dev_priv);
+ if (stream->sample_flags & SAMPLE_TS)
+ dev_priv->perf.oa.last_gpu_ts =
+ I915_READ64_2x32(GT_TIMESTAMP_COUNT,
+ GT_TIMESTAMP_COUNT_UDW);
+ }
+
if (stream->cs_mode || dev_priv->perf.oa.periodic)
hrtimer_start(&dev_priv->perf.poll_check_timer,
ns_to_ktime(POLL_PERIOD),
@@ -730,6 +730,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define PS_DEPTH_COUNT _MMIO(0x2350)
#define PS_DEPTH_COUNT_UDW _MMIO(0x2350 + 4)
+/* Timestamp count register */
+#define GT_TIMESTAMP_COUNT _MMIO(0x2358)
+#define GT_TIMESTAMP_COUNT_UDW _MMIO(0x2358 + 4)
+
/* There are the 4 64-bit counter registers, one for each stream output */
#define GEN7_SO_NUM_PRIMS_WRITTEN(n) _MMIO(0x5200 + (n) * 8)
#define GEN7_SO_NUM_PRIMS_WRITTEN_UDW(n) _MMIO(0x5200 + (n) * 8 + 4)