@@ -2196,6 +2196,7 @@ struct drm_i915_private {
u32 ctx_flexeu0_off;
u32 n_pending_periodic_samples;
u32 pending_periodic_ts;
+ u64 last_gpu_ts;
struct i915_oa_ops ops;
const struct i915_oa_format *oa_formats;
@@ -781,6 +781,24 @@ static int append_sample(struct i915_perf_stream *stream,
return 0;
}
+static u64 get_gpu_ts_from_oa_report(struct drm_i915_private *dev_priv,
+ const u8 *report)
+{
+ u32 sample_ts = *(u32 *)(report + 4);
+ u32 delta;
+
+ /*
+ * NB: We have to assume we're updating last_gpu_ts frequently
+ * enough that it's never possible to see multiple overflows before
+ * we compare sample_ts to last_gpu_ts. Since this is significantly
+ * large duration (~6min for 80ns ts base), we can safely assume so.
+ */
+ delta = sample_ts - (u32)dev_priv->perf.oa.last_gpu_ts;
+ dev_priv->perf.oa.last_gpu_ts += delta;
+
+ return dev_priv->perf.oa.last_gpu_ts;
+}
+
static int append_oa_buffer_sample(struct i915_perf_stream *stream,
struct i915_perf_read_state *read_state,
const u8 *report)
@@ -817,10 +835,9 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
if (sample_flags & SAMPLE_TAG)
data.tag = dev_priv->perf.last_tag;
- /* Derive timestamp from OA report, after scaling with the ts base */
-#warning "FIXME: append_oa_buffer_sample: derive the timestamp from OA report"
+ /* Derive timestamp from OA report */
if (sample_flags & SAMPLE_TS)
- data.ts = 0;
+ data.ts = get_gpu_ts_from_oa_report(dev_priv, report);
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
@@ -1272,6 +1289,7 @@ static int append_one_cs_sample(struct i915_perf_stream *stream,
enum intel_engine_id id = stream->engine;
struct sample_data data = { 0 };
u32 sample_flags = stream->sample_flags;
+ u64 gpu_ts = 0;
int ret = 0;
if (sample_flags & SAMPLE_OA_REPORT) {
@@ -1288,6 +1306,9 @@ static int append_one_cs_sample(struct i915_perf_stream *stream,
U32_MAX);
if (ret)
return ret;
+
+ if (sample_flags & SAMPLE_TS)
+ gpu_ts = get_gpu_ts_from_oa_report(dev_priv, report);
}
if (sample_flags & SAMPLE_OA_SOURCE_INFO)
@@ -1309,17 +1330,14 @@ static int append_one_cs_sample(struct i915_perf_stream *stream,
}
if (sample_flags & SAMPLE_TS) {
- /* For RCS, if OA samples are also being collected, derive the
- * timestamp from OA report, after scaling with the TS base.
+ /* If OA sampling is enabled, derive the ts from OA report.
* Else, forward the timestamp collected via command stream.
*/
-#warning "FIXME: append_one_cs_sample: derive the timestamp from OA report"
- if (sample_flags & SAMPLE_OA_REPORT)
- data.ts = 0;
- else
- data.ts = *(u64 *)
+ if (!(sample_flags & SAMPLE_OA_REPORT))
+ gpu_ts = *(u64 *)
(dev_priv->perf.command_stream_buf[id].addr +
node->ts_offset);
+ data.ts = gpu_ts;
}
return append_sample(stream, read_state, &data);
@@ -2055,9 +2073,15 @@ static void i915_ring_stream_enable(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- if (stream->sample_flags & SAMPLE_OA_REPORT)
+ if (stream->sample_flags & SAMPLE_OA_REPORT) {
dev_priv->perf.oa.ops.oa_enable(dev_priv);
+ if (stream->sample_flags & SAMPLE_TS)
+ dev_priv->perf.oa.last_gpu_ts =
+ I915_READ64_2x32(GT_TIMESTAMP_COUNT,
+ GT_TIMESTAMP_COUNT_UDW);
+ }
+
if (stream->cs_mode || dev_priv->perf.oa.periodic)
hrtimer_start(&dev_priv->perf.poll_check_timer,
ns_to_ktime(POLL_PERIOD),
@@ -591,6 +591,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define PS_DEPTH_COUNT _MMIO(0x2350)
#define PS_DEPTH_COUNT_UDW _MMIO(0x2350 + 4)
+/* Timestamp count register */
+#define GT_TIMESTAMP_COUNT _MMIO(0x2358)
+#define GT_TIMESTAMP_COUNT_UDW _MMIO(0x2358 + 4)
+
/* There are the 4 64-bit counter registers, one for each stream output */
#define GEN7_SO_NUM_PRIMS_WRITTEN(n) _MMIO(0x5200 + (n) * 8)
#define GEN7_SO_NUM_PRIMS_WRITTEN_UDW(n) _MMIO(0x5200 + (n) * 8 + 4)