@@ -1725,6 +1725,7 @@ struct i915_perf_stream {
struct list_head link;
+ enum intel_ring_id ring_id;
u32 sample_flags;
int sample_size;
@@ -1734,6 +1735,9 @@ struct i915_perf_stream {
/* Whether command stream based data collection is enabled */
bool cs_mode;
+ /* Whether the OA unit is in use */
+ bool using_oa;
+
/* Enables the collection of HW samples, either in response to
* I915_PERF_IOCTL_ENABLE or implicitly called when stream is
* opened without I915_PERF_FLAG_DISABLED */
@@ -1782,7 +1786,8 @@ struct i915_perf_stream {
* Routine to emit the commands in the command streamer associated
* with the corresponding gpu engine.
*/
- void (*command_stream_hook)(struct drm_i915_gem_request *req, u32 tag);
+ void (*command_stream_hook)(struct i915_perf_stream *stream,
+ struct drm_i915_gem_request *req, u32 tag);
};
struct i915_oa_ops {
@@ -1807,7 +1812,16 @@ struct i915_oa_ops {
struct i915_perf_cs_data_node {
struct list_head link;
struct drm_i915_gem_request *request;
- u32 offset;
+
+ /* Offsets into the GEM obj holding the data */
+ u32 start_offset;
+ u32 oa_offset;
+ u32 ts_offset;
+
+ /* buffer size corresponding to this entry */
+ u32 size;
+
+ /* Other metadata */
u32 ctx_id;
u32 pid;
u32 tag;
@@ -2071,14 +2085,13 @@ struct drm_i915_private {
spinlock_t hook_lock;
- struct {
- struct i915_perf_stream *exclusive_stream;
+ struct hrtimer poll_check_timer;
+ struct i915_perf_stream *exclusive_stream;
+ wait_queue_head_t poll_wq[I915_NUM_RINGS];
+ struct {
u32 specific_ctx_id;
- struct hrtimer poll_check_timer;
- wait_queue_head_t poll_wq;
-
bool periodic;
u32 period_exponent;
@@ -2115,10 +2128,10 @@ struct drm_i915_private {
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
u8 *addr;
- } command_stream_buf;
+ } command_stream_buf[I915_NUM_RINGS];
- struct list_head node_list;
- spinlock_t node_list_lock;
+ struct list_head node_list[I915_NUM_RINGS];
+ spinlock_t node_list_lock[I915_NUM_RINGS];
} perf;
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
@@ -51,12 +51,17 @@ static u32 i915_perf_stream_paranoid = true;
#define GEN8_OAREPORT_REASON_GO_TRANSITION (1<<23)
#define GEN9_OAREPORT_REASON_CLK_RATIO (1<<24)
-/* Data common to periodic and RCS based samples */
-struct oa_sample_data {
+#define OA_ADDR_ALIGN 64
+#define TS_ADDR_ALIGN 8
+#define I915_PERF_TS_SAMPLE_SIZE 8
+
+/* Data common to all samples (periodic OA / CS based OA / Timestamps) */
+struct sample_data {
u32 source;
u32 ctx_id;
u32 pid;
u32 tag;
+ u64 ts;
const u8 *report;
};
@@ -100,6 +105,7 @@ static struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
#define SAMPLE_CTX_ID (1<<2)
#define SAMPLE_PID (1<<3)
#define SAMPLE_TAG (1<<4)
+#define SAMPLE_TS (1<<5)
struct perf_open_properties
{
@@ -136,8 +142,9 @@ void i915_perf_command_stream_hook(struct drm_i915_gem_request *req, u32 tag)
mutex_lock(&dev_priv->perf.streams_lock);
list_for_each_entry(stream, &dev_priv->perf.streams, link) {
- if (stream->enabled && stream->command_stream_hook)
- stream->command_stream_hook(req, tag);
+ if (stream->enabled && (stream->ring_id == ring->id) &&
+ stream->command_stream_hook)
+ stream->command_stream_hook(stream, req, tag);
}
mutex_unlock(&dev_priv->perf.streams_lock);
}
@@ -150,16 +157,15 @@ void i915_perf_command_stream_hook(struct drm_i915_gem_request *req, u32 tag)
* eventually, when the request associated with new entry completes.
*/
static void release_some_perf_entries(struct drm_i915_private *dev_priv,
- u32 target_size)
+ enum intel_ring_id id, u32 target_size)
{
struct i915_perf_cs_data_node *entry, *next;
- u32 entry_size = dev_priv->perf.oa.oa_buffer.format_size;
u32 size = 0;
list_for_each_entry_safe
- (entry, next, &dev_priv->perf.node_list, link) {
+ (entry, next, &dev_priv->perf.node_list[id], link) {
- size += entry_size;
+ size += entry->size;
i915_gem_request_unreference(entry->request);
list_del(&entry->link);
kfree(entry);
@@ -175,99 +181,117 @@ static void release_some_perf_entries(struct drm_i915_private *dev_priv,
* the buffer, it will remove the oldest entries in order to make space.
*/
static void insert_perf_entry(struct drm_i915_private *dev_priv,
+ struct i915_perf_stream *stream,
struct i915_perf_cs_data_node *entry)
{
struct i915_perf_cs_data_node *first_entry, *last_entry;
- int max_offset = dev_priv->perf.command_stream_buf.obj->base.size;
- u32 entry_size = dev_priv->perf.oa.oa_buffer.format_size;
-
- spin_lock(&dev_priv->perf.node_list_lock);
- if (list_empty(&dev_priv->perf.node_list)) {
- entry->offset = 0;
- list_add_tail(&entry->link, &dev_priv->perf.node_list);
- spin_unlock(&dev_priv->perf.node_list_lock);
- return;
+ u32 sample_flags = stream->sample_flags;
+ enum intel_ring_id id = stream->ring_id;
+ int max_offset = dev_priv->perf.command_stream_buf[id].obj->base.size;
+ u32 offset, entry_size = 0;
+ bool sample_ts = false;
+
+ if (stream->sample_flags & SAMPLE_OA_REPORT)
+ entry_size += dev_priv->perf.oa.oa_buffer.format_size;
+ else if (sample_flags & SAMPLE_TS) {
+ /*
+ * XXX: Since TS data can anyways be derived from OA report, so
+ * no need to capture it for RCS ring, if capture oa data is
+ * called already.
+ */
+ entry_size += I915_PERF_TS_SAMPLE_SIZE;
+ sample_ts = true;
}
- first_entry = list_first_entry(&dev_priv->perf.node_list,
+ spin_lock(&dev_priv->perf.node_list_lock[id]);
+ if (list_empty(&dev_priv->perf.node_list[id])) {
+ offset = 0;
+ goto out;
+ }
+
+ first_entry = list_first_entry(&dev_priv->perf.node_list[id],
typeof(*first_entry), link);
- last_entry = list_last_entry(&dev_priv->perf.node_list,
- typeof(*first_entry), link);
+ last_entry = list_last_entry(&dev_priv->perf.node_list[id],
+ typeof(*last_entry), link);
- if (last_entry->offset >= first_entry->offset) {
+ if (last_entry->start_offset >= first_entry->start_offset) {
/* Sufficient space available at the end of buffer? */
- if (last_entry->offset + 2*entry_size < max_offset)
- entry->offset = last_entry->offset + entry_size;
+ if (last_entry->start_offset + last_entry->size + entry_size
+ < max_offset)
+ offset = last_entry->start_offset + last_entry->size;
/*
* Wraparound condition. Is sufficient space available at
* beginning of buffer?
*/
- else if (entry_size < first_entry->offset)
- entry->offset = 0;
+ else if (entry_size < first_entry->start_offset)
+ offset = 0;
/* Insufficient space. Overwrite existing old entries */
else {
- u32 target_size = entry_size - first_entry->offset;
+ u32 target_size = entry_size -
+ first_entry->start_offset;
- release_some_perf_entries(dev_priv, target_size);
- entry->offset = 0;
+ release_some_perf_entries(dev_priv, id, target_size);
+ offset = 0;
}
} else {
/* Sufficient space available? */
- if (last_entry->offset + 2*entry_size < first_entry->offset)
- entry->offset = last_entry->offset + entry_size;
+ if (last_entry->start_offset + last_entry->size + entry_size
+ < first_entry->start_offset)
+ offset = last_entry->start_offset + last_entry->size;
/* Insufficient space. Overwrite existing old entries */
else {
u32 target_size = entry_size -
- (first_entry->offset - last_entry->offset -
- entry_size);
+ (first_entry->start_offset -
+ last_entry->start_offset -
+ last_entry->size);
- release_some_perf_entries(dev_priv, target_size);
- entry->offset = last_entry->offset + entry_size;
+ release_some_perf_entries(dev_priv, id, target_size);
+ offset = last_entry->start_offset + last_entry->size;
}
}
- list_add_tail(&entry->link, &dev_priv->perf.node_list);
- spin_unlock(&dev_priv->perf.node_list_lock);
+
+out:
+ entry->start_offset = offset;
+ entry->size = entry_size;
+ if (stream->sample_flags & SAMPLE_OA_REPORT) {
+ entry->oa_offset = offset;
+ /* Ensure 64 byte alignment of oa_offset */
+ entry->oa_offset = ALIGN(entry->oa_offset, OA_ADDR_ALIGN);
+ offset = entry->oa_offset +
+ dev_priv->perf.oa.oa_buffer.format_size;
+ }
+ if (sample_ts) {
+ entry->ts_offset = offset;
+ /* Ensure 8 byte alignment of ts_offset */
+ entry->ts_offset = ALIGN(entry->ts_offset, TS_ADDR_ALIGN);
+ offset = entry->ts_offset + I915_PERF_TS_SAMPLE_SIZE;
+ }
+
+ list_add_tail(&entry->link, &dev_priv->perf.node_list[id]);
+ spin_unlock(&dev_priv->perf.node_list_lock[id]);
}
-static void i915_perf_command_stream_hook_oa(struct drm_i915_gem_request *req,
- u32 tag)
+static int i915_perf_stream_capture_oa_report(struct drm_i915_gem_request *req,
+ u32 offset)
{
struct intel_engine_cs *ring = req->ring;
struct intel_ringbuffer *ringbuf = req->ringbuf;
- struct intel_context *ctx = req->ctx;
struct drm_i915_private *dev_priv = ring->dev->dev_private;
- struct i915_perf_cs_data_node *entry;
u32 addr = 0;
int ret;
/* OA counters are only supported on the render ring */
BUG_ON(ring->id != RCS);
- entry = kzalloc(sizeof(*entry), GFP_KERNEL);
- if (entry == NULL) {
- DRM_ERROR("alloc failed\n");
- return;
- }
-
if (i915.enable_execlists)
ret = intel_logical_ring_begin(req, 4);
else
ret = intel_ring_begin(req, 4);
- if (ret) {
- kfree(entry);
- return;
- }
-
- entry->ctx_id = ctx->global_id;
- entry->pid = current->pid;
- entry->tag = tag;
- i915_gem_request_assign(&entry->request, req);
-
- insert_perf_entry(dev_priv, entry);
+ if (ret)
+ return ret;
- addr = dev_priv->perf.command_stream_buf.vma->node.start +
- entry->offset;
+ addr = dev_priv->perf.command_stream_buf[RCS].vma->node.start + offset;
/* addr should be 64 byte aligned */
BUG_ON(addr & 0x3f);
@@ -295,10 +319,154 @@ static void i915_perf_command_stream_hook_oa(struct drm_i915_gem_request *req,
}
intel_ring_advance(ring);
}
- i915_vma_move_to_active(dev_priv->perf.command_stream_buf.vma, req);
+ return 0;
+}
+
+static int i915_perf_stream_capture_ts_data(struct drm_i915_gem_request *req,
+ u32 offset)
+{
+ struct intel_engine_cs *ring = req->ring;
+ struct intel_ringbuffer *ringbuf = req->ringbuf;
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
+ u32 addr = 0;
+ int ret;
+
+ if (i915.enable_execlists)
+ ret = intel_logical_ring_begin(req, 6);
+ else
+ ret = intel_ring_begin(req, 6);
+
+ if (ret)
+ return ret;
+
+ addr = dev_priv->perf.command_stream_buf[ring->id].vma->node.start +
+ offset;
+
+ if (i915.enable_execlists) {
+ if (ring->id == RCS) {
+ intel_logical_ring_emit(ringbuf,
+ GFX_OP_PIPE_CONTROL(6));
+ intel_logical_ring_emit(ringbuf,
+ PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_TIMESTAMP_WRITE);
+ intel_logical_ring_emit(ringbuf, addr |
+ PIPE_CONTROL_GLOBAL_GTT);
+ intel_logical_ring_emit(ringbuf, 0);
+ intel_logical_ring_emit(ringbuf, 0);
+ intel_logical_ring_emit(ringbuf, 0);
+ } else {
+ uint32_t cmd;
+
+ cmd = MI_FLUSH_DW + 2; /* Gen8+ */
+
+ cmd |= MI_FLUSH_DW_OP_STAMP;
+
+ intel_logical_ring_emit(ringbuf, cmd);
+ intel_logical_ring_emit(ringbuf, addr |
+ MI_FLUSH_DW_USE_GTT);
+ intel_logical_ring_emit(ringbuf, 0);
+ intel_logical_ring_emit(ringbuf, 0);
+ intel_logical_ring_emit(ringbuf, 0);
+ intel_logical_ring_emit(ringbuf, MI_NOOP);
+ }
+ intel_logical_ring_advance(ringbuf);
+ } else {
+ if (ring->id == RCS) {
+ if (INTEL_INFO(ring->dev)->gen >= 8)
+ intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
+ else
+ intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
+ intel_ring_emit(ring,
+ PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_TIMESTAMP_WRITE);
+ intel_ring_emit(ring, addr | PIPE_CONTROL_GLOBAL_GTT);
+ intel_ring_emit(ring, 0);
+ if (INTEL_INFO(ring->dev)->gen >= 8) {
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, 0);
+ } else {
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, MI_NOOP);
+ }
+ } else {
+ uint32_t cmd;
+
+ cmd = MI_FLUSH_DW + 1;
+ if (INTEL_INFO(ring->dev)->gen >= 8)
+ cmd += 1;
+
+ cmd |= MI_FLUSH_DW_OP_STAMP;
+
+ intel_ring_emit(ring, cmd);
+ intel_ring_emit(ring, addr | MI_FLUSH_DW_USE_GTT);
+ if (INTEL_INFO(ring->dev)->gen >= 8) {
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, 0);
+ } else {
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, MI_NOOP);
+ }
+ intel_ring_emit(ring, MI_NOOP);
+ }
+ intel_ring_advance(ring);
+ }
+ return 0;
+}
+
+static void i915_perf_stream_cs_hook(struct i915_perf_stream *stream,
+ struct drm_i915_gem_request *req, u32 tag)
+{
+ struct intel_engine_cs *ring = req->ring;
+ struct intel_context *ctx = req->ctx;
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
+ enum intel_ring_id id = stream->ring_id;
+ u32 sample_flags = stream->sample_flags;
+ struct i915_perf_cs_data_node *entry;
+ int ret = 0;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (entry == NULL) {
+ DRM_ERROR("alloc failed\n");
+ return;
+ }
+
+ entry->ctx_id = ctx->global_id;
+ entry->pid = current->pid;
+ entry->tag = tag;
+ i915_gem_request_assign(&entry->request, req);
+
+ insert_perf_entry(dev_priv, stream, entry);
+
+ if (sample_flags & SAMPLE_OA_REPORT) {
+ ret = i915_perf_stream_capture_oa_report(req, entry->oa_offset);
+ if (ret)
+ goto err;
+ } else if (sample_flags & SAMPLE_TS) {
+ /*
+ * XXX: Since TS data can anyways be derived from OA report, so
+ * no need to capture it for RCS ring, if capture oa data is
+ * called already.
+ */
+ ret = i915_perf_stream_capture_ts_data(req, entry->ts_offset);
+ if (ret)
+ goto err;
+ }
+
+ i915_vma_move_to_active(dev_priv->perf.command_stream_buf[id].vma, req);
+ return;
+
+err:
+ i915_gem_request_unreference(entry->request);
+ spin_lock(&dev_priv->perf.node_list_lock[id]);
+ list_del(&entry->link);
+ kfree(entry);
+ spin_unlock(&dev_priv->perf.node_list_lock[id]);
}
-static int i915_oa_rcs_wait_gpu(struct drm_i915_private *dev_priv)
+static int i915_perf_wait_gpu(struct drm_i915_private *dev_priv,
+ enum intel_ring_id id)
{
struct i915_perf_cs_data_node *last_entry = NULL;
struct drm_i915_gem_request *req = NULL;
@@ -309,14 +477,14 @@ static int i915_oa_rcs_wait_gpu(struct drm_i915_private *dev_priv)
* implicitly wait for the prior submitted requests. The refcount
* of the requests is not decremented here.
*/
- spin_lock(&dev_priv->perf.node_list_lock);
+ spin_lock(&dev_priv->perf.node_list_lock[id]);
- if (!list_empty(&dev_priv->perf.node_list)) {
- last_entry = list_last_entry(&dev_priv->perf.node_list,
+ if (!list_empty(&dev_priv->perf.node_list[id])) {
+ last_entry = list_last_entry(&dev_priv->perf.node_list[id],
struct i915_perf_cs_data_node, link);
req = last_entry->request;
}
- spin_unlock(&dev_priv->perf.node_list_lock);
+ spin_unlock(&dev_priv->perf.node_list_lock[id]);
if (!req)
return 0;
@@ -331,17 +499,18 @@ static int i915_oa_rcs_wait_gpu(struct drm_i915_private *dev_priv)
return 0;
}
-static void i915_oa_rcs_free_requests(struct drm_i915_private *dev_priv)
+static void i915_perf_free_requests(struct drm_i915_private *dev_priv,
+ enum intel_ring_id id)
{
struct i915_perf_cs_data_node *entry, *next;
list_for_each_entry_safe
- (entry, next, &dev_priv->perf.node_list, link) {
+ (entry, next, &dev_priv->perf.node_list[id], link) {
i915_gem_request_unreference__unlocked(entry->request);
- spin_lock(&dev_priv->perf.node_list_lock);
+ spin_lock(&dev_priv->perf.node_list_lock[id]);
list_del(&entry->link);
- spin_unlock(&dev_priv->perf.node_list_lock);
+ spin_unlock(&dev_priv->perf.node_list_lock[id]);
kfree(entry);
}
}
@@ -381,9 +550,9 @@ static bool append_oa_status(struct i915_perf_stream *stream,
return true;
}
-static bool append_oa_sample(struct i915_perf_stream *stream,
+static bool append_sample(struct i915_perf_stream *stream,
struct i915_perf_read_state *read_state,
- struct oa_sample_data *data)
+ struct sample_data *data)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
@@ -424,6 +593,13 @@ static bool append_oa_sample(struct i915_perf_stream *stream,
read_state->buf += 4;
}
+ if (sample_flags & SAMPLE_TS) {
+ if (copy_to_user(read_state->buf, &data->ts,
+ I915_PERF_TS_SAMPLE_SIZE))
+ return false;
+ read_state->buf += I915_PERF_TS_SAMPLE_SIZE;
+ }
+
if (sample_flags & SAMPLE_OA_REPORT) {
if (copy_to_user(read_state->buf, data->report, report_size))
return false;
@@ -441,7 +617,7 @@ static bool append_oa_buffer_sample(struct i915_perf_stream *stream,
{
struct drm_i915_private *dev_priv = stream->dev_priv;
u32 sample_flags = stream->sample_flags;
- struct oa_sample_data data = { 0 };
+ struct sample_data data = { 0 };
if (sample_flags & SAMPLE_OA_SOURCE_INFO) {
enum drm_i915_perf_oa_event_source source;
@@ -473,10 +649,15 @@ static bool append_oa_buffer_sample(struct i915_perf_stream *stream,
if (sample_flags & SAMPLE_TAG)
data.tag = 0;
+ /* Derive timestamp from OA report, after scaling with the ts base */
+#warning "FIXME: append_oa_buffer_sample: derive the timestamp from OA report"
+ if (sample_flags & SAMPLE_TS)
+ data.ts = 0;
+
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
- append_oa_sample(stream, read_state, &data);
+ append_sample(stream, read_state, &data);
return true;
}
@@ -528,7 +709,7 @@ static u32 gen8_append_oa_reports(struct i915_perf_stream *stream,
ctx_id &= 0xfffff;
}
- if (dev_priv->perf.oa.exclusive_stream->enabled) {
+ if (stream->enabled) {
/* NB: For Gen 8 we handle per-context report filtering
* ourselves instead of programming the OA unit with a
@@ -539,7 +720,7 @@ static u32 gen8_append_oa_reports(struct i915_perf_stream *stream,
* first report belonging to any subsequently
* switched-too context.
*/
- if (!dev_priv->perf.oa.exclusive_stream->ctx ||
+ if (!stream->ctx ||
(dev_priv->perf.oa.specific_ctx_id == ctx_id ||
(dev_priv->perf.oa.specific_ctx_id !=
dev_priv->perf.oa.oa_buffer.last_ctx_id))) {
@@ -630,7 +811,7 @@ static u32 gen7_append_oa_reports(struct i915_perf_stream *stream,
if (report_ts > ts)
break;
- if (dev_priv->perf.oa.exclusive_stream->enabled) {
+ if (stream->enabled) {
if (!append_oa_buffer_sample(stream, read_state,
report))
break;
@@ -687,24 +868,32 @@ static void gen7_oa_read(struct i915_perf_stream *stream,
OA_MEM_SELECT_GGTT);
}
-static bool append_oa_rcs_sample(struct i915_perf_stream *stream,
+static bool append_one_cs_sample(struct i915_perf_stream *stream,
struct i915_perf_read_state *read_state,
struct i915_perf_cs_data_node *node)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- struct oa_sample_data data = { 0 };
- const u8 *report = dev_priv->perf.command_stream_buf.addr +
- node->offset;
+ enum intel_ring_id id = stream->ring_id;
+ struct sample_data data = { 0 };
u32 sample_flags = stream->sample_flags;
- u32 report_ts;
- /*
- * Forward the periodic OA samples which have the timestamp lower
- * than timestamp of this sample, before forwarding this sample.
- * This ensures samples read by user are order acc. to their timestamps
- */
- report_ts = *(u32 *)(report + 4);
- dev_priv->perf.oa.ops.read(stream, read_state, report_ts);
+ if (sample_flags & SAMPLE_OA_REPORT) {
+ const u8 *report = dev_priv->perf.command_stream_buf[id].addr +
+ node->oa_offset;
+ u32 sample_ts = *(u32 *)(report + 4);
+
+ BUG_ON(id != RCS);
+
+ data.report = report;
+
+ /*
+ * Forward the periodic OA samples which have the timestamp
+ * lower than timestamp of this sample, before forwarding this
+ * sample. This ensures samples read by user are order acc. to
+ * their timestamps
+ */
+ dev_priv->perf.oa.ops.read(stream, read_state, sample_ts);
+ }
if (sample_flags & SAMPLE_OA_SOURCE_INFO)
data.source = I915_PERF_OA_EVENT_SOURCE_RCS;
@@ -718,38 +907,51 @@ static bool append_oa_rcs_sample(struct i915_perf_stream *stream,
if (sample_flags & SAMPLE_TAG)
data.tag = node->tag;
- if (sample_flags & SAMPLE_OA_REPORT)
- data.report = report;
+ if (sample_flags & SAMPLE_TS) {
+ /* For RCS, derive timestamp from OA report, after
+ * scaling with the timestamp base. For other rings, forward the
+ * timestamp collected via command stream.
+ */
+#warning "FIXME: append_one_cs_sample: derive the timestamp from OA report"
+ if (sample_flags & SAMPLE_OA_REPORT)
+ data.ts = 0;
+ else
+ data.ts = *(u64 *)
+ (dev_priv->perf.command_stream_buf[id].addr +
+ node->ts_offset);
+ }
- append_oa_sample(stream, read_state, &data);
+ append_sample(stream, read_state, &data);
return true;
}
-static void oa_rcs_append_reports(struct i915_perf_stream *stream,
+static void append_command_stream_samples(struct i915_perf_stream *stream,
struct i915_perf_read_state *read_state)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
+ enum intel_ring_id id = stream->ring_id;
struct i915_perf_cs_data_node *entry, *next;
list_for_each_entry_safe(entry, next,
- &dev_priv->perf.node_list, link) {
+ &dev_priv->perf.node_list[id], link) {
if (!i915_gem_request_completed(entry->request, true))
break;
- if (!append_oa_rcs_sample(stream, read_state, entry))
+ if (!append_one_cs_sample(stream, read_state, entry))
break;
- spin_lock(&dev_priv->perf.node_list_lock);
+ spin_lock(&dev_priv->perf.node_list_lock[id]);
list_del(&entry->link);
- spin_unlock(&dev_priv->perf.node_list_lock);
+ spin_unlock(&dev_priv->perf.node_list_lock[id]);
i915_gem_request_unreference__unlocked(entry->request);
kfree(entry);
}
- /* Flush any remaining periodic reports */
- dev_priv->perf.oa.ops.read(stream, read_state, U32_MAX);
+ /* Flush any remaining periodic OA reports in case of RCS*/
+ if (stream->sample_flags & SAMPLE_OA_REPORT)
+ dev_priv->perf.oa.ops.read(stream, read_state, U32_MAX);
}
static bool command_stream_buf_is_empty(struct i915_perf_stream *stream)
@@ -757,7 +959,7 @@ static bool command_stream_buf_is_empty(struct i915_perf_stream *stream)
struct drm_i915_private *dev_priv = stream->dev_priv;
if (stream->cs_mode)
- return list_empty(&dev_priv->perf.node_list);
+ return list_empty(&dev_priv->perf.node_list[stream->ring_id]);
else
return true;
}
@@ -772,63 +974,69 @@ static bool stream_have_data__unlocked(struct i915_perf_stream *stream)
* can't be destroyed until completion (such as a read()) that ensures
* the device + OA buffer can't disappear
*/
- return !(dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv) &&
- command_stream_buf_is_empty(stream));
+ if (stream->sample_flags & SAMPLE_OA_REPORT)
+ return !(dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv) &&
+ command_stream_buf_is_empty(stream));
+ else
+ return !command_stream_buf_is_empty(stream);
}
-static bool i915_oa_can_read(struct i915_perf_stream *stream)
+static bool i915_perf_stream_can_read(struct i915_perf_stream *stream)
{
return stream_have_data__unlocked(stream);
}
-static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
+static int i915_perf_stream_wait_unlocked(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
+ enum intel_ring_id id = stream->ring_id;
int ret;
if (stream->cs_mode) {
- ret = i915_oa_rcs_wait_gpu(dev_priv);
+ ret = i915_perf_wait_gpu(dev_priv, id);
if (ret)
return ret;
}
- return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
+ return wait_event_interruptible(dev_priv->perf.poll_wq[id],
stream_have_data__unlocked(stream));
}
-static void i915_oa_poll_wait(struct i915_perf_stream *stream,
+static void i915_perf_stream_poll_wait(struct i915_perf_stream *stream,
struct file *file,
poll_table *wait)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- poll_wait(file, &dev_priv->perf.oa.poll_wq, wait);
+ poll_wait(file, &dev_priv->perf.poll_wq[stream->ring_id], wait);
}
-static void i915_oa_read(struct i915_perf_stream *stream,
+static void i915_perf_stream_read(struct i915_perf_stream *stream,
struct i915_perf_read_state *read_state)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
if (stream->cs_mode)
- oa_rcs_append_reports(stream, read_state);
- else
+ append_command_stream_samples(stream, read_state);
+ else if (stream->ring_id == RCS)
dev_priv->perf.oa.ops.read(stream, read_state, U32_MAX);
}
static void
-free_command_stream_buf(struct drm_i915_private *i915)
+free_command_stream_buf(struct drm_i915_private *i915,
+ enum intel_ring_id id)
{
mutex_lock(&i915->dev->struct_mutex);
- vunmap(i915->perf.command_stream_buf.addr);
- i915_gem_object_ggtt_unpin(i915->perf.command_stream_buf.obj);
- drm_gem_object_unreference(&i915->perf.command_stream_buf.obj->base);
+ vunmap(i915->perf.command_stream_buf[id].addr);
+ i915_gem_object_ggtt_unpin(i915->perf.command_stream_buf[id].obj);
+ drm_gem_object_unreference(
+ &i915->perf.command_stream_buf[id].obj->base);
- i915->perf.command_stream_buf.obj = NULL;
- i915->perf.command_stream_buf.vma = NULL;
- i915->perf.command_stream_buf.addr = NULL;
+ i915->perf.command_stream_buf[id].obj = NULL;
+ i915->perf.command_stream_buf[id].vma = NULL;
+ i915->perf.command_stream_buf[id].addr = NULL;
mutex_unlock(&i915->dev->struct_mutex);
}
@@ -849,16 +1057,13 @@ free_oa_buffer(struct drm_i915_private *i915)
mutex_unlock(&i915->dev->struct_mutex);
}
-static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
+static void i915_perf_stream_destroy(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- BUG_ON(stream != dev_priv->perf.oa.exclusive_stream);
+ BUG_ON(stream != dev_priv->perf.exclusive_stream);
- if (stream->cs_mode)
- free_command_stream_buf(dev_priv);
-
- if (dev_priv->perf.oa.oa_buffer.obj) {
+ if (stream->using_oa) {
dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
free_oa_buffer(dev_priv);
@@ -867,7 +1072,10 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
intel_runtime_pm_put(dev_priv);
}
- dev_priv->perf.oa.exclusive_stream = NULL;
+ if (stream->cs_mode)
+ free_command_stream_buf(dev_priv, stream->ring_id);
+
+ dev_priv->perf.exclusive_stream = NULL;
}
static void *vmap_oa_buffer(struct drm_i915_gem_object *obj)
@@ -993,27 +1201,28 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
return 0;
}
-static int alloc_command_stream_buf(struct drm_i915_private *dev_priv)
+static int alloc_command_stream_buf(struct drm_i915_private *dev_priv,
+ enum intel_ring_id id)
{
struct drm_i915_gem_object *bo;
int ret;
- BUG_ON(dev_priv->perf.command_stream_buf.obj);
+ BUG_ON(dev_priv->perf.command_stream_buf[id].obj);
ret = alloc_obj(dev_priv, &bo);
if (ret)
return ret;
- dev_priv->perf.command_stream_buf.obj = bo;
- dev_priv->perf.command_stream_buf.vma = i915_gem_obj_to_ggtt(bo);
- dev_priv->perf.command_stream_buf.addr = vmap_oa_buffer(bo);
- INIT_LIST_HEAD(&dev_priv->perf.node_list);
+ dev_priv->perf.command_stream_buf[id].obj = bo;
+ dev_priv->perf.command_stream_buf[id].vma = i915_gem_obj_to_ggtt(bo);
+ dev_priv->perf.command_stream_buf[id].addr = vmap_oa_buffer(bo);
+ INIT_LIST_HEAD(&dev_priv->perf.node_list[id]);
DRM_DEBUG_DRIVER(
"command stream buf initialized, gtt offset = 0x%x, vaddr = %p",
(unsigned int)
- dev_priv->perf.command_stream_buf.vma->node.start,
- dev_priv->perf.command_stream_buf.addr);
+ dev_priv->perf.command_stream_buf[id].vma->node.start,
+ dev_priv->perf.command_stream_buf[id].addr);
return 0;
}
@@ -1225,17 +1434,17 @@ static void gen7_update_oacontrol_locked(struct drm_i915_private *dev_priv)
{
assert_spin_locked(&dev_priv->perf.hook_lock);
- if (dev_priv->perf.oa.exclusive_stream->enabled) {
+ if (dev_priv->perf.exclusive_stream->enabled) {
unsigned long ctx_id = 0;
bool pinning_ok = false;
- if (dev_priv->perf.oa.exclusive_stream->ctx &&
+ if (dev_priv->perf.exclusive_stream->ctx &&
dev_priv->perf.oa.specific_ctx_id) {
ctx_id = dev_priv->perf.oa.specific_ctx_id;
pinning_ok = true;
}
- if (dev_priv->perf.oa.exclusive_stream->ctx == NULL ||
+ if (dev_priv->perf.exclusive_stream->ctx == NULL ||
pinning_ok) {
bool periodic = dev_priv->perf.oa.periodic;
u32 period_exponent = dev_priv->perf.oa.period_exponent;
@@ -1292,17 +1501,18 @@ static void gen8_oa_enable(struct drm_i915_private *dev_priv)
I915_WRITE(GEN8_OAHEADPTR, tail);
}
-static void i915_oa_stream_enable(struct i915_perf_stream *stream)
+static void i915_perf_stream_enable(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- dev_priv->perf.oa.ops.oa_enable(dev_priv);
+ if (stream->sample_flags & SAMPLE_OA_REPORT)
+ dev_priv->perf.oa.ops.oa_enable(dev_priv);
if (stream->cs_mode)
- stream->command_stream_hook = i915_perf_command_stream_hook_oa;
+ stream->command_stream_hook = i915_perf_stream_cs_hook;
- if (dev_priv->perf.oa.periodic)
- hrtimer_start(&dev_priv->perf.oa.poll_check_timer,
+ if (stream->cs_mode || dev_priv->perf.oa.periodic)
+ hrtimer_start(&dev_priv->perf.poll_check_timer,
ns_to_ktime(POLL_PERIOD),
HRTIMER_MODE_REL_PINNED);
}
@@ -1317,23 +1527,24 @@ static void gen8_oa_disable(struct drm_i915_private *dev_priv)
I915_WRITE(GEN8_OACONTROL, 0);
}
-static void i915_oa_stream_disable(struct i915_perf_stream *stream)
+static void i915_perf_stream_disable(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
- if (dev_priv->perf.oa.periodic)
- hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
+ if (stream->cs_mode || dev_priv->perf.oa.periodic)
+ hrtimer_cancel(&dev_priv->perf.poll_check_timer);
if (stream->cs_mode) {
stream->command_stream_hook = NULL;
- i915_oa_rcs_wait_gpu(dev_priv);
- i915_oa_rcs_free_requests(dev_priv);
+ i915_perf_wait_gpu(dev_priv, stream->ring_id);
+ i915_perf_free_requests(dev_priv, stream->ring_id);
}
- dev_priv->perf.oa.ops.oa_disable(dev_priv);
+ if (stream->sample_flags & SAMPLE_OA_REPORT)
+ dev_priv->perf.oa.ops.oa_disable(dev_priv);
}
-static int i915_oa_stream_init(struct i915_perf_stream *stream,
+static int i915_perf_stream_init(struct i915_perf_stream *stream,
struct drm_i915_perf_open_param *param,
struct perf_open_properties *props)
{
@@ -1341,15 +1552,15 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
bool require_oa_unit = props->sample_flags & (SAMPLE_OA_REPORT |
SAMPLE_OA_SOURCE_INFO);
bool require_cs_mode = props->sample_flags & (SAMPLE_PID |
- SAMPLE_TAG);
- int format_size;
+ SAMPLE_TAG |
+ SAMPLE_TS);
int ret;
/* To avoid the complexity of having to accurately filter
* counter reports and marshal to the appropriate client
* we currently only allow exclusive access */
- if (dev_priv->perf.oa.exclusive_stream) {
- DRM_ERROR("OA unit already in use\n");
+ if (dev_priv->perf.exclusive_stream) {
+ DRM_ERROR("Stream already in use\n");
return -EBUSY;
}
@@ -1364,6 +1575,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
stream->sample_size = sizeof(struct drm_i915_perf_record_header);
if (require_oa_unit) {
+ int format_size;
if (!dev_priv->perf.oa.ops.init_oa_buffer) {
DRM_ERROR("OA unit not supported\n");
return -ENODEV;
@@ -1386,6 +1598,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
return -EINVAL;
}
+ stream->using_oa = true;
+
format_size =
dev_priv->perf.oa.oa_formats[props->oa_format].size;
@@ -1452,7 +1666,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
}
if (require_cs_mode && !props->cs_mode) {
- DRM_ERROR("PID or TAG sampling require a ring to be specified");
+ DRM_ERROR(
+ "PID, TAG or TS sampling require a ring to be specified");
ret = -EINVAL;
goto cs_error;
}
@@ -1472,6 +1687,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
}
stream->cs_mode = true;
+ stream->ring_id = props->ring_id;
if (props->sample_flags & SAMPLE_PID) {
stream->sample_flags |= SAMPLE_PID;
@@ -1483,20 +1699,25 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
stream->sample_size += 4;
}
- ret = alloc_command_stream_buf(dev_priv);
+ if (props->sample_flags & SAMPLE_TS) {
+ stream->sample_flags |= SAMPLE_TS;
+ stream->sample_size += I915_PERF_TS_SAMPLE_SIZE;
+ }
+
+ ret = alloc_command_stream_buf(dev_priv, stream->ring_id);
if (ret)
goto cs_error;
}
- dev_priv->perf.oa.exclusive_stream = stream;
+ dev_priv->perf.exclusive_stream = stream;
- stream->destroy = i915_oa_stream_destroy;
- stream->enable = i915_oa_stream_enable;
- stream->disable = i915_oa_stream_disable;
- stream->can_read = i915_oa_can_read;
- stream->wait_unlocked = i915_oa_wait_unlocked;
- stream->poll_wait = i915_oa_poll_wait;
- stream->read = i915_oa_read;
+ stream->destroy = i915_perf_stream_destroy;
+ stream->enable = i915_perf_stream_enable;
+ stream->disable = i915_perf_stream_disable;
+ stream->can_read = i915_perf_stream_can_read;
+ stream->wait_unlocked = i915_perf_stream_wait_unlocked;
+ stream->poll_wait = i915_perf_stream_poll_wait;
+ stream->read = i915_perf_stream_read;
return 0;
@@ -1530,8 +1751,8 @@ static void i915_oa_context_pin_notify_locked(struct drm_i915_private *dev_priv,
dev_priv->perf.oa.ops.update_hw_ctx_id_locked == NULL)
return;
- if (dev_priv->perf.oa.exclusive_stream &&
- dev_priv->perf.oa.exclusive_stream->ctx == context) {
+ if (dev_priv->perf.exclusive_stream &&
+ dev_priv->perf.exclusive_stream->ctx == context) {
struct drm_i915_gem_object *obj =
context->legacy_hw_ctx.rcs_state;
u32 ctx_id = i915_gem_obj_ggtt_offset(obj);
@@ -1599,8 +1820,8 @@ void i915_oa_legacy_ctx_switch_notify(struct drm_i915_gem_request *req)
if (dev_priv->perf.oa.ops.legacy_ctx_switch_unlocked == NULL)
return;
- if (dev_priv->perf.oa.exclusive_stream &&
- dev_priv->perf.oa.exclusive_stream->enabled) {
+ if (dev_priv->perf.exclusive_stream &&
+ dev_priv->perf.exclusive_stream->enabled) {
/* XXX: We don't take a lock here and this may run
* async with respect to stream methods. Notably we
@@ -1729,7 +1950,7 @@ static enum hrtimer_restart poll_check_timer_cb(struct hrtimer *hrtimer)
struct drm_i915_private *dev_priv =
container_of(hrtimer, typeof(*dev_priv),
- perf.oa.poll_check_timer);
+ perf.poll_check_timer);
/* No need to protect the streams list here, since the hrtimer is
* disabled before the stream is removed from list, and currently a
@@ -1738,7 +1959,7 @@ static enum hrtimer_restart poll_check_timer_cb(struct hrtimer *hrtimer)
*/
list_for_each_entry(stream, &dev_priv->perf.streams, link) {
if (stream_have_data__unlocked(stream))
- wake_up(&dev_priv->perf.oa.poll_wq);
+ wake_up(&dev_priv->perf.poll_wq[stream->ring_id]);
}
hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
@@ -1947,7 +2168,7 @@ int i915_perf_open_ioctl_locked(struct drm_device *dev,
stream->dev_priv = dev_priv;
stream->ctx = specific_ctx;
- ret = i915_oa_stream_init(stream, param, props);
+ ret = i915_perf_stream_init(stream, param, props);
if (ret)
goto err_alloc;
@@ -2088,13 +2309,6 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
if (ring_id >= LAST_USER_RING)
return -EINVAL;
- /* XXX: Currently only RCS is supported.
- * Remove this check when support for other
- * rings is added
- */
- if (ring_id != RCS)
- return -EINVAL;
-
props->cs_mode = true;
props->ring_id = ring_id;
}
@@ -2108,6 +2322,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
case DRM_I915_PERF_SAMPLE_TAG_PROP:
props->sample_flags |= SAMPLE_TAG;
break;
+ case DRM_I915_PERF_SAMPLE_TS_PROP:
+ props->sample_flags |= SAMPLE_TS;
+ break;
case DRM_I915_PERF_PROP_MAX:
BUG();
}
@@ -2193,6 +2410,7 @@ static struct ctl_table dev_root[] = {
void i915_perf_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
+ int i;
if (!(IS_HASWELL(dev) ||
IS_BROADWELL(dev) || IS_CHERRYVIEW(dev) ||
@@ -2204,16 +2422,18 @@ void i915_perf_init(struct drm_device *dev)
if (!dev_priv->perf.metrics_kobj)
return;
- hrtimer_init(&dev_priv->perf.oa.poll_check_timer,
+ hrtimer_init(&dev_priv->perf.poll_check_timer,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- dev_priv->perf.oa.poll_check_timer.function = poll_check_timer_cb;
- init_waitqueue_head(&dev_priv->perf.oa.poll_wq);
+ dev_priv->perf.poll_check_timer.function = poll_check_timer_cb;
+ for (i = 0; i < I915_NUM_RINGS; i++) {
+ spin_lock_init(&dev_priv->perf.node_list_lock[i]);
+ init_waitqueue_head(&dev_priv->perf.poll_wq[i]);
+ }
INIT_LIST_HEAD(&dev_priv->perf.streams);
mutex_init(&dev_priv->perf.lock);
mutex_init(&dev_priv->perf.streams_lock);
spin_lock_init(&dev_priv->perf.hook_lock);
- spin_lock_init(&dev_priv->perf.node_list_lock);
if (IS_HASWELL(dev)) {
dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer;
@@ -359,6 +359,7 @@
#define MI_FLUSH_DW_STORE_INDEX (1<<21)
#define MI_INVALIDATE_TLB (1<<18)
#define MI_FLUSH_DW_OP_STOREDW (1<<14)
+#define MI_FLUSH_DW_OP_STAMP (3<<14)
#define MI_FLUSH_DW_OP_MASK (3<<14)
#define MI_FLUSH_DW_NOTIFY (1<<8)
#define MI_INVALIDATE_BSD (1<<7)
@@ -438,6 +439,7 @@
#define PIPE_CONTROL_TLB_INVALIDATE (1<<18)
#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16)
#define PIPE_CONTROL_QW_WRITE (1<<14)
+#define PIPE_CONTROL_TIMESTAMP_WRITE (3<<14)
#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14)
#define PIPE_CONTROL_DEPTH_STALL (1<<13)
#define PIPE_CONTROL_WRITE_FLUSH (1<<12)
@@ -1236,6 +1236,12 @@ enum drm_i915_perf_property_id {
*/
DRM_I915_PERF_SAMPLE_TAG_PROP,
+ /**
+ * The value of this property set to 1 requests inclusion of timestamp
+ * in the perf sample data.
+ */
+ DRM_I915_PERF_SAMPLE_TS_PROP,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
@@ -1287,6 +1293,7 @@ enum drm_i915_perf_record_type {
* { u32 ctx_id; } && DRM_I915_PERF_SAMPLE_CTX_ID_PROP
* { u32 pid; } && DRM_I915_PERF_SAMPLE_PID_PROP
* { u32 tag; } && DRM_I915_PERF_SAMPLE_TAG_PROP
+ * { u64 timestamp; } && DRM_I915_PERF_SAMPLE_TS_PROP
* { u32 oa_report[]; } && DRM_I915_PERF_SAMPLE_OA_PROP
* };
*/