@@ -1751,7 +1751,7 @@ struct i915_perf_stream_ops {
/* Return: true if any i915 perf records are ready to read()
* for this stream.
*/
- bool (*can_read)(struct i915_perf_stream *stream);
+ bool (*can_read_unlocked)(struct i915_perf_stream *stream);
/* Call poll_wait, passing a wait queue that will be woken
* once there is something ready to read() for the stream
@@ -1763,8 +1763,8 @@ struct i915_perf_stream_ops {
/* For handling a blocking read, wait until there is something
* to ready to read() for the stream. E.g. wait on the same
* wait queue that would be passed to poll_wait() until
- * ->can_read() returns true (if its safe to call ->can_read()
- * without the i915 perf lock held).
+ * ->can_read_unlocked() returns true (if its safe to call
+ * ->can_read_unlocked() without the i915 perf lock held).
*/
int (*wait_unlocked)(struct i915_perf_stream *stream);
@@ -1834,8 +1834,10 @@ struct i915_oa_ops {
u32 ctx_id);
void (*legacy_ctx_switch_unlocked)(struct drm_i915_gem_request *req);
int (*read)(struct i915_perf_stream *stream,
- struct i915_perf_read_state *read_state, u32 ts);
- bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv);
+ struct i915_perf_read_state *read_state,
+ u32 ts, u32 max_records);
+ int (*oa_buffer_num_samples)(struct drm_i915_private *dev_priv,
+ u32 *last_ts);
};
/*
@@ -2175,6 +2177,8 @@ struct drm_i915_private {
u32 gen7_latched_oastatus1;
u32 ctx_oactxctrl_off;
u32 ctx_flexeu0_off;
+ u32 n_pending_periodic_samples;
+ u32 pending_periodic_ts;
struct i915_oa_ops ops;
const struct i915_oa_format *oa_formats;
@@ -388,13 +388,30 @@ static void i915_oa_rcs_free_requests(struct drm_i915_private *dev_priv)
* pointers. A race here could result in a false positive !empty status which
* is acceptable.
*/
-static bool gen8_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_priv)
+static int
+gen8_oa_buffer_num_samples_fop_unlocked(struct drm_i915_private *dev_priv,
+ u32 *last_ts)
{
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
- u32 head = I915_READ(GEN8_OAHEADPTR);
- u32 tail = I915_READ(GEN8_OATAILPTR);
+ u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.addr;
+ u32 head = I915_READ(GEN8_OAHEADPTR) & GEN8_OAHEADPTR_MASK;
+ u32 tail = I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
+ u32 mask = (OA_BUFFER_SIZE - 1);
+ u32 num_samples;
+ u8 *report;
+
+ head -= dev_priv->perf.oa.oa_buffer.gtt_offset;
+ tail -= dev_priv->perf.oa.oa_buffer.gtt_offset;
+ num_samples = OA_TAKEN(tail, head) / report_size;
- return OA_TAKEN(tail, head) < report_size;
+ /* read the timestamp of the last sample */
+ if (num_samples) {
+ head += report_size*(num_samples - 1);
+ report = oa_buf_base + (head & mask);
+ *last_ts = *(u32 *)(report + 4);
+ }
+
+ return num_samples;
}
/* NB: This is either called via fops or the poll check hrtimer (atomic ctx)
@@ -408,16 +425,32 @@ static bool gen8_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_pr
* pointers. A race here could result in a false positive !empty status which
* is acceptable.
*/
-static bool gen7_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_priv)
+static int
+gen7_oa_buffer_num_samples_fop_unlocked(struct drm_i915_private *dev_priv,
+ u32 *last_ts)
{
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
u32 oastatus2 = I915_READ(GEN7_OASTATUS2);
u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
u32 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
u32 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
+ u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.addr;
+ u32 mask = (OA_BUFFER_SIZE - 1);
+ int available_size;
+ u32 num_samples = 0;
+ u8 *report;
- return OA_TAKEN(tail, head) <
- dev_priv->perf.oa.tail_margin + report_size;
+ head -= dev_priv->perf.oa.oa_buffer.gtt_offset;
+ tail -= dev_priv->perf.oa.oa_buffer.gtt_offset;
+ available_size = OA_TAKEN(tail, head) - dev_priv->perf.oa.tail_margin;
+ if (available_size >= report_size) {
+ num_samples = available_size / report_size;
+ head += report_size*(num_samples - 1);
+ report = oa_buf_base + (head & mask);
+ *last_ts = *(u32 *)(report + 4);
+ }
+
+ return num_samples;
}
/**
@@ -542,7 +575,7 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
static int gen8_append_oa_reports(struct i915_perf_stream *stream,
struct i915_perf_read_state *read_state,
u32 *head_ptr,
- u32 tail, u32 ts)
+ u32 tail, u32 ts, u32 max_records)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
@@ -551,6 +584,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
u32 head;
u32 taken;
int ret = 0;
+ int n_records = 0;
BUG_ON(stream->state != I915_PERF_STREAM_ENABLED);
@@ -577,7 +611,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
tail &= ~(report_size - 1);
for (/* none */;
- (taken = OA_TAKEN(tail, head));
+ (taken = OA_TAKEN(tail, head)) && (n_records <= max_records);
head = (head + report_size) & mask) {
u8 *report = oa_buf_base + head;
u32 *report32 = (void *)report;
@@ -643,6 +677,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
if (ret)
break;
+ n_records++;
dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id;
}
}
@@ -661,7 +696,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* updated @read_state.
*/
static int gen8_oa_read(struct i915_perf_stream *stream,
- struct i915_perf_read_state *read_state, u32 ts)
+ struct i915_perf_read_state *read_state,
+ u32 ts, u32 max_records)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
@@ -703,7 +739,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
/* If there is still buffer space */
- ret = gen8_append_oa_reports(stream, read_state, &head, tail, ts);
+ ret = gen8_append_oa_reports(stream, read_state, &head, tail,
+ ts, max_records);
/* All the report sizes are a power of two and the
* head should always be incremented by some multiple
@@ -742,7 +779,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
static int gen7_append_oa_reports(struct i915_perf_stream *stream,
struct i915_perf_read_state *read_state,
u32 *head_ptr,
- u32 tail, u32 ts)
+ u32 tail, u32 ts, u32 max_records)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
@@ -752,6 +789,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
u32 head;
u32 taken;
int ret = 0;
+ int n_records = 0;
BUG_ON(stream->state != I915_PERF_STREAM_ENABLED);
@@ -791,7 +829,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
tail &= mask;
for (/* none */;
- (taken = OA_TAKEN(tail, head));
+ (taken = OA_TAKEN(tail, head)) && (n_records <= max_records);
head = (head + report_size) & mask) {
u8 *report = oa_buf_base + head;
u32 *report32 = (void *)report;
@@ -824,6 +862,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
ret = append_oa_buffer_sample(stream, read_state, report);
if (ret)
break;
+ n_records++;
/* The above report-id field sanity check is based on
* the assumption that the OA buffer is initially
@@ -848,7 +887,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
* updated @read_state.
*/
static int gen7_oa_read(struct i915_perf_stream *stream,
- struct i915_perf_read_state *read_state, u32 ts)
+ struct i915_perf_read_state *read_state,
+ u32 ts, u32 max_records)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
@@ -920,7 +960,8 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
GEN7_OASTATUS1_REPORT_LOST;
}
- ret = gen7_append_oa_reports(stream, read_state, &head, tail, ts);
+ ret = gen7_append_oa_reports(stream, read_state, &head, tail,
+ ts, max_records);
/* All the report sizes are a power of two and the
* head should always be incremented by some multiple
@@ -966,7 +1007,8 @@ static int append_oa_rcs_sample(struct i915_perf_stream *stream,
/* First, append the periodic OA samples having lower timestamps */
report_ts = *(u32 *)(report + 4);
- ret = dev_priv->perf.oa.ops.read(stream, read_state, report_ts);
+ ret = dev_priv->perf.oa.ops.read(stream, read_state,
+ report_ts, U32_MAX);
if (ret)
return ret;
@@ -983,7 +1025,8 @@ static int append_oa_rcs_sample(struct i915_perf_stream *stream,
}
/**
- * Copies all command stream based OA reports into userspace read() buffer.
+ * Copies all OA reports into userspace read() buffer. This includes command
+ * stream as well as periodic OA reports.
*
* NB: some data may be successfully copied to the userspace buffer
* even if an error is returned, and this is reflected in the
@@ -1000,7 +1043,7 @@ static int oa_rcs_append_reports(struct i915_perf_stream *stream,
spin_lock(&dev_priv->perf.node_list_lock);
if (list_empty(&dev_priv->perf.node_list)) {
spin_unlock(&dev_priv->perf.node_list_lock);
- return 0;
+ goto pending_periodic;
}
list_for_each_entry_safe(entry, next,
&dev_priv->perf.node_list, link) {
@@ -1011,7 +1054,7 @@ static int oa_rcs_append_reports(struct i915_perf_stream *stream,
spin_unlock(&dev_priv->perf.node_list_lock);
if (list_empty(&free_list))
- return 0;
+ goto pending_periodic;
list_for_each_entry_safe(entry, next, &free_list, link) {
ret = append_oa_rcs_sample(stream, read_state, entry);
@@ -1029,16 +1072,35 @@ static int oa_rcs_append_reports(struct i915_perf_stream *stream,
spin_unlock(&dev_priv->perf.node_list_lock);
return ret;
+
+pending_periodic:
+ if (!dev_priv->perf.oa.n_pending_periodic_samples)
+ return 0;
+
+ ret = dev_priv->perf.oa.ops.read(stream, read_state,
+ dev_priv->perf.oa.pending_periodic_ts,
+ dev_priv->perf.oa.n_pending_periodic_samples);
+ dev_priv->perf.oa.n_pending_periodic_samples = 0;
+ dev_priv->perf.oa.pending_periodic_ts = 0;
+ return ret;
}
+enum cs_buf_data_state {
+ CS_BUF_EMPTY,
+ CS_BUF_REQ_PENDING,
+ CS_BUF_HAVE_DATA,
+};
+
/*
* Checks whether the command stream buffer associated with the stream has
* data ready to be forwarded to userspace.
- * Returns true if atleast one request associated with command stream is
- * completed, else returns false.
+ * Value returned:
+ * CS_BUF_HAVE_DATA - if there is atleast one completed request
+ * CS_BUF_REQ_PENDING - there are requests pending, but no completed requests
+ * CS_BUF_EMPTY - no requests scheduled
*/
-static bool command_stream_buf_is_empty(struct i915_perf_stream *stream)
-
+static enum cs_buf_data_state command_stream_buf_state(
+ struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
struct i915_perf_cs_data_node *entry = NULL;
@@ -1052,37 +1114,63 @@ static bool command_stream_buf_is_empty(struct i915_perf_stream *stream)
spin_unlock(&dev_priv->perf.node_list_lock);
if (!entry)
- return true;
+ return CS_BUF_EMPTY;
else if (!i915_gem_request_completed(request, true))
- return true;
+ return CS_BUF_REQ_PENDING;
else
- return false;
+ return CS_BUF_HAVE_DATA;
}
/*
- * Checks whether the stream has data ready to forward to userspace.
- * For command stream based streams, check if the command stream buffer has
- * atleast one sample ready, if not return false, irrespective of periodic
- * oa buffer having the data or not.
+ * Checks whether the stream has data ready to forward to userspace, by
+ * querying for periodic oa buffer and command stream buffer samples.
*/
static bool stream_have_data__unlocked(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
+ enum cs_buf_data_state cs_buf_state;
+ u32 num_samples, last_ts = 0;
- /* Note: the oa_buffer_is_empty() condition is ok to run unlocked as it
- * just performs mmio reads of the OA buffer head + tail pointers and
+ /* Note: oa_buffer_num_samples() is ok to run unlocked as it just
+ * performs mmio reads of the OA buffer head + tail pointers and
* it's assumed we're handling some operation that implies the stream
* can't be destroyed until completion (such as a read()) that ensures
* the device + OA buffer can't disappear
*/
+ dev_priv->perf.oa.n_pending_periodic_samples = 0;
+ dev_priv->perf.oa.pending_periodic_ts = 0;
+ num_samples = dev_priv->perf.oa.ops.oa_buffer_num_samples(dev_priv,
+ &last_ts);
if (stream->cs_mode)
- return !command_stream_buf_is_empty(stream);
+ cs_buf_state = command_stream_buf_state(stream);
else
- return !dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv);
+ cs_buf_state = CS_BUF_EMPTY;
+
+ /*
+ * Note: We can safely forward the periodic OA samples in the case we
+ * have no pending CS samples, but we can't do so in the case we have
+ * pending CS samples, since we don't know what the ordering between
+ * pending CS samples and periodic samples will eventually be. If we
+ * have no pending CS sample, it won't be possible for future pending CS
+ * sample to have timestamps earlier than current periodic timestamp.
+ */
+ switch (cs_buf_state) {
+ case CS_BUF_EMPTY:
+ dev_priv->perf.oa.n_pending_periodic_samples = num_samples;
+ dev_priv->perf.oa.pending_periodic_ts = last_ts;
+ return (num_samples != 0);
+
+ case CS_BUF_HAVE_DATA:
+ return true;
+
+ case CS_BUF_REQ_PENDING:
+ default:
+ return false;
+ }
}
-static bool i915_oa_can_read(struct i915_perf_stream *stream)
+static bool i915_oa_can_read_unlocked(struct i915_perf_stream *stream)
{
return stream_have_data__unlocked(stream);
@@ -1124,7 +1212,8 @@ static int i915_oa_read(struct i915_perf_stream *stream,
if (stream->cs_mode)
return oa_rcs_append_reports(stream, read_state);
else
- return dev_priv->perf.oa.ops.read(stream, read_state, U32_MAX);
+ return dev_priv->perf.oa.ops.read(stream, read_state,
+ U32_MAX, U32_MAX);
}
static void
@@ -1683,7 +1772,7 @@ static const struct i915_perf_stream_ops i915_oa_stream_ops = {
.destroy = i915_oa_stream_destroy,
.enable = i915_oa_stream_enable,
.disable = i915_oa_stream_disable,
- .can_read = i915_oa_can_read,
+ .can_read_unlocked = i915_oa_can_read_unlocked,
.wait_unlocked = i915_oa_wait_unlocked,
.poll_wait = i915_oa_poll_wait,
.read = i915_oa_read,
@@ -2132,7 +2221,7 @@ static unsigned int i915_perf_poll_locked(struct i915_perf_stream *stream,
stream->ops->poll_wait(stream, file, wait);
- if (stream->ops->can_read(stream))
+ if (stream->ops->can_read_unlocked(stream))
streams |= POLLIN;
return streams;
@@ -2607,8 +2696,8 @@ void i915_perf_init(struct drm_device *dev)
dev_priv->perf.oa.ops.update_hw_ctx_id_locked =
gen7_update_hw_ctx_id_locked;
dev_priv->perf.oa.ops.read = gen7_oa_read;
- dev_priv->perf.oa.ops.oa_buffer_is_empty =
- gen7_oa_buffer_is_empty_fop_unlocked;
+ dev_priv->perf.oa.ops.oa_buffer_num_samples =
+ gen7_oa_buffer_num_samples_fop_unlocked;
dev_priv->perf.oa.timestamp_frequency = 12500000;
@@ -2624,8 +2713,8 @@ void i915_perf_init(struct drm_device *dev)
dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable;
dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable;
dev_priv->perf.oa.ops.read = gen8_oa_read;
- dev_priv->perf.oa.ops.oa_buffer_is_empty =
- gen8_oa_buffer_is_empty_fop_unlocked;
+ dev_priv->perf.oa.ops.oa_buffer_num_samples =
+ gen8_oa_buffer_num_samples_fop_unlocked;
dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats;