@@ -2110,10 +2110,11 @@ struct i915_oa_ops {
char __user *buf,
size_t count,
size_t *offset,
- u32 ts);
+ u32 ts,
+ u32 max_reports);
/**
- * @oa_buffer_check: Check for OA buffer data + update tail
+ * @oa_buffer_num_reports: Return number of OA reports + update tail
*
* This is either called via fops or the poll check hrtimer (atomic
* ctx) without any locks taken.
@@ -2126,7 +2127,8 @@ struct i915_oa_ops {
* here, which will be handled gracefully - likely resulting in an
* %EAGAIN error for userspace.
*/
- bool (*oa_buffer_check)(struct drm_i915_private *dev_priv);
+ u32 (*oa_buffer_num_reports)(struct drm_i915_private *dev_priv,
+ u32 *last_ts);
};
/*
@@ -2589,6 +2591,8 @@ struct drm_i915_private {
u32 gen7_latched_oastatus1;
u32 ctx_oactxctrl_off;
u32 ctx_flexeu0_off;
+ u32 n_pending_periodic_samples;
+ u32 pending_periodic_ts;
/* The RPT_ID/reason field for Gen8+ includes a bit
* to determine if the CTX ID in the report is valid
@@ -583,7 +583,7 @@ static void i915_oa_rcs_release_samples(struct drm_i915_private *dev_priv)
}
/**
- * gen8_oa_buffer_check_unlocked - check for data and update tail ptr state
+ * gen8_oa_buffer_num_reports_unlocked - check for data and update tail ptr state
* @dev_priv: i915 device instance
*
* This is either called via fops (for blocking reads in user ctx) or the poll
@@ -596,7 +596,7 @@ static void i915_oa_rcs_release_samples(struct drm_i915_private *dev_priv)
* the pointers time to 'age' before they are made available for reading.
* (See description of OA_TAIL_MARGIN_NSEC above for further details.)
*
- * Besides returning true when there is data available to read() this function
+ * Besides returning num of reports when there is data available to read() it
* also has the side effect of updating the oa_buffer.tails[], .aging_timestamp
* and .aged_tail_idx state used for reading.
*
@@ -604,14 +604,15 @@ static void i915_oa_rcs_release_samples(struct drm_i915_private *dev_priv)
* only called while the stream is enabled, while the global OA configuration
* can't be modified.
*
- * Returns: %true if the OA buffer contains data, else %false
+ * Returns: number of samples available to read
*/
-static bool gen8_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
+static u32 gen8_oa_buffer_num_reports_unlocked(
+ struct drm_i915_private *dev_priv, u32 *last_ts)
{
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
unsigned long flags;
unsigned int aged_idx;
- u32 head, hw_tail, aged_tail, aging_tail;
+ u32 head, hw_tail, aged_tail, aging_tail, num_reports = 0;
u64 now;
/* We have to consider the (unlikely) possibility that read() errors
@@ -652,6 +653,13 @@ static bool gen8_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
if (aging_tail != INVALID_TAIL_PTR &&
((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
OA_TAIL_MARGIN_NSEC)) {
+ u32 mask = (OA_BUFFER_SIZE - 1);
+ u32 gtt_offset = i915_ggtt_offset(
+ dev_priv->perf.oa.oa_buffer.vma);
+ u32 head = (dev_priv->perf.oa.oa_buffer.head - gtt_offset)
+ & mask;
+ u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
+ u32 *report32;
aged_idx ^= 1;
dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
@@ -661,6 +669,14 @@ static bool gen8_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
/* Mark that we need a new pointer to start aging... */
dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
aging_tail = INVALID_TAIL_PTR;
+
+ num_reports = OA_TAKEN(((aged_tail - gtt_offset) & mask), head)/
+ report_size;
+
+ /* read the timestamp of last OA report */
+ head = (head + report_size*(num_reports - 1)) & mask;
+ report32 = (u32 *)(oa_buf_base + head);
+ *last_ts = report32[1];
}
/* Update the aging tail
@@ -694,12 +710,11 @@ static bool gen8_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
- return aged_tail == INVALID_TAIL_PTR ?
- false : OA_TAKEN(aged_tail, head) >= report_size;
+ return aged_tail == INVALID_TAIL_PTR ? 0 : num_reports;
}
/**
- * gen7_oa_buffer_check_unlocked - check for data and update tail ptr state
+ * gen7_oa_buffer_num_reports_unlocked - check for data and update tail ptr state
* @dev_priv: i915 device instance
*
* This is either called via fops (for blocking reads in user ctx) or the poll
@@ -712,7 +727,7 @@ static bool gen8_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
* the pointers time to 'age' before they are made available for reading.
* (See description of OA_TAIL_MARGIN_NSEC above for further details.)
*
- * Besides returning true when there is data available to read() this function
+ * Besides returning num of reports when there is data available to read() it
* also has the side effect of updating the oa_buffer.tails[], .aging_timestamp
* and .aged_tail_idx state used for reading.
*
@@ -720,15 +735,16 @@ static bool gen8_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
* only called while the stream is enabled, while the global OA configuration
* can't be modified.
*
- * Returns: %true if the OA buffer contains data, else %false
+ * Returns: number of samples available to read
*/
-static bool gen7_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
+static u32 gen7_oa_buffer_num_reports_unlocked(
+ struct drm_i915_private *dev_priv, u32 *last_ts)
{
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
unsigned long flags;
unsigned int aged_idx;
u32 oastatus1;
- u32 head, hw_tail, aged_tail, aging_tail;
+ u32 head, hw_tail, aged_tail, aging_tail, num_reports = 0;
u64 now;
/* We have to consider the (unlikely) possibility that read() errors
@@ -770,6 +786,14 @@ static bool gen7_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
if (aging_tail != INVALID_TAIL_PTR &&
((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
OA_TAIL_MARGIN_NSEC)) {
+ u32 mask = (OA_BUFFER_SIZE - 1);
+ u32 gtt_offset = i915_ggtt_offset(
+ dev_priv->perf.oa.oa_buffer.vma);
+ u32 head = (dev_priv->perf.oa.oa_buffer.head - gtt_offset)
+ & mask;
+ u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
+ u32 *report32;
+
aged_idx ^= 1;
dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
@@ -778,6 +802,14 @@ static bool gen7_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
/* Mark that we need a new pointer to start aging... */
dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
aging_tail = INVALID_TAIL_PTR;
+
+ num_reports = OA_TAKEN(((aged_tail - gtt_offset) & mask), head)/
+ report_size;
+
+ /* read the timestamp of last OA report */
+ head = (head + report_size*(num_reports - 1)) & mask;
+ report32 = (u32 *)(oa_buf_base + head);
+ *last_ts = report32[1];
}
/* Update the aging tail
@@ -811,8 +843,7 @@ static bool gen7_oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
- return aged_tail == INVALID_TAIL_PTR ?
- false : OA_TAKEN(aged_tail, head) >= report_size;
+ return aged_tail == INVALID_TAIL_PTR ? 0 : num_reports;
}
/**
@@ -955,6 +986,7 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
* @count: the number of bytes userspace wants to read
* @offset: (inout): the current position for writing into @buf
* @ts: copy OA reports till this timestamp
+ * @max_reports: max number of OA reports to copy
*
* Notably any error condition resulting in a short read (-%ENOSPC or
* -%EFAULT) will be returned even though one or more records may
@@ -973,7 +1005,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
char __user *buf,
size_t count,
size_t *offset,
- u32 ts)
+ u32 ts,
+ u32 max_reports)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
@@ -986,6 +1019,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
u32 head, tail;
u32 taken;
int ret = 0;
+ u32 report_count = 0;
if (WARN_ON(stream->state != I915_PERF_STREAM_ENABLED))
return -EIO;
@@ -1024,7 +1058,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
for (/* none */;
- (taken = OA_TAKEN(tail, head));
+ (taken = OA_TAKEN(tail, head)) && (report_count <= max_reports);
head = (head + report_size) & mask) {
u8 *report = oa_buf_base + head;
u32 *report32 = (void *)report;
@@ -1127,6 +1161,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
if (ret)
break;
+ report_count++;
dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id;
}
@@ -1163,6 +1198,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* @count: the number of bytes userspace wants to read
* @offset: (inout): the current position for writing into @buf
* @ts: copy OA reports till this timestamp
+ * @max_reports: max number of OA reports to copy
*
* Checks OA unit status registers and if necessary appends corresponding
* status records for userspace (such as for a buffer full condition) and then
@@ -1181,7 +1217,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
char __user *buf,
size_t count,
size_t *offset,
- u32 ts)
+ u32 ts,
+ u32 max_reports)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
u32 oastatus;
@@ -1231,7 +1268,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
}
}
- return gen8_append_oa_reports(stream, buf, count, offset, ts);
+ return gen8_append_oa_reports(stream, buf, count, offset, ts,
+ max_reports);
}
/**
@@ -1241,6 +1279,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
* @count: the number of bytes userspace wants to read
* @offset: (inout): the current position for writing into @buf
* @ts: copy OA reports till this timestamp
+ * @max_reports: max number of OA reports to copy
*
* Notably any error condition resulting in a short read (-%ENOSPC or
* -%EFAULT) will be returned even though one or more records may
@@ -1259,7 +1298,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
char __user *buf,
size_t count,
size_t *offset,
- u32 ts)
+ u32 ts,
+ u32 max_reports)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
@@ -1272,6 +1312,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
u32 head, tail;
u32 taken;
int ret = 0;
+ u32 report_count = 0;
if (WARN_ON(stream->state != I915_PERF_STREAM_ENABLED))
return -EIO;
@@ -1310,7 +1351,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
for (/* none */;
- (taken = OA_TAKEN(tail, head));
+ (taken = OA_TAKEN(tail, head)) && (report_count <= max_reports);
head = (head + report_size) & mask) {
u8 *report = oa_buf_base + head;
u32 *report32 = (void *)report;
@@ -1348,6 +1389,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
if (ret)
break;
+ report_count++;
/* The above report-id field sanity check is based on
* the assumption that the OA buffer is initially
* zeroed and we reset the field after copying so the
@@ -1384,6 +1426,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
* @count: the number of bytes userspace wants to read
* @offset: (inout): the current position for writing into @buf
* @ts: copy OA reports till this timestamp
+ * @max_reports: max number of OA reports to copy
*
* Checks Gen 7 specific OA unit status registers and if necessary appends
* corresponding status records for userspace (such as for a buffer full
@@ -1398,7 +1441,8 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
char __user *buf,
size_t count,
size_t *offset,
- u32 ts)
+ u32 ts,
+ u32 max_reports)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
u32 oastatus1;
@@ -1460,7 +1504,8 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
GEN7_OASTATUS1_REPORT_LOST;
}
- return gen7_append_oa_reports(stream, buf, count, offset, ts);
+ return gen7_append_oa_reports(stream, buf, count, offset, ts,
+ max_reports);
}
/**
@@ -1490,7 +1535,8 @@ static int append_oa_rcs_sample(struct i915_perf_stream *stream,
/* First, append the periodic OA samples having lower timestamps */
report_ts = *(u32 *)(report + 4);
- ret = dev_priv->perf.oa.ops.read(stream, buf, count, offset, report_ts);
+ ret = dev_priv->perf.oa.ops.read(stream, buf, count, offset,
+ report_ts, U32_MAX);
if (ret)
return ret;
@@ -1535,7 +1581,7 @@ static int oa_rcs_append_reports(struct i915_perf_stream *stream,
spin_lock(&dev_priv->perf.sample_lock);
if (list_empty(&dev_priv->perf.cs_samples)) {
spin_unlock(&dev_priv->perf.sample_lock);
- return 0;
+ goto pending_periodic;
}
list_for_each_entry_safe(entry, next,
&dev_priv->perf.cs_samples, link) {
@@ -1546,7 +1592,7 @@ static int oa_rcs_append_reports(struct i915_perf_stream *stream,
spin_unlock(&dev_priv->perf.sample_lock);
if (list_empty(&free_list))
- return 0;
+ goto pending_periodic;
list_for_each_entry_safe(entry, next, &free_list, link) {
ret = append_oa_rcs_sample(stream, buf, count, offset, entry);
@@ -1564,18 +1610,37 @@ static int oa_rcs_append_reports(struct i915_perf_stream *stream,
spin_unlock(&dev_priv->perf.sample_lock);
return ret;
+
+pending_periodic:
+ if (!dev_priv->perf.oa.n_pending_periodic_samples)
+ return 0;
+
+ ret = dev_priv->perf.oa.ops.read(stream, buf, count, offset,
+ dev_priv->perf.oa.pending_periodic_ts,
+ dev_priv->perf.oa.n_pending_periodic_samples);
+ dev_priv->perf.oa.n_pending_periodic_samples = 0;
+ dev_priv->perf.oa.pending_periodic_ts = 0;
+ return ret;
}
+enum cs_buf_state {
+ CS_BUF_EMPTY,
+ CS_BUF_REQ_PENDING,
+ CS_BUF_HAVE_DATA,
+};
+
/*
- * command_stream_buf_is_empty - Checks whether the command stream buffer
+ * command_stream_buf_state - Checks whether the command stream buffer
* associated with the stream has data available.
* @stream: An i915-perf stream opened for OA metrics
*
- * Returns: true if atleast one request associated with command stream is
- * completed, else returns false.
+ * Returns:
+ * CS_BUF_HAVE_DATA - if there is atleast one completed request
+ * CS_BUF_REQ_PENDING - there are requests pending, but no completed requests
+ * CS_BUF_EMPTY - no requests scheduled
*/
-static bool command_stream_buf_is_empty(struct i915_perf_stream *stream)
-
+static enum cs_buf_state command_stream_buf_state(
+ struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
struct i915_perf_cs_sample *entry = NULL;
@@ -1589,30 +1654,54 @@ static bool command_stream_buf_is_empty(struct i915_perf_stream *stream)
spin_unlock(&dev_priv->perf.sample_lock);
if (!entry)
- return true;
+ return CS_BUF_EMPTY;
else if (!i915_gem_request_completed(request))
- return true;
+ return CS_BUF_REQ_PENDING;
else
- return false;
+ return CS_BUF_HAVE_DATA;
}
/**
* stream_have_data_unlocked - Checks whether the stream has data available
* @stream: An i915-perf stream opened for OA metrics
*
- * For command stream based streams, check if the command stream buffer has
- * atleast one sample available, if not return false, irrespective of periodic
- * oa buffer having the data or not.
+ * Note: We can safely forward the periodic OA samples in the case we have no
+ * pending CS samples, but we can't do so in the case we have pending CS
+ * samples, since we don't know what the ordering between pending CS samples
+ * and periodic samples will eventually be. If we have no pending CS sample,
+ * it won't be possible for future pending CS sample to have timestamps
+ * earlier than current periodic timestamp.
*/
static bool stream_have_data_unlocked(struct i915_perf_stream *stream)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
+ enum cs_buf_state state;
+ u32 num_samples, last_ts = 0;
+ dev_priv->perf.oa.n_pending_periodic_samples = 0;
+ dev_priv->perf.oa.pending_periodic_ts = 0;
+ num_samples = dev_priv->perf.oa.ops.oa_buffer_num_reports(dev_priv,
+ &last_ts);
if (stream->cs_mode)
- return !command_stream_buf_is_empty(stream);
+ state = command_stream_buf_state(stream);
else
- return dev_priv->perf.oa.ops.oa_buffer_check(dev_priv);
+ state = CS_BUF_EMPTY;
+
+ switch (state) {
+ case CS_BUF_EMPTY:
+ dev_priv->perf.oa.n_pending_periodic_samples = num_samples;
+ dev_priv->perf.oa.pending_periodic_ts = last_ts;
+ return (num_samples != 0);
+
+ case CS_BUF_HAVE_DATA:
+ return true;
+
+ case CS_BUF_REQ_PENDING:
+ default:
+ return false;
+ }
+ return false;
}
/**
@@ -1698,7 +1787,7 @@ static int i915_oa_read(struct i915_perf_stream *stream,
return oa_rcs_append_reports(stream, buf, count, offset);
else
return dev_priv->perf.oa.ops.read(stream, buf, count, offset,
- U32_MAX);
+ U32_MAX, U32_MAX);
}
/**
@@ -3688,8 +3777,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable;
dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable;
dev_priv->perf.oa.ops.read = gen7_oa_read;
- dev_priv->perf.oa.ops.oa_buffer_check =
- gen7_oa_buffer_check_unlocked;
+ dev_priv->perf.oa.ops.oa_buffer_num_reports =
+ gen7_oa_buffer_num_reports_unlocked;
dev_priv->perf.oa.oa_formats = hsw_oa_formats;
@@ -3753,8 +3842,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable;
dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable;
dev_priv->perf.oa.ops.read = gen8_oa_read;
- dev_priv->perf.oa.ops.oa_buffer_check =
- gen8_oa_buffer_check_unlocked;
+ dev_priv->perf.oa.ops.oa_buffer_num_reports =
+ gen8_oa_buffer_num_reports_unlocked;
dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats;
}