Message ID | 20190913230620.15906-2-umesh.nerlige.ramappa@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915/perf: Enable non-power-of-2 OA report sizes | expand |
On 14/09/2019 02:06, Umesh Nerlige Ramappa wrote: > From: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > > Right now the workaround against the OA tail pointer race condition > requires at least twice the internal kernel polling timer to make any > data available. > > This changes introduce checks on the OA data written into the circular > buffer to make as much data as possible available on the first > iteration of the polling timer. > > v2: Use OA_TAKEN macro without the gtt_offset (Lionel) > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 30 ++--- > drivers/gpu/drm/i915/i915_perf.c | 200 ++++++++++++++----------------- > 2 files changed, 103 insertions(+), 127 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index bf600888b3f1..876aeaf3568e 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1180,21 +1180,11 @@ struct i915_perf_stream { > spinlock_t ptr_lock; > > /** > - * One 'aging' tail pointer and one 'aged' tail pointer ready to > - * used for reading. > - * > - * Initial values of 0xffffffff are invalid and imply that an > - * update is required (and should be ignored by an attempted > - * read) > - */ > - struct { > - u32 offset; > - } tails[2]; > - > - /** > - * Index for the aged tail ready to read() data up to. > + * The last HW tail reported by HW. The data > + * might not have made it to memory yet > + * though. > */ > - unsigned int aged_tail_idx; > + u32 aging_tail; > > /** > * A monotonic timestamp for when the current aging tail pointer > @@ -1210,6 +1200,12 @@ struct i915_perf_stream { > * OA buffer data to userspace. > */ > u32 head; > + > + /** > + * The last verified tail that can be read > + * by user space > + */ > + u32 tail; > } oa_buffer; > }; > > @@ -1693,6 +1689,12 @@ struct drm_i915_private { > */ > struct ratelimit_state spurious_report_rs; > > + /** > + * For rate limiting any notifications of tail pointer > + * race. > + */ > + struct ratelimit_state tail_pointer_race; > + > struct i915_oa_config test_config; > > u32 gen7_latched_oastatus1; > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index c1b764233761..50b6d154fd46 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -237,23 +237,14 @@ > * for this earlier, as part of the oa_buffer_check to avoid lots of redundant > * read() attempts. > * > - * In effect we define a tail pointer for reading that lags the real tail > - * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough > - * time for the corresponding reports to become visible to the CPU. > - * > - * To manage this we actually track two tail pointers: > - * 1) An 'aging' tail with an associated timestamp that is tracked until we > - * can trust the corresponding data is visible to the CPU; at which point > - * it is considered 'aged'. > - * 2) An 'aged' tail that can be used for read()ing. > - * > - * The two separate pointers let us decouple read()s from tail pointer aging. > - * > - * The tail pointers are checked and updated at a limited rate within a hrtimer > - * callback (the same callback that is used for delivering EPOLLIN events) > - * > - * Initially the tails are marked invalid with %INVALID_TAIL_PTR which > - * indicates that an updated tail pointer is needed. > + * We workaround this issue in oa_buffer_check() by reading the reports in the > + * OA buffer, starting from the tail reported by the HW until we find 2 > + * consecutive reports with their first 2 dwords of not at 0. Those dwords are > + * also set to 0 once read and the whole buffer is cleared upon OA buffer > + * initialization. The first dword is the reason for this report while the > + * second is the timestamp, making the chances of having those 2 fields at 0 > + * fairly unlikely. A more detailed explanation is available in > + * oa_buffer_check(). > * > * Most of the implementation details for this workaround are in > * oa_buffer_check_unlocked() and _append_oa_reports() > @@ -266,7 +257,6 @@ > * enabled without any periodic sampling. > */ > #define OA_TAIL_MARGIN_NSEC 100000ULL > -#define INVALID_TAIL_PTR 0xffffffff > > /* frequency for checking whether the OA unit has written new reports to the > * circular OA buffer... > @@ -457,10 +447,10 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) > static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) > { > struct drm_i915_private *dev_priv = stream->dev_priv; > + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); > int report_size = stream->oa_buffer.format_size; > unsigned long flags; > - unsigned int aged_idx; > - u32 head, hw_tail, aged_tail, aging_tail; > + u32 hw_tail; > u64 now; > > /* We have to consider the (unlikely) possibility that read() errors > @@ -469,16 +459,6 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) > */ > spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); > > - /* NB: The head we observe here might effectively be a little out of > - * date (between head and tails[aged_idx].offset if there is currently > - * a read() in progress. > - */ > - head = stream->oa_buffer.head; > - > - aged_idx = stream->oa_buffer.aged_tail_idx; > - aged_tail = stream->oa_buffer.tails[aged_idx].offset; > - aging_tail = stream->oa_buffer.tails[!aged_idx].offset; > - > hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream); > > /* The tail pointer increases in 64 byte increments, > @@ -488,63 +468,75 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) > > now = ktime_get_mono_fast_ns(); > > - /* Update the aged tail > - * > - * Flip the tail pointer available for read()s once the aging tail is > - * old enough to trust that the corresponding data will be visible to > - * the CPU... > - * > - * Do this before updating the aging pointer in case we may be able to > - * immediately start aging a new pointer too (if new data has become > - * available) without needing to wait for a later hrtimer callback. > - */ > - if (aging_tail != INVALID_TAIL_PTR && > - ((now - stream->oa_buffer.aging_timestamp) > > - OA_TAIL_MARGIN_NSEC)) { > - > - aged_idx ^= 1; > - stream->oa_buffer.aged_tail_idx = aged_idx; > + if (hw_tail == stream->oa_buffer.aging_tail) { > + /* If the HW tail hasn't move since the last check and the HW > + * tail has been aging for long enough, declare it the new > + * tail. > + */ > + if ((now - stream->oa_buffer.aging_timestamp) > > + OA_TAIL_MARGIN_NSEC) { > + stream->oa_buffer.tail = > + stream->oa_buffer.aging_tail; > + } > + } else { > + u32 head, tail, landed_report_heads; > > - aged_tail = aging_tail; > + /* NB: The head we observe here might effectively be a little out of > + * date (between head and tails[aged_idx].offset if there is currently > + * a read() in progress. > + */ > + head = stream->oa_buffer.head - gtt_offset; > > - /* Mark that we need a new pointer to start aging... */ > - stream->oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR; > - aging_tail = INVALID_TAIL_PTR; > - } > + hw_tail -= gtt_offset; > + tail = hw_tail; > > - /* Update the aging tail > - * > - * We throttle aging tail updates until we have a new tail that > - * represents >= one report more data than is already available for > - * reading. This ensures there will be enough data for a successful > - * read once this new pointer has aged and ensures we will give the new > - * pointer time to age. > - */ > - if (aging_tail == INVALID_TAIL_PTR && > - (aged_tail == INVALID_TAIL_PTR || > - OA_TAKEN(hw_tail, aged_tail) >= report_size)) { > - struct i915_vma *vma = stream->oa_buffer.vma; > - u32 gtt_offset = i915_ggtt_offset(vma); > - > - /* Be paranoid and do a bounds check on the pointer read back > - * from hardware, just in case some spurious hardware condition > - * could put the tail out of bounds... > + /* Walk the stream backward until we find at least 2 reports > + * with dword 0 & 1 not at 0. Since the circular buffer > + * pointers progress by increments of 64 bytes and that > + * reports can be up to 256 bytes long, we can't tell whether > + * a report has fully landed in memory before the first 2 > + * dwords of the following report have effectively landed. > + * > + * This is assuming that the writes of the OA unit land in > + * memory in the order they were written to. > + * If not : (╯°□°)╯︵ ┻━┻ > */ > - if (hw_tail >= gtt_offset && > - hw_tail < (gtt_offset + OA_BUFFER_SIZE)) { > - stream->oa_buffer.tails[!aged_idx].offset = > - aging_tail = hw_tail; > - stream->oa_buffer.aging_timestamp = now; > - } else { > - DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n", > - hw_tail); > + landed_report_heads = 0; > + while (OA_TAKEN(tail, head) >= report_size) { > + u32 previous_tail = (tail - report_size) & (OA_BUFFER_SIZE - 1); > + u8 *report = stream->oa_buffer.vaddr + previous_tail; > + u32 *report32 = (void *) report; > + > + /* Head of the report indicated by the HW tail register has > + * indeed landed into memory. > + */ > + if (report32[0] != 0 || report[1] != 0) { Just noticed this is wrong, it should be : if (report32[0] != || report32[1] != 0) { report is not a pointer to a dword. -Lionel > + landed_report_heads++; > + > + if (landed_report_heads >= 2) > + break; > + } > + > + tail = previous_tail; > + } > + > + if (abs(tail - hw_tail) >= (2 * report_size)) { > + if (__ratelimit(&dev_priv->perf.tail_pointer_race)) { > + DRM_NOTE("unlanded report(s) head=0x%x " > + "tail=0x%x hw_tail=0x%x\n", > + head, tail, hw_tail); > + } > } > + > + stream->oa_buffer.tail = gtt_offset + tail; > + stream->oa_buffer.aging_tail = gtt_offset + hw_tail; > + stream->oa_buffer.aging_timestamp = now; > } > > spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); > > - return aged_tail == INVALID_TAIL_PTR ? > - false : OA_TAKEN(aged_tail, head) >= report_size; > + return OA_TAKEN(stream->oa_buffer.tail - gtt_offset, > + stream->oa_buffer.head - gtt_offset) >= report_size; > } > > /** > @@ -662,7 +654,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > u32 mask = (OA_BUFFER_SIZE - 1); > size_t start_offset = *offset; > unsigned long flags; > - unsigned int aged_tail_idx; > u32 head, tail; > u32 taken; > int ret = 0; > @@ -673,18 +664,10 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); > > head = stream->oa_buffer.head; > - aged_tail_idx = stream->oa_buffer.aged_tail_idx; > - tail = stream->oa_buffer.tails[aged_tail_idx].offset; > + tail = stream->oa_buffer.tail; > > spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); > > - /* > - * An invalid tail pointer here means we're still waiting for the poll > - * hrtimer callback to give us a pointer > - */ > - if (tail == INVALID_TAIL_PTR) > - return -EAGAIN; > - > /* > * NB: oa_buffer.head/tail include the gtt_offset which we don't want > * while indexing relative to oa_buf_base. > @@ -812,13 +795,10 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > } > > /* > - * The above reason field sanity check is based on > - * the assumption that the OA buffer is initially > - * zeroed and we reset the field after copying so the > - * check is still meaningful once old reports start > - * being overwritten. > + * Clear out the first 2 dword as a mean to detect unlanded > + * reports. > */ > - report32[0] = 0; > + report32[0] = report32[1] = 0; > } > > if (start_offset != *offset) { > @@ -950,7 +930,6 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, > u32 mask = (OA_BUFFER_SIZE - 1); > size_t start_offset = *offset; > unsigned long flags; > - unsigned int aged_tail_idx; > u32 head, tail; > u32 taken; > int ret = 0; > @@ -961,17 +940,10 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, > spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); > > head = stream->oa_buffer.head; > - aged_tail_idx = stream->oa_buffer.aged_tail_idx; > - tail = stream->oa_buffer.tails[aged_tail_idx].offset; > + tail = stream->oa_buffer.tail; > > spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); > > - /* An invalid tail pointer here means we're still waiting for the poll > - * hrtimer callback to give us a pointer > - */ > - if (tail == INVALID_TAIL_PTR) > - return -EAGAIN; > - > /* NB: oa_buffer.head/tail include the gtt_offset which we don't want > * while indexing relative to oa_buf_base. > */ > @@ -1026,13 +998,10 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, > if (ret) > break; > > - /* The above report-id field sanity check is based on > - * the assumption that the OA buffer is initially > - * zeroed and we reset the field after copying so the > - * check is still meaningful once old reports start > - * being overwritten. > + /* Clear out the first 2 dwords as a mean to detect unlanded > + * reports. > */ > - report32[0] = 0; > + report32[0] = report32[1] = 0; > } > > if (start_offset != *offset) { > @@ -1411,8 +1380,8 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) > I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ > > /* Mark that we need updated tail pointers to read from... */ > - stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; > - stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR; > + stream->oa_buffer.aging_tail = > + stream->oa_buffer.tail = gtt_offset; > > spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); > > @@ -1468,8 +1437,8 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) > I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); > > /* Mark that we need updated tail pointers to read from... */ > - stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; > - stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR; > + stream->oa_buffer.aging_tail = > + stream->oa_buffer.tail = gtt_offset; > > /* > * Reset state used to recognise context switches, affecting which > @@ -3672,6 +3641,11 @@ void i915_perf_init(struct drm_i915_private *dev_priv) > ratelimit_set_flags(&dev_priv->perf.spurious_report_rs, > RATELIMIT_MSG_ON_RELEASE); > > + ratelimit_state_init(&dev_priv->perf.tail_pointer_race, > + 5 * HZ, 10); > + ratelimit_set_flags(&dev_priv->perf.tail_pointer_race, > + RATELIMIT_MSG_ON_RELEASE); > + > dev_priv->perf.initialized = true; > } > }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bf600888b3f1..876aeaf3568e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1180,21 +1180,11 @@ struct i915_perf_stream { spinlock_t ptr_lock; /** - * One 'aging' tail pointer and one 'aged' tail pointer ready to - * used for reading. - * - * Initial values of 0xffffffff are invalid and imply that an - * update is required (and should be ignored by an attempted - * read) - */ - struct { - u32 offset; - } tails[2]; - - /** - * Index for the aged tail ready to read() data up to. + * The last HW tail reported by HW. The data + * might not have made it to memory yet + * though. */ - unsigned int aged_tail_idx; + u32 aging_tail; /** * A monotonic timestamp for when the current aging tail pointer @@ -1210,6 +1200,12 @@ struct i915_perf_stream { * OA buffer data to userspace. */ u32 head; + + /** + * The last verified tail that can be read + * by user space + */ + u32 tail; } oa_buffer; }; @@ -1693,6 +1689,12 @@ struct drm_i915_private { */ struct ratelimit_state spurious_report_rs; + /** + * For rate limiting any notifications of tail pointer + * race. + */ + struct ratelimit_state tail_pointer_race; + struct i915_oa_config test_config; u32 gen7_latched_oastatus1; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index c1b764233761..50b6d154fd46 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -237,23 +237,14 @@ * for this earlier, as part of the oa_buffer_check to avoid lots of redundant * read() attempts. * - * In effect we define a tail pointer for reading that lags the real tail - * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough - * time for the corresponding reports to become visible to the CPU. - * - * To manage this we actually track two tail pointers: - * 1) An 'aging' tail with an associated timestamp that is tracked until we - * can trust the corresponding data is visible to the CPU; at which point - * it is considered 'aged'. - * 2) An 'aged' tail that can be used for read()ing. - * - * The two separate pointers let us decouple read()s from tail pointer aging. - * - * The tail pointers are checked and updated at a limited rate within a hrtimer - * callback (the same callback that is used for delivering EPOLLIN events) - * - * Initially the tails are marked invalid with %INVALID_TAIL_PTR which - * indicates that an updated tail pointer is needed. + * We workaround this issue in oa_buffer_check() by reading the reports in the + * OA buffer, starting from the tail reported by the HW until we find 2 + * consecutive reports with their first 2 dwords of not at 0. Those dwords are + * also set to 0 once read and the whole buffer is cleared upon OA buffer + * initialization. The first dword is the reason for this report while the + * second is the timestamp, making the chances of having those 2 fields at 0 + * fairly unlikely. A more detailed explanation is available in + * oa_buffer_check(). * * Most of the implementation details for this workaround are in * oa_buffer_check_unlocked() and _append_oa_reports() @@ -266,7 +257,6 @@ * enabled without any periodic sampling. */ #define OA_TAIL_MARGIN_NSEC 100000ULL -#define INVALID_TAIL_PTR 0xffffffff /* frequency for checking whether the OA unit has written new reports to the * circular OA buffer... @@ -457,10 +447,10 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); int report_size = stream->oa_buffer.format_size; unsigned long flags; - unsigned int aged_idx; - u32 head, hw_tail, aged_tail, aging_tail; + u32 hw_tail; u64 now; /* We have to consider the (unlikely) possibility that read() errors @@ -469,16 +459,6 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) */ spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - /* NB: The head we observe here might effectively be a little out of - * date (between head and tails[aged_idx].offset if there is currently - * a read() in progress. - */ - head = stream->oa_buffer.head; - - aged_idx = stream->oa_buffer.aged_tail_idx; - aged_tail = stream->oa_buffer.tails[aged_idx].offset; - aging_tail = stream->oa_buffer.tails[!aged_idx].offset; - hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream); /* The tail pointer increases in 64 byte increments, @@ -488,63 +468,75 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) now = ktime_get_mono_fast_ns(); - /* Update the aged tail - * - * Flip the tail pointer available for read()s once the aging tail is - * old enough to trust that the corresponding data will be visible to - * the CPU... - * - * Do this before updating the aging pointer in case we may be able to - * immediately start aging a new pointer too (if new data has become - * available) without needing to wait for a later hrtimer callback. - */ - if (aging_tail != INVALID_TAIL_PTR && - ((now - stream->oa_buffer.aging_timestamp) > - OA_TAIL_MARGIN_NSEC)) { - - aged_idx ^= 1; - stream->oa_buffer.aged_tail_idx = aged_idx; + if (hw_tail == stream->oa_buffer.aging_tail) { + /* If the HW tail hasn't move since the last check and the HW + * tail has been aging for long enough, declare it the new + * tail. + */ + if ((now - stream->oa_buffer.aging_timestamp) > + OA_TAIL_MARGIN_NSEC) { + stream->oa_buffer.tail = + stream->oa_buffer.aging_tail; + } + } else { + u32 head, tail, landed_report_heads; - aged_tail = aging_tail; + /* NB: The head we observe here might effectively be a little out of + * date (between head and tails[aged_idx].offset if there is currently + * a read() in progress. + */ + head = stream->oa_buffer.head - gtt_offset; - /* Mark that we need a new pointer to start aging... */ - stream->oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR; - aging_tail = INVALID_TAIL_PTR; - } + hw_tail -= gtt_offset; + tail = hw_tail; - /* Update the aging tail - * - * We throttle aging tail updates until we have a new tail that - * represents >= one report more data than is already available for - * reading. This ensures there will be enough data for a successful - * read once this new pointer has aged and ensures we will give the new - * pointer time to age. - */ - if (aging_tail == INVALID_TAIL_PTR && - (aged_tail == INVALID_TAIL_PTR || - OA_TAKEN(hw_tail, aged_tail) >= report_size)) { - struct i915_vma *vma = stream->oa_buffer.vma; - u32 gtt_offset = i915_ggtt_offset(vma); - - /* Be paranoid and do a bounds check on the pointer read back - * from hardware, just in case some spurious hardware condition - * could put the tail out of bounds... + /* Walk the stream backward until we find at least 2 reports + * with dword 0 & 1 not at 0. Since the circular buffer + * pointers progress by increments of 64 bytes and that + * reports can be up to 256 bytes long, we can't tell whether + * a report has fully landed in memory before the first 2 + * dwords of the following report have effectively landed. + * + * This is assuming that the writes of the OA unit land in + * memory in the order they were written to. + * If not : (╯°□°)╯︵ ┻━┻ */ - if (hw_tail >= gtt_offset && - hw_tail < (gtt_offset + OA_BUFFER_SIZE)) { - stream->oa_buffer.tails[!aged_idx].offset = - aging_tail = hw_tail; - stream->oa_buffer.aging_timestamp = now; - } else { - DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n", - hw_tail); + landed_report_heads = 0; + while (OA_TAKEN(tail, head) >= report_size) { + u32 previous_tail = (tail - report_size) & (OA_BUFFER_SIZE - 1); + u8 *report = stream->oa_buffer.vaddr + previous_tail; + u32 *report32 = (void *) report; + + /* Head of the report indicated by the HW tail register has + * indeed landed into memory. + */ + if (report32[0] != 0 || report[1] != 0) { + landed_report_heads++; + + if (landed_report_heads >= 2) + break; + } + + tail = previous_tail; + } + + if (abs(tail - hw_tail) >= (2 * report_size)) { + if (__ratelimit(&dev_priv->perf.tail_pointer_race)) { + DRM_NOTE("unlanded report(s) head=0x%x " + "tail=0x%x hw_tail=0x%x\n", + head, tail, hw_tail); + } } + + stream->oa_buffer.tail = gtt_offset + tail; + stream->oa_buffer.aging_tail = gtt_offset + hw_tail; + stream->oa_buffer.aging_timestamp = now; } spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - return aged_tail == INVALID_TAIL_PTR ? - false : OA_TAKEN(aged_tail, head) >= report_size; + return OA_TAKEN(stream->oa_buffer.tail - gtt_offset, + stream->oa_buffer.head - gtt_offset) >= report_size; } /** @@ -662,7 +654,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, u32 mask = (OA_BUFFER_SIZE - 1); size_t start_offset = *offset; unsigned long flags; - unsigned int aged_tail_idx; u32 head, tail; u32 taken; int ret = 0; @@ -673,18 +664,10 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); head = stream->oa_buffer.head; - aged_tail_idx = stream->oa_buffer.aged_tail_idx; - tail = stream->oa_buffer.tails[aged_tail_idx].offset; + tail = stream->oa_buffer.tail; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - /* - * An invalid tail pointer here means we're still waiting for the poll - * hrtimer callback to give us a pointer - */ - if (tail == INVALID_TAIL_PTR) - return -EAGAIN; - /* * NB: oa_buffer.head/tail include the gtt_offset which we don't want * while indexing relative to oa_buf_base. @@ -812,13 +795,10 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, } /* - * The above reason field sanity check is based on - * the assumption that the OA buffer is initially - * zeroed and we reset the field after copying so the - * check is still meaningful once old reports start - * being overwritten. + * Clear out the first 2 dword as a mean to detect unlanded + * reports. */ - report32[0] = 0; + report32[0] = report32[1] = 0; } if (start_offset != *offset) { @@ -950,7 +930,6 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, u32 mask = (OA_BUFFER_SIZE - 1); size_t start_offset = *offset; unsigned long flags; - unsigned int aged_tail_idx; u32 head, tail; u32 taken; int ret = 0; @@ -961,17 +940,10 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); head = stream->oa_buffer.head; - aged_tail_idx = stream->oa_buffer.aged_tail_idx; - tail = stream->oa_buffer.tails[aged_tail_idx].offset; + tail = stream->oa_buffer.tail; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - /* An invalid tail pointer here means we're still waiting for the poll - * hrtimer callback to give us a pointer - */ - if (tail == INVALID_TAIL_PTR) - return -EAGAIN; - /* NB: oa_buffer.head/tail include the gtt_offset which we don't want * while indexing relative to oa_buf_base. */ @@ -1026,13 +998,10 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, if (ret) break; - /* The above report-id field sanity check is based on - * the assumption that the OA buffer is initially - * zeroed and we reset the field after copying so the - * check is still meaningful once old reports start - * being overwritten. + /* Clear out the first 2 dwords as a mean to detect unlanded + * reports. */ - report32[0] = 0; + report32[0] = report32[1] = 0; } if (start_offset != *offset) { @@ -1411,8 +1380,8 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ /* Mark that we need updated tail pointers to read from... */ - stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; - stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR; + stream->oa_buffer.aging_tail = + stream->oa_buffer.tail = gtt_offset; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -1468,8 +1437,8 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); /* Mark that we need updated tail pointers to read from... */ - stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; - stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR; + stream->oa_buffer.aging_tail = + stream->oa_buffer.tail = gtt_offset; /* * Reset state used to recognise context switches, affecting which @@ -3672,6 +3641,11 @@ void i915_perf_init(struct drm_i915_private *dev_priv) ratelimit_set_flags(&dev_priv->perf.spurious_report_rs, RATELIMIT_MSG_ON_RELEASE); + ratelimit_state_init(&dev_priv->perf.tail_pointer_race, + 5 * HZ, 10); + ratelimit_set_flags(&dev_priv->perf.tail_pointer_race, + RATELIMIT_MSG_ON_RELEASE); + dev_priv->perf.initialized = true; } }