Message ID | 20230909011626.1643734-3-ashutosh.dixit@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915/perf: A few fixes and enhancements | expand |
On Fri, Sep 08, 2023 at 06:16:25PM -0700, Ashutosh Dixit wrote: >There is no reason to add gtt_offset to the cached head/tail pointers >stream->oa_buffer.head and stream->oa_buffer.tail. This causes the code to >constantly add gtt_offset and subtract gtt_offset and is error >prone (e.g. see previous patch). > >It is much simpler to maintain stream->oa_buffer.head and >stream->oa_buffer.tail without adding gtt_offset to them and just allow for >the gtt_offset when reading/writing from/to HW registers. > >Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> >--- > drivers/gpu/drm/i915/i915_perf.c | 53 ++++++++------------------------ > 1 file changed, 13 insertions(+), 40 deletions(-) > >diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >index ec0fc2934045a..1347e4ec9dd5a 100644 >--- a/drivers/gpu/drm/i915/i915_perf.c >+++ b/drivers/gpu/drm/i915/i915_perf.c >@@ -543,10 +543,9 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) > { > u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); > int report_size = stream->oa_buffer.format->size; >- u32 head, tail, read_tail; >+ u32 tail, hw_tail; > unsigned long flags; > bool pollin; >- u32 hw_tail; > u32 partial_report_size; > > /* We have to consider the (unlikely) possibility that read() errors >@@ -556,6 +555,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) > spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); > > hw_tail = stream->perf->ops.oa_hw_tail_read(stream); >+ hw_tail -= gtt_offset; Since this patch intends to remove gtt_offset for all head/tail calculations, we don't need patch 1/3. Patch 1 can be dropped. > > /* The tail pointer increases in 64 byte increments, not in report_size > * steps. Also the report size may not be a power of 2. Compute >@@ -565,16 +565,8 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) > partial_report_size %= report_size; > > /* Subtract partial amount off the tail */ >- hw_tail -= gtt_offset; > hw_tail = OA_TAKEN(hw_tail, partial_report_size); > >- /* NB: The head we observe here might effectively be a little >- * out of date. If a read() is in progress, the head could be >- * anywhere between this head and stream->oa_buffer.tail. >- */ >- head = stream->oa_buffer.head - gtt_offset; >- read_tail = stream->oa_buffer.tail - gtt_offset; >- > tail = hw_tail; > > /* Walk the stream backward until we find a report with report >@@ -588,7 +580,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) > * memory in the order they were written to. > * If not : (╯°□°)╯︵ ┻━┻ > */ >- while (OA_TAKEN(tail, read_tail) >= report_size) { >+ while (OA_TAKEN(tail, stream->oa_buffer.tail) >= report_size) { > void *report = stream->oa_buffer.vaddr + tail; > > if (oa_report_id(stream, report) || >@@ -602,9 +594,9 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) > __ratelimit(&stream->perf->tail_pointer_race)) > drm_notice(&stream->uncore->i915->drm, > "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", >- head, tail, hw_tail); >+ stream->oa_buffer.head, tail, hw_tail); > >- stream->oa_buffer.tail = gtt_offset + tail; >+ stream->oa_buffer.tail = tail; > > pollin = OA_TAKEN(stream->oa_buffer.tail, > stream->oa_buffer.head) >= report_size; >@@ -754,13 +746,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > > spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); > >- /* >- * NB: oa_buffer.head/tail include the gtt_offset which we don't want >- * while indexing relative to oa_buf_base. >- */ >- head -= gtt_offset; >- tail -= gtt_offset; >- > /* > * An out of bounds or misaligned head or tail pointer implies a driver > * bug since we validate + align the tail pointers we read from the >@@ -896,9 +881,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > * We removed the gtt_offset for the copy loop above, indexing > * relative to oa_buf_base so put back here... > */ >- head += gtt_offset; > intel_uncore_write(uncore, oaheadptr, >- head & GEN12_OAG_OAHEADPTR_MASK); >+ (head + gtt_offset) & GEN12_OAG_OAHEADPTR_MASK); > stream->oa_buffer.head = head; > > spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); >@@ -1043,12 +1027,6 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, > > spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); > >- /* NB: oa_buffer.head/tail include the gtt_offset which we don't want >- * while indexing relative to oa_buf_base. >- */ >- head -= gtt_offset; >- tail -= gtt_offset; >- > /* An out of bounds or misaligned head or tail pointer implies a driver > * bug since we validate + align the tail pointers we read from the > * hardware and we are in full control of the head pointer which should >@@ -1111,13 +1089,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, > if (start_offset != *offset) { > spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); > >- /* We removed the gtt_offset for the copy loop above, indexing >- * relative to oa_buf_base so put back here... >- */ >- head += gtt_offset; >- > intel_uncore_write(uncore, GEN7_OASTATUS2, >- (head & GEN7_OASTATUS2_HEAD_MASK) | >+ ((head + gtt_offset) & GEN7_OASTATUS2_HEAD_MASK) | > GEN7_OASTATUS2_MEM_SELECT_GGTT); > stream->oa_buffer.head = head; > >@@ -1705,7 +1678,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) > */ > intel_uncore_write(uncore, GEN7_OASTATUS2, /* head */ > gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); >- stream->oa_buffer.head = gtt_offset; >+ stream->oa_buffer.head = 0; > > intel_uncore_write(uncore, GEN7_OABUFFER, gtt_offset); > >@@ -1713,7 +1686,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) > gtt_offset | OABUFFER_SIZE_16M); > > /* Mark that we need updated tail pointers to read from... */ >- stream->oa_buffer.tail = gtt_offset; >+ stream->oa_buffer.tail = 0; > > spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); > >@@ -1747,7 +1720,7 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) > > intel_uncore_write(uncore, GEN8_OASTATUS, 0); > intel_uncore_write(uncore, GEN8_OAHEADPTR, gtt_offset); >- stream->oa_buffer.head = gtt_offset; >+ stream->oa_buffer.head = 0; > > intel_uncore_write(uncore, GEN8_OABUFFER_UDW, 0); > >@@ -1764,7 +1737,7 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) > intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); > > /* Mark that we need updated tail pointers to read from... */ >- stream->oa_buffer.tail = gtt_offset; >+ stream->oa_buffer.tail = 0; > > /* > * Reset state used to recognise context switches, affecting which >@@ -1801,7 +1774,7 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream) > intel_uncore_write(uncore, __oa_regs(stream)->oa_status, 0); > intel_uncore_write(uncore, __oa_regs(stream)->oa_head_ptr, > gtt_offset & GEN12_OAG_OAHEADPTR_MASK); >- stream->oa_buffer.head = gtt_offset; >+ stream->oa_buffer.head = 0; > > /* > * PRM says: >@@ -1817,7 +1790,7 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream) > gtt_offset & GEN12_OAG_OATAILPTR_MASK); > > /* Mark that we need updated tail pointers to read from... */ >- stream->oa_buffer.tail = gtt_offset; >+ stream->oa_buffer.tail = 0; > Looks correct. Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> Thanks, Umesh > /* > * Reset state used to recognise context switches, affecting which >-- >2.41.0 >
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index ec0fc2934045a..1347e4ec9dd5a 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -543,10 +543,9 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) { u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); int report_size = stream->oa_buffer.format->size; - u32 head, tail, read_tail; + u32 tail, hw_tail; unsigned long flags; bool pollin; - u32 hw_tail; u32 partial_report_size; /* We have to consider the (unlikely) possibility that read() errors @@ -556,6 +555,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); hw_tail = stream->perf->ops.oa_hw_tail_read(stream); + hw_tail -= gtt_offset; /* The tail pointer increases in 64 byte increments, not in report_size * steps. Also the report size may not be a power of 2. Compute @@ -565,16 +565,8 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) partial_report_size %= report_size; /* Subtract partial amount off the tail */ - hw_tail -= gtt_offset; hw_tail = OA_TAKEN(hw_tail, partial_report_size); - /* NB: The head we observe here might effectively be a little - * out of date. If a read() is in progress, the head could be - * anywhere between this head and stream->oa_buffer.tail. - */ - head = stream->oa_buffer.head - gtt_offset; - read_tail = stream->oa_buffer.tail - gtt_offset; - tail = hw_tail; /* Walk the stream backward until we find a report with report @@ -588,7 +580,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) * memory in the order they were written to. * If not : (╯°□°)╯︵ ┻━┻ */ - while (OA_TAKEN(tail, read_tail) >= report_size) { + while (OA_TAKEN(tail, stream->oa_buffer.tail) >= report_size) { void *report = stream->oa_buffer.vaddr + tail; if (oa_report_id(stream, report) || @@ -602,9 +594,9 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) __ratelimit(&stream->perf->tail_pointer_race)) drm_notice(&stream->uncore->i915->drm, "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", - head, tail, hw_tail); + stream->oa_buffer.head, tail, hw_tail); - stream->oa_buffer.tail = gtt_offset + tail; + stream->oa_buffer.tail = tail; pollin = OA_TAKEN(stream->oa_buffer.tail, stream->oa_buffer.head) >= report_size; @@ -754,13 +746,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - /* - * NB: oa_buffer.head/tail include the gtt_offset which we don't want - * while indexing relative to oa_buf_base. - */ - head -= gtt_offset; - tail -= gtt_offset; - /* * An out of bounds or misaligned head or tail pointer implies a driver * bug since we validate + align the tail pointers we read from the @@ -896,9 +881,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * We removed the gtt_offset for the copy loop above, indexing * relative to oa_buf_base so put back here... */ - head += gtt_offset; intel_uncore_write(uncore, oaheadptr, - head & GEN12_OAG_OAHEADPTR_MASK); + (head + gtt_offset) & GEN12_OAG_OAHEADPTR_MASK); stream->oa_buffer.head = head; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -1043,12 +1027,6 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - /* NB: oa_buffer.head/tail include the gtt_offset which we don't want - * while indexing relative to oa_buf_base. - */ - head -= gtt_offset; - tail -= gtt_offset; - /* An out of bounds or misaligned head or tail pointer implies a driver * bug since we validate + align the tail pointers we read from the * hardware and we are in full control of the head pointer which should @@ -1111,13 +1089,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, if (start_offset != *offset) { spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - /* We removed the gtt_offset for the copy loop above, indexing - * relative to oa_buf_base so put back here... - */ - head += gtt_offset; - intel_uncore_write(uncore, GEN7_OASTATUS2, - (head & GEN7_OASTATUS2_HEAD_MASK) | + ((head + gtt_offset) & GEN7_OASTATUS2_HEAD_MASK) | GEN7_OASTATUS2_MEM_SELECT_GGTT); stream->oa_buffer.head = head; @@ -1705,7 +1678,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) */ intel_uncore_write(uncore, GEN7_OASTATUS2, /* head */ gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); - stream->oa_buffer.head = gtt_offset; + stream->oa_buffer.head = 0; intel_uncore_write(uncore, GEN7_OABUFFER, gtt_offset); @@ -1713,7 +1686,7 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream) gtt_offset | OABUFFER_SIZE_16M); /* Mark that we need updated tail pointers to read from... */ - stream->oa_buffer.tail = gtt_offset; + stream->oa_buffer.tail = 0; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -1747,7 +1720,7 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) intel_uncore_write(uncore, GEN8_OASTATUS, 0); intel_uncore_write(uncore, GEN8_OAHEADPTR, gtt_offset); - stream->oa_buffer.head = gtt_offset; + stream->oa_buffer.head = 0; intel_uncore_write(uncore, GEN8_OABUFFER_UDW, 0); @@ -1764,7 +1737,7 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); /* Mark that we need updated tail pointers to read from... */ - stream->oa_buffer.tail = gtt_offset; + stream->oa_buffer.tail = 0; /* * Reset state used to recognise context switches, affecting which @@ -1801,7 +1774,7 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream) intel_uncore_write(uncore, __oa_regs(stream)->oa_status, 0); intel_uncore_write(uncore, __oa_regs(stream)->oa_head_ptr, gtt_offset & GEN12_OAG_OAHEADPTR_MASK); - stream->oa_buffer.head = gtt_offset; + stream->oa_buffer.head = 0; /* * PRM says: @@ -1817,7 +1790,7 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream) gtt_offset & GEN12_OAG_OATAILPTR_MASK); /* Mark that we need updated tail pointers to read from... */ - stream->oa_buffer.tail = gtt_offset; + stream->oa_buffer.tail = 0; /* * Reset state used to recognise context switches, affecting which
There is no reason to add gtt_offset to the cached head/tail pointers stream->oa_buffer.head and stream->oa_buffer.tail. This causes the code to constantly add gtt_offset and subtract gtt_offset and is error prone (e.g. see previous patch). It is much simpler to maintain stream->oa_buffer.head and stream->oa_buffer.tail without adding gtt_offset to them and just allow for the gtt_offset when reading/writing from/to HW registers. Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com> --- drivers/gpu/drm/i915/i915_perf.c | 53 ++++++++------------------------ 1 file changed, 13 insertions(+), 40 deletions(-)