Message ID | 20220823000342.281222-13-umesh.nerlige.ramappa@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add DG2 OA support | expand |
On Tue, 23 Aug 2022, Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> wrote: > Now that OA formats come in flavor of 64 bit reports, the report header > has 64 bit report-id, timestamp, context-id and gpu-ticks fields. When > filtering these reports, use the right width for these fields. > > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> > --- > drivers/gpu/drm/i915/i915_perf.c | 109 ++++++++++++++++++++----- > drivers/gpu/drm/i915/i915_perf_types.h | 6 ++ > 2 files changed, 94 insertions(+), 21 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 41634d614ba5..c3183aedc712 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -324,8 +324,8 @@ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = { > [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, > [I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, > [I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 }, > - [I915_OAR_FORMAT_A36u64_B8_C8] = { 1, 384 }, > - [I915_OA_FORMAT_A38u64_R2u64_B8_C8] = { 1, 448 }, > + [I915_OAR_FORMAT_A36u64_B8_C8] = { 1, 384, HDR_64_BIT }, > + [I915_OA_FORMAT_A38u64_R2u64_B8_C8] = { 1, 448, HDR_64_BIT }, > }; > > #define SAMPLE_OA_REPORT (1<<0) > @@ -457,6 +457,75 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) > return oastatus1 & GEN7_OASTATUS1_TAIL_MASK; > } > > +#define oa_report_header_64bit(__s) \ > + ((__s)->oa_buffer.format->header == HDR_64_BIT) > + > +static inline u64 > +oa_report_id(struct i915_perf_stream *stream, void *report) > +{ > + return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; > +} Drive-by-comment, I suspect you're not really gaining anything by making these "inline". Just let the compiler do its thing. Actually making them inline prevents you from getting warnings for unused functions if they ever become unused. BR, Jani. > +static inline u64 > +oa_report_reason(struct i915_perf_stream *stream, void *report) > +{ > + return (oa_report_id(stream, report) >> OAREPORT_REASON_SHIFT) & > + (GRAPHICS_VER(stream->perf->i915) == 12 ? > + OAREPORT_REASON_MASK_EXTENDED : > + OAREPORT_REASON_MASK); > +} > + > +static inline void > +oa_report_id_clear(struct i915_perf_stream *stream, u32 *report) > +{ > + if (oa_report_header_64bit(stream)) > + *(u64 *)report = 0; > + else > + *report = 0; > +} > + > +static inline bool > +oa_report_ctx_invalid(struct i915_perf_stream *stream, void *report) > +{ > + return !(oa_report_id(stream, report) & > + stream->perf->gen8_valid_ctx_bit) && > + GRAPHICS_VER(stream->perf->i915) <= 11; > +} > + > +static inline u64 > +oa_timestamp(struct i915_perf_stream *stream, void *report) > +{ > + return oa_report_header_64bit(stream) ? > + *((u64 *)report + 1) : > + *((u32 *)report + 1); > +} > + > +static inline void > +oa_timestamp_clear(struct i915_perf_stream *stream, u32 *report) > +{ > + if (oa_report_header_64bit(stream)) > + *(u64 *)&report[2] = 0; > + else > + report[1] = 0; > +} > + > +static inline u32 > +oa_context_id(struct i915_perf_stream *stream, u32 *report) > +{ > + u32 ctx_id = oa_report_header_64bit(stream) ? report[4] : report[2]; > + > + return ctx_id & stream->specific_ctx_id_mask; > +} > + > +static inline void > +oa_context_id_squash(struct i915_perf_stream *stream, u32 *report) > +{ > + if (oa_report_header_64bit(stream)) > + report[4] = INVALID_CTX_ID; > + else > + report[2] = INVALID_CTX_ID; > +} > + > /** > * oa_buffer_check_unlocked - check for data and update tail ptr state > * @stream: i915 stream instance > @@ -546,9 +615,10 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) > * If not : (╯°□°)╯︵ ┻━┻ > */ > while (_oa_taken(stream, tail, aged_tail) >= report_size) { > - u32 *report32 = (void *)(stream->oa_buffer.vaddr + tail); > + void *report = stream->oa_buffer.vaddr + tail; > > - if (report32[0] != 0 || report32[1] != 0) > + if (oa_report_id(stream, report) || > + oa_timestamp(stream, report)) > break; > > tail = _rewind_tail(stream, tail, report_size); > @@ -741,23 +811,19 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > u8 *report = oa_buf_base + head; > u32 *report32 = (void *)report; > u32 ctx_id; > - u32 reason; > + u64 reason; > > /* > * The reason field includes flags identifying what > * triggered this specific report (mostly timer > * triggered or e.g. due to a context switch). > * > - * This field is never expected to be zero so we can > - * check that the report isn't invalid before copying > - * it to userspace... > + * In MMIO triggered reports, some platforms do not set the > + * reason bit in this field and it is valid to have a reason > + * field of zero. > */ > - reason = ((report32[0] >> OAREPORT_REASON_SHIFT) & > - (GRAPHICS_VER(stream->perf->i915) == 12 ? > - OAREPORT_REASON_MASK_EXTENDED : > - OAREPORT_REASON_MASK)); > - > - ctx_id = report32[2] & stream->specific_ctx_id_mask; > + reason = oa_report_reason(stream, report); > + ctx_id = oa_context_id(stream, report32); > > /* > * Squash whatever is in the CTX_ID field if it's marked as > @@ -767,9 +833,10 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > * Note: that we don't clear the valid_ctx_bit so userspace can > * understand that the ID has been squashed by the kernel. > */ > - if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) && > - GRAPHICS_VER(stream->perf->i915) <= 11) > - ctx_id = report32[2] = INVALID_CTX_ID; > + if (oa_report_ctx_invalid(stream, report)) { > + ctx_id = INVALID_CTX_ID; > + oa_context_id_squash(stream, report32); > + } > > /* > * NB: For Gen 8 the OA unit no longer supports clock gating > @@ -813,7 +880,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > */ > if (stream->ctx && > stream->specific_ctx_id != ctx_id) { > - report32[2] = INVALID_CTX_ID; > + oa_context_id_squash(stream, report32); > } > > ret = append_oa_sample(stream, buf, count, offset, > @@ -825,11 +892,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > } > > /* > - * Clear out the first 2 dword as a mean to detect unlanded > + * Clear out the report id and timestamp as a means to detect unlanded > * reports. > */ > - report32[0] = 0; > - report32[1] = 0; > + oa_report_id_clear(stream, report32); > + oa_timestamp_clear(stream, report32); > } > > if (start_offset != *offset) { > diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h > index e0c96b44eda8..68db5f94bc58 100644 > --- a/drivers/gpu/drm/i915/i915_perf_types.h > +++ b/drivers/gpu/drm/i915/i915_perf_types.h > @@ -30,9 +30,15 @@ struct i915_vma; > struct intel_context; > struct intel_engine_cs; > > +enum report_header { > + HDR_32_BIT = 0, > + HDR_64_BIT, > +}; > + > struct i915_oa_format { > u32 format; > int size; > + enum report_header header; > }; > > struct i915_oa_reg {
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 41634d614ba5..c3183aedc712 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -324,8 +324,8 @@ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = { [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, [I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, [I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 }, - [I915_OAR_FORMAT_A36u64_B8_C8] = { 1, 384 }, - [I915_OA_FORMAT_A38u64_R2u64_B8_C8] = { 1, 448 }, + [I915_OAR_FORMAT_A36u64_B8_C8] = { 1, 384, HDR_64_BIT }, + [I915_OA_FORMAT_A38u64_R2u64_B8_C8] = { 1, 448, HDR_64_BIT }, }; #define SAMPLE_OA_REPORT (1<<0) @@ -457,6 +457,75 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) return oastatus1 & GEN7_OASTATUS1_TAIL_MASK; } +#define oa_report_header_64bit(__s) \ + ((__s)->oa_buffer.format->header == HDR_64_BIT) + +static inline u64 +oa_report_id(struct i915_perf_stream *stream, void *report) +{ + return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; +} + +static inline u64 +oa_report_reason(struct i915_perf_stream *stream, void *report) +{ + return (oa_report_id(stream, report) >> OAREPORT_REASON_SHIFT) & + (GRAPHICS_VER(stream->perf->i915) == 12 ? + OAREPORT_REASON_MASK_EXTENDED : + OAREPORT_REASON_MASK); +} + +static inline void +oa_report_id_clear(struct i915_perf_stream *stream, u32 *report) +{ + if (oa_report_header_64bit(stream)) + *(u64 *)report = 0; + else + *report = 0; +} + +static inline bool +oa_report_ctx_invalid(struct i915_perf_stream *stream, void *report) +{ + return !(oa_report_id(stream, report) & + stream->perf->gen8_valid_ctx_bit) && + GRAPHICS_VER(stream->perf->i915) <= 11; +} + +static inline u64 +oa_timestamp(struct i915_perf_stream *stream, void *report) +{ + return oa_report_header_64bit(stream) ? + *((u64 *)report + 1) : + *((u32 *)report + 1); +} + +static inline void +oa_timestamp_clear(struct i915_perf_stream *stream, u32 *report) +{ + if (oa_report_header_64bit(stream)) + *(u64 *)&report[2] = 0; + else + report[1] = 0; +} + +static inline u32 +oa_context_id(struct i915_perf_stream *stream, u32 *report) +{ + u32 ctx_id = oa_report_header_64bit(stream) ? report[4] : report[2]; + + return ctx_id & stream->specific_ctx_id_mask; +} + +static inline void +oa_context_id_squash(struct i915_perf_stream *stream, u32 *report) +{ + if (oa_report_header_64bit(stream)) + report[4] = INVALID_CTX_ID; + else + report[2] = INVALID_CTX_ID; +} + /** * oa_buffer_check_unlocked - check for data and update tail ptr state * @stream: i915 stream instance @@ -546,9 +615,10 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) * If not : (╯°□°)╯︵ ┻━┻ */ while (_oa_taken(stream, tail, aged_tail) >= report_size) { - u32 *report32 = (void *)(stream->oa_buffer.vaddr + tail); + void *report = stream->oa_buffer.vaddr + tail; - if (report32[0] != 0 || report32[1] != 0) + if (oa_report_id(stream, report) || + oa_timestamp(stream, report)) break; tail = _rewind_tail(stream, tail, report_size); @@ -741,23 +811,19 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, u8 *report = oa_buf_base + head; u32 *report32 = (void *)report; u32 ctx_id; - u32 reason; + u64 reason; /* * The reason field includes flags identifying what * triggered this specific report (mostly timer * triggered or e.g. due to a context switch). * - * This field is never expected to be zero so we can - * check that the report isn't invalid before copying - * it to userspace... + * In MMIO triggered reports, some platforms do not set the + * reason bit in this field and it is valid to have a reason + * field of zero. */ - reason = ((report32[0] >> OAREPORT_REASON_SHIFT) & - (GRAPHICS_VER(stream->perf->i915) == 12 ? - OAREPORT_REASON_MASK_EXTENDED : - OAREPORT_REASON_MASK)); - - ctx_id = report32[2] & stream->specific_ctx_id_mask; + reason = oa_report_reason(stream, report); + ctx_id = oa_context_id(stream, report32); /* * Squash whatever is in the CTX_ID field if it's marked as @@ -767,9 +833,10 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * Note: that we don't clear the valid_ctx_bit so userspace can * understand that the ID has been squashed by the kernel. */ - if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) && - GRAPHICS_VER(stream->perf->i915) <= 11) - ctx_id = report32[2] = INVALID_CTX_ID; + if (oa_report_ctx_invalid(stream, report)) { + ctx_id = INVALID_CTX_ID; + oa_context_id_squash(stream, report32); + } /* * NB: For Gen 8 the OA unit no longer supports clock gating @@ -813,7 +880,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, */ if (stream->ctx && stream->specific_ctx_id != ctx_id) { - report32[2] = INVALID_CTX_ID; + oa_context_id_squash(stream, report32); } ret = append_oa_sample(stream, buf, count, offset, @@ -825,11 +892,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, } /* - * Clear out the first 2 dword as a mean to detect unlanded + * Clear out the report id and timestamp as a means to detect unlanded * reports. */ - report32[0] = 0; - report32[1] = 0; + oa_report_id_clear(stream, report32); + oa_timestamp_clear(stream, report32); } if (start_offset != *offset) { diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index e0c96b44eda8..68db5f94bc58 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -30,9 +30,15 @@ struct i915_vma; struct intel_context; struct intel_engine_cs; +enum report_header { + HDR_32_BIT = 0, + HDR_64_BIT, +}; + struct i915_oa_format { u32 format; int size; + enum report_header header; }; struct i915_oa_reg {
Now that OA formats come in flavor of 64 bit reports, the report header has 64 bit report-id, timestamp, context-id and gpu-ticks fields. When filtering these reports, use the right width for these fields. Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> --- drivers/gpu/drm/i915/i915_perf.c | 109 ++++++++++++++++++++----- drivers/gpu/drm/i915/i915_perf_types.h | 6 ++ 2 files changed, 94 insertions(+), 21 deletions(-)