Message ID | 20220823204155.8178-3-umesh.nerlige.ramappa@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | None | expand |
On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote: > Add new OA formats for DG2. Some of the newer OA formats are not > multples of 64 bytes and are not powers of 2. For those formats, adjust > hw_tail accordingly when checking for new reports. > > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramampa@intel.com> Apart from the coding style issue : Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > --- > drivers/gpu/drm/i915/i915_perf.c | 63 ++++++++++++++++++++------------ > include/uapi/drm/i915_drm.h | 6 +++ > 2 files changed, 46 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 735244a3aedd..c8331b549d31 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -306,7 +306,8 @@ static u32 i915_oa_max_sample_rate = 100000; > > /* XXX: beware if future OA HW adds new report formats that the current > * code assumes all reports have a power-of-two size and ~(size - 1) can > - * be used as a mask to align the OA tail pointer. > + * be used as a mask to align the OA tail pointer. In some of the > + * formats, R is used to denote reserved field. > */ > static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = { > [I915_OA_FORMAT_A13] = { 0, 64 }, > @@ -320,6 +321,10 @@ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = { > [I915_OA_FORMAT_A12] = { 0, 64 }, > [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 }, > [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, > + [I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, > + [I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 }, > + [I915_OAR_FORMAT_A36u64_B8_C8] = { 1, 384 }, > + [I915_OA_FORMAT_A38u64_R2u64_B8_C8] = { 1, 448 }, > }; > > #define SAMPLE_OA_REPORT (1<<0) > @@ -467,6 +472,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) > bool pollin; > u32 hw_tail; > u64 now; > + u32 partial_report_size; > > /* We have to consider the (unlikely) possibility that read() errors > * could result in an OA buffer reset which might reset the head and > @@ -476,10 +482,16 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) > > hw_tail = stream->perf->ops.oa_hw_tail_read(stream); > > - /* The tail pointer increases in 64 byte increments, > - * not in report_size steps... > + /* The tail pointer increases in 64 byte increments, whereas report > + * sizes need not be integral multiples or 64 or powers of 2. > + * Compute potentially partially landed report in the OA buffer > */ > - hw_tail &= ~(report_size - 1); > + partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail); > + partial_report_size %= report_size; > + > + /* Subtract partial amount off the tail */ > + hw_tail = gtt_offset + ((hw_tail - partial_report_size) & > + (stream->oa_buffer.vma->size - 1)); > > now = ktime_get_mono_fast_ns(); > > @@ -601,6 +613,8 @@ static int append_oa_sample(struct i915_perf_stream *stream, > { > int report_size = stream->oa_buffer.format_size; > struct drm_i915_perf_record_header header; > + int report_size_partial; > + u8 *oa_buf_end; > > header.type = DRM_I915_PERF_RECORD_SAMPLE; > header.pad = 0; > @@ -614,7 +628,19 @@ static int append_oa_sample(struct i915_perf_stream *stream, > return -EFAULT; > buf += sizeof(header); > > - if (copy_to_user(buf, report, report_size)) > + oa_buf_end = stream->oa_buffer.vaddr + > + stream->oa_buffer.vma->size; > + report_size_partial = oa_buf_end - report; > + > + if (report_size_partial < report_size) { > + if(copy_to_user(buf, report, report_size_partial)) > + return -EFAULT; > + buf += report_size_partial; > + > + if(copy_to_user(buf, stream->oa_buffer.vaddr, > + report_size - report_size_partial)) > + return -EFAULT; I think the coding style requires you to use if () not if() Just a suggestion : you could make this code deal with the partial bit as the main bit of the function : oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.vma->size; report_size_partial = oa_buf_end - report; if (copy_to_user(buf, report, report_size_partial)) return -EFAULT; buf += report_size_partial; if (report_size_partial < report_size && copy_to_user(buf, stream->oa_buffer.vaddr, report_size - report_size_partial)) return -EFAULT; buf += report_size - report_size_partial; > + } else if (copy_to_user(buf, report, report_size)) > return -EFAULT; > > (*offset) += header.size; > @@ -684,8 +710,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > * all a power of two). > */ > if (drm_WARN_ONCE(&uncore->i915->drm, > - head > OA_BUFFER_SIZE || head % report_size || > - tail > OA_BUFFER_SIZE || tail % report_size, > + head > stream->oa_buffer.vma->size || > + tail > stream->oa_buffer.vma->size, > "Inconsistent OA buffer pointers: head = %u, tail = %u\n", > head, tail)) > return -EIO; > @@ -699,22 +725,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > u32 ctx_id; > u32 reason; > > - /* > - * All the report sizes factor neatly into the buffer > - * size so we never expect to see a report split > - * between the beginning and end of the buffer. > - * > - * Given the initial alignment check a misalignment > - * here would imply a driver bug that would result > - * in an overrun. > - */ > - if (drm_WARN_ON(&uncore->i915->drm, > - (OA_BUFFER_SIZE - head) < report_size)) { > - drm_err(&uncore->i915->drm, > - "Spurious OA head ptr: non-integral report offset\n"); > - break; > - } > - > /* > * The reason field includes flags identifying what > * triggered this specific report (mostly timer > @@ -4513,6 +4523,13 @@ static void oa_init_supported_formats(struct i915_perf *perf) > oa_format_add(perf, I915_OA_FORMAT_C4_B8); > break; > > + case INTEL_DG2: > + oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8); > + oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8); > + oa_format_add(perf, I915_OAR_FORMAT_A36u64_B8_C8); > + oa_format_add(perf, I915_OA_FORMAT_A38u64_R2u64_B8_C8); > + break; > + > default: > MISSING_CASE(platform); > } > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 520ad2691a99..d20d723925b5 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -2650,6 +2650,12 @@ enum drm_i915_oa_format { > I915_OA_FORMAT_A12_B8_C8, > I915_OA_FORMAT_A32u40_A4u32_B8_C8, > > + /* DG2 */ > + I915_OAR_FORMAT_A32u40_A4u32_B8_C8, > + I915_OA_FORMAT_A24u40_A14u32_B8_C8, > + I915_OAR_FORMAT_A36u64_B8_C8, > + I915_OA_FORMAT_A38u64_R2u64_B8_C8, > + > I915_OA_FORMAT_MAX /* non-ABI */ > }; >
On Tue, Sep 06, 2022 at 10:35:16PM +0300, Lionel Landwerlin wrote: >On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote: >>Add new OA formats for DG2. Some of the newer OA formats are not >>multples of 64 bytes and are not powers of 2. For those formats, adjust >>hw_tail accordingly when checking for new reports. >> >>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramampa@intel.com> > >Apart from the coding style issue : > > >Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > > >>--- >> drivers/gpu/drm/i915/i915_perf.c | 63 ++++++++++++++++++++------------ >> include/uapi/drm/i915_drm.h | 6 +++ >> 2 files changed, 46 insertions(+), 23 deletions(-) >> >>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >>index 735244a3aedd..c8331b549d31 100644 >>--- a/drivers/gpu/drm/i915/i915_perf.c >>+++ b/drivers/gpu/drm/i915/i915_perf.c >>@@ -306,7 +306,8 @@ static u32 i915_oa_max_sample_rate = 100000; >> /* XXX: beware if future OA HW adds new report formats that the current >> * code assumes all reports have a power-of-two size and ~(size - 1) can >>- * be used as a mask to align the OA tail pointer. >>+ * be used as a mask to align the OA tail pointer. In some of the >>+ * formats, R is used to denote reserved field. >> */ >> static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = { >> [I915_OA_FORMAT_A13] = { 0, 64 }, >>@@ -320,6 +321,10 @@ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = { >> [I915_OA_FORMAT_A12] = { 0, 64 }, >> [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 }, >> [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, >>+ [I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, >>+ [I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 }, >>+ [I915_OAR_FORMAT_A36u64_B8_C8] = { 1, 384 }, >>+ [I915_OA_FORMAT_A38u64_R2u64_B8_C8] = { 1, 448 }, >> }; >> #define SAMPLE_OA_REPORT (1<<0) >>@@ -467,6 +472,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) >> bool pollin; >> u32 hw_tail; >> u64 now; >>+ u32 partial_report_size; >> /* We have to consider the (unlikely) possibility that read() errors >> * could result in an OA buffer reset which might reset the head and >>@@ -476,10 +482,16 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) >> hw_tail = stream->perf->ops.oa_hw_tail_read(stream); >>- /* The tail pointer increases in 64 byte increments, >>- * not in report_size steps... >>+ /* The tail pointer increases in 64 byte increments, whereas report >>+ * sizes need not be integral multiples or 64 or powers of 2. >>+ * Compute potentially partially landed report in the OA buffer >> */ >>- hw_tail &= ~(report_size - 1); >>+ partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail); >>+ partial_report_size %= report_size; >>+ >>+ /* Subtract partial amount off the tail */ >>+ hw_tail = gtt_offset + ((hw_tail - partial_report_size) & >>+ (stream->oa_buffer.vma->size - 1)); >> now = ktime_get_mono_fast_ns(); >>@@ -601,6 +613,8 @@ static int append_oa_sample(struct i915_perf_stream *stream, >> { >> int report_size = stream->oa_buffer.format_size; >> struct drm_i915_perf_record_header header; >>+ int report_size_partial; >>+ u8 *oa_buf_end; >> header.type = DRM_I915_PERF_RECORD_SAMPLE; >> header.pad = 0; >>@@ -614,7 +628,19 @@ static int append_oa_sample(struct i915_perf_stream *stream, >> return -EFAULT; >> buf += sizeof(header); >>- if (copy_to_user(buf, report, report_size)) >>+ oa_buf_end = stream->oa_buffer.vaddr + >>+ stream->oa_buffer.vma->size; >>+ report_size_partial = oa_buf_end - report; >>+ >>+ if (report_size_partial < report_size) { >>+ if(copy_to_user(buf, report, report_size_partial)) >>+ return -EFAULT; >>+ buf += report_size_partial; >>+ >>+ if(copy_to_user(buf, stream->oa_buffer.vaddr, >>+ report_size - report_size_partial)) >>+ return -EFAULT; > >I think the coding style requires you to use if () not if() > Will fix. > >Just a suggestion : you could make this code deal with the partial bit >as the main bit of the function : > > >oa_buf_end = stream->oa_buffer.vaddr + > stream->oa_buffer.vma->size; > >report_size_partial = oa_buf_end - report; > >if (copy_to_user(buf, report, report_size_partial)) > return -EFAULT; >buf += report_size_partial; This ^ may not work because append_oa_sample is appending exactly one report to the user buffer, whereas the above may append more than one. Thanks, Umesh > >if (report_size_partial < report_size && > copy_to_user(buf, stream->oa_buffer.vaddr, > report_size - report_size_partial)) > return -EFAULT; >buf += report_size - report_size_partial; > > >>+ } else if (copy_to_user(buf, report, report_size)) >> return -EFAULT; >> (*offset) += header.size; >>@@ -684,8 +710,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, >> * all a power of two). >> */ >> if (drm_WARN_ONCE(&uncore->i915->drm, >>- head > OA_BUFFER_SIZE || head % report_size || >>- tail > OA_BUFFER_SIZE || tail % report_size, >>+ head > stream->oa_buffer.vma->size || >>+ tail > stream->oa_buffer.vma->size, >> "Inconsistent OA buffer pointers: head = %u, tail = %u\n", >> head, tail)) >> return -EIO; >>@@ -699,22 +725,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, >> u32 ctx_id; >> u32 reason; >>- /* >>- * All the report sizes factor neatly into the buffer >>- * size so we never expect to see a report split >>- * between the beginning and end of the buffer. >>- * >>- * Given the initial alignment check a misalignment >>- * here would imply a driver bug that would result >>- * in an overrun. >>- */ >>- if (drm_WARN_ON(&uncore->i915->drm, >>- (OA_BUFFER_SIZE - head) < report_size)) { >>- drm_err(&uncore->i915->drm, >>- "Spurious OA head ptr: non-integral report offset\n"); >>- break; >>- } >>- >> /* >> * The reason field includes flags identifying what >> * triggered this specific report (mostly timer >>@@ -4513,6 +4523,13 @@ static void oa_init_supported_formats(struct i915_perf *perf) >> oa_format_add(perf, I915_OA_FORMAT_C4_B8); >> break; >>+ case INTEL_DG2: >>+ oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8); >>+ oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8); >>+ oa_format_add(perf, I915_OAR_FORMAT_A36u64_B8_C8); >>+ oa_format_add(perf, I915_OA_FORMAT_A38u64_R2u64_B8_C8); >>+ break; >>+ >> default: >> MISSING_CASE(platform); >> } >>diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h >>index 520ad2691a99..d20d723925b5 100644 >>--- a/include/uapi/drm/i915_drm.h >>+++ b/include/uapi/drm/i915_drm.h >>@@ -2650,6 +2650,12 @@ enum drm_i915_oa_format { >> I915_OA_FORMAT_A12_B8_C8, >> I915_OA_FORMAT_A32u40_A4u32_B8_C8, >>+ /* DG2 */ >>+ I915_OAR_FORMAT_A32u40_A4u32_B8_C8, >>+ I915_OA_FORMAT_A24u40_A14u32_B8_C8, >>+ I915_OAR_FORMAT_A36u64_B8_C8, >>+ I915_OA_FORMAT_A38u64_R2u64_B8_C8, >>+ >> I915_OA_FORMAT_MAX /* non-ABI */ >> }; > >
On 06/09/2022 22:46, Umesh Nerlige Ramappa wrote: > On Tue, Sep 06, 2022 at 10:35:16PM +0300, Lionel Landwerlin wrote: >> On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote: >>> Add new OA formats for DG2. Some of the newer OA formats are not >>> multples of 64 bytes and are not powers of 2. For those formats, adjust >>> hw_tail accordingly when checking for new reports. >>> >>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramampa@intel.com> >> >> Apart from the coding style issue : >> >> >> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >> >> >>> --- >>> drivers/gpu/drm/i915/i915_perf.c | 63 ++++++++++++++++++++------------ >>> include/uapi/drm/i915_drm.h | 6 +++ >>> 2 files changed, 46 insertions(+), 23 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/i915_perf.c >>> b/drivers/gpu/drm/i915/i915_perf.c >>> index 735244a3aedd..c8331b549d31 100644 >>> --- a/drivers/gpu/drm/i915/i915_perf.c >>> +++ b/drivers/gpu/drm/i915/i915_perf.c >>> @@ -306,7 +306,8 @@ static u32 i915_oa_max_sample_rate = 100000; >>> /* XXX: beware if future OA HW adds new report formats that the >>> current >>> * code assumes all reports have a power-of-two size and ~(size - >>> 1) can >>> - * be used as a mask to align the OA tail pointer. >>> + * be used as a mask to align the OA tail pointer. In some of the >>> + * formats, R is used to denote reserved field. >>> */ >>> static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = { >>> [I915_OA_FORMAT_A13] = { 0, 64 }, >>> @@ -320,6 +321,10 @@ static const struct i915_oa_format >>> oa_formats[I915_OA_FORMAT_MAX] = { >>> [I915_OA_FORMAT_A12] = { 0, 64 }, >>> [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 }, >>> [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, >>> + [I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, >>> + [I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 }, >>> + [I915_OAR_FORMAT_A36u64_B8_C8] = { 1, 384 }, >>> + [I915_OA_FORMAT_A38u64_R2u64_B8_C8] = { 1, 448 }, >>> }; >>> #define SAMPLE_OA_REPORT (1<<0) >>> @@ -467,6 +472,7 @@ static bool oa_buffer_check_unlocked(struct >>> i915_perf_stream *stream) >>> bool pollin; >>> u32 hw_tail; >>> u64 now; >>> + u32 partial_report_size; >>> /* We have to consider the (unlikely) possibility that read() >>> errors >>> * could result in an OA buffer reset which might reset the >>> head and >>> @@ -476,10 +482,16 @@ static bool oa_buffer_check_unlocked(struct >>> i915_perf_stream *stream) >>> hw_tail = stream->perf->ops.oa_hw_tail_read(stream); >>> - /* The tail pointer increases in 64 byte increments, >>> - * not in report_size steps... >>> + /* The tail pointer increases in 64 byte increments, whereas >>> report >>> + * sizes need not be integral multiples or 64 or powers of 2. >>> + * Compute potentially partially landed report in the OA buffer >>> */ >>> - hw_tail &= ~(report_size - 1); >>> + partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail); >>> + partial_report_size %= report_size; >>> + >>> + /* Subtract partial amount off the tail */ >>> + hw_tail = gtt_offset + ((hw_tail - partial_report_size) & >>> + (stream->oa_buffer.vma->size - 1)); >>> now = ktime_get_mono_fast_ns(); >>> @@ -601,6 +613,8 @@ static int append_oa_sample(struct >>> i915_perf_stream *stream, >>> { >>> int report_size = stream->oa_buffer.format_size; >>> struct drm_i915_perf_record_header header; >>> + int report_size_partial; >>> + u8 *oa_buf_end; >>> header.type = DRM_I915_PERF_RECORD_SAMPLE; >>> header.pad = 0; >>> @@ -614,7 +628,19 @@ static int append_oa_sample(struct >>> i915_perf_stream *stream, >>> return -EFAULT; >>> buf += sizeof(header); >>> - if (copy_to_user(buf, report, report_size)) >>> + oa_buf_end = stream->oa_buffer.vaddr + >>> + stream->oa_buffer.vma->size; >>> + report_size_partial = oa_buf_end - report; >>> + >>> + if (report_size_partial < report_size) { >>> + if(copy_to_user(buf, report, report_size_partial)) >>> + return -EFAULT; >>> + buf += report_size_partial; >>> + >>> + if(copy_to_user(buf, stream->oa_buffer.vaddr, >>> + report_size - report_size_partial)) >>> + return -EFAULT; >> >> I think the coding style requires you to use if () not if() >> > > Will fix. > >> >> Just a suggestion : you could make this code deal with the partial >> bit as the main bit of the function : >> >> >> oa_buf_end = stream->oa_buffer.vaddr + >> stream->oa_buffer.vma->size; >> >> report_size_partial = oa_buf_end - report; >> >> if (copy_to_user(buf, report, report_size_partial)) >> return -EFAULT; >> buf += report_size_partial; > > This ^ may not work because append_oa_sample is appending exactly one > report to the user buffer, whereas the above may append more than one. > > Thanks, > Umesh Ah I see, thanks for pointing this out. -Lionel > >> >> if (report_size_partial < report_size && >> copy_to_user(buf, stream->oa_buffer.vaddr, >> report_size - report_size_partial)) >> return -EFAULT; >> buf += report_size - report_size_partial; >> >> >>> + } else if (copy_to_user(buf, report, report_size)) >>> return -EFAULT; >>> (*offset) += header.size; >>> @@ -684,8 +710,8 @@ static int gen8_append_oa_reports(struct >>> i915_perf_stream *stream, >>> * all a power of two). >>> */ >>> if (drm_WARN_ONCE(&uncore->i915->drm, >>> - head > OA_BUFFER_SIZE || head % report_size || >>> - tail > OA_BUFFER_SIZE || tail % report_size, >>> + head > stream->oa_buffer.vma->size || >>> + tail > stream->oa_buffer.vma->size, >>> "Inconsistent OA buffer pointers: head = %u, tail = >>> %u\n", >>> head, tail)) >>> return -EIO; >>> @@ -699,22 +725,6 @@ static int gen8_append_oa_reports(struct >>> i915_perf_stream *stream, >>> u32 ctx_id; >>> u32 reason; >>> - /* >>> - * All the report sizes factor neatly into the buffer >>> - * size so we never expect to see a report split >>> - * between the beginning and end of the buffer. >>> - * >>> - * Given the initial alignment check a misalignment >>> - * here would imply a driver bug that would result >>> - * in an overrun. >>> - */ >>> - if (drm_WARN_ON(&uncore->i915->drm, >>> - (OA_BUFFER_SIZE - head) < report_size)) { >>> - drm_err(&uncore->i915->drm, >>> - "Spurious OA head ptr: non-integral report offset\n"); >>> - break; >>> - } >>> - >>> /* >>> * The reason field includes flags identifying what >>> * triggered this specific report (mostly timer >>> @@ -4513,6 +4523,13 @@ static void oa_init_supported_formats(struct >>> i915_perf *perf) >>> oa_format_add(perf, I915_OA_FORMAT_C4_B8); >>> break; >>> + case INTEL_DG2: >>> + oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8); >>> + oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8); >>> + oa_format_add(perf, I915_OAR_FORMAT_A36u64_B8_C8); >>> + oa_format_add(perf, I915_OA_FORMAT_A38u64_R2u64_B8_C8); >>> + break; >>> + >>> default: >>> MISSING_CASE(platform); >>> } >>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h >>> index 520ad2691a99..d20d723925b5 100644 >>> --- a/include/uapi/drm/i915_drm.h >>> +++ b/include/uapi/drm/i915_drm.h >>> @@ -2650,6 +2650,12 @@ enum drm_i915_oa_format { >>> I915_OA_FORMAT_A12_B8_C8, >>> I915_OA_FORMAT_A32u40_A4u32_B8_C8, >>> + /* DG2 */ >>> + I915_OAR_FORMAT_A32u40_A4u32_B8_C8, >>> + I915_OA_FORMAT_A24u40_A14u32_B8_C8, >>> + I915_OAR_FORMAT_A36u64_B8_C8, >>> + I915_OA_FORMAT_A38u64_R2u64_B8_C8, >>> + >>> I915_OA_FORMAT_MAX /* non-ABI */ >>> }; >> >>
On Tue, 23 Aug 2022 13:41:38 -0700, Umesh Nerlige Ramappa wrote: > > Add new OA formats for DG2. Should we change the patch title and commit message a bit to 'Add OAR and OAG formats for DG2'? > Some of the newer OA formats are not > multples of 64 bytes and are not powers of 2. For those formats, adjust > hw_tail accordingly when checking for new reports. > > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramampa@intel.com> > --- > drivers/gpu/drm/i915/i915_perf.c | 63 ++++++++++++++++++++------------ > include/uapi/drm/i915_drm.h | 6 +++ > 2 files changed, 46 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 735244a3aedd..c8331b549d31 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -306,7 +306,8 @@ static u32 i915_oa_max_sample_rate = 100000; > > /* XXX: beware if future OA HW adds new report formats that the current > * code assumes all reports have a power-of-two size and ~(size - 1) can > - * be used as a mask to align the OA tail pointer. > + * be used as a mask to align the OA tail pointer. In some of the > + * formats, R is used to denote reserved field. > */ > static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = { > [I915_OA_FORMAT_A13] = { 0, 64 }, > @@ -320,6 +321,10 @@ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = { > [I915_OA_FORMAT_A12] = { 0, 64 }, > [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 }, > [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, > + [I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, > + [I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 }, > + [I915_OAR_FORMAT_A36u64_B8_C8] = { 1, 384 }, > + [I915_OA_FORMAT_A38u64_R2u64_B8_C8] = { 1, 448 }, Isn't the size for this last one 416 (or 400)? Bspec: 52198. Unless the size has to be a multiple of 64? Looks like Lionel's R-b is not showing up on Patchwork, might need to be manually added. For now this is: Acked-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
On Tue, Sep 13, 2022 at 08:40:22AM -0700, Dixit, Ashutosh wrote: >On Tue, 23 Aug 2022 13:41:38 -0700, Umesh Nerlige Ramappa wrote: >> >> Add new OA formats for DG2. > >Should we change the patch title and commit message a bit to 'Add OAR and >OAG formats for DG2'? Hmm, I assumed OAR was also part of TGL, but looks like it's not. I can change the title as suggested. > >> Some of the newer OA formats are not >> multples of 64 bytes and are not powers of 2. For those formats, adjust >> hw_tail accordingly when checking for new reports. >> >> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramampa@intel.com> >> --- >> drivers/gpu/drm/i915/i915_perf.c | 63 ++++++++++++++++++++------------ >> include/uapi/drm/i915_drm.h | 6 +++ >> 2 files changed, 46 insertions(+), 23 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >> index 735244a3aedd..c8331b549d31 100644 >> --- a/drivers/gpu/drm/i915/i915_perf.c >> +++ b/drivers/gpu/drm/i915/i915_perf.c >> @@ -306,7 +306,8 @@ static u32 i915_oa_max_sample_rate = 100000; >> >> /* XXX: beware if future OA HW adds new report formats that the current >> * code assumes all reports have a power-of-two size and ~(size - 1) can >> - * be used as a mask to align the OA tail pointer. >> + * be used as a mask to align the OA tail pointer. In some of the >> + * formats, R is used to denote reserved field. >> */ >> static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = { >> [I915_OA_FORMAT_A13] = { 0, 64 }, >> @@ -320,6 +321,10 @@ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = { >> [I915_OA_FORMAT_A12] = { 0, 64 }, >> [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 }, >> [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, >> + [I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, >> + [I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 }, >> + [I915_OAR_FORMAT_A36u64_B8_C8] = { 1, 384 }, >> + [I915_OA_FORMAT_A38u64_R2u64_B8_C8] = { 1, 448 }, > >Isn't the size for this last one 416 (or 400)? Bspec: 52198. Unless the >size has to be a multiple of 64? Format size is multiple of 64 bytes, so it is rounded up. > >Looks like Lionel's R-b is not showing up on Patchwork, might need to be >manually added. For now this is: > >Acked-by: Ashutosh Dixit <ashutosh.dixit@intel.com> Thanks, Umesh
On Wed, 14 Sep 2022 13:54:34 -0700, Umesh Nerlige Ramappa wrote: > > On Tue, Sep 13, 2022 at 08:40:22AM -0700, Dixit, Ashutosh wrote: > > On Tue, 23 Aug 2022 13:41:38 -0700, Umesh Nerlige Ramappa wrote: > >> > >> Add new OA formats for DG2. > > > > Should we change the patch title and commit message a bit to 'Add OAR and > > OAG formats for DG2'? > > Hmm, I assumed OAR was also part of TGL, but looks like it's not. I can > change the title as suggested. By 'Add OAR and OAG formats for DG2' I meant we are only adding OAR and OAG formats and not including other DG2 formats ;)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 735244a3aedd..c8331b549d31 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -306,7 +306,8 @@ static u32 i915_oa_max_sample_rate = 100000; /* XXX: beware if future OA HW adds new report formats that the current * code assumes all reports have a power-of-two size and ~(size - 1) can - * be used as a mask to align the OA tail pointer. + * be used as a mask to align the OA tail pointer. In some of the + * formats, R is used to denote reserved field. */ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = { [I915_OA_FORMAT_A13] = { 0, 64 }, @@ -320,6 +321,10 @@ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = { [I915_OA_FORMAT_A12] = { 0, 64 }, [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 }, [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, + [I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, + [I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 }, + [I915_OAR_FORMAT_A36u64_B8_C8] = { 1, 384 }, + [I915_OA_FORMAT_A38u64_R2u64_B8_C8] = { 1, 448 }, }; #define SAMPLE_OA_REPORT (1<<0) @@ -467,6 +472,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) bool pollin; u32 hw_tail; u64 now; + u32 partial_report_size; /* We have to consider the (unlikely) possibility that read() errors * could result in an OA buffer reset which might reset the head and @@ -476,10 +482,16 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) hw_tail = stream->perf->ops.oa_hw_tail_read(stream); - /* The tail pointer increases in 64 byte increments, - * not in report_size steps... + /* The tail pointer increases in 64 byte increments, whereas report + * sizes need not be integral multiples or 64 or powers of 2. + * Compute potentially partially landed report in the OA buffer */ - hw_tail &= ~(report_size - 1); + partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail); + partial_report_size %= report_size; + + /* Subtract partial amount off the tail */ + hw_tail = gtt_offset + ((hw_tail - partial_report_size) & + (stream->oa_buffer.vma->size - 1)); now = ktime_get_mono_fast_ns(); @@ -601,6 +613,8 @@ static int append_oa_sample(struct i915_perf_stream *stream, { int report_size = stream->oa_buffer.format_size; struct drm_i915_perf_record_header header; + int report_size_partial; + u8 *oa_buf_end; header.type = DRM_I915_PERF_RECORD_SAMPLE; header.pad = 0; @@ -614,7 +628,19 @@ static int append_oa_sample(struct i915_perf_stream *stream, return -EFAULT; buf += sizeof(header); - if (copy_to_user(buf, report, report_size)) + oa_buf_end = stream->oa_buffer.vaddr + + stream->oa_buffer.vma->size; + report_size_partial = oa_buf_end - report; + + if (report_size_partial < report_size) { + if(copy_to_user(buf, report, report_size_partial)) + return -EFAULT; + buf += report_size_partial; + + if(copy_to_user(buf, stream->oa_buffer.vaddr, + report_size - report_size_partial)) + return -EFAULT; + } else if (copy_to_user(buf, report, report_size)) return -EFAULT; (*offset) += header.size; @@ -684,8 +710,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * all a power of two). */ if (drm_WARN_ONCE(&uncore->i915->drm, - head > OA_BUFFER_SIZE || head % report_size || - tail > OA_BUFFER_SIZE || tail % report_size, + head > stream->oa_buffer.vma->size || + tail > stream->oa_buffer.vma->size, "Inconsistent OA buffer pointers: head = %u, tail = %u\n", head, tail)) return -EIO; @@ -699,22 +725,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, u32 ctx_id; u32 reason; - /* - * All the report sizes factor neatly into the buffer - * size so we never expect to see a report split - * between the beginning and end of the buffer. - * - * Given the initial alignment check a misalignment - * here would imply a driver bug that would result - * in an overrun. - */ - if (drm_WARN_ON(&uncore->i915->drm, - (OA_BUFFER_SIZE - head) < report_size)) { - drm_err(&uncore->i915->drm, - "Spurious OA head ptr: non-integral report offset\n"); - break; - } - /* * The reason field includes flags identifying what * triggered this specific report (mostly timer @@ -4513,6 +4523,13 @@ static void oa_init_supported_formats(struct i915_perf *perf) oa_format_add(perf, I915_OA_FORMAT_C4_B8); break; + case INTEL_DG2: + oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8); + oa_format_add(perf, I915_OAR_FORMAT_A36u64_B8_C8); + oa_format_add(perf, I915_OA_FORMAT_A38u64_R2u64_B8_C8); + break; + default: MISSING_CASE(platform); } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 520ad2691a99..d20d723925b5 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2650,6 +2650,12 @@ enum drm_i915_oa_format { I915_OA_FORMAT_A12_B8_C8, I915_OA_FORMAT_A32u40_A4u32_B8_C8, + /* DG2 */ + I915_OAR_FORMAT_A32u40_A4u32_B8_C8, + I915_OA_FORMAT_A24u40_A14u32_B8_C8, + I915_OAR_FORMAT_A36u64_B8_C8, + I915_OA_FORMAT_A38u64_R2u64_B8_C8, + I915_OA_FORMAT_MAX /* non-ABI */ };
Add new OA formats for DG2. Some of the newer OA formats are not multples of 64 bytes and are not powers of 2. For those formats, adjust hw_tail accordingly when checking for new reports. Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramampa@intel.com> --- drivers/gpu/drm/i915/i915_perf.c | 63 ++++++++++++++++++++------------ include/uapi/drm/i915_drm.h | 6 +++ 2 files changed, 46 insertions(+), 23 deletions(-)