diff mbox series

[v2,7/9] drm/i915/perf: Handle non-power-of-2 reports

Message ID 20230217005850.2511422-8-umesh.nerlige.ramappa@intel.com (mailing list archive)
State New, archived
Headers show
Series Add OAM support for MTL | expand

Commit Message

Umesh Nerlige Ramappa Feb. 17, 2023, 12:58 a.m. UTC
Some of the newer OA formats are not powers of 2. For those formats,
adjust the hw_tail accordingly when checking for new reports.

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 50 ++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 22 deletions(-)

Comments

Dixit, Ashutosh Feb. 17, 2023, 8:58 p.m. UTC | #1
On Thu, 16 Feb 2023 16:58:48 -0800, Umesh Nerlige Ramappa wrote:
>

Hi Umesh, couple of nits below.

> Some of the newer OA formats are not powers of 2. For those formats,
> adjust the hw_tail accordingly when checking for new reports.
>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_perf.c | 50 ++++++++++++++++++--------------
>  1 file changed, 28 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 9715b964aa1e..d3a1892c93be 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -542,6 +542,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>	bool pollin;
>	u32 hw_tail;
>	u64 now;
> +	u32 partial_report_size;
>
>	/* We have to consider the (unlikely) possibility that read() errors
>	 * could result in an OA buffer reset which might reset the head and
> @@ -551,10 +552,16 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>
>	hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
>
> -	/* The tail pointer increases in 64 byte increments,
> -	 * not in report_size steps...
> +	/* The tail pointer increases in 64 byte increments, whereas report
> +	 * sizes need not be integral multiples or 64 or powers of 2.
s/or/of/ ---------------------------------------^

Also I think report sizes can only be multiples of 64, the ones we have
seen till now definitely are. Also the lower 6 bits of tail pointer are 0.

> +	 * Compute potentially partially landed report in the OA buffer
>	 */
> -	hw_tail &= ~(report_size - 1);
> +	partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail);
> +	partial_report_size %= report_size;
> +
> +	/* Subtract partial amount off the tail */
> +	hw_tail = gtt_offset + ((hw_tail - partial_report_size) &
> +				(stream->oa_buffer.vma->size - 1));

Couple of questions here because OA_TAKEN uses OA_BUFFER_SIZE and the above
expression uses stream->oa_buffer.vma->size:

1. Is 'OA_BUFFER_SIZE == stream->oa_buffer.vma->size'? We seem to be using
   the two interchaneably in the code.
2. If yes, can we add an assert about this in alloc_oa_buffer?
3. Can the above expression be changed to:

	hw_tail = gtt_offset + OA_TAKEN(hw_tail, partial_report_size);

It would be good to use the same construct if possible. Maybe we can even
change OA_TAKEN to something like:

#define OA_TAKEN(tail, head)    ((tail - head) & (stream->oa_buffer.vma->size - 1))

>
>	now = ktime_get_mono_fast_ns();
>
> @@ -677,6 +684,8 @@ static int append_oa_sample(struct i915_perf_stream *stream,
>  {
>	int report_size = stream->oa_buffer.format->size;
>	struct drm_i915_perf_record_header header;
> +	int report_size_partial;
> +	u8 *oa_buf_end;
>
>	header.type = DRM_I915_PERF_RECORD_SAMPLE;
>	header.pad = 0;
> @@ -690,8 +699,21 @@ static int append_oa_sample(struct i915_perf_stream *stream,
>		return -EFAULT;
>	buf += sizeof(header);
>
> -	if (copy_to_user(buf, report, report_size))
> +	oa_buf_end = stream->oa_buffer.vaddr +
> +		     stream->oa_buffer.vma->size;
> +	report_size_partial = oa_buf_end - report;
> +
> +	if (report_size_partial < report_size) {
> +		if (copy_to_user(buf, report, report_size_partial))
> +			return -EFAULT;
> +		buf += report_size_partial;
> +
> +		if (copy_to_user(buf, stream->oa_buffer.vaddr,
> +				 report_size - report_size_partial))
> +			return -EFAULT;
> +	} else if (copy_to_user(buf, report, report_size)) {
>		return -EFAULT;
> +	}
>
>	(*offset) += header.size;
>
> @@ -759,8 +781,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>	 * all a power of two).
>	 */
>	if (drm_WARN_ONCE(&uncore->i915->drm,
> -			  head > OA_BUFFER_SIZE || head % report_size ||
> -			  tail > OA_BUFFER_SIZE || tail % report_size,
> +			  head > OA_BUFFER_SIZE ||
> +			  tail > OA_BUFFER_SIZE,

The comment above the if () also needs to be fixed.

Also, does it make sense to have 'head % 64 || tail % 64' checks above? As
I was saying above head and tail will be 64 byte aligned.

>			  "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
>			  head, tail))
>		return -EIO;
> @@ -774,22 +796,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>		u32 ctx_id;
>		u64 reason;
>
> -		/*
> -		 * All the report sizes factor neatly into the buffer
> -		 * size so we never expect to see a report split
> -		 * between the beginning and end of the buffer.
> -		 *
> -		 * Given the initial alignment check a misalignment
> -		 * here would imply a driver bug that would result
> -		 * in an overrun.
> -		 */
> -		if (drm_WARN_ON(&uncore->i915->drm,
> -				(OA_BUFFER_SIZE - head) < report_size)) {
> -			drm_err(&uncore->i915->drm,
> -				"Spurious OA head ptr: non-integral report offset\n");
> -			break;
> -		}
> -
>		/*
>		 * The reason field includes flags identifying what
>		 * triggered this specific report (mostly timer
> --
> 2.36.1
>
Umesh Nerlige Ramappa Feb. 18, 2023, 12:05 a.m. UTC | #2
On Fri, Feb 17, 2023 at 12:58:18PM -0800, Dixit, Ashutosh wrote:
>On Thu, 16 Feb 2023 16:58:48 -0800, Umesh Nerlige Ramappa wrote:
>>
>
>Hi Umesh, couple of nits below.
>
>> Some of the newer OA formats are not powers of 2. For those formats,
>> adjust the hw_tail accordingly when checking for new reports.
>>
>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_perf.c | 50 ++++++++++++++++++--------------
>>  1 file changed, 28 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> index 9715b964aa1e..d3a1892c93be 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -542,6 +542,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>>	bool pollin;
>>	u32 hw_tail;
>>	u64 now;
>> +	u32 partial_report_size;
>>
>>	/* We have to consider the (unlikely) possibility that read() errors
>>	 * could result in an OA buffer reset which might reset the head and
>> @@ -551,10 +552,16 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>>
>>	hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
>>
>> -	/* The tail pointer increases in 64 byte increments,
>> -	 * not in report_size steps...
>> +	/* The tail pointer increases in 64 byte increments, whereas report
>> +	 * sizes need not be integral multiples or 64 or powers of 2.
>s/or/of/ ---------------------------------------^
>
>Also I think report sizes can only be multiples of 64, the ones we have
>seen till now definitely are. Also the lower 6 bits of tail pointer are 0.

Agree, the only addition to the old comment should be that the new 
reports may not be powers of 2.

>
>> +	 * Compute potentially partially landed report in the OA buffer
>>	 */
>> -	hw_tail &= ~(report_size - 1);
>> +	partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail);
>> +	partial_report_size %= report_size;
>> +
>> +	/* Subtract partial amount off the tail */
>> +	hw_tail = gtt_offset + ((hw_tail - partial_report_size) &
>> +				(stream->oa_buffer.vma->size - 1));
>
>Couple of questions here because OA_TAKEN uses OA_BUFFER_SIZE and the above
>expression uses stream->oa_buffer.vma->size:
>
>1. Is 'OA_BUFFER_SIZE == stream->oa_buffer.vma->size'? We seem to be using
>   the two interchaneably in the code.

Yes. I think the code was updated to use vma->size when support for 
selecting OA buffer size along with large OA buffers was added, but we 
haven't pushed that upstream yet. Since I have cherry-picked patches 
here, there is some inconsistency. I would just change this patch to use 
OA_BUFFER_SIZE for now.

>2. If yes, can we add an assert about this in alloc_oa_buffer?

If I change to OA_BUFFER_SIZE, then okay to skip assert? Do you suspect 
that the vma size may actually differ from what we requested?

>3. Can the above expression be changed to:
>
>	hw_tail = gtt_offset + OA_TAKEN(hw_tail, partial_report_size);

Not if hw_tail has rolled over, but stream->oa_buffer.tail hasn't.

>
>It would be good to use the same construct if possible. Maybe we can even
>change OA_TAKEN to something like:
>
>#define OA_TAKEN(tail, head)    ((tail - head) & (stream->oa_buffer.vma->size - 1))

I am thinking of changing to OA_BUFFER_SIZE at other places in this 
patch. Thoughts?

>
>>
>>	now = ktime_get_mono_fast_ns();
>>
>> @@ -677,6 +684,8 @@ static int append_oa_sample(struct i915_perf_stream *stream,
>>  {
>>	int report_size = stream->oa_buffer.format->size;
>>	struct drm_i915_perf_record_header header;
>> +	int report_size_partial;
>> +	u8 *oa_buf_end;
>>
>>	header.type = DRM_I915_PERF_RECORD_SAMPLE;
>>	header.pad = 0;
>> @@ -690,8 +699,21 @@ static int append_oa_sample(struct i915_perf_stream *stream,
>>		return -EFAULT;
>>	buf += sizeof(header);
>>
>> -	if (copy_to_user(buf, report, report_size))
>> +	oa_buf_end = stream->oa_buffer.vaddr +
>> +		     stream->oa_buffer.vma->size;
>> +	report_size_partial = oa_buf_end - report;
>> +
>> +	if (report_size_partial < report_size) {
>> +		if (copy_to_user(buf, report, report_size_partial))
>> +			return -EFAULT;
>> +		buf += report_size_partial;
>> +
>> +		if (copy_to_user(buf, stream->oa_buffer.vaddr,
>> +				 report_size - report_size_partial))
>> +			return -EFAULT;
>> +	} else if (copy_to_user(buf, report, report_size)) {
>>		return -EFAULT;
>> +	}
>>
>>	(*offset) += header.size;
>>
>> @@ -759,8 +781,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>>	 * all a power of two).
>>	 */
>>	if (drm_WARN_ONCE(&uncore->i915->drm,
>> -			  head > OA_BUFFER_SIZE || head % report_size ||
>> -			  tail > OA_BUFFER_SIZE || tail % report_size,
>> +			  head > OA_BUFFER_SIZE ||
>> +			  tail > OA_BUFFER_SIZE,
>
>The comment above the if () also needs to be fixed.

Will fix

>
>Also, does it make sense to have 'head % 64 || tail % 64' checks above? As
>I was saying above head and tail will be 64 byte aligned.

Since head and tail are derived from HW registers and the HW only 
increments them by 64, we should be good even without the %64.

Thanks,
Umesh
Dixit, Ashutosh Feb. 18, 2023, 1:57 a.m. UTC | #3
On Fri, 17 Feb 2023 16:05:50 -0800, Umesh Nerlige Ramappa wrote:
> On Fri, Feb 17, 2023 at 12:58:18PM -0800, Dixit, Ashutosh wrote:
> > On Thu, 16 Feb 2023 16:58:48 -0800, Umesh Nerlige Ramappa wrote:
> >>
> >
> > Hi Umesh, couple of nits below.
> >
> >> Some of the newer OA formats are not powers of 2. For those formats,
> >> adjust the hw_tail accordingly when checking for new reports.
> >>
> >> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> >> ---
> >>  drivers/gpu/drm/i915/i915_perf.c | 50 ++++++++++++++++++--------------
> >>  1 file changed, 28 insertions(+), 22 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> >> index 9715b964aa1e..d3a1892c93be 100644
> >> --- a/drivers/gpu/drm/i915/i915_perf.c
> >> +++ b/drivers/gpu/drm/i915/i915_perf.c
> >> @@ -542,6 +542,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
> >>	bool pollin;
> >>	u32 hw_tail;
> >>	u64 now;
> >> +	u32 partial_report_size;
> >>
> >>	/* We have to consider the (unlikely) possibility that read() errors
> >>	 * could result in an OA buffer reset which might reset the head and
> >> @@ -551,10 +552,16 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
> >>
> >>	hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
> >>
> >> -	/* The tail pointer increases in 64 byte increments,
> >> -	 * not in report_size steps...
> >> +	/* The tail pointer increases in 64 byte increments, whereas report
> >> +	 * sizes need not be integral multiples or 64 or powers of 2.
> > s/or/of/ ---------------------------------------^
> >
> > Also I think report sizes can only be multiples of 64, the ones we have
> > seen till now definitely are. Also the lower 6 bits of tail pointer are 0.
>
> Agree, the only addition to the old comment should be that the new reports
> may not be powers of 2.
>
> >
> >> +	 * Compute potentially partially landed report in the OA buffer
> >>	 */
> >> -	hw_tail &= ~(report_size - 1);
> >> +	partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail);
> >> +	partial_report_size %= report_size;
> >> +
> >> +	/* Subtract partial amount off the tail */
> >> +	hw_tail = gtt_offset + ((hw_tail - partial_report_size) &
> >> +				(stream->oa_buffer.vma->size - 1));
> >
> > Couple of questions here because OA_TAKEN uses OA_BUFFER_SIZE and the above
> > expression uses stream->oa_buffer.vma->size:
> >
> > 1. Is 'OA_BUFFER_SIZE == stream->oa_buffer.vma->size'? We seem to be using
> >   the two interchaneably in the code.
>
> Yes. I think the code was updated to use vma->size when support for
> selecting OA buffer size along with large OA buffers was added, but we
> haven't pushed that upstream yet. Since I have cherry-picked patches here,
> there is some inconsistency. I would just change this patch to use
> OA_BUFFER_SIZE for now.
>
> > 2. If yes, can we add an assert about this in alloc_oa_buffer?
>
> If I change to OA_BUFFER_SIZE, then okay to skip assert?

Yes.

> Do you suspect that the vma size may actually differ from what we
> requested?

Not sure how shmem objects are allocated but my guess would be that for a
nice whole size like 16 M they the vma size will be the same. So ok to just
use OA_BUFFER_SIZE in a couple of places in this patch and skip the
assert. As long as vma_size >= OA_BUFFER_SIZE we are ok.

>
> > 3. Can the above expression be changed to:
> >
> >	hw_tail = gtt_offset + OA_TAKEN(hw_tail, partial_report_size);
>
> Not if hw_tail has rolled over, but stream->oa_buffer.tail hasn't.

Why not, the two expressions are exactly the same? And anyway
stream->oa_buffer.tail is already handled in the first OA_TAKEN expression.

>
> >
> > It would be good to use the same construct if possible. Maybe we can even
> > change OA_TAKEN to something like:
> >
> > #define OA_TAKEN(tail, head)    ((tail - head) & (stream->oa_buffer.vma->size - 1))
>
> I am thinking of changing to OA_BUFFER_SIZE at other places in this
> patch. Thoughts?

Sure, let's do that, there are just 2 places.

> >
> >>
> >>	now = ktime_get_mono_fast_ns();
> >>
> >> @@ -677,6 +684,8 @@ static int append_oa_sample(struct i915_perf_stream *stream,
> >>  {
> >>	int report_size = stream->oa_buffer.format->size;
> >>	struct drm_i915_perf_record_header header;
> >> +	int report_size_partial;
> >> +	u8 *oa_buf_end;
> >>
> >>	header.type = DRM_I915_PERF_RECORD_SAMPLE;
> >>	header.pad = 0;
> >> @@ -690,8 +699,21 @@ static int append_oa_sample(struct i915_perf_stream *stream,
> >>		return -EFAULT;
> >>	buf += sizeof(header);
> >>
> >> -	if (copy_to_user(buf, report, report_size))
> >> +	oa_buf_end = stream->oa_buffer.vaddr +
> >> +		     stream->oa_buffer.vma->size;
> >> +	report_size_partial = oa_buf_end - report;
> >> +
> >> +	if (report_size_partial < report_size) {
> >> +		if (copy_to_user(buf, report, report_size_partial))
> >> +			return -EFAULT;
> >> +		buf += report_size_partial;
> >> +
> >> +		if (copy_to_user(buf, stream->oa_buffer.vaddr,
> >> +				 report_size - report_size_partial))
> >> +			return -EFAULT;
> >> +	} else if (copy_to_user(buf, report, report_size)) {
> >>		return -EFAULT;
> >> +	}
> >>
> >>	(*offset) += header.size;
> >>
> >> @@ -759,8 +781,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
> >>	 * all a power of two).
> >>	 */
> >>	if (drm_WARN_ONCE(&uncore->i915->drm,
> >> -			  head > OA_BUFFER_SIZE || head % report_size ||
> >> -			  tail > OA_BUFFER_SIZE || tail % report_size,
> >> +			  head > OA_BUFFER_SIZE ||
> >> +			  tail > OA_BUFFER_SIZE,
> >
> > The comment above the if () also needs to be fixed.
>
> Will fix
>
> >
> > Also, does it make sense to have 'head % 64 || tail % 64' checks above? As
> > I was saying above head and tail will be 64 byte aligned.
>
> Since head and tail are derived from HW registers and the HW only
> increments them by 64, we should be good even without the %64.

OK.

Thanks.
--
Ashutosh
Dixit, Ashutosh Feb. 21, 2023, 6:51 p.m. UTC | #4
On Fri, 17 Feb 2023 17:57:02 -0800, Dixit, Ashutosh wrote:
>
> On Fri, 17 Feb 2023 16:05:50 -0800, Umesh Nerlige Ramappa wrote:
> > On Fri, Feb 17, 2023 at 12:58:18PM -0800, Dixit, Ashutosh wrote:
> > > On Thu, 16 Feb 2023 16:58:48 -0800, Umesh Nerlige Ramappa wrote:
> > >>
> > >
> > > Hi Umesh, couple of nits below.
> > >
> > >> Some of the newer OA formats are not powers of 2. For those formats,
> > >> adjust the hw_tail accordingly when checking for new reports.
> > >>
> > >> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> > >> ---
> > >>  drivers/gpu/drm/i915/i915_perf.c | 50 ++++++++++++++++++--------------
> > >>  1 file changed, 28 insertions(+), 22 deletions(-)
> > >>
> > >> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> > >> index 9715b964aa1e..d3a1892c93be 100644
> > >> --- a/drivers/gpu/drm/i915/i915_perf.c
> > >> +++ b/drivers/gpu/drm/i915/i915_perf.c
> > >> @@ -542,6 +542,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
> > >>	bool pollin;
> > >>	u32 hw_tail;
> > >>	u64 now;
> > >> +	u32 partial_report_size;
> > >>
> > >>	/* We have to consider the (unlikely) possibility that read() errors
> > >>	 * could result in an OA buffer reset which might reset the head and
> > >> @@ -551,10 +552,16 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
> > >>
> > >>	hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
> > >>
> > >> -	/* The tail pointer increases in 64 byte increments,
> > >> -	 * not in report_size steps...
> > >> +	/* The tail pointer increases in 64 byte increments, whereas report
> > >> +	 * sizes need not be integral multiples or 64 or powers of 2.
> > > s/or/of/ ---------------------------------------^
> > >
> > > Also I think report sizes can only be multiples of 64, the ones we have
> > > seen till now definitely are. Also the lower 6 bits of tail pointer are 0.
> >
> > Agree, the only addition to the old comment should be that the new reports
> > may not be powers of 2.
> >
> > >
> > >> +	 * Compute potentially partially landed report in the OA buffer
> > >>	 */
> > >> -	hw_tail &= ~(report_size - 1);
> > >> +	partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail);
> > >> +	partial_report_size %= report_size;
> > >> +
> > >> +	/* Subtract partial amount off the tail */
> > >> +	hw_tail = gtt_offset + ((hw_tail - partial_report_size) &
> > >> +				(stream->oa_buffer.vma->size - 1));
> > >
> > > Couple of questions here because OA_TAKEN uses OA_BUFFER_SIZE and the above
> > > expression uses stream->oa_buffer.vma->size:
> > >
> > > 1. Is 'OA_BUFFER_SIZE == stream->oa_buffer.vma->size'? We seem to be using
> > >   the two interchaneably in the code.
> >
> > Yes. I think the code was updated to use vma->size when support for
> > selecting OA buffer size along with large OA buffers was added, but we
> > haven't pushed that upstream yet. Since I have cherry-picked patches here,
> > there is some inconsistency. I would just change this patch to use
> > OA_BUFFER_SIZE for now.
> >
> > > 2. If yes, can we add an assert about this in alloc_oa_buffer?
> >
> > If I change to OA_BUFFER_SIZE, then okay to skip assert?
>
> Yes.
>
> > Do you suspect that the vma size may actually differ from what we
> > requested?
>
> Not sure how shmem objects are allocated but my guess would be that for a
> nice whole size like 16 M they the vma size will be the same. So ok to just
> use OA_BUFFER_SIZE in a couple of places in this patch and skip the
> assert. As long as vma_size >= OA_BUFFER_SIZE we are ok.
>
> >
> > > 3. Can the above expression be changed to:
> > >
> > >	hw_tail = gtt_offset + OA_TAKEN(hw_tail, partial_report_size);
> >
> > Not if hw_tail has rolled over, but stream->oa_buffer.tail hasn't.
>
> Why not, the two expressions are exactly the same? And anyway
> stream->oa_buffer.tail is already handled in the first OA_TAKEN expression.

Basically, for me OA_TAKEN is a "circular diff" (for a power-of-2 sized
circular buffer), so anywhere we have these "circular diff" opereations we
should be able to replace them by OA_TAKEN.

> > >
> > > It would be good to use the same construct if possible. Maybe we can even
> > > change OA_TAKEN to something like:
> > >
> > > #define OA_TAKEN(tail, head)    ((tail - head) & (stream->oa_buffer.vma->size - 1))
> >
> > I am thinking of changing to OA_BUFFER_SIZE at other places in this
> > patch. Thoughts?
>
> Sure, let's do that, there are just 2 places.
>
> > >
> > >>
> > >>	now = ktime_get_mono_fast_ns();
> > >>
> > >> @@ -677,6 +684,8 @@ static int append_oa_sample(struct i915_perf_stream *stream,
> > >>  {
> > >>	int report_size = stream->oa_buffer.format->size;
> > >>	struct drm_i915_perf_record_header header;
> > >> +	int report_size_partial;
> > >> +	u8 *oa_buf_end;
> > >>
> > >>	header.type = DRM_I915_PERF_RECORD_SAMPLE;
> > >>	header.pad = 0;
> > >> @@ -690,8 +699,21 @@ static int append_oa_sample(struct i915_perf_stream *stream,
> > >>		return -EFAULT;
> > >>	buf += sizeof(header);
> > >>
> > >> -	if (copy_to_user(buf, report, report_size))
> > >> +	oa_buf_end = stream->oa_buffer.vaddr +
> > >> +		     stream->oa_buffer.vma->size;
> > >> +	report_size_partial = oa_buf_end - report;
> > >> +
> > >> +	if (report_size_partial < report_size) {
> > >> +		if (copy_to_user(buf, report, report_size_partial))
> > >> +			return -EFAULT;
> > >> +		buf += report_size_partial;
> > >> +
> > >> +		if (copy_to_user(buf, stream->oa_buffer.vaddr,
> > >> +				 report_size - report_size_partial))
> > >> +			return -EFAULT;
> > >> +	} else if (copy_to_user(buf, report, report_size)) {
> > >>		return -EFAULT;
> > >> +	}
> > >>
> > >>	(*offset) += header.size;
> > >>
> > >> @@ -759,8 +781,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
> > >>	 * all a power of two).
> > >>	 */
> > >>	if (drm_WARN_ONCE(&uncore->i915->drm,
> > >> -			  head > OA_BUFFER_SIZE || head % report_size ||
> > >> -			  tail > OA_BUFFER_SIZE || tail % report_size,
> > >> +			  head > OA_BUFFER_SIZE ||
> > >> +			  tail > OA_BUFFER_SIZE,
> > >
> > > The comment above the if () also needs to be fixed.
> >
> > Will fix
> >
> > >
> > > Also, does it make sense to have 'head % 64 || tail % 64' checks above? As
> > > I was saying above head and tail will be 64 byte aligned.
> >
> > Since head and tail are derived from HW registers and the HW only
> > increments them by 64, we should be good even without the %64.
>
> OK.
>
> Thanks.
> --
> Ashutosh
Umesh Nerlige Ramappa Feb. 24, 2023, 7:12 p.m. UTC | #5
On Tue, Feb 21, 2023 at 10:51:57AM -0800, Dixit, Ashutosh wrote:
>On Fri, 17 Feb 2023 17:57:02 -0800, Dixit, Ashutosh wrote:
>>
>> On Fri, 17 Feb 2023 16:05:50 -0800, Umesh Nerlige Ramappa wrote:
>> > On Fri, Feb 17, 2023 at 12:58:18PM -0800, Dixit, Ashutosh wrote:
>> > > On Thu, 16 Feb 2023 16:58:48 -0800, Umesh Nerlige Ramappa wrote:
>> > >>
>> > >
>> > > Hi Umesh, couple of nits below.
>> > >
>> > >> Some of the newer OA formats are not powers of 2. For those formats,
>> > >> adjust the hw_tail accordingly when checking for new reports.
>> > >>
>> > >> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>> > >> ---
>> > >>  drivers/gpu/drm/i915/i915_perf.c | 50 ++++++++++++++++++--------------
>> > >>  1 file changed, 28 insertions(+), 22 deletions(-)
>> > >>
>> > >> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> > >> index 9715b964aa1e..d3a1892c93be 100644
>> > >> --- a/drivers/gpu/drm/i915/i915_perf.c
>> > >> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> > >> @@ -542,6 +542,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>> > >>	bool pollin;
>> > >>	u32 hw_tail;
>> > >>	u64 now;
>> > >> +	u32 partial_report_size;
>> > >>
>> > >>	/* We have to consider the (unlikely) possibility that read() errors
>> > >>	 * could result in an OA buffer reset which might reset the head and
>> > >> @@ -551,10 +552,16 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>> > >>
>> > >>	hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
>> > >>
>> > >> -	/* The tail pointer increases in 64 byte increments,
>> > >> -	 * not in report_size steps...
>> > >> +	/* The tail pointer increases in 64 byte increments, whereas report
>> > >> +	 * sizes need not be integral multiples or 64 or powers of 2.
>> > > s/or/of/ ---------------------------------------^
>> > >
>> > > Also I think report sizes can only be multiples of 64, the ones we have
>> > > seen till now definitely are. Also the lower 6 bits of tail pointer are 0.
>> >
>> > Agree, the only addition to the old comment should be that the new reports
>> > may not be powers of 2.
>> >
>> > >
>> > >> +	 * Compute potentially partially landed report in the OA buffer
>> > >>	 */
>> > >> -	hw_tail &= ~(report_size - 1);
>> > >> +	partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail);
>> > >> +	partial_report_size %= report_size;
>> > >> +
>> > >> +	/* Subtract partial amount off the tail */
>> > >> +	hw_tail = gtt_offset + ((hw_tail - partial_report_size) &
>> > >> +				(stream->oa_buffer.vma->size - 1));
>> > >
>> > > Couple of questions here because OA_TAKEN uses OA_BUFFER_SIZE and the above
>> > > expression uses stream->oa_buffer.vma->size:
>> > >
>> > > 1. Is 'OA_BUFFER_SIZE == stream->oa_buffer.vma->size'? We seem to be using
>> > >   the two interchaneably in the code.
>> >
>> > Yes. I think the code was updated to use vma->size when support for
>> > selecting OA buffer size along with large OA buffers was added, but we
>> > haven't pushed that upstream yet. Since I have cherry-picked patches here,
>> > there is some inconsistency. I would just change this patch to use
>> > OA_BUFFER_SIZE for now.
>> >
>> > > 2. If yes, can we add an assert about this in alloc_oa_buffer?
>> >
>> > If I change to OA_BUFFER_SIZE, then okay to skip assert?
>>
>> Yes.
>>
>> > Do you suspect that the vma size may actually differ from what we
>> > requested?
>>
>> Not sure how shmem objects are allocated but my guess would be that for a
>> nice whole size like 16 M they the vma size will be the same. So ok to just
>> use OA_BUFFER_SIZE in a couple of places in this patch and skip the
>> assert. As long as vma_size >= OA_BUFFER_SIZE we are ok.
>>
>> >
>> > > 3. Can the above expression be changed to:
>> > >
>> > >	hw_tail = gtt_offset + OA_TAKEN(hw_tail, partial_report_size);
>> >
>> > Not if hw_tail has rolled over, but stream->oa_buffer.tail hasn't.
>>
>> Why not, the two expressions are exactly the same? And anyway
>> stream->oa_buffer.tail is already handled in the first OA_TAKEN expression.
>
>Basically, for me OA_TAKEN is a "circular diff" (for a power-of-2 sized
>circular buffer), so anywhere we have these "circular diff" opereations we
>should be able to replace them by OA_TAKEN.

I guess I misread your comment. They are indeed identical. I can change 
that.

Thanks,
Umesh
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 9715b964aa1e..d3a1892c93be 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -542,6 +542,7 @@  static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
 	bool pollin;
 	u32 hw_tail;
 	u64 now;
+	u32 partial_report_size;
 
 	/* We have to consider the (unlikely) possibility that read() errors
 	 * could result in an OA buffer reset which might reset the head and
@@ -551,10 +552,16 @@  static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
 
 	hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
 
-	/* The tail pointer increases in 64 byte increments,
-	 * not in report_size steps...
+	/* The tail pointer increases in 64 byte increments, whereas report
+	 * sizes need not be integral multiples or 64 or powers of 2.
+	 * Compute potentially partially landed report in the OA buffer
 	 */
-	hw_tail &= ~(report_size - 1);
+	partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail);
+	partial_report_size %= report_size;
+
+	/* Subtract partial amount off the tail */
+	hw_tail = gtt_offset + ((hw_tail - partial_report_size) &
+				(stream->oa_buffer.vma->size - 1));
 
 	now = ktime_get_mono_fast_ns();
 
@@ -677,6 +684,8 @@  static int append_oa_sample(struct i915_perf_stream *stream,
 {
 	int report_size = stream->oa_buffer.format->size;
 	struct drm_i915_perf_record_header header;
+	int report_size_partial;
+	u8 *oa_buf_end;
 
 	header.type = DRM_I915_PERF_RECORD_SAMPLE;
 	header.pad = 0;
@@ -690,8 +699,21 @@  static int append_oa_sample(struct i915_perf_stream *stream,
 		return -EFAULT;
 	buf += sizeof(header);
 
-	if (copy_to_user(buf, report, report_size))
+	oa_buf_end = stream->oa_buffer.vaddr +
+		     stream->oa_buffer.vma->size;
+	report_size_partial = oa_buf_end - report;
+
+	if (report_size_partial < report_size) {
+		if (copy_to_user(buf, report, report_size_partial))
+			return -EFAULT;
+		buf += report_size_partial;
+
+		if (copy_to_user(buf, stream->oa_buffer.vaddr,
+				 report_size - report_size_partial))
+			return -EFAULT;
+	} else if (copy_to_user(buf, report, report_size)) {
 		return -EFAULT;
+	}
 
 	(*offset) += header.size;
 
@@ -759,8 +781,8 @@  static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 	 * all a power of two).
 	 */
 	if (drm_WARN_ONCE(&uncore->i915->drm,
-			  head > OA_BUFFER_SIZE || head % report_size ||
-			  tail > OA_BUFFER_SIZE || tail % report_size,
+			  head > OA_BUFFER_SIZE ||
+			  tail > OA_BUFFER_SIZE,
 			  "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
 			  head, tail))
 		return -EIO;
@@ -774,22 +796,6 @@  static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		u32 ctx_id;
 		u64 reason;
 
-		/*
-		 * All the report sizes factor neatly into the buffer
-		 * size so we never expect to see a report split
-		 * between the beginning and end of the buffer.
-		 *
-		 * Given the initial alignment check a misalignment
-		 * here would imply a driver bug that would result
-		 * in an overrun.
-		 */
-		if (drm_WARN_ON(&uncore->i915->drm,
-				(OA_BUFFER_SIZE - head) < report_size)) {
-			drm_err(&uncore->i915->drm,
-				"Spurious OA head ptr: non-integral report offset\n");
-			break;
-		}
-
 		/*
 		 * The reason field includes flags identifying what
 		 * triggered this specific report (mostly timer