Message ID | 1510748034-14034-4-git-send-email-sagar.a.kamble@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 15/11/17 12:13, Sagar Arun Kamble wrote: > From: Sourab Gupta <sourab.gupta@intel.com> > > The OA reports contain the least significant 32 bits of the gpu timestamp. > This patch enables retrieval of the timestamp field from OA reports, to > forward as 64 bit raw gpu timestamps in the perf samples. > > v2: Rebase w.r.t new timecounter support. > > Signed-off-by: Sourab Gupta <sourab.gupta@intel.com> > Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com> > Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Sourab Gupta <sourab.gupta@intel.com> > Cc: Matthew Auld <matthew.auld@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 2 ++ > drivers/gpu/drm/i915/i915_perf.c | 26 +++++++++++++++++++++++++- > 2 files changed, 27 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index e08bc85..5534cd2 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2151,6 +2151,8 @@ struct i915_perf_stream { > */ > struct i915_oa_config *oa_config; > > + u64 last_gpu_ts; > + > /** > * System time correlation variables. > */ > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index f7e748c..3b721d7 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -575,6 +575,26 @@ static int append_oa_status(struct i915_perf_stream *stream, > } > > /** > + * get_gpu_ts_from_oa_report - Retrieve absolute gpu timestamp from OA report > + * > + * Note: We are assuming that we're updating last_gpu_ts frequently enough so > + * that it's never possible to see multiple overflows before we compare > + * sample_ts to last_gpu_ts. Since this is significantly large duration > + * (~6min for 80ns ts base), we can safely assume so. > + */ > +static u64 get_gpu_ts_from_oa_report(struct i915_perf_stream *stream, > + const u8 *report) > +{ > + u32 sample_ts = *(u32 *)(report + 4); > + u32 delta; > + > + delta = sample_ts - (u32)stream->last_gpu_ts; > + stream->last_gpu_ts += delta; > + > + return stream->last_gpu_ts; > +} > + > +/** > * append_oa_sample - Copies single OA report into userspace read() buffer. > * @stream: An i915-perf stream opened for OA metrics > * @buf: destination buffer given by userspace > @@ -622,7 +642,9 @@ static int append_oa_sample(struct i915_perf_stream *stream, > } > > if (sample_flags & SAMPLE_GPU_TS) { > - /* Timestamp to be populated from OA report */ > + /* Timestamp populated from OA report */ > + gpu_ts = get_gpu_ts_from_oa_report(stream, report); > + > if (copy_to_user(buf, &gpu_ts, I915_PERF_TS_SAMPLE_SIZE)) > return -EFAULT; > } I think everything above this line should be merged int patch 2. It's better to have a single functional patch. > @@ -2421,6 +2443,8 @@ static u64 i915_cyclecounter_read(const struct cyclecounter *cc) > GEN7_TIMESTAMP_UDW); > intel_runtime_pm_put(dev_priv); > > + stream->last_gpu_ts = ts_count; This doesn't look right. You're already adding a delta in get_gpu_ts_from_oa_report(). This will produce incorrect timestamps. Since at the moment we won't allow opening without PROP_SAMPLE_OA, I would just drop this line. > + > return ts_count; > } >
On 12/7/2017 1:25 AM, Lionel Landwerlin wrote: > On 15/11/17 12:13, Sagar Arun Kamble wrote: >> From: Sourab Gupta <sourab.gupta@intel.com> >> >> The OA reports contain the least significant 32 bits of the gpu >> timestamp. >> This patch enables retrieval of the timestamp field from OA reports, to >> forward as 64 bit raw gpu timestamps in the perf samples. >> >> v2: Rebase w.r.t new timecounter support. >> >> Signed-off-by: Sourab Gupta <sourab.gupta@intel.com> >> Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com> >> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >> Cc: Chris Wilson <chris@chris-wilson.co.uk> >> Cc: Sourab Gupta <sourab.gupta@intel.com> >> Cc: Matthew Auld <matthew.auld@intel.com> >> --- >> drivers/gpu/drm/i915/i915_drv.h | 2 ++ >> drivers/gpu/drm/i915/i915_perf.c | 26 +++++++++++++++++++++++++- >> 2 files changed, 27 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h >> b/drivers/gpu/drm/i915/i915_drv.h >> index e08bc85..5534cd2 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -2151,6 +2151,8 @@ struct i915_perf_stream { >> */ >> struct i915_oa_config *oa_config; >> + u64 last_gpu_ts; >> + >> /** >> * System time correlation variables. >> */ >> diff --git a/drivers/gpu/drm/i915/i915_perf.c >> b/drivers/gpu/drm/i915/i915_perf.c >> index f7e748c..3b721d7 100644 >> --- a/drivers/gpu/drm/i915/i915_perf.c >> +++ b/drivers/gpu/drm/i915/i915_perf.c >> @@ -575,6 +575,26 @@ static int append_oa_status(struct >> i915_perf_stream *stream, >> } >> /** >> + * get_gpu_ts_from_oa_report - Retrieve absolute gpu timestamp from >> OA report >> + * >> + * Note: We are assuming that we're updating last_gpu_ts frequently >> enough so >> + * that it's never possible to see multiple overflows before we compare >> + * sample_ts to last_gpu_ts. Since this is significantly large duration >> + * (~6min for 80ns ts base), we can safely assume so. >> + */ >> +static u64 get_gpu_ts_from_oa_report(struct i915_perf_stream *stream, >> + const u8 *report) >> +{ >> + u32 sample_ts = *(u32 *)(report + 4); >> + u32 delta; >> + >> + delta = sample_ts - (u32)stream->last_gpu_ts; >> + stream->last_gpu_ts += delta; >> + >> + return stream->last_gpu_ts; >> +} >> + >> +/** >> * append_oa_sample - Copies single OA report into userspace read() >> buffer. >> * @stream: An i915-perf stream opened for OA metrics >> * @buf: destination buffer given by userspace >> @@ -622,7 +642,9 @@ static int append_oa_sample(struct >> i915_perf_stream *stream, >> } >> if (sample_flags & SAMPLE_GPU_TS) { >> - /* Timestamp to be populated from OA report */ >> + /* Timestamp populated from OA report */ >> + gpu_ts = get_gpu_ts_from_oa_report(stream, report); >> + >> if (copy_to_user(buf, &gpu_ts, I915_PERF_TS_SAMPLE_SIZE)) >> return -EFAULT; >> } > > I think everything above this line should be merged int patch 2. > It's better to have a single functional patch. Yes. Will merge. > >> @@ -2421,6 +2443,8 @@ static u64 i915_cyclecounter_read(const struct >> cyclecounter *cc) >> GEN7_TIMESTAMP_UDW); >> intel_runtime_pm_put(dev_priv); >> + stream->last_gpu_ts = ts_count; > > This doesn't look right. You're already adding a delta in > get_gpu_ts_from_oa_report(). > This will produce incorrect timestamps. Since at the moment we won't > allow opening without PROP_SAMPLE_OA, I would just drop this line. > Yes. makes sense. will remove. >> + >> return ts_count; >> } > >
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e08bc85..5534cd2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2151,6 +2151,8 @@ struct i915_perf_stream { */ struct i915_oa_config *oa_config; + u64 last_gpu_ts; + /** * System time correlation variables. */ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index f7e748c..3b721d7 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -575,6 +575,26 @@ static int append_oa_status(struct i915_perf_stream *stream, } /** + * get_gpu_ts_from_oa_report - Retrieve absolute gpu timestamp from OA report + * + * Note: We are assuming that we're updating last_gpu_ts frequently enough so + * that it's never possible to see multiple overflows before we compare + * sample_ts to last_gpu_ts. Since this is significantly large duration + * (~6min for 80ns ts base), we can safely assume so. + */ +static u64 get_gpu_ts_from_oa_report(struct i915_perf_stream *stream, + const u8 *report) +{ + u32 sample_ts = *(u32 *)(report + 4); + u32 delta; + + delta = sample_ts - (u32)stream->last_gpu_ts; + stream->last_gpu_ts += delta; + + return stream->last_gpu_ts; +} + +/** * append_oa_sample - Copies single OA report into userspace read() buffer. * @stream: An i915-perf stream opened for OA metrics * @buf: destination buffer given by userspace @@ -622,7 +642,9 @@ static int append_oa_sample(struct i915_perf_stream *stream, } if (sample_flags & SAMPLE_GPU_TS) { - /* Timestamp to be populated from OA report */ + /* Timestamp populated from OA report */ + gpu_ts = get_gpu_ts_from_oa_report(stream, report); + if (copy_to_user(buf, &gpu_ts, I915_PERF_TS_SAMPLE_SIZE)) return -EFAULT; } @@ -2421,6 +2443,8 @@ static u64 i915_cyclecounter_read(const struct cyclecounter *cc) GEN7_TIMESTAMP_UDW); intel_runtime_pm_put(dev_priv); + stream->last_gpu_ts = ts_count; + return ts_count; }