Message ID | 1510748034-14034-2-git-send-email-sagar.a.kamble@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Quoting Sagar Arun Kamble (2017-11-15 12:13:51) > #include <drm/drmP.h> > #include <drm/intel-gtt.h> > @@ -2149,6 +2150,14 @@ struct i915_perf_stream { > * @oa_config: The OA configuration used by the stream. > */ > struct i915_oa_config *oa_config; > + > + /** > + * System time correlation variables. > + */ > + struct cyclecounter cc; > + spinlock_t systime_lock; > + struct timespec64 start_systime; > + struct timecounter tc; This pattern is repeated a lot by struct timecounter users. (I'm still trying to understand why the common case is not catered for by a convenience timecounter api.) > }; > > /** > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 00be015..72ddc34 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -192,6 +192,7 @@ > */ > > #include <linux/anon_inodes.h> > +#include <linux/clocksource.h> > #include <linux/sizes.h> > #include <linux/uuid.h> > > @@ -2391,6 +2392,56 @@ static unsigned int i915_perf_poll(struct file *file, poll_table *wait) > } > > /** > + * i915_cyclecounter_read - read raw cycle/timestamp counter > + * @cc: cyclecounter structure > + */ > +static u64 i915_cyclecounter_read(const struct cyclecounter *cc) > +{ > + struct i915_perf_stream *stream = container_of(cc, typeof(*stream), cc); > + struct drm_i915_private *dev_priv = stream->dev_priv; > + u64 ts_count; > + > + intel_runtime_pm_get(dev_priv); > + ts_count = I915_READ64_2x32(GEN4_TIMESTAMP, > + GEN7_TIMESTAMP_UDW); > + intel_runtime_pm_put(dev_priv); > + > + return ts_count; > +} > + > +static void i915_perf_init_cyclecounter(struct i915_perf_stream *stream) > +{ > + struct drm_i915_private *dev_priv = stream->dev_priv; > + int cs_ts_freq = dev_priv->perf.oa.timestamp_frequency; > + struct cyclecounter *cc = &stream->cc; > + u32 maxsec; > + > + cc->read = i915_cyclecounter_read; > + cc->mask = CYCLECOUNTER_MASK(CS_TIMESTAMP_WIDTH(dev_priv)); > + maxsec = cc->mask / cs_ts_freq; > + > + clocks_calc_mult_shift(&cc->mult, &cc->shift, cs_ts_freq, > + NSEC_PER_SEC, maxsec); > +} > + > +static void i915_perf_init_timecounter(struct i915_perf_stream *stream) > +{ > +#define SYSTIME_START_OFFSET 350000 /* Counter read takes about 350us */ > + unsigned long flags; > + u64 ns; > + > + i915_perf_init_cyclecounter(stream); > + spin_lock_init(&stream->systime_lock); > + > + getnstimeofday64(&stream->start_systime); > + ns = timespec64_to_ns(&stream->start_systime) + SYSTIME_START_OFFSET; Use ktime directly. Or else Arnd will be back with a patch to fix it. (All non-ktime interfaces are effectively deprecated; obsolete for drivers.) > + spin_lock_irqsave(&stream->systime_lock, flags); > + timecounter_init(&stream->tc, &stream->cc, ns); > + spin_unlock_irqrestore(&stream->systime_lock, flags); > +} > + > +/** > * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl > * @stream: A disabled i915 perf stream > * > @@ -2408,6 +2459,8 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream) > /* Allow stream->ops->enable() to refer to this */ > stream->enabled = true; > > + i915_perf_init_timecounter(stream); > + > if (stream->ops->enable) > stream->ops->enable(stream); > } > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index cfdf4f8..e7e6966 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -8882,6 +8882,12 @@ enum skl_power_gate { > > /* Gen4+ Timestamp and Pipe Frame time stamp registers */ > #define GEN4_TIMESTAMP _MMIO(0x2358) > +#define GEN7_TIMESTAMP_UDW _MMIO(0x235C) > +#define PRE_GEN7_TIMESTAMP_WIDTH 32 > +#define GEN7_TIMESTAMP_WIDTH 36 > +#define CS_TIMESTAMP_WIDTH(dev_priv) \ > + (INTEL_GEN(dev_priv) < 7 ? PRE_GEN7_TIMESTAMP_WIDTH : \ > + GEN7_TIMESTAMP_WIDTH) s/PRE_GEN7/GEN4/ would be consistent. If you really want to add support for earlier, I9XX_. Ok. I can accept the justification, and we are not the only ones who do the cyclecounter -> timecounter correction like this. -Chris
On 11/15/2017 5:55 PM, Chris Wilson wrote: > Quoting Sagar Arun Kamble (2017-11-15 12:13:51) >> #include <drm/drmP.h> >> #include <drm/intel-gtt.h> >> @@ -2149,6 +2150,14 @@ struct i915_perf_stream { >> * @oa_config: The OA configuration used by the stream. >> */ >> struct i915_oa_config *oa_config; >> + >> + /** >> + * System time correlation variables. >> + */ >> + struct cyclecounter cc; >> + spinlock_t systime_lock; >> + struct timespec64 start_systime; >> + struct timecounter tc; > This pattern is repeated a lot by struct timecounter users. (I'm still > trying to understand why the common case is not catered for by a > convenience timecounter api.) Yes. Looks good case to change the cyclecounter* member in timecounter to cyclecounter. And then we can avoid separate cyclecounter. > >> }; >> >> /** >> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >> index 00be015..72ddc34 100644 >> --- a/drivers/gpu/drm/i915/i915_perf.c >> +++ b/drivers/gpu/drm/i915/i915_perf.c >> @@ -192,6 +192,7 @@ >> */ >> >> #include <linux/anon_inodes.h> >> +#include <linux/clocksource.h> >> #include <linux/sizes.h> >> #include <linux/uuid.h> >> >> @@ -2391,6 +2392,56 @@ static unsigned int i915_perf_poll(struct file *file, poll_table *wait) >> } >> >> /** >> + * i915_cyclecounter_read - read raw cycle/timestamp counter >> + * @cc: cyclecounter structure >> + */ >> +static u64 i915_cyclecounter_read(const struct cyclecounter *cc) >> +{ >> + struct i915_perf_stream *stream = container_of(cc, typeof(*stream), cc); >> + struct drm_i915_private *dev_priv = stream->dev_priv; >> + u64 ts_count; >> + >> + intel_runtime_pm_get(dev_priv); >> + ts_count = I915_READ64_2x32(GEN4_TIMESTAMP, >> + GEN7_TIMESTAMP_UDW); >> + intel_runtime_pm_put(dev_priv); >> + >> + return ts_count; >> +} >> + >> +static void i915_perf_init_cyclecounter(struct i915_perf_stream *stream) >> +{ >> + struct drm_i915_private *dev_priv = stream->dev_priv; >> + int cs_ts_freq = dev_priv->perf.oa.timestamp_frequency; >> + struct cyclecounter *cc = &stream->cc; >> + u32 maxsec; >> + >> + cc->read = i915_cyclecounter_read; >> + cc->mask = CYCLECOUNTER_MASK(CS_TIMESTAMP_WIDTH(dev_priv)); >> + maxsec = cc->mask / cs_ts_freq; >> + >> + clocks_calc_mult_shift(&cc->mult, &cc->shift, cs_ts_freq, >> + NSEC_PER_SEC, maxsec); >> +} >> + >> +static void i915_perf_init_timecounter(struct i915_perf_stream *stream) >> +{ >> +#define SYSTIME_START_OFFSET 350000 /* Counter read takes about 350us */ >> + unsigned long flags; >> + u64 ns; >> + >> + i915_perf_init_cyclecounter(stream); >> + spin_lock_init(&stream->systime_lock); >> + >> + getnstimeofday64(&stream->start_systime); >> + ns = timespec64_to_ns(&stream->start_systime) + SYSTIME_START_OFFSET; > Use ktime directly. Or else Arnd will be back with a patch to fix it. > (All non-ktime interfaces are effectively deprecated; obsolete for > drivers.) Ok. Will update. > >> + spin_lock_irqsave(&stream->systime_lock, flags); >> + timecounter_init(&stream->tc, &stream->cc, ns); >> + spin_unlock_irqrestore(&stream->systime_lock, flags); >> +} >> + >> +/** >> * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl >> * @stream: A disabled i915 perf stream >> * >> @@ -2408,6 +2459,8 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream) >> /* Allow stream->ops->enable() to refer to this */ >> stream->enabled = true; >> >> + i915_perf_init_timecounter(stream); >> + >> if (stream->ops->enable) >> stream->ops->enable(stream); >> } >> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h >> index cfdf4f8..e7e6966 100644 >> --- a/drivers/gpu/drm/i915/i915_reg.h >> +++ b/drivers/gpu/drm/i915/i915_reg.h >> @@ -8882,6 +8882,12 @@ enum skl_power_gate { >> >> /* Gen4+ Timestamp and Pipe Frame time stamp registers */ >> #define GEN4_TIMESTAMP _MMIO(0x2358) >> +#define GEN7_TIMESTAMP_UDW _MMIO(0x235C) >> +#define PRE_GEN7_TIMESTAMP_WIDTH 32 >> +#define GEN7_TIMESTAMP_WIDTH 36 >> +#define CS_TIMESTAMP_WIDTH(dev_priv) \ >> + (INTEL_GEN(dev_priv) < 7 ? PRE_GEN7_TIMESTAMP_WIDTH : \ >> + GEN7_TIMESTAMP_WIDTH) > s/PRE_GEN7/GEN4/ would be consistent. > If you really want to add support for earlier, I9XX_. Yes. > > Ok. I can accept the justification, and we are not the only ones who do > the cyclecounter -> timecounter correction like this. > -Chris
On 15/11/17 12:25, Chris Wilson wrote: > Quoting Sagar Arun Kamble (2017-11-15 12:13:51) >> #include <drm/drmP.h> >> #include <drm/intel-gtt.h> >> @@ -2149,6 +2150,14 @@ struct i915_perf_stream { >> * @oa_config: The OA configuration used by the stream. >> */ >> struct i915_oa_config *oa_config; >> + >> + /** >> + * System time correlation variables. >> + */ >> + struct cyclecounter cc; >> + spinlock_t systime_lock; >> + struct timespec64 start_systime; >> + struct timecounter tc; > This pattern is repeated a lot by struct timecounter users. (I'm still > trying to understand why the common case is not catered for by a > convenience timecounter api.) > >> }; >> >> /** >> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >> index 00be015..72ddc34 100644 >> --- a/drivers/gpu/drm/i915/i915_perf.c >> +++ b/drivers/gpu/drm/i915/i915_perf.c >> @@ -192,6 +192,7 @@ >> */ >> >> #include <linux/anon_inodes.h> >> +#include <linux/clocksource.h> >> #include <linux/sizes.h> >> #include <linux/uuid.h> >> >> @@ -2391,6 +2392,56 @@ static unsigned int i915_perf_poll(struct file *file, poll_table *wait) >> } >> >> /** >> + * i915_cyclecounter_read - read raw cycle/timestamp counter >> + * @cc: cyclecounter structure >> + */ >> +static u64 i915_cyclecounter_read(const struct cyclecounter *cc) >> +{ >> + struct i915_perf_stream *stream = container_of(cc, typeof(*stream), cc); >> + struct drm_i915_private *dev_priv = stream->dev_priv; >> + u64 ts_count; >> + >> + intel_runtime_pm_get(dev_priv); >> + ts_count = I915_READ64_2x32(GEN4_TIMESTAMP, >> + GEN7_TIMESTAMP_UDW); >> + intel_runtime_pm_put(dev_priv); >> + >> + return ts_count; >> +} >> + >> +static void i915_perf_init_cyclecounter(struct i915_perf_stream *stream) >> +{ >> + struct drm_i915_private *dev_priv = stream->dev_priv; >> + int cs_ts_freq = dev_priv->perf.oa.timestamp_frequency; >> + struct cyclecounter *cc = &stream->cc; >> + u32 maxsec; >> + >> + cc->read = i915_cyclecounter_read; >> + cc->mask = CYCLECOUNTER_MASK(CS_TIMESTAMP_WIDTH(dev_priv)); >> + maxsec = cc->mask / cs_ts_freq; >> + >> + clocks_calc_mult_shift(&cc->mult, &cc->shift, cs_ts_freq, >> + NSEC_PER_SEC, maxsec); >> +} >> + >> +static void i915_perf_init_timecounter(struct i915_perf_stream *stream) >> +{ >> +#define SYSTIME_START_OFFSET 350000 /* Counter read takes about 350us */ >> + unsigned long flags; >> + u64 ns; >> + >> + i915_perf_init_cyclecounter(stream); >> + spin_lock_init(&stream->systime_lock); >> + >> + getnstimeofday64(&stream->start_systime); >> + ns = timespec64_to_ns(&stream->start_systime) + SYSTIME_START_OFFSET; > Use ktime directly. Or else Arnd will be back with a patch to fix it. > (All non-ktime interfaces are effectively deprecated; obsolete for > drivers.) > >> + spin_lock_irqsave(&stream->systime_lock, flags); >> + timecounter_init(&stream->tc, &stream->cc, ns); >> + spin_unlock_irqrestore(&stream->systime_lock, flags); >> +} >> + >> +/** >> * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl >> * @stream: A disabled i915 perf stream >> * >> @@ -2408,6 +2459,8 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream) >> /* Allow stream->ops->enable() to refer to this */ >> stream->enabled = true; >> >> + i915_perf_init_timecounter(stream); >> + >> if (stream->ops->enable) >> stream->ops->enable(stream); >> } >> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h >> index cfdf4f8..e7e6966 100644 >> --- a/drivers/gpu/drm/i915/i915_reg.h >> +++ b/drivers/gpu/drm/i915/i915_reg.h >> @@ -8882,6 +8882,12 @@ enum skl_power_gate { >> >> /* Gen4+ Timestamp and Pipe Frame time stamp registers */ >> #define GEN4_TIMESTAMP _MMIO(0x2358) >> +#define GEN7_TIMESTAMP_UDW _MMIO(0x235C) >> +#define PRE_GEN7_TIMESTAMP_WIDTH 32 >> +#define GEN7_TIMESTAMP_WIDTH 36 >> +#define CS_TIMESTAMP_WIDTH(dev_priv) \ >> + (INTEL_GEN(dev_priv) < 7 ? PRE_GEN7_TIMESTAMP_WIDTH : \ >> + GEN7_TIMESTAMP_WIDTH) > s/PRE_GEN7/GEN4/ would be consistent. > If you really want to add support for earlier, I9XX_. Why not use the RING_TIMESTAMP() RING_TIMESTAMP_UDW() ? There's already used for the reg_read ioctl. > > Ok. I can accept the justification, and we are not the only ones who do > the cyclecounter -> timecounter correction like this. > -Chris >
On 12/5/2017 7:28 PM, Lionel Landwerlin wrote: > On 15/11/17 12:25, Chris Wilson wrote: >> Quoting Sagar Arun Kamble (2017-11-15 12:13:51) >>> #include <drm/drmP.h> >>> #include <drm/intel-gtt.h> >>> @@ -2149,6 +2150,14 @@ struct i915_perf_stream { >>> * @oa_config: The OA configuration used by the stream. >>> */ >>> struct i915_oa_config *oa_config; >>> + >>> + /** >>> + * System time correlation variables. >>> + */ >>> + struct cyclecounter cc; >>> + spinlock_t systime_lock; >>> + struct timespec64 start_systime; >>> + struct timecounter tc; >> This pattern is repeated a lot by struct timecounter users. (I'm still >> trying to understand why the common case is not catered for by a >> convenience timecounter api.) >> >>> }; >>> /** >>> diff --git a/drivers/gpu/drm/i915/i915_perf.c >>> b/drivers/gpu/drm/i915/i915_perf.c >>> index 00be015..72ddc34 100644 >>> --- a/drivers/gpu/drm/i915/i915_perf.c >>> +++ b/drivers/gpu/drm/i915/i915_perf.c >>> @@ -192,6 +192,7 @@ >>> */ >>> #include <linux/anon_inodes.h> >>> +#include <linux/clocksource.h> >>> #include <linux/sizes.h> >>> #include <linux/uuid.h> >>> @@ -2391,6 +2392,56 @@ static unsigned int i915_perf_poll(struct >>> file *file, poll_table *wait) >>> } >>> /** >>> + * i915_cyclecounter_read - read raw cycle/timestamp counter >>> + * @cc: cyclecounter structure >>> + */ >>> +static u64 i915_cyclecounter_read(const struct cyclecounter *cc) >>> +{ >>> + struct i915_perf_stream *stream = container_of(cc, >>> typeof(*stream), cc); >>> + struct drm_i915_private *dev_priv = stream->dev_priv; >>> + u64 ts_count; >>> + >>> + intel_runtime_pm_get(dev_priv); >>> + ts_count = I915_READ64_2x32(GEN4_TIMESTAMP, >>> + GEN7_TIMESTAMP_UDW); >>> + intel_runtime_pm_put(dev_priv); >>> + >>> + return ts_count; >>> +} >>> + >>> +static void i915_perf_init_cyclecounter(struct i915_perf_stream >>> *stream) >>> +{ >>> + struct drm_i915_private *dev_priv = stream->dev_priv; >>> + int cs_ts_freq = dev_priv->perf.oa.timestamp_frequency; >>> + struct cyclecounter *cc = &stream->cc; >>> + u32 maxsec; >>> + >>> + cc->read = i915_cyclecounter_read; >>> + cc->mask = CYCLECOUNTER_MASK(CS_TIMESTAMP_WIDTH(dev_priv)); >>> + maxsec = cc->mask / cs_ts_freq; >>> + >>> + clocks_calc_mult_shift(&cc->mult, &cc->shift, cs_ts_freq, >>> + NSEC_PER_SEC, maxsec); >>> +} >>> + >>> +static void i915_perf_init_timecounter(struct i915_perf_stream >>> *stream) >>> +{ >>> +#define SYSTIME_START_OFFSET 350000 /* Counter read takes about >>> 350us */ >>> + unsigned long flags; >>> + u64 ns; >>> + >>> + i915_perf_init_cyclecounter(stream); >>> + spin_lock_init(&stream->systime_lock); >>> + >>> + getnstimeofday64(&stream->start_systime); >>> + ns = timespec64_to_ns(&stream->start_systime) + >>> SYSTIME_START_OFFSET; >> Use ktime directly. Or else Arnd will be back with a patch to fix it. >> (All non-ktime interfaces are effectively deprecated; obsolete for >> drivers.) >> >>> + spin_lock_irqsave(&stream->systime_lock, flags); >>> + timecounter_init(&stream->tc, &stream->cc, ns); >>> + spin_unlock_irqrestore(&stream->systime_lock, flags); >>> +} >>> + >>> +/** >>> * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl >>> * @stream: A disabled i915 perf stream >>> * >>> @@ -2408,6 +2459,8 @@ static void i915_perf_enable_locked(struct >>> i915_perf_stream *stream) >>> /* Allow stream->ops->enable() to refer to this */ >>> stream->enabled = true; >>> + i915_perf_init_timecounter(stream); >>> + >>> if (stream->ops->enable) >>> stream->ops->enable(stream); >>> } >>> diff --git a/drivers/gpu/drm/i915/i915_reg.h >>> b/drivers/gpu/drm/i915/i915_reg.h >>> index cfdf4f8..e7e6966 100644 >>> --- a/drivers/gpu/drm/i915/i915_reg.h >>> +++ b/drivers/gpu/drm/i915/i915_reg.h >>> @@ -8882,6 +8882,12 @@ enum skl_power_gate { >>> /* Gen4+ Timestamp and Pipe Frame time stamp registers */ >>> #define GEN4_TIMESTAMP _MMIO(0x2358) >>> +#define GEN7_TIMESTAMP_UDW _MMIO(0x235C) >>> +#define PRE_GEN7_TIMESTAMP_WIDTH 32 >>> +#define GEN7_TIMESTAMP_WIDTH 36 >>> +#define CS_TIMESTAMP_WIDTH(dev_priv) \ >>> + (INTEL_GEN(dev_priv) < 7 ? PRE_GEN7_TIMESTAMP_WIDTH : \ >>> + GEN7_TIMESTAMP_WIDTH) >> s/PRE_GEN7/GEN4/ would be consistent. >> If you really want to add support for earlier, I9XX_. > > Why not use the RING_TIMESTAMP() RING_TIMESTAMP_UDW() ? > There's already used for the reg_read ioctl. Yes. We can use these. > >> >> Ok. I can accept the justification, and we are not the only ones who do >> the cyclecounter -> timecounter correction like this. >> -Chris >> >
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2158a75..e08bc85 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -43,6 +43,7 @@ #include <linux/pm_qos.h> #include <linux/reservation.h> #include <linux/shmem_fs.h> +#include <linux/timecounter.h> #include <drm/drmP.h> #include <drm/intel-gtt.h> @@ -2149,6 +2150,14 @@ struct i915_perf_stream { * @oa_config: The OA configuration used by the stream. */ struct i915_oa_config *oa_config; + + /** + * System time correlation variables. + */ + struct cyclecounter cc; + spinlock_t systime_lock; + struct timespec64 start_systime; + struct timecounter tc; }; /** diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 00be015..72ddc34 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -192,6 +192,7 @@ */ #include <linux/anon_inodes.h> +#include <linux/clocksource.h> #include <linux/sizes.h> #include <linux/uuid.h> @@ -2391,6 +2392,56 @@ static unsigned int i915_perf_poll(struct file *file, poll_table *wait) } /** + * i915_cyclecounter_read - read raw cycle/timestamp counter + * @cc: cyclecounter structure + */ +static u64 i915_cyclecounter_read(const struct cyclecounter *cc) +{ + struct i915_perf_stream *stream = container_of(cc, typeof(*stream), cc); + struct drm_i915_private *dev_priv = stream->dev_priv; + u64 ts_count; + + intel_runtime_pm_get(dev_priv); + ts_count = I915_READ64_2x32(GEN4_TIMESTAMP, + GEN7_TIMESTAMP_UDW); + intel_runtime_pm_put(dev_priv); + + return ts_count; +} + +static void i915_perf_init_cyclecounter(struct i915_perf_stream *stream) +{ + struct drm_i915_private *dev_priv = stream->dev_priv; + int cs_ts_freq = dev_priv->perf.oa.timestamp_frequency; + struct cyclecounter *cc = &stream->cc; + u32 maxsec; + + cc->read = i915_cyclecounter_read; + cc->mask = CYCLECOUNTER_MASK(CS_TIMESTAMP_WIDTH(dev_priv)); + maxsec = cc->mask / cs_ts_freq; + + clocks_calc_mult_shift(&cc->mult, &cc->shift, cs_ts_freq, + NSEC_PER_SEC, maxsec); +} + +static void i915_perf_init_timecounter(struct i915_perf_stream *stream) +{ +#define SYSTIME_START_OFFSET 350000 /* Counter read takes about 350us */ + unsigned long flags; + u64 ns; + + i915_perf_init_cyclecounter(stream); + spin_lock_init(&stream->systime_lock); + + getnstimeofday64(&stream->start_systime); + ns = timespec64_to_ns(&stream->start_systime) + SYSTIME_START_OFFSET; + + spin_lock_irqsave(&stream->systime_lock, flags); + timecounter_init(&stream->tc, &stream->cc, ns); + spin_unlock_irqrestore(&stream->systime_lock, flags); +} + +/** * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl * @stream: A disabled i915 perf stream * @@ -2408,6 +2459,8 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream) /* Allow stream->ops->enable() to refer to this */ stream->enabled = true; + i915_perf_init_timecounter(stream); + if (stream->ops->enable) stream->ops->enable(stream); } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index cfdf4f8..e7e6966 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8882,6 +8882,12 @@ enum skl_power_gate { /* Gen4+ Timestamp and Pipe Frame time stamp registers */ #define GEN4_TIMESTAMP _MMIO(0x2358) +#define GEN7_TIMESTAMP_UDW _MMIO(0x235C) +#define PRE_GEN7_TIMESTAMP_WIDTH 32 +#define GEN7_TIMESTAMP_WIDTH 36 +#define CS_TIMESTAMP_WIDTH(dev_priv) \ + (INTEL_GEN(dev_priv) < 7 ? PRE_GEN7_TIMESTAMP_WIDTH : \ + GEN7_TIMESTAMP_WIDTH) #define ILK_TIMESTAMP_HI _MMIO(0x70070) #define IVB_TIMESTAMP_CTR _MMIO(0x44070)
We can compute system time corresponding to GPU timestamp by taking a reference point and then adding delta time computed using timecounter and cyclecounter support in kernel. We have to configure cyclecounter with the GPU timestamp frequency. In further patches we can leverage timecounter_cyc2time function to compute the system time corresponding to GPU timestamp cycles derived from OA report. Important thing to note is timecounter_cyc2time considers time backwards if delta timestamp is more than half the max ns time covered by counter. (It will be ~35min for 36 bit counter. If this much sampling duration is needed we will have to update tc->nsec by explicitly reading the timecounter after duration less than 35min during sampling) On enabling perf stream we start the timecounter/cyclecounter and while collecting OA samples we translate GPU timestamp to System timestamp. Earlier approach that was based on cross-timestamp is not needed. It was being used to approximate the frequency based on invalid assumptions (possibly drift was being seen in the time due to precision issue). The precision of time from GPU clocks is already in ns and timecounter takes care of it as verified over variable durations. Cross-timestamp might be valuable to get the precise reference point w.r.t device and system time but it needs a support for reading ART counter from i915 which I find not available for i915. Hence, we are compensating the offset to setup the reference point ourselves. With this approach we have very fine precision of device and system time only differing by 5-10us. Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Sourab Gupta <sourab.gupta@intel.com> Cc: Matthew Auld <matthew.auld@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 9 +++++++ drivers/gpu/drm/i915/i915_perf.c | 53 ++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 6 +++++ 3 files changed, 68 insertions(+)