Message ID | 20170405162320.30094-7-robert@sixbynine.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hey Rob, Thanks for sending this, it looks good to me. I think we also need to update the oa_sample_rate_hard_limit & i915_oa_max_sample_rate variables. This patch is : Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> On 05/04/17 17:23, Robert Bragg wrote: > An oa_exponent_to_ns() utility and per-gen timebase constants where > recently removed when updating the tail pointer race condition WA, and > this restores those so we can update the _PROP_OA_EXPONENT validation > done in read_properties_unlocked() to not assume we have a 12.5KHz > timebase as we did for Haswell. > > Signed-off-by: Robert Bragg <robert@sixbynine.org> > Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 1 + > drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++------ > 2 files changed, 16 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 3a22b6fd0ee6..48b07d706f06 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2463,6 +2463,7 @@ struct drm_i915_private { > > bool periodic; > int period_exponent; > + int timestamp_frequency; > > int metrics_set; > > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 98eb6415b63a..87c0d1ce1b9f 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -2549,6 +2549,12 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, > return ret; > } > > +static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) > +{ > + return div_u64(1000000000ULL * (2ULL << exponent), > + dev_priv->perf.oa.timestamp_frequency); > +} > + > /** > * read_properties_unlocked - validate + copy userspace stream open properties > * @dev_priv: i915 device instance > @@ -2647,14 +2653,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, > /* Theoretically we can program the OA unit to sample > * every 160ns but don't allow that by default unless > * root. > - * > - * On Haswell the period is derived from the exponent > - * as: > - * > - * period = 80ns * 2^(exponent + 1) > */ > BUILD_BUG_ON(sizeof(oa_period) != 8); > - oa_period = 80ull * (2ull << value); > + oa_period = oa_exponent_to_ns(dev_priv, value); > > /* This check is primarily to ensure that oa_period <= > * UINT32_MAX (before passing to do_div which only > @@ -2910,6 +2911,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) > dev_priv->perf.oa.ops.oa_hw_tail_read = > gen7_oa_hw_tail_read; > > + dev_priv->perf.oa.timestamp_frequency = 12500000; > + > dev_priv->perf.oa.oa_formats = hsw_oa_formats; > > dev_priv->perf.oa.n_builtin_sets = > @@ -2923,6 +2926,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) > */ > > if (IS_GEN8(dev_priv)) { > + dev_priv->perf.oa.timestamp_frequency = 12500000; > + > dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; > dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; > dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); > @@ -2939,6 +2944,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) > i915_oa_select_metric_set_chv; > } > } else if (IS_GEN9(dev_priv)) { > + dev_priv->perf.oa.timestamp_frequency = 12000000; > + > dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; > dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; > dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); > @@ -2959,6 +2966,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) > dev_priv->perf.oa.ops.select_metric_set = > i915_oa_select_metric_set_sklgt4; > } else if (IS_BROXTON(dev_priv)) { > + dev_priv->perf.oa.timestamp_frequency = 19200123; > + > dev_priv->perf.oa.n_builtin_sets = > i915_oa_n_builtin_metric_sets_bxt; > dev_priv->perf.oa.ops.select_metric_set =
On Wed, Apr 05, 2017 at 05:23:19PM +0100, Robert Bragg wrote: > An oa_exponent_to_ns() utility and per-gen timebase constants where > recently removed when updating the tail pointer race condition WA, and > this restores those so we can update the _PROP_OA_EXPONENT validation > done in read_properties_unlocked() to not assume we have a 12.5KHz > timebase as we did for Haswell. > > Signed-off-by: Robert Bragg <robert@sixbynine.org> > Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 1 + > drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++------ > 2 files changed, 16 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 3a22b6fd0ee6..48b07d706f06 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2463,6 +2463,7 @@ struct drm_i915_private { > > bool periodic; > int period_exponent; > + int timestamp_frequency; > > int metrics_set; > > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 98eb6415b63a..87c0d1ce1b9f 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -2549,6 +2549,12 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, > return ret; > } > > +static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) > +{ > + return div_u64(1000000000ULL * (2ULL << exponent), > + dev_priv->perf.oa.timestamp_frequency); > +} > + > /** > * read_properties_unlocked - validate + copy userspace stream open properties > * @dev_priv: i915 device instance > @@ -2647,14 +2653,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, > /* Theoretically we can program the OA unit to sample > * every 160ns but don't allow that by default unless > * root. > - * > - * On Haswell the period is derived from the exponent > - * as: > - * > - * period = 80ns * 2^(exponent + 1) > */ > BUILD_BUG_ON(sizeof(oa_period) != 8); > - oa_period = 80ull * (2ull << value); > + oa_period = oa_exponent_to_ns(dev_priv, value); > > /* This check is primarily to ensure that oa_period <= > * UINT32_MAX (before passing to do_div which only > @@ -2910,6 +2911,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) > dev_priv->perf.oa.ops.oa_hw_tail_read = > gen7_oa_hw_tail_read; > > + dev_priv->perf.oa.timestamp_frequency = 12500000; > + > dev_priv->perf.oa.oa_formats = hsw_oa_formats; > > dev_priv->perf.oa.n_builtin_sets = > @@ -2923,6 +2926,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) > */ > > if (IS_GEN8(dev_priv)) { > + dev_priv->perf.oa.timestamp_frequency = 12500000; > + > dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; > dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; > dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); > @@ -2939,6 +2944,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) > i915_oa_select_metric_set_chv; > } > } else if (IS_GEN9(dev_priv)) { > + dev_priv->perf.oa.timestamp_frequency = 12000000; > + > dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; > dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; > dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); > @@ -2959,6 +2966,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) > dev_priv->perf.oa.ops.select_metric_set = > i915_oa_select_metric_set_sklgt4; > } else if (IS_BROXTON(dev_priv)) { > + dev_priv->perf.oa.timestamp_frequency = 19200123; > + Why isn't this exactly 19.2MHz? > dev_priv->perf.oa.n_builtin_sets = > i915_oa_n_builtin_metric_sets_bxt; > dev_priv->perf.oa.ops.select_metric_set = > -- > 2.12.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
On 05/04/17 18:06, Ville Syrjälä wrote: > On Wed, Apr 05, 2017 at 05:23:19PM +0100, Robert Bragg wrote: >> An oa_exponent_to_ns() utility and per-gen timebase constants where >> recently removed when updating the tail pointer race condition WA, and >> this restores those so we can update the _PROP_OA_EXPONENT validation >> done in read_properties_unlocked() to not assume we have a 12.5KHz >> timebase as we did for Haswell. >> >> Signed-off-by: Robert Bragg <robert@sixbynine.org> >> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> >> --- >> drivers/gpu/drm/i915/i915_drv.h | 1 + >> drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++------ >> 2 files changed, 16 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index 3a22b6fd0ee6..48b07d706f06 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -2463,6 +2463,7 @@ struct drm_i915_private { >> >> bool periodic; >> int period_exponent; >> + int timestamp_frequency; >> >> int metrics_set; >> >> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >> index 98eb6415b63a..87c0d1ce1b9f 100644 >> --- a/drivers/gpu/drm/i915/i915_perf.c >> +++ b/drivers/gpu/drm/i915/i915_perf.c >> @@ -2549,6 +2549,12 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, >> return ret; >> } >> >> +static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) >> +{ >> + return div_u64(1000000000ULL * (2ULL << exponent), >> + dev_priv->perf.oa.timestamp_frequency); >> +} >> + >> /** >> * read_properties_unlocked - validate + copy userspace stream open properties >> * @dev_priv: i915 device instance >> @@ -2647,14 +2653,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, >> /* Theoretically we can program the OA unit to sample >> * every 160ns but don't allow that by default unless >> * root. >> - * >> - * On Haswell the period is derived from the exponent >> - * as: >> - * >> - * period = 80ns * 2^(exponent + 1) >> */ >> BUILD_BUG_ON(sizeof(oa_period) != 8); >> - oa_period = 80ull * (2ull << value); >> + oa_period = oa_exponent_to_ns(dev_priv, value); >> >> /* This check is primarily to ensure that oa_period <= >> * UINT32_MAX (before passing to do_div which only >> @@ -2910,6 +2911,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) >> dev_priv->perf.oa.ops.oa_hw_tail_read = >> gen7_oa_hw_tail_read; >> >> + dev_priv->perf.oa.timestamp_frequency = 12500000; >> + >> dev_priv->perf.oa.oa_formats = hsw_oa_formats; >> >> dev_priv->perf.oa.n_builtin_sets = >> @@ -2923,6 +2926,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) >> */ >> >> if (IS_GEN8(dev_priv)) { >> + dev_priv->perf.oa.timestamp_frequency = 12500000; >> + >> dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; >> dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; >> dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); >> @@ -2939,6 +2944,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) >> i915_oa_select_metric_set_chv; >> } >> } else if (IS_GEN9(dev_priv)) { >> + dev_priv->perf.oa.timestamp_frequency = 12000000; >> + >> dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; >> dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; >> dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); >> @@ -2959,6 +2966,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) >> dev_priv->perf.oa.ops.select_metric_set = >> i915_oa_select_metric_set_sklgt4; >> } else if (IS_BROXTON(dev_priv)) { >> + dev_priv->perf.oa.timestamp_frequency = 19200123; >> + > Why isn't this exactly 19.2MHz? It's based off the timestamp base (from documentation) : BDW: 80ns SKL: 83.333ns BXT: 52.083ns 1000000000 / 19200123 is the closest we can get. > >> dev_priv->perf.oa.n_builtin_sets = >> i915_oa_n_builtin_metric_sets_bxt; >> dev_priv->perf.oa.ops.select_metric_set = >> -- >> 2.12.0 >> >> _______________________________________________ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Wed, Apr 05, 2017 at 06:17:36PM +0100, Lionel Landwerlin wrote: > On 05/04/17 18:06, Ville Syrjälä wrote: > > On Wed, Apr 05, 2017 at 05:23:19PM +0100, Robert Bragg wrote: > >> An oa_exponent_to_ns() utility and per-gen timebase constants where > >> recently removed when updating the tail pointer race condition WA, and > >> this restores those so we can update the _PROP_OA_EXPONENT validation > >> done in read_properties_unlocked() to not assume we have a 12.5KHz > >> timebase as we did for Haswell. > >> > >> Signed-off-by: Robert Bragg <robert@sixbynine.org> > >> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> > >> --- > >> drivers/gpu/drm/i915/i915_drv.h | 1 + > >> drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++------ > >> 2 files changed, 16 insertions(+), 6 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > >> index 3a22b6fd0ee6..48b07d706f06 100644 > >> --- a/drivers/gpu/drm/i915/i915_drv.h > >> +++ b/drivers/gpu/drm/i915/i915_drv.h > >> @@ -2463,6 +2463,7 @@ struct drm_i915_private { > >> > >> bool periodic; > >> int period_exponent; > >> + int timestamp_frequency; > >> > >> int metrics_set; > >> > >> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > >> index 98eb6415b63a..87c0d1ce1b9f 100644 > >> --- a/drivers/gpu/drm/i915/i915_perf.c > >> +++ b/drivers/gpu/drm/i915/i915_perf.c > >> @@ -2549,6 +2549,12 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, > >> return ret; > >> } > >> > >> +static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) > >> +{ > >> + return div_u64(1000000000ULL * (2ULL << exponent), > >> + dev_priv->perf.oa.timestamp_frequency); > >> +} > >> + > >> /** > >> * read_properties_unlocked - validate + copy userspace stream open properties > >> * @dev_priv: i915 device instance > >> @@ -2647,14 +2653,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, > >> /* Theoretically we can program the OA unit to sample > >> * every 160ns but don't allow that by default unless > >> * root. > >> - * > >> - * On Haswell the period is derived from the exponent > >> - * as: > >> - * > >> - * period = 80ns * 2^(exponent + 1) > >> */ > >> BUILD_BUG_ON(sizeof(oa_period) != 8); > >> - oa_period = 80ull * (2ull << value); > >> + oa_period = oa_exponent_to_ns(dev_priv, value); > >> > >> /* This check is primarily to ensure that oa_period <= > >> * UINT32_MAX (before passing to do_div which only > >> @@ -2910,6 +2911,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) > >> dev_priv->perf.oa.ops.oa_hw_tail_read = > >> gen7_oa_hw_tail_read; > >> > >> + dev_priv->perf.oa.timestamp_frequency = 12500000; > >> + > >> dev_priv->perf.oa.oa_formats = hsw_oa_formats; > >> > >> dev_priv->perf.oa.n_builtin_sets = > >> @@ -2923,6 +2926,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) > >> */ > >> > >> if (IS_GEN8(dev_priv)) { > >> + dev_priv->perf.oa.timestamp_frequency = 12500000; > >> + > >> dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; > >> dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; > >> dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); > >> @@ -2939,6 +2944,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) > >> i915_oa_select_metric_set_chv; > >> } > >> } else if (IS_GEN9(dev_priv)) { > >> + dev_priv->perf.oa.timestamp_frequency = 12000000; > >> + > >> dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; > >> dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; > >> dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); > >> @@ -2959,6 +2966,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) > >> dev_priv->perf.oa.ops.select_metric_set = > >> i915_oa_select_metric_set_sklgt4; > >> } else if (IS_BROXTON(dev_priv)) { > >> + dev_priv->perf.oa.timestamp_frequency = 19200123; > >> + > > Why isn't this exactly 19.2MHz? > > It's based off the timestamp base (from documentation) : > > BDW: 80ns > SKL: 83.333ns > BXT: 52.083ns > > 1000000000 / 19200123 is the closest we can get. I'm pretty sure the clock is actually 19.2 and the 52.083 is just a truncated value. Same for the 83.333 where the code does specify 120MHz exactly. > > > > >> dev_priv->perf.oa.n_builtin_sets = > >> i915_oa_n_builtin_metric_sets_bxt; > >> dev_priv->perf.oa.ops.select_metric_set = > >> -- > >> 2.12.0 > >> > >> _______________________________________________ > >> Intel-gfx mailing list > >> Intel-gfx@lists.freedesktop.org > >> https://lists.freedesktop.org/mailman/listinfo/intel-gfx >
On Wed, Apr 5, 2017 at 6:26 PM, Ville Syrjälä <ville.syrjala@linux.intel.com > wrote: > On Wed, Apr 05, 2017 at 06:17:36PM +0100, Lionel Landwerlin wrote: > > On 05/04/17 18:06, Ville Syrjälä wrote: > > > On Wed, Apr 05, 2017 at 05:23:19PM +0100, Robert Bragg wrote: > > >> An oa_exponent_to_ns() utility and per-gen timebase constants where > > >> recently removed when updating the tail pointer race condition WA, and > > >> this restores those so we can update the _PROP_OA_EXPONENT validation > > >> done in read_properties_unlocked() to not assume we have a 12.5KHz > > >> timebase as we did for Haswell. > > >> > > >> Signed-off-by: Robert Bragg <robert@sixbynine.org> > > >> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> > > >> --- > > >> drivers/gpu/drm/i915/i915_drv.h | 1 + > > >> drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++------ > > >> 2 files changed, 16 insertions(+), 6 deletions(-) > > >> > > >> diff --git a/drivers/gpu/drm/i915/i915_drv.h > b/drivers/gpu/drm/i915/i915_drv.h > > >> index 3a22b6fd0ee6..48b07d706f06 100644 > > >> --- a/drivers/gpu/drm/i915/i915_drv.h > > >> +++ b/drivers/gpu/drm/i915/i915_drv.h > > >> @@ -2463,6 +2463,7 @@ struct drm_i915_private { > > >> > > >> bool periodic; > > >> int period_exponent; > > >> + int timestamp_frequency; > > >> > > >> int metrics_set; > > >> > > >> diff --git a/drivers/gpu/drm/i915/i915_perf.c > b/drivers/gpu/drm/i915/i915_perf.c > > >> index 98eb6415b63a..87c0d1ce1b9f 100644 > > >> --- a/drivers/gpu/drm/i915/i915_perf.c > > >> +++ b/drivers/gpu/drm/i915/i915_perf.c > > >> @@ -2549,6 +2549,12 @@ i915_perf_open_ioctl_locked(struct > drm_i915_private *dev_priv, > > >> return ret; > > >> } > > >> > > >> +static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int > exponent) > > >> +{ > > >> + return div_u64(1000000000ULL * (2ULL << exponent), > > >> + dev_priv->perf.oa.timestamp_frequency); > > >> +} > > >> + > > >> /** > > >> * read_properties_unlocked - validate + copy userspace stream open > properties > > >> * @dev_priv: i915 device instance > > >> @@ -2647,14 +2653,9 @@ static int read_properties_unlocked(struct > drm_i915_private *dev_priv, > > >> /* Theoretically we can program the OA unit to > sample > > >> * every 160ns but don't allow that by default > unless > > >> * root. > > >> - * > > >> - * On Haswell the period is derived from the > exponent > > >> - * as: > > >> - * > > >> - * period = 80ns * 2^(exponent + 1) > > >> */ > > >> BUILD_BUG_ON(sizeof(oa_period) != 8); > > >> - oa_period = 80ull * (2ull << value); > > >> + oa_period = oa_exponent_to_ns(dev_priv, value); > > >> > > >> /* This check is primarily to ensure that > oa_period <= > > >> * UINT32_MAX (before passing to do_div which only > > >> @@ -2910,6 +2911,8 @@ void i915_perf_init(struct drm_i915_private > *dev_priv) > > >> dev_priv->perf.oa.ops.oa_hw_tail_read = > > >> gen7_oa_hw_tail_read; > > >> > > >> + dev_priv->perf.oa.timestamp_frequency = 12500000; > > >> + > > >> dev_priv->perf.oa.oa_formats = hsw_oa_formats; > > >> > > >> dev_priv->perf.oa.n_builtin_sets = > > >> @@ -2923,6 +2926,8 @@ void i915_perf_init(struct drm_i915_private > *dev_priv) > > >> */ > > >> > > >> if (IS_GEN8(dev_priv)) { > > >> + dev_priv->perf.oa.timestamp_frequency = 12500000; > > >> + > > >> dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; > > >> dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; > > >> dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); > > >> @@ -2939,6 +2944,8 @@ void i915_perf_init(struct drm_i915_private > *dev_priv) > > >> i915_oa_select_metric_set_chv; > > >> } > > >> } else if (IS_GEN9(dev_priv)) { > > >> + dev_priv->perf.oa.timestamp_frequency = 12000000; > > >> + > > >> dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; > > >> dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; > > >> dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); > > >> @@ -2959,6 +2966,8 @@ void i915_perf_init(struct drm_i915_private > *dev_priv) > > >> dev_priv->perf.oa.ops.select_metric_set = > > >> i915_oa_select_metric_set_sklgt4; > > >> } else if (IS_BROXTON(dev_priv)) { > > >> + dev_priv->perf.oa.timestamp_frequency = > 19200123; > > >> + > > > Why isn't this exactly 19.2MHz? > > > > It's based off the timestamp base (from documentation) : > > > > BDW: 80ns > > SKL: 83.333ns > > BXT: 52.083ns > > > > 1000000000 / 19200123 is the closest we can get. > > I'm pretty sure the clock is actually 19.2 and the 52.083 is just a > truncated value. Same for the 83.333 where the code does specify > 120MHz exactly. > Ah, right, that sounds most likely. I'll update. Thanks, - Robert > > > > > > > > >> dev_priv->perf.oa.n_builtin_sets = > > >> i915_oa_n_builtin_metric_sets_bxt; > > >> dev_priv->perf.oa.ops.select_metric_set = > > >> -- > > >> 2.12.0 > > >> > > >> _______________________________________________ > > >> Intel-gfx mailing list > > >> Intel-gfx@lists.freedesktop.org > > >> https://lists.freedesktop.org/mailman/listinfo/intel-gfx > > > > -- > Ville Syrjälä > Intel OTC >
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3a22b6fd0ee6..48b07d706f06 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2463,6 +2463,7 @@ struct drm_i915_private { bool periodic; int period_exponent; + int timestamp_frequency; int metrics_set; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 98eb6415b63a..87c0d1ce1b9f 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2549,6 +2549,12 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, return ret; } +static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) +{ + return div_u64(1000000000ULL * (2ULL << exponent), + dev_priv->perf.oa.timestamp_frequency); +} + /** * read_properties_unlocked - validate + copy userspace stream open properties * @dev_priv: i915 device instance @@ -2647,14 +2653,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, /* Theoretically we can program the OA unit to sample * every 160ns but don't allow that by default unless * root. - * - * On Haswell the period is derived from the exponent - * as: - * - * period = 80ns * 2^(exponent + 1) */ BUILD_BUG_ON(sizeof(oa_period) != 8); - oa_period = 80ull * (2ull << value); + oa_period = oa_exponent_to_ns(dev_priv, value); /* This check is primarily to ensure that oa_period <= * UINT32_MAX (before passing to do_div which only @@ -2910,6 +2911,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.ops.oa_hw_tail_read = gen7_oa_hw_tail_read; + dev_priv->perf.oa.timestamp_frequency = 12500000; + dev_priv->perf.oa.oa_formats = hsw_oa_formats; dev_priv->perf.oa.n_builtin_sets = @@ -2923,6 +2926,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) */ if (IS_GEN8(dev_priv)) { + dev_priv->perf.oa.timestamp_frequency = 12500000; + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120; dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce; dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25); @@ -2939,6 +2944,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) i915_oa_select_metric_set_chv; } } else if (IS_GEN9(dev_priv)) { + dev_priv->perf.oa.timestamp_frequency = 12000000; + dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128; dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de; dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); @@ -2959,6 +2966,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.ops.select_metric_set = i915_oa_select_metric_set_sklgt4; } else if (IS_BROXTON(dev_priv)) { + dev_priv->perf.oa.timestamp_frequency = 19200123; + dev_priv->perf.oa.n_builtin_sets = i915_oa_n_builtin_metric_sets_bxt; dev_priv->perf.oa.ops.select_metric_set =
An oa_exponent_to_ns() utility and per-gen timebase constants where recently removed when updating the tail pointer race condition WA, and this restores those so we can update the _PROP_OA_EXPONENT validation done in read_properties_unlocked() to not assume we have a 12.5KHz timebase as we did for Haswell. Signed-off-by: Robert Bragg <robert@sixbynine.org> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++------ 2 files changed, 16 insertions(+), 6 deletions(-)