Message ID | 20190501153450.30494-6-stuart.summers@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Refactor to expand subslice mask | expand |
Can you elaborate a bit more on what's the rationale for this? do you just want to avoid having too many inlines since the paths they're used in are not critical, or do you have some more functional reason? This is not a critic to the patch, I just want to understand where you're coming from ;) BTW, looking at this patch I realized there are a few more DIV_ROUND_UP(..., BITS_PER_BYTE) that could be converted to GEN_SSEU_STRIDE() in patch 2. I noticed you update them to a new variable in the next patch, but for consistency it might still be worth updating them all in patch 2 or at least mention in the commit message of patch 2 that the remaining cases are updated by a follow-up patch in the series. Patch 2 is quite small, so you could also just squash it into patch 6 to avoid the split. Daniele On 5/1/19 8:34 AM, Stuart Summers wrote: > Additionally, ensure these are all prefixed with intel_sseu_* > to match the convention of other functions in i915. > > Signed-off-by: Stuart Summers <stuart.summers@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_sseu.c | 54 +++++++++++++++++++ > drivers/gpu/drm/i915/gt/intel_sseu.h | 57 +++----------------- > drivers/gpu/drm/i915/i915_debugfs.c | 6 +-- > drivers/gpu/drm/i915/i915_drv.c | 2 +- > drivers/gpu/drm/i915/intel_device_info.c | 69 ++++++++++++------------ > 5 files changed, 102 insertions(+), 86 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c > index 7f448f3bea0b..4a0b82fc108c 100644 > --- a/drivers/gpu/drm/i915/gt/intel_sseu.c > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c > @@ -8,6 +8,60 @@ > #include "intel_lrc_reg.h" > #include "intel_sseu.h" > > +unsigned int > +intel_sseu_subslice_total(const struct sseu_dev_info *sseu) > +{ > + unsigned int i, total = 0; > + > + for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) > + total += hweight8(sseu->subslice_mask[i]); > + > + return total; > +} > + > +unsigned int > +intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) > +{ > + return hweight8(sseu->subslice_mask[slice]); > +} > + > +static int intel_sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, > + int subslice) > +{ > + int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice, > + BITS_PER_BYTE); > + int slice_stride = sseu->max_subslices * subslice_stride; > + > + return slice * slice_stride + subslice * subslice_stride; > +} > + > +u16 intel_sseu_get_eus(const struct sseu_dev_info *sseu, int slice, > + int subslice) > +{ > + int i, offset = intel_sseu_eu_idx(sseu, slice, subslice); > + u16 eu_mask = 0; > + > + for (i = 0; > + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) { > + eu_mask |= ((u16)sseu->eu_mask[offset + i]) << > + (i * BITS_PER_BYTE); > + } > + > + return eu_mask; > +} > + > +void intel_sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, > + u16 eu_mask) > +{ > + int i, offset = intel_sseu_eu_idx(sseu, slice, subslice); > + > + for (i = 0; > + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) { > + sseu->eu_mask[offset + i] = > + (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; > + } > +} > + > u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, > const struct intel_sseu *req_sseu) > { > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h > index 029e71d8f140..56e3721ae83f 100644 > --- a/drivers/gpu/drm/i915/gt/intel_sseu.h > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h > @@ -63,58 +63,17 @@ intel_sseu_from_device_info(const struct sseu_dev_info *sseu) > return value; > } > > -static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu) > -{ > - unsigned int i, total = 0; > - > - for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) > - total += hweight8(sseu->subslice_mask[i]); > +unsigned int > +intel_sseu_subslice_total(const struct sseu_dev_info *sseu); > > - return total; > -} > +unsigned int > +intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); > > -static inline unsigned int > -sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) > -{ > - return hweight8(sseu->subslice_mask[slice]); > -} > - > -static inline int sseu_eu_idx(const struct sseu_dev_info *sseu, > - int slice, int subslice) > -{ > - int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice, > - BITS_PER_BYTE); > - int slice_stride = sseu->max_subslices * subslice_stride; > - > - return slice * slice_stride + subslice * subslice_stride; > -} > +u16 intel_sseu_get_eus(const struct sseu_dev_info *sseu, int slice, > + int subslice); > > -static inline u16 sseu_get_eus(const struct sseu_dev_info *sseu, > - int slice, int subslice) > -{ > - int i, offset = sseu_eu_idx(sseu, slice, subslice); > - u16 eu_mask = 0; > - > - for (i = 0; > - i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) { > - eu_mask |= ((u16)sseu->eu_mask[offset + i]) << > - (i * BITS_PER_BYTE); > - } > - > - return eu_mask; > -} > - > -static inline void sseu_set_eus(struct sseu_dev_info *sseu, > - int slice, int subslice, u16 eu_mask) > -{ > - int i, offset = sseu_eu_idx(sseu, slice, subslice); > - > - for (i = 0; > - i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) { > - sseu->eu_mask[offset + i] = > - (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; > - } > -} > +void intel_sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, > + u16 eu_mask); > > u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, > const struct intel_sseu *req_sseu); > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index fe854c629a32..3f3ee83ac315 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -4158,7 +4158,7 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv, > RUNTIME_INFO(dev_priv)->sseu.subslice_mask[s]; > } > sseu->eu_total = sseu->eu_per_subslice * > - sseu_subslice_total(sseu); > + intel_sseu_subslice_total(sseu); > > /* subtract fused off EU(s) from enabled slice(s) */ > for (s = 0; s < fls(sseu->slice_mask); s++) { > @@ -4182,10 +4182,10 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, > seq_printf(m, " %s Slice Total: %u\n", type, > hweight8(sseu->slice_mask)); > seq_printf(m, " %s Subslice Total: %u\n", type, > - sseu_subslice_total(sseu)); > + intel_sseu_subslice_total(sseu)); > for (s = 0; s < fls(sseu->slice_mask); s++) { > seq_printf(m, " %s Slice%i subslices: %u\n", type, > - s, sseu_subslices_per_slice(sseu, s)); > + s, intel_sseu_subslices_per_slice(sseu, s)); > } > seq_printf(m, " %s EU Total: %u\n", type, > sseu->eu_total); > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index c376244c19c4..130c5140db0d 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -378,7 +378,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, > value = i915_cmd_parser_get_version(dev_priv); > break; > case I915_PARAM_SUBSLICE_TOTAL: > - value = sseu_subslice_total(sseu); > + value = intel_sseu_subslice_total(sseu); > if (!value) > return -ENODEV; > break; > diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c > index 559cf0d0628e..e1dbccf04cd9 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.c > +++ b/drivers/gpu/drm/i915/intel_device_info.c > @@ -90,10 +90,10 @@ static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) > > drm_printf(p, "slice total: %u, mask=%04x\n", > hweight8(sseu->slice_mask), sseu->slice_mask); > - drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu)); > + drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu)); > for (s = 0; s < sseu->max_slices; s++) { > drm_printf(p, "slice%d: %u subslices, mask=%04x\n", > - s, sseu_subslices_per_slice(sseu, s), > + s, intel_sseu_subslices_per_slice(sseu, s), > sseu->subslice_mask[s]); > } > drm_printf(p, "EU total: %u\n", sseu->eu_total); > @@ -126,11 +126,11 @@ void intel_device_info_dump_topology(const struct sseu_dev_info *sseu, > > for (s = 0; s < sseu->max_slices; s++) { > drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n", > - s, sseu_subslices_per_slice(sseu, s), > + s, intel_sseu_subslices_per_slice(sseu, s), > sseu->subslice_mask[s]); > > for (ss = 0; ss < sseu->max_subslices; ss++) { > - u16 enabled_eus = sseu_get_eus(sseu, s, ss); > + u16 enabled_eus = intel_sseu_get_eus(sseu, s, ss); > > drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n", > ss, hweight16(enabled_eus), enabled_eus); > @@ -180,7 +180,7 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) > sseu->subslice_mask[s] = (ss_en >> ss_idx) & ss_en_mask; > for (ss = 0; ss < sseu->max_subslices; ss++) { > if (sseu->subslice_mask[s] & BIT(ss)) > - sseu_set_eus(sseu, s, ss, eu_en); > + intel_sseu_set_eus(sseu, s, ss, eu_en); > } > } > } > @@ -222,32 +222,32 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) > /* Slice0 */ > eu_en = ~I915_READ(GEN8_EU_DISABLE0); > for (ss = 0; ss < sseu->max_subslices; ss++) > - sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask); > + intel_sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask); > /* Slice1 */ > - sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); > + intel_sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); > eu_en = ~I915_READ(GEN8_EU_DISABLE1); > - sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); > + intel_sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); > /* Slice2 */ > - sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); > - sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); > + intel_sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); > + intel_sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); > /* Slice3 */ > - sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); > + intel_sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); > eu_en = ~I915_READ(GEN8_EU_DISABLE2); > - sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); > + intel_sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); > /* Slice4 */ > - sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); > - sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); > + intel_sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); > + intel_sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); > /* Slice5 */ > - sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); > + intel_sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); > eu_en = ~I915_READ(GEN10_EU_DISABLE3); > - sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); > + intel_sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); > > /* Do a second pass where we mark the subslices disabled if all their > * eus are off. > */ > for (s = 0; s < sseu->max_slices; s++) { > for (ss = 0; ss < sseu->max_subslices; ss++) { > - if (sseu_get_eus(sseu, s, ss) == 0) > + if (intel_sseu_get_eus(sseu, s, ss) == 0) > sseu->subslice_mask[s] &= ~BIT(ss); > } > } > @@ -260,9 +260,10 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) > * EU in any one subslice may be fused off for die > * recovery. > */ > - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? > + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? > DIV_ROUND_UP(sseu->eu_total, > - sseu_subslice_total(sseu)) : 0; > + intel_sseu_subslice_total(sseu)) : > + 0; > > /* No restrictions on Power Gating */ > sseu->has_slice_pg = 1; > @@ -290,7 +291,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) > CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); > > sseu->subslice_mask[0] |= BIT(0); > - sseu_set_eus(sseu, 0, 0, ~disabled_mask); > + intel_sseu_set_eus(sseu, 0, 0, ~disabled_mask); > } > > if (!(fuse & CHV_FGT_DISABLE_SS1)) { > @@ -301,7 +302,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) > CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); > > sseu->subslice_mask[0] |= BIT(1); > - sseu_set_eus(sseu, 0, 1, ~disabled_mask); > + intel_sseu_set_eus(sseu, 0, 1, ~disabled_mask); > } > > sseu->eu_total = compute_eu_total(sseu); > @@ -310,8 +311,8 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) > * CHV expected to always have a uniform distribution of EU > * across subslices. > */ > - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? > - sseu->eu_total / sseu_subslice_total(sseu) : > + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? > + sseu->eu_total / intel_sseu_subslice_total(sseu) : > 0; > /* > * CHV supports subslice power gating on devices with more than > @@ -319,7 +320,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) > * more than one EU pair per subslice. > */ > sseu->has_slice_pg = 0; > - sseu->has_subslice_pg = sseu_subslice_total(sseu) > 1; > + sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1; > sseu->has_eu_pg = (sseu->eu_per_subslice > 2); > } > > @@ -369,7 +370,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) > > eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask; > > - sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); > + intel_sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); > > eu_per_ss = sseu->max_eus_per_subslice - > hweight8(eu_disabled_mask); > @@ -393,9 +394,10 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) > * recovery. BXT is expected to be perfectly uniform in EU > * distribution. > */ > - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? > + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? > DIV_ROUND_UP(sseu->eu_total, > - sseu_subslice_total(sseu)) : 0; > + intel_sseu_subslice_total(sseu)) : > + 0; > /* > * SKL+ supports slice power gating on devices with more than > * one slice, and supports EU power gating on devices with > @@ -407,7 +409,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) > sseu->has_slice_pg = > !IS_GEN9_LP(dev_priv) && hweight8(sseu->slice_mask) > 1; > sseu->has_subslice_pg = > - IS_GEN9_LP(dev_priv) && sseu_subslice_total(sseu) > 1; > + IS_GEN9_LP(dev_priv) && intel_sseu_subslice_total(sseu) > 1; > sseu->has_eu_pg = sseu->eu_per_subslice > 2; > > if (IS_GEN9_LP(dev_priv)) { > @@ -477,7 +479,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) > eu_disabled_mask = > eu_disable[s] >> (ss * sseu->max_eus_per_subslice); > > - sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); > + intel_sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); > > n_disabled = hweight8(eu_disabled_mask); > > @@ -496,9 +498,10 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) > * subslices with the exception that any one EU in any one subslice may > * be fused off for die recovery. > */ > - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? > + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? > DIV_ROUND_UP(sseu->eu_total, > - sseu_subslice_total(sseu)) : 0; > + intel_sseu_subslice_total(sseu)) : > + 0; > > /* > * BDW supports slice power gating on devices with more than > @@ -561,8 +564,8 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) > > for (s = 0; s < sseu->max_slices; s++) { > for (ss = 0; ss < sseu->max_subslices; ss++) { > - sseu_set_eus(sseu, s, ss, > - (1UL << sseu->eu_per_subslice) - 1); > + intel_sseu_set_eus(sseu, s, ss, > + (1UL << sseu->eu_per_subslice) - 1); > } > } > >
On Wed, 2019-05-01 at 13:04 -0700, Daniele Ceraolo Spurio wrote: > Can you elaborate a bit more on what's the rationale for this? do > you > just want to avoid having too many inlines since the paths they're > used > in are not critical, or do you have some more functional reason? This > is > not a critic to the patch, I just want to understand where you're > coming > from ;) This was a request from Jani Nikula in a previous series update. I don't have a strong preference either way personally. If you don't have any major concerns, I'd prefer to keep the series as-is to prevent too much thrash here, but let me know. > > BTW, looking at this patch I realized there are a few more > DIV_ROUND_UP(..., BITS_PER_BYTE) that could be converted to > GEN_SSEU_STRIDE() in patch 2. I noticed you update them to a new > variable in the next patch, but for consistency it might still be > worth > updating them all in patch 2 or at least mention in the commit > message > of patch 2 that the remaining cases are updated by a follow-up patch > in > the series. Patch 2 is quite small, so you could also just squash it > into patch 6 to avoid the split. I'm happy to squash them. I did try to isolate this a bit, but you're right that I ended up pushing some of these DIV_ROUND_UP... stride calculations to the last patch in the series. If you don't have any objection, to keep the finaly patch a bit simpler, I'd rather pull those changes into the earlier patch. I realize you already have a RB on that patch. Any issues doing this? Thanks, Stuart > > Daniele > > On 5/1/19 8:34 AM, Stuart Summers wrote: > > Additionally, ensure these are all prefixed with intel_sseu_* > > to match the convention of other functions in i915. > > > > Signed-off-by: Stuart Summers <stuart.summers@intel.com> > > --- > > drivers/gpu/drm/i915/gt/intel_sseu.c | 54 +++++++++++++++++++ > > drivers/gpu/drm/i915/gt/intel_sseu.h | 57 +++-------------- > > --- > > drivers/gpu/drm/i915/i915_debugfs.c | 6 +-- > > drivers/gpu/drm/i915/i915_drv.c | 2 +- > > drivers/gpu/drm/i915/intel_device_info.c | 69 ++++++++++++------- > > ----- > > 5 files changed, 102 insertions(+), 86 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c > > b/drivers/gpu/drm/i915/gt/intel_sseu.c > > index 7f448f3bea0b..4a0b82fc108c 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_sseu.c > > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c > > @@ -8,6 +8,60 @@ > > #include "intel_lrc_reg.h" > > #include "intel_sseu.h" > > > > +unsigned int > > +intel_sseu_subslice_total(const struct sseu_dev_info *sseu) > > +{ > > + unsigned int i, total = 0; > > + > > + for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) > > + total += hweight8(sseu->subslice_mask[i]); > > + > > + return total; > > +} > > + > > +unsigned int > > +intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, > > u8 slice) > > +{ > > + return hweight8(sseu->subslice_mask[slice]); > > +} > > + > > +static int intel_sseu_eu_idx(const struct sseu_dev_info *sseu, int > > slice, > > + int subslice) > > +{ > > + int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice, > > + BITS_PER_BYTE); > > + int slice_stride = sseu->max_subslices * subslice_stride; > > + > > + return slice * slice_stride + subslice * subslice_stride; > > +} > > + > > +u16 intel_sseu_get_eus(const struct sseu_dev_info *sseu, int > > slice, > > + int subslice) > > +{ > > + int i, offset = intel_sseu_eu_idx(sseu, slice, subslice); > > + u16 eu_mask = 0; > > + > > + for (i = 0; > > + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, > > BITS_PER_BYTE); i++) { > > + eu_mask |= ((u16)sseu->eu_mask[offset + i]) << > > + (i * BITS_PER_BYTE); > > + } > > + > > + return eu_mask; > > +} > > + > > +void intel_sseu_set_eus(struct sseu_dev_info *sseu, int slice, int > > subslice, > > + u16 eu_mask) > > +{ > > + int i, offset = intel_sseu_eu_idx(sseu, slice, subslice); > > + > > + for (i = 0; > > + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, > > BITS_PER_BYTE); i++) { > > + sseu->eu_mask[offset + i] = > > + (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; > > + } > > +} > > + > > u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, > > const struct intel_sseu *req_sseu) > > { > > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h > > b/drivers/gpu/drm/i915/gt/intel_sseu.h > > index 029e71d8f140..56e3721ae83f 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_sseu.h > > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h > > @@ -63,58 +63,17 @@ intel_sseu_from_device_info(const struct > > sseu_dev_info *sseu) > > return value; > > } > > > > -static inline unsigned int sseu_subslice_total(const struct > > sseu_dev_info *sseu) > > -{ > > - unsigned int i, total = 0; > > - > > - for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) > > - total += hweight8(sseu->subslice_mask[i]); > > +unsigned int > > +intel_sseu_subslice_total(const struct sseu_dev_info *sseu); > > > > - return total; > > -} > > +unsigned int > > +intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, > > u8 slice); > > > > -static inline unsigned int > > -sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 > > slice) > > -{ > > - return hweight8(sseu->subslice_mask[slice]); > > -} > > - > > -static inline int sseu_eu_idx(const struct sseu_dev_info *sseu, > > - int slice, int subslice) > > -{ > > - int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice, > > - BITS_PER_BYTE); > > - int slice_stride = sseu->max_subslices * subslice_stride; > > - > > - return slice * slice_stride + subslice * subslice_stride; > > -} > > +u16 intel_sseu_get_eus(const struct sseu_dev_info *sseu, int > > slice, > > + int subslice); > > > > -static inline u16 sseu_get_eus(const struct sseu_dev_info *sseu, > > - int slice, int subslice) > > -{ > > - int i, offset = sseu_eu_idx(sseu, slice, subslice); > > - u16 eu_mask = 0; > > - > > - for (i = 0; > > - i < DIV_ROUND_UP(sseu->max_eus_per_subslice, > > BITS_PER_BYTE); i++) { > > - eu_mask |= ((u16)sseu->eu_mask[offset + i]) << > > - (i * BITS_PER_BYTE); > > - } > > - > > - return eu_mask; > > -} > > - > > -static inline void sseu_set_eus(struct sseu_dev_info *sseu, > > - int slice, int subslice, u16 eu_mask) > > -{ > > - int i, offset = sseu_eu_idx(sseu, slice, subslice); > > - > > - for (i = 0; > > - i < DIV_ROUND_UP(sseu->max_eus_per_subslice, > > BITS_PER_BYTE); i++) { > > - sseu->eu_mask[offset + i] = > > - (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; > > - } > > -} > > +void intel_sseu_set_eus(struct sseu_dev_info *sseu, int slice, int > > subslice, > > + u16 eu_mask); > > > > u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, > > const struct intel_sseu *req_sseu); > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c > > b/drivers/gpu/drm/i915/i915_debugfs.c > > index fe854c629a32..3f3ee83ac315 100644 > > --- a/drivers/gpu/drm/i915/i915_debugfs.c > > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > > @@ -4158,7 +4158,7 @@ static void > > broadwell_sseu_device_status(struct drm_i915_private *dev_priv, > > RUNTIME_INFO(dev_priv)- > > >sseu.subslice_mask[s]; > > } > > sseu->eu_total = sseu->eu_per_subslice * > > - sseu_subslice_total(sseu); > > + intel_sseu_subslice_total(sseu); > > > > /* subtract fused off EU(s) from enabled slice(s) */ > > for (s = 0; s < fls(sseu->slice_mask); s++) { > > @@ -4182,10 +4182,10 @@ static void i915_print_sseu_info(struct > > seq_file *m, bool is_available_info, > > seq_printf(m, " %s Slice Total: %u\n", type, > > hweight8(sseu->slice_mask)); > > seq_printf(m, " %s Subslice Total: %u\n", type, > > - sseu_subslice_total(sseu)); > > + intel_sseu_subslice_total(sseu)); > > for (s = 0; s < fls(sseu->slice_mask); s++) { > > seq_printf(m, " %s Slice%i subslices: %u\n", type, > > - s, sseu_subslices_per_slice(sseu, s)); > > + s, intel_sseu_subslices_per_slice(sseu, s)); > > } > > seq_printf(m, " %s EU Total: %u\n", type, > > sseu->eu_total); > > diff --git a/drivers/gpu/drm/i915/i915_drv.c > > b/drivers/gpu/drm/i915/i915_drv.c > > index c376244c19c4..130c5140db0d 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.c > > +++ b/drivers/gpu/drm/i915/i915_drv.c > > @@ -378,7 +378,7 @@ static int i915_getparam_ioctl(struct > > drm_device *dev, void *data, > > value = i915_cmd_parser_get_version(dev_priv); > > break; > > case I915_PARAM_SUBSLICE_TOTAL: > > - value = sseu_subslice_total(sseu); > > + value = intel_sseu_subslice_total(sseu); > > if (!value) > > return -ENODEV; > > break; > > diff --git a/drivers/gpu/drm/i915/intel_device_info.c > > b/drivers/gpu/drm/i915/intel_device_info.c > > index 559cf0d0628e..e1dbccf04cd9 100644 > > --- a/drivers/gpu/drm/i915/intel_device_info.c > > +++ b/drivers/gpu/drm/i915/intel_device_info.c > > @@ -90,10 +90,10 @@ static void sseu_dump(const struct > > sseu_dev_info *sseu, struct drm_printer *p) > > > > drm_printf(p, "slice total: %u, mask=%04x\n", > > hweight8(sseu->slice_mask), sseu->slice_mask); > > - drm_printf(p, "subslice total: %u\n", > > sseu_subslice_total(sseu)); > > + drm_printf(p, "subslice total: %u\n", > > intel_sseu_subslice_total(sseu)); > > for (s = 0; s < sseu->max_slices; s++) { > > drm_printf(p, "slice%d: %u subslices, mask=%04x\n", > > - s, sseu_subslices_per_slice(sseu, s), > > + s, intel_sseu_subslices_per_slice(sseu, s), > > sseu->subslice_mask[s]); > > } > > drm_printf(p, "EU total: %u\n", sseu->eu_total); > > @@ -126,11 +126,11 @@ void intel_device_info_dump_topology(const > > struct sseu_dev_info *sseu, > > > > for (s = 0; s < sseu->max_slices; s++) { > > drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n", > > - s, sseu_subslices_per_slice(sseu, s), > > + s, intel_sseu_subslices_per_slice(sseu, s), > > sseu->subslice_mask[s]); > > > > for (ss = 0; ss < sseu->max_subslices; ss++) { > > - u16 enabled_eus = sseu_get_eus(sseu, s, ss); > > + u16 enabled_eus = intel_sseu_get_eus(sseu, s, > > ss); > > > > drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n", > > ss, hweight16(enabled_eus), > > enabled_eus); > > @@ -180,7 +180,7 @@ static void gen11_sseu_info_init(struct > > drm_i915_private *dev_priv) > > sseu->subslice_mask[s] = (ss_en >> ss_idx) & > > ss_en_mask; > > for (ss = 0; ss < sseu->max_subslices; ss++) { > > if (sseu->subslice_mask[s] & BIT(ss)) > > - sseu_set_eus(sseu, s, ss, > > eu_en); > > + intel_sseu_set_eus(sseu, s, ss, > > eu_en); > > } > > } > > } > > @@ -222,32 +222,32 @@ static void gen10_sseu_info_init(struct > > drm_i915_private *dev_priv) > > /* Slice0 */ > > eu_en = ~I915_READ(GEN8_EU_DISABLE0); > > for (ss = 0; ss < sseu->max_subslices; ss++) > > - sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & > > eu_mask); > > + intel_sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & > > eu_mask); > > /* Slice1 */ > > - sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); > > + intel_sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); > > eu_en = ~I915_READ(GEN8_EU_DISABLE1); > > - sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); > > + intel_sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); > > /* Slice2 */ > > - sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); > > - sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); > > + intel_sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); > > + intel_sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); > > /* Slice3 */ > > - sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); > > + intel_sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); > > eu_en = ~I915_READ(GEN8_EU_DISABLE2); > > - sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); > > + intel_sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); > > /* Slice4 */ > > - sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); > > - sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); > > + intel_sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); > > + intel_sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); > > /* Slice5 */ > > - sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); > > + intel_sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); > > eu_en = ~I915_READ(GEN10_EU_DISABLE3); > > - sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); > > + intel_sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); > > > > /* Do a second pass where we mark the subslices disabled if all > > their > > * eus are off. > > */ > > for (s = 0; s < sseu->max_slices; s++) { > > for (ss = 0; ss < sseu->max_subslices; ss++) { > > - if (sseu_get_eus(sseu, s, ss) == 0) > > + if (intel_sseu_get_eus(sseu, s, ss) == 0) > > sseu->subslice_mask[s] &= ~BIT(ss); > > } > > } > > @@ -260,9 +260,10 @@ static void gen10_sseu_info_init(struct > > drm_i915_private *dev_priv) > > * EU in any one subslice may be fused off for die > > * recovery. > > */ > > - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? > > + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? > > DIV_ROUND_UP(sseu->eu_total, > > - sseu_subslice_total(sseu)) > > : 0; > > + intel_sseu_subslice_total( > > sseu)) : > > + 0; > > > > /* No restrictions on Power Gating */ > > sseu->has_slice_pg = 1; > > @@ -290,7 +291,7 @@ static void cherryview_sseu_info_init(struct > > drm_i915_private *dev_priv) > > CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); > > > > sseu->subslice_mask[0] |= BIT(0); > > - sseu_set_eus(sseu, 0, 0, ~disabled_mask); > > + intel_sseu_set_eus(sseu, 0, 0, ~disabled_mask); > > } > > > > if (!(fuse & CHV_FGT_DISABLE_SS1)) { > > @@ -301,7 +302,7 @@ static void cherryview_sseu_info_init(struct > > drm_i915_private *dev_priv) > > CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); > > > > sseu->subslice_mask[0] |= BIT(1); > > - sseu_set_eus(sseu, 0, 1, ~disabled_mask); > > + intel_sseu_set_eus(sseu, 0, 1, ~disabled_mask); > > } > > > > sseu->eu_total = compute_eu_total(sseu); > > @@ -310,8 +311,8 @@ static void cherryview_sseu_info_init(struct > > drm_i915_private *dev_priv) > > * CHV expected to always have a uniform distribution of EU > > * across subslices. > > */ > > - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? > > - sseu->eu_total / > > sseu_subslice_total(sseu) : > > + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? > > + sseu->eu_total / > > intel_sseu_subslice_total(sseu) : > > 0; > > /* > > * CHV supports subslice power gating on devices with more than > > @@ -319,7 +320,7 @@ static void cherryview_sseu_info_init(struct > > drm_i915_private *dev_priv) > > * more than one EU pair per subslice. > > */ > > sseu->has_slice_pg = 0; > > - sseu->has_subslice_pg = sseu_subslice_total(sseu) > 1; > > + sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1; > > sseu->has_eu_pg = (sseu->eu_per_subslice > 2); > > } > > > > @@ -369,7 +370,7 @@ static void gen9_sseu_info_init(struct > > drm_i915_private *dev_priv) > > > > eu_disabled_mask = (eu_disable >> (ss * 8)) & > > eu_mask; > > > > - sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); > > + intel_sseu_set_eus(sseu, s, ss, > > ~eu_disabled_mask); > > > > eu_per_ss = sseu->max_eus_per_subslice - > > hweight8(eu_disabled_mask); > > @@ -393,9 +394,10 @@ static void gen9_sseu_info_init(struct > > drm_i915_private *dev_priv) > > * recovery. BXT is expected to be perfectly uniform in EU > > * distribution. > > */ > > - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? > > + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? > > DIV_ROUND_UP(sseu->eu_total, > > - sseu_subslice_total(sseu)) > > : 0; > > + intel_sseu_subslice_total( > > sseu)) : > > + 0; > > /* > > * SKL+ supports slice power gating on devices with more than > > * one slice, and supports EU power gating on devices with > > @@ -407,7 +409,7 @@ static void gen9_sseu_info_init(struct > > drm_i915_private *dev_priv) > > sseu->has_slice_pg = > > !IS_GEN9_LP(dev_priv) && hweight8(sseu->slice_mask) > > > 1; > > sseu->has_subslice_pg = > > - IS_GEN9_LP(dev_priv) && sseu_subslice_total(sseu) > 1; > > + IS_GEN9_LP(dev_priv) && intel_sseu_subslice_total(sseu) > > > 1; > > sseu->has_eu_pg = sseu->eu_per_subslice > 2; > > > > if (IS_GEN9_LP(dev_priv)) { > > @@ -477,7 +479,7 @@ static void broadwell_sseu_info_init(struct > > drm_i915_private *dev_priv) > > eu_disabled_mask = > > eu_disable[s] >> (ss * sseu- > > >max_eus_per_subslice); > > > > - sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); > > + intel_sseu_set_eus(sseu, s, ss, > > ~eu_disabled_mask); > > > > n_disabled = hweight8(eu_disabled_mask); > > > > @@ -496,9 +498,10 @@ static void broadwell_sseu_info_init(struct > > drm_i915_private *dev_priv) > > * subslices with the exception that any one EU in any one > > subslice may > > * be fused off for die recovery. > > */ > > - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? > > + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? > > DIV_ROUND_UP(sseu->eu_total, > > - sseu_subslice_total(sseu)) > > : 0; > > + intel_sseu_subslice_total( > > sseu)) : > > + 0; > > > > /* > > * BDW supports slice power gating on devices with more than > > @@ -561,8 +564,8 @@ static void haswell_sseu_info_init(struct > > drm_i915_private *dev_priv) > > > > for (s = 0; s < sseu->max_slices; s++) { > > for (ss = 0; ss < sseu->max_subslices; ss++) { > > - sseu_set_eus(sseu, s, ss, > > - (1UL << sseu->eu_per_subslice) - > > 1); > > + intel_sseu_set_eus(sseu, s, ss, > > + (1UL << sseu- > > >eu_per_subslice) - 1); > > } > > } > > > >
On 5/1/19 2:04 PM, Summers, Stuart wrote: > On Wed, 2019-05-01 at 13:04 -0700, Daniele Ceraolo Spurio wrote: >> Can you elaborate a bit more on what's the rationale for this? do >> you >> just want to avoid having too many inlines since the paths they're >> used >> in are not critical, or do you have some more functional reason? This >> is >> not a critic to the patch, I just want to understand where you're >> coming >> from ;) > > This was a request from Jani Nikula in a previous series update. I > don't have a strong preference either way personally. If you don't have > any major concerns, I'd prefer to keep the series as-is to prevent too > much thrash here, but let me know. > No concerns, just please update the commit message to explain that we're moving them because there is no need for them to be inline since they're not on a critical path where we need preformance. >> >> BTW, looking at this patch I realized there are a few more >> DIV_ROUND_UP(..., BITS_PER_BYTE) that could be converted to >> GEN_SSEU_STRIDE() in patch 2. I noticed you update them to a new >> variable in the next patch, but for consistency it might still be >> worth >> updating them all in patch 2 or at least mention in the commit >> message >> of patch 2 that the remaining cases are updated by a follow-up patch >> in >> the series. Patch 2 is quite small, so you could also just squash it >> into patch 6 to avoid the split. > > I'm happy to squash them. I did try to isolate this a bit, but you're > right that I ended up pushing some of these DIV_ROUND_UP... stride > calculations to the last patch in the series. If you don't have any > objection, to keep the finaly patch a bit simpler, I'd rather pull > those changes into the earlier patch. I realize you already have a RB > on that patch. Any issues doing this? > If you're changing all of them from DIV_ROUND_UP to GEN_SSEU_STRIDE in patch 2 I'm ok for you to keep the r-b. If you want to port the other logic for saving sseu->ss_stride to that patch then I'll have another quick look at it after you re-send as that is a more complex change. Daniele > Thanks, > Stuart > >> >> Daniele >> >> On 5/1/19 8:34 AM, Stuart Summers wrote: >>> Additionally, ensure these are all prefixed with intel_sseu_* >>> to match the convention of other functions in i915. >>> >>> Signed-off-by: Stuart Summers <stuart.summers@intel.com> >>> --- >>> drivers/gpu/drm/i915/gt/intel_sseu.c | 54 +++++++++++++++++++ >>> drivers/gpu/drm/i915/gt/intel_sseu.h | 57 +++-------------- >>> --- >>> drivers/gpu/drm/i915/i915_debugfs.c | 6 +-- >>> drivers/gpu/drm/i915/i915_drv.c | 2 +- >>> drivers/gpu/drm/i915/intel_device_info.c | 69 ++++++++++++------- >>> ----- >>> 5 files changed, 102 insertions(+), 86 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c >>> b/drivers/gpu/drm/i915/gt/intel_sseu.c >>> index 7f448f3bea0b..4a0b82fc108c 100644 >>> --- a/drivers/gpu/drm/i915/gt/intel_sseu.c >>> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c >>> @@ -8,6 +8,60 @@ >>> #include "intel_lrc_reg.h" >>> #include "intel_sseu.h" >>> >>> +unsigned int >>> +intel_sseu_subslice_total(const struct sseu_dev_info *sseu) >>> +{ >>> + unsigned int i, total = 0; >>> + >>> + for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) >>> + total += hweight8(sseu->subslice_mask[i]); >>> + >>> + return total; >>> +} >>> + >>> +unsigned int >>> +intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, >>> u8 slice) >>> +{ >>> + return hweight8(sseu->subslice_mask[slice]); >>> +} >>> + >>> +static int intel_sseu_eu_idx(const struct sseu_dev_info *sseu, int >>> slice, >>> + int subslice) >>> +{ >>> + int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice, >>> + BITS_PER_BYTE); >>> + int slice_stride = sseu->max_subslices * subslice_stride; >>> + >>> + return slice * slice_stride + subslice * subslice_stride; >>> +} >>> + >>> +u16 intel_sseu_get_eus(const struct sseu_dev_info *sseu, int >>> slice, >>> + int subslice) >>> +{ >>> + int i, offset = intel_sseu_eu_idx(sseu, slice, subslice); >>> + u16 eu_mask = 0; >>> + >>> + for (i = 0; >>> + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, >>> BITS_PER_BYTE); i++) { >>> + eu_mask |= ((u16)sseu->eu_mask[offset + i]) << >>> + (i * BITS_PER_BYTE); >>> + } >>> + >>> + return eu_mask; >>> +} >>> + >>> +void intel_sseu_set_eus(struct sseu_dev_info *sseu, int slice, int >>> subslice, >>> + u16 eu_mask) >>> +{ >>> + int i, offset = intel_sseu_eu_idx(sseu, slice, subslice); >>> + >>> + for (i = 0; >>> + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, >>> BITS_PER_BYTE); i++) { >>> + sseu->eu_mask[offset + i] = >>> + (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; >>> + } >>> +} >>> + >>> u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, >>> const struct intel_sseu *req_sseu) >>> { >>> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h >>> b/drivers/gpu/drm/i915/gt/intel_sseu.h >>> index 029e71d8f140..56e3721ae83f 100644 >>> --- a/drivers/gpu/drm/i915/gt/intel_sseu.h >>> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h >>> @@ -63,58 +63,17 @@ intel_sseu_from_device_info(const struct >>> sseu_dev_info *sseu) >>> return value; >>> } >>> >>> -static inline unsigned int sseu_subslice_total(const struct >>> sseu_dev_info *sseu) >>> -{ >>> - unsigned int i, total = 0; >>> - >>> - for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) >>> - total += hweight8(sseu->subslice_mask[i]); >>> +unsigned int >>> +intel_sseu_subslice_total(const struct sseu_dev_info *sseu); >>> >>> - return total; >>> -} >>> +unsigned int >>> +intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, >>> u8 slice); >>> >>> -static inline unsigned int >>> -sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 >>> slice) >>> -{ >>> - return hweight8(sseu->subslice_mask[slice]); >>> -} >>> - >>> -static inline int sseu_eu_idx(const struct sseu_dev_info *sseu, >>> - int slice, int subslice) >>> -{ >>> - int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice, >>> - BITS_PER_BYTE); >>> - int slice_stride = sseu->max_subslices * subslice_stride; >>> - >>> - return slice * slice_stride + subslice * subslice_stride; >>> -} >>> +u16 intel_sseu_get_eus(const struct sseu_dev_info *sseu, int >>> slice, >>> + int subslice); >>> >>> -static inline u16 sseu_get_eus(const struct sseu_dev_info *sseu, >>> - int slice, int subslice) >>> -{ >>> - int i, offset = sseu_eu_idx(sseu, slice, subslice); >>> - u16 eu_mask = 0; >>> - >>> - for (i = 0; >>> - i < DIV_ROUND_UP(sseu->max_eus_per_subslice, >>> BITS_PER_BYTE); i++) { >>> - eu_mask |= ((u16)sseu->eu_mask[offset + i]) << >>> - (i * BITS_PER_BYTE); >>> - } >>> - >>> - return eu_mask; >>> -} >>> - >>> -static inline void sseu_set_eus(struct sseu_dev_info *sseu, >>> - int slice, int subslice, u16 eu_mask) >>> -{ >>> - int i, offset = sseu_eu_idx(sseu, slice, subslice); >>> - >>> - for (i = 0; >>> - i < DIV_ROUND_UP(sseu->max_eus_per_subslice, >>> BITS_PER_BYTE); i++) { >>> - sseu->eu_mask[offset + i] = >>> - (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; >>> - } >>> -} >>> +void intel_sseu_set_eus(struct sseu_dev_info *sseu, int slice, int >>> subslice, >>> + u16 eu_mask); >>> >>> u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, >>> const struct intel_sseu *req_sseu); >>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c >>> b/drivers/gpu/drm/i915/i915_debugfs.c >>> index fe854c629a32..3f3ee83ac315 100644 >>> --- a/drivers/gpu/drm/i915/i915_debugfs.c >>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c >>> @@ -4158,7 +4158,7 @@ static void >>> broadwell_sseu_device_status(struct drm_i915_private *dev_priv, >>> RUNTIME_INFO(dev_priv)- >>>> sseu.subslice_mask[s]; >>> } >>> sseu->eu_total = sseu->eu_per_subslice * >>> - sseu_subslice_total(sseu); >>> + intel_sseu_subslice_total(sseu); >>> >>> /* subtract fused off EU(s) from enabled slice(s) */ >>> for (s = 0; s < fls(sseu->slice_mask); s++) { >>> @@ -4182,10 +4182,10 @@ static void i915_print_sseu_info(struct >>> seq_file *m, bool is_available_info, >>> seq_printf(m, " %s Slice Total: %u\n", type, >>> hweight8(sseu->slice_mask)); >>> seq_printf(m, " %s Subslice Total: %u\n", type, >>> - sseu_subslice_total(sseu)); >>> + intel_sseu_subslice_total(sseu)); >>> for (s = 0; s < fls(sseu->slice_mask); s++) { >>> seq_printf(m, " %s Slice%i subslices: %u\n", type, >>> - s, sseu_subslices_per_slice(sseu, s)); >>> + s, intel_sseu_subslices_per_slice(sseu, s)); >>> } >>> seq_printf(m, " %s EU Total: %u\n", type, >>> sseu->eu_total); >>> diff --git a/drivers/gpu/drm/i915/i915_drv.c >>> b/drivers/gpu/drm/i915/i915_drv.c >>> index c376244c19c4..130c5140db0d 100644 >>> --- a/drivers/gpu/drm/i915/i915_drv.c >>> +++ b/drivers/gpu/drm/i915/i915_drv.c >>> @@ -378,7 +378,7 @@ static int i915_getparam_ioctl(struct >>> drm_device *dev, void *data, >>> value = i915_cmd_parser_get_version(dev_priv); >>> break; >>> case I915_PARAM_SUBSLICE_TOTAL: >>> - value = sseu_subslice_total(sseu); >>> + value = intel_sseu_subslice_total(sseu); >>> if (!value) >>> return -ENODEV; >>> break; >>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c >>> b/drivers/gpu/drm/i915/intel_device_info.c >>> index 559cf0d0628e..e1dbccf04cd9 100644 >>> --- a/drivers/gpu/drm/i915/intel_device_info.c >>> +++ b/drivers/gpu/drm/i915/intel_device_info.c >>> @@ -90,10 +90,10 @@ static void sseu_dump(const struct >>> sseu_dev_info *sseu, struct drm_printer *p) >>> >>> drm_printf(p, "slice total: %u, mask=%04x\n", >>> hweight8(sseu->slice_mask), sseu->slice_mask); >>> - drm_printf(p, "subslice total: %u\n", >>> sseu_subslice_total(sseu)); >>> + drm_printf(p, "subslice total: %u\n", >>> intel_sseu_subslice_total(sseu)); >>> for (s = 0; s < sseu->max_slices; s++) { >>> drm_printf(p, "slice%d: %u subslices, mask=%04x\n", >>> - s, sseu_subslices_per_slice(sseu, s), >>> + s, intel_sseu_subslices_per_slice(sseu, s), >>> sseu->subslice_mask[s]); >>> } >>> drm_printf(p, "EU total: %u\n", sseu->eu_total); >>> @@ -126,11 +126,11 @@ void intel_device_info_dump_topology(const >>> struct sseu_dev_info *sseu, >>> >>> for (s = 0; s < sseu->max_slices; s++) { >>> drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n", >>> - s, sseu_subslices_per_slice(sseu, s), >>> + s, intel_sseu_subslices_per_slice(sseu, s), >>> sseu->subslice_mask[s]); >>> >>> for (ss = 0; ss < sseu->max_subslices; ss++) { >>> - u16 enabled_eus = sseu_get_eus(sseu, s, ss); >>> + u16 enabled_eus = intel_sseu_get_eus(sseu, s, >>> ss); >>> >>> drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n", >>> ss, hweight16(enabled_eus), >>> enabled_eus); >>> @@ -180,7 +180,7 @@ static void gen11_sseu_info_init(struct >>> drm_i915_private *dev_priv) >>> sseu->subslice_mask[s] = (ss_en >> ss_idx) & >>> ss_en_mask; >>> for (ss = 0; ss < sseu->max_subslices; ss++) { >>> if (sseu->subslice_mask[s] & BIT(ss)) >>> - sseu_set_eus(sseu, s, ss, >>> eu_en); >>> + intel_sseu_set_eus(sseu, s, ss, >>> eu_en); >>> } >>> } >>> } >>> @@ -222,32 +222,32 @@ static void gen10_sseu_info_init(struct >>> drm_i915_private *dev_priv) >>> /* Slice0 */ >>> eu_en = ~I915_READ(GEN8_EU_DISABLE0); >>> for (ss = 0; ss < sseu->max_subslices; ss++) >>> - sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & >>> eu_mask); >>> + intel_sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & >>> eu_mask); >>> /* Slice1 */ >>> - sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); >>> + intel_sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); >>> eu_en = ~I915_READ(GEN8_EU_DISABLE1); >>> - sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); >>> + intel_sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); >>> /* Slice2 */ >>> - sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); >>> - sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); >>> + intel_sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); >>> + intel_sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); >>> /* Slice3 */ >>> - sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); >>> + intel_sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); >>> eu_en = ~I915_READ(GEN8_EU_DISABLE2); >>> - sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); >>> + intel_sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); >>> /* Slice4 */ >>> - sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); >>> - sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); >>> + intel_sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); >>> + intel_sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); >>> /* Slice5 */ >>> - sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); >>> + intel_sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); >>> eu_en = ~I915_READ(GEN10_EU_DISABLE3); >>> - sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); >>> + intel_sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); >>> >>> /* Do a second pass where we mark the subslices disabled if all >>> their >>> * eus are off. >>> */ >>> for (s = 0; s < sseu->max_slices; s++) { >>> for (ss = 0; ss < sseu->max_subslices; ss++) { >>> - if (sseu_get_eus(sseu, s, ss) == 0) >>> + if (intel_sseu_get_eus(sseu, s, ss) == 0) >>> sseu->subslice_mask[s] &= ~BIT(ss); >>> } >>> } >>> @@ -260,9 +260,10 @@ static void gen10_sseu_info_init(struct >>> drm_i915_private *dev_priv) >>> * EU in any one subslice may be fused off for die >>> * recovery. >>> */ >>> - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? >>> + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? >>> DIV_ROUND_UP(sseu->eu_total, >>> - sseu_subslice_total(sseu)) >>> : 0; >>> + intel_sseu_subslice_total( >>> sseu)) : >>> + 0; >>> >>> /* No restrictions on Power Gating */ >>> sseu->has_slice_pg = 1; >>> @@ -290,7 +291,7 @@ static void cherryview_sseu_info_init(struct >>> drm_i915_private *dev_priv) >>> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); >>> >>> sseu->subslice_mask[0] |= BIT(0); >>> - sseu_set_eus(sseu, 0, 0, ~disabled_mask); >>> + intel_sseu_set_eus(sseu, 0, 0, ~disabled_mask); >>> } >>> >>> if (!(fuse & CHV_FGT_DISABLE_SS1)) { >>> @@ -301,7 +302,7 @@ static void cherryview_sseu_info_init(struct >>> drm_i915_private *dev_priv) >>> CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); >>> >>> sseu->subslice_mask[0] |= BIT(1); >>> - sseu_set_eus(sseu, 0, 1, ~disabled_mask); >>> + intel_sseu_set_eus(sseu, 0, 1, ~disabled_mask); >>> } >>> >>> sseu->eu_total = compute_eu_total(sseu); >>> @@ -310,8 +311,8 @@ static void cherryview_sseu_info_init(struct >>> drm_i915_private *dev_priv) >>> * CHV expected to always have a uniform distribution of EU >>> * across subslices. >>> */ >>> - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? >>> - sseu->eu_total / >>> sseu_subslice_total(sseu) : >>> + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? >>> + sseu->eu_total / >>> intel_sseu_subslice_total(sseu) : >>> 0; >>> /* >>> * CHV supports subslice power gating on devices with more than >>> @@ -319,7 +320,7 @@ static void cherryview_sseu_info_init(struct >>> drm_i915_private *dev_priv) >>> * more than one EU pair per subslice. >>> */ >>> sseu->has_slice_pg = 0; >>> - sseu->has_subslice_pg = sseu_subslice_total(sseu) > 1; >>> + sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1; >>> sseu->has_eu_pg = (sseu->eu_per_subslice > 2); >>> } >>> >>> @@ -369,7 +370,7 @@ static void gen9_sseu_info_init(struct >>> drm_i915_private *dev_priv) >>> >>> eu_disabled_mask = (eu_disable >> (ss * 8)) & >>> eu_mask; >>> >>> - sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); >>> + intel_sseu_set_eus(sseu, s, ss, >>> ~eu_disabled_mask); >>> >>> eu_per_ss = sseu->max_eus_per_subslice - >>> hweight8(eu_disabled_mask); >>> @@ -393,9 +394,10 @@ static void gen9_sseu_info_init(struct >>> drm_i915_private *dev_priv) >>> * recovery. BXT is expected to be perfectly uniform in EU >>> * distribution. >>> */ >>> - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? >>> + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? >>> DIV_ROUND_UP(sseu->eu_total, >>> - sseu_subslice_total(sseu)) >>> : 0; >>> + intel_sseu_subslice_total( >>> sseu)) : >>> + 0; >>> /* >>> * SKL+ supports slice power gating on devices with more than >>> * one slice, and supports EU power gating on devices with >>> @@ -407,7 +409,7 @@ static void gen9_sseu_info_init(struct >>> drm_i915_private *dev_priv) >>> sseu->has_slice_pg = >>> !IS_GEN9_LP(dev_priv) && hweight8(sseu->slice_mask) > >>> 1; >>> sseu->has_subslice_pg = >>> - IS_GEN9_LP(dev_priv) && sseu_subslice_total(sseu) > 1; >>> + IS_GEN9_LP(dev_priv) && intel_sseu_subslice_total(sseu) >>>> 1; >>> sseu->has_eu_pg = sseu->eu_per_subslice > 2; >>> >>> if (IS_GEN9_LP(dev_priv)) { >>> @@ -477,7 +479,7 @@ static void broadwell_sseu_info_init(struct >>> drm_i915_private *dev_priv) >>> eu_disabled_mask = >>> eu_disable[s] >> (ss * sseu- >>>> max_eus_per_subslice); >>> >>> - sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); >>> + intel_sseu_set_eus(sseu, s, ss, >>> ~eu_disabled_mask); >>> >>> n_disabled = hweight8(eu_disabled_mask); >>> >>> @@ -496,9 +498,10 @@ static void broadwell_sseu_info_init(struct >>> drm_i915_private *dev_priv) >>> * subslices with the exception that any one EU in any one >>> subslice may >>> * be fused off for die recovery. >>> */ >>> - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? >>> + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? >>> DIV_ROUND_UP(sseu->eu_total, >>> - sseu_subslice_total(sseu)) >>> : 0; >>> + intel_sseu_subslice_total( >>> sseu)) : >>> + 0; >>> >>> /* >>> * BDW supports slice power gating on devices with more than >>> @@ -561,8 +564,8 @@ static void haswell_sseu_info_init(struct >>> drm_i915_private *dev_priv) >>> >>> for (s = 0; s < sseu->max_slices; s++) { >>> for (ss = 0; ss < sseu->max_subslices; ss++) { >>> - sseu_set_eus(sseu, s, ss, >>> - (1UL << sseu->eu_per_subslice) - >>> 1); >>> + intel_sseu_set_eus(sseu, s, ss, >>> + (1UL << sseu- >>>> eu_per_subslice) - 1); >>> } >>> } >>> >>>
On Wed, 2019-05-01 at 14:19 -0700, Daniele Ceraolo Spurio wrote: > > On 5/1/19 2:04 PM, Summers, Stuart wrote: > > On Wed, 2019-05-01 at 13:04 -0700, Daniele Ceraolo Spurio wrote: > > > Can you elaborate a bit more on what's the rationale for this? do > > > you > > > just want to avoid having too many inlines since the paths > > > they're > > > used > > > in are not critical, or do you have some more functional reason? > > > This > > > is > > > not a critic to the patch, I just want to understand where you're > > > coming > > > from ;) > > > > This was a request from Jani Nikula in a previous series update. I > > don't have a strong preference either way personally. If you don't > > have > > any major concerns, I'd prefer to keep the series as-is to prevent > > too > > much thrash here, but let me know. > > > > No concerns, just please update the commit message to explain that > we're > moving them because there is no need for them to be inline since > they're > not on a critical path where we need preformance. Sounds great. > > > > > > > BTW, looking at this patch I realized there are a few more > > > DIV_ROUND_UP(..., BITS_PER_BYTE) that could be converted to > > > GEN_SSEU_STRIDE() in patch 2. I noticed you update them to a new > > > variable in the next patch, but for consistency it might still be > > > worth > > > updating them all in patch 2 or at least mention in the commit > > > message > > > of patch 2 that the remaining cases are updated by a follow-up > > > patch > > > in > > > the series. Patch 2 is quite small, so you could also just squash > > > it > > > into patch 6 to avoid the split. > > > > I'm happy to squash them. I did try to isolate this a bit, but > > you're > > right that I ended up pushing some of these DIV_ROUND_UP... stride > > calculations to the last patch in the series. If you don't have any > > objection, to keep the finaly patch a bit simpler, I'd rather pull > > those changes into the earlier patch. I realize you already have a > > RB > > on that patch. Any issues doing this? > > > > If you're changing all of them from DIV_ROUND_UP to GEN_SSEU_STRIDE > in > patch 2 I'm ok for you to keep the r-b. If you want to port the > other > logic for saving sseu->ss_stride to that patch then I'll have > another > quick look at it after you re-send as that is a more complex change. I'll do the former, then convert those to the new structure layout in the subsequent patches. Thanks, Stuart > > Daniele > > > Thanks, > > Stuart > > > > > > > > Daniele > > > > > > On 5/1/19 8:34 AM, Stuart Summers wrote: > > > > Additionally, ensure these are all prefixed with intel_sseu_* > > > > to match the convention of other functions in i915. > > > > > > > > Signed-off-by: Stuart Summers <stuart.summers@intel.com> > > > > --- > > > > drivers/gpu/drm/i915/gt/intel_sseu.c | 54 > > > > +++++++++++++++++++ > > > > drivers/gpu/drm/i915/gt/intel_sseu.h | 57 +++----------- > > > > --- > > > > --- > > > > drivers/gpu/drm/i915/i915_debugfs.c | 6 +-- > > > > drivers/gpu/drm/i915/i915_drv.c | 2 +- > > > > drivers/gpu/drm/i915/intel_device_info.c | 69 ++++++++++++ > > > > ------- > > > > ----- > > > > 5 files changed, 102 insertions(+), 86 deletions(-) > > > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c > > > > b/drivers/gpu/drm/i915/gt/intel_sseu.c > > > > index 7f448f3bea0b..4a0b82fc108c 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_sseu.c > > > > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c > > > > @@ -8,6 +8,60 @@ > > > > #include "intel_lrc_reg.h" > > > > #include "intel_sseu.h" > > > > > > > > +unsigned int > > > > +intel_sseu_subslice_total(const struct sseu_dev_info *sseu) > > > > +{ > > > > + unsigned int i, total = 0; > > > > + > > > > + for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) > > > > + total += hweight8(sseu->subslice_mask[i]); > > > > + > > > > + return total; > > > > +} > > > > + > > > > +unsigned int > > > > +intel_sseu_subslices_per_slice(const struct sseu_dev_info > > > > *sseu, > > > > u8 slice) > > > > +{ > > > > + return hweight8(sseu->subslice_mask[slice]); > > > > +} > > > > + > > > > +static int intel_sseu_eu_idx(const struct sseu_dev_info *sseu, > > > > int > > > > slice, > > > > + int subslice) > > > > +{ > > > > + int subslice_stride = DIV_ROUND_UP(sseu- > > > > >max_eus_per_subslice, > > > > + BITS_PER_BYTE); > > > > + int slice_stride = sseu->max_subslices * > > > > subslice_stride; > > > > + > > > > + return slice * slice_stride + subslice * > > > > subslice_stride; > > > > +} > > > > + > > > > +u16 intel_sseu_get_eus(const struct sseu_dev_info *sseu, int > > > > slice, > > > > + int subslice) > > > > +{ > > > > + int i, offset = intel_sseu_eu_idx(sseu, slice, > > > > subslice); > > > > + u16 eu_mask = 0; > > > > + > > > > + for (i = 0; > > > > + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, > > > > BITS_PER_BYTE); i++) { > > > > + eu_mask |= ((u16)sseu->eu_mask[offset + i]) << > > > > + (i * BITS_PER_BYTE); > > > > + } > > > > + > > > > + return eu_mask; > > > > +} > > > > + > > > > +void intel_sseu_set_eus(struct sseu_dev_info *sseu, int slice, > > > > int > > > > subslice, > > > > + u16 eu_mask) > > > > +{ > > > > + int i, offset = intel_sseu_eu_idx(sseu, slice, > > > > subslice); > > > > + > > > > + for (i = 0; > > > > + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, > > > > BITS_PER_BYTE); i++) { > > > > + sseu->eu_mask[offset + i] = > > > > + (eu_mask >> (BITS_PER_BYTE * i)) & > > > > 0xff; > > > > + } > > > > +} > > > > + > > > > u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, > > > > const struct intel_sseu *req_sseu) > > > > { > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h > > > > b/drivers/gpu/drm/i915/gt/intel_sseu.h > > > > index 029e71d8f140..56e3721ae83f 100644 > > > > --- a/drivers/gpu/drm/i915/gt/intel_sseu.h > > > > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h > > > > @@ -63,58 +63,17 @@ intel_sseu_from_device_info(const struct > > > > sseu_dev_info *sseu) > > > > return value; > > > > } > > > > > > > > -static inline unsigned int sseu_subslice_total(const struct > > > > sseu_dev_info *sseu) > > > > -{ > > > > - unsigned int i, total = 0; > > > > - > > > > - for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) > > > > - total += hweight8(sseu->subslice_mask[i]); > > > > +unsigned int > > > > +intel_sseu_subslice_total(const struct sseu_dev_info *sseu); > > > > > > > > - return total; > > > > -} > > > > +unsigned int > > > > +intel_sseu_subslices_per_slice(const struct sseu_dev_info > > > > *sseu, > > > > u8 slice); > > > > > > > > -static inline unsigned int > > > > -sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 > > > > slice) > > > > -{ > > > > - return hweight8(sseu->subslice_mask[slice]); > > > > -} > > > > - > > > > -static inline int sseu_eu_idx(const struct sseu_dev_info > > > > *sseu, > > > > - int slice, int subslice) > > > > -{ > > > > - int subslice_stride = DIV_ROUND_UP(sseu- > > > > >max_eus_per_subslice, > > > > - BITS_PER_BYTE); > > > > - int slice_stride = sseu->max_subslices * > > > > subslice_stride; > > > > - > > > > - return slice * slice_stride + subslice * > > > > subslice_stride; > > > > -} > > > > +u16 intel_sseu_get_eus(const struct sseu_dev_info *sseu, int > > > > slice, > > > > + int subslice); > > > > > > > > -static inline u16 sseu_get_eus(const struct sseu_dev_info > > > > *sseu, > > > > - int slice, int subslice) > > > > -{ > > > > - int i, offset = sseu_eu_idx(sseu, slice, subslice); > > > > - u16 eu_mask = 0; > > > > - > > > > - for (i = 0; > > > > - i < DIV_ROUND_UP(sseu->max_eus_per_subslice, > > > > BITS_PER_BYTE); i++) { > > > > - eu_mask |= ((u16)sseu->eu_mask[offset + i]) << > > > > - (i * BITS_PER_BYTE); > > > > - } > > > > - > > > > - return eu_mask; > > > > -} > > > > - > > > > -static inline void sseu_set_eus(struct sseu_dev_info *sseu, > > > > - int slice, int subslice, u16 > > > > eu_mask) > > > > -{ > > > > - int i, offset = sseu_eu_idx(sseu, slice, subslice); > > > > - > > > > - for (i = 0; > > > > - i < DIV_ROUND_UP(sseu->max_eus_per_subslice, > > > > BITS_PER_BYTE); i++) { > > > > - sseu->eu_mask[offset + i] = > > > > - (eu_mask >> (BITS_PER_BYTE * i)) & > > > > 0xff; > > > > - } > > > > -} > > > > +void intel_sseu_set_eus(struct sseu_dev_info *sseu, int slice, > > > > int > > > > subslice, > > > > + u16 eu_mask); > > > > > > > > u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, > > > > const struct intel_sseu *req_sseu); > > > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c > > > > b/drivers/gpu/drm/i915/i915_debugfs.c > > > > index fe854c629a32..3f3ee83ac315 100644 > > > > --- a/drivers/gpu/drm/i915/i915_debugfs.c > > > > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > > > > @@ -4158,7 +4158,7 @@ static void > > > > broadwell_sseu_device_status(struct drm_i915_private *dev_priv, > > > > RUNTIME_INFO(dev_priv)- > > > > > sseu.subslice_mask[s]; > > > > > > > > } > > > > sseu->eu_total = sseu->eu_per_subslice * > > > > - sseu_subslice_total(sseu); > > > > + intel_sseu_subslice_total(sseu > > > > ); > > > > > > > > /* subtract fused off EU(s) from enabled > > > > slice(s) */ > > > > for (s = 0; s < fls(sseu->slice_mask); s++) { > > > > @@ -4182,10 +4182,10 @@ static void i915_print_sseu_info(struct > > > > seq_file *m, bool is_available_info, > > > > seq_printf(m, " %s Slice Total: %u\n", type, > > > > hweight8(sseu->slice_mask)); > > > > seq_printf(m, " %s Subslice Total: %u\n", type, > > > > - sseu_subslice_total(sseu)); > > > > + intel_sseu_subslice_total(sseu)); > > > > for (s = 0; s < fls(sseu->slice_mask); s++) { > > > > seq_printf(m, " %s Slice%i subslices: %u\n", > > > > type, > > > > - s, sseu_subslices_per_slice(sseu, > > > > s)); > > > > + s, > > > > intel_sseu_subslices_per_slice(sseu, s)); > > > > } > > > > seq_printf(m, " %s EU Total: %u\n", type, > > > > sseu->eu_total); > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.c > > > > b/drivers/gpu/drm/i915/i915_drv.c > > > > index c376244c19c4..130c5140db0d 100644 > > > > --- a/drivers/gpu/drm/i915/i915_drv.c > > > > +++ b/drivers/gpu/drm/i915/i915_drv.c > > > > @@ -378,7 +378,7 @@ static int i915_getparam_ioctl(struct > > > > drm_device *dev, void *data, > > > > value = i915_cmd_parser_get_version(dev_priv); > > > > break; > > > > case I915_PARAM_SUBSLICE_TOTAL: > > > > - value = sseu_subslice_total(sseu); > > > > + value = intel_sseu_subslice_total(sseu); > > > > if (!value) > > > > return -ENODEV; > > > > break; > > > > diff --git a/drivers/gpu/drm/i915/intel_device_info.c > > > > b/drivers/gpu/drm/i915/intel_device_info.c > > > > index 559cf0d0628e..e1dbccf04cd9 100644 > > > > --- a/drivers/gpu/drm/i915/intel_device_info.c > > > > +++ b/drivers/gpu/drm/i915/intel_device_info.c > > > > @@ -90,10 +90,10 @@ static void sseu_dump(const struct > > > > sseu_dev_info *sseu, struct drm_printer *p) > > > > > > > > drm_printf(p, "slice total: %u, mask=%04x\n", > > > > hweight8(sseu->slice_mask), sseu- > > > > >slice_mask); > > > > - drm_printf(p, "subslice total: %u\n", > > > > sseu_subslice_total(sseu)); > > > > + drm_printf(p, "subslice total: %u\n", > > > > intel_sseu_subslice_total(sseu)); > > > > for (s = 0; s < sseu->max_slices; s++) { > > > > drm_printf(p, "slice%d: %u subslices, > > > > mask=%04x\n", > > > > - s, sseu_subslices_per_slice(sseu, > > > > s), > > > > + s, > > > > intel_sseu_subslices_per_slice(sseu, s), > > > > sseu->subslice_mask[s]); > > > > } > > > > drm_printf(p, "EU total: %u\n", sseu->eu_total); > > > > @@ -126,11 +126,11 @@ void > > > > intel_device_info_dump_topology(const > > > > struct sseu_dev_info *sseu, > > > > > > > > for (s = 0; s < sseu->max_slices; s++) { > > > > drm_printf(p, "slice%d: %u subslice(s) > > > > (0x%hhx):\n", > > > > - s, sseu_subslices_per_slice(sseu, > > > > s), > > > > + s, > > > > intel_sseu_subslices_per_slice(sseu, s), > > > > sseu->subslice_mask[s]); > > > > > > > > for (ss = 0; ss < sseu->max_subslices; ss++) { > > > > - u16 enabled_eus = sseu_get_eus(sseu, s, > > > > ss); > > > > + u16 enabled_eus = > > > > intel_sseu_get_eus(sseu, s, > > > > ss); > > > > > > > > drm_printf(p, "\tsubslice%d: %u EUs > > > > (0x%hx)\n", > > > > ss, hweight16(enabled_eus), > > > > enabled_eus); > > > > @@ -180,7 +180,7 @@ static void gen11_sseu_info_init(struct > > > > drm_i915_private *dev_priv) > > > > sseu->subslice_mask[s] = (ss_en >> > > > > ss_idx) & > > > > ss_en_mask; > > > > for (ss = 0; ss < sseu->max_subslices; > > > > ss++) { > > > > if (sseu->subslice_mask[s] & > > > > BIT(ss)) > > > > - sseu_set_eus(sseu, s, > > > > ss, > > > > eu_en); > > > > + intel_sseu_set_eus(sseu > > > > , s, ss, > > > > eu_en); > > > > } > > > > } > > > > } > > > > @@ -222,32 +222,32 @@ static void gen10_sseu_info_init(struct > > > > drm_i915_private *dev_priv) > > > > /* Slice0 */ > > > > eu_en = ~I915_READ(GEN8_EU_DISABLE0); > > > > for (ss = 0; ss < sseu->max_subslices; ss++) > > > > - sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & > > > > eu_mask); > > > > + intel_sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * > > > > ss)) & > > > > eu_mask); > > > > /* Slice1 */ > > > > - sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); > > > > + intel_sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & > > > > eu_mask); > > > > eu_en = ~I915_READ(GEN8_EU_DISABLE1); > > > > - sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); > > > > + intel_sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); > > > > /* Slice2 */ > > > > - sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); > > > > - sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); > > > > + intel_sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); > > > > + intel_sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & > > > > eu_mask); > > > > /* Slice3 */ > > > > - sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); > > > > + intel_sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & > > > > eu_mask); > > > > eu_en = ~I915_READ(GEN8_EU_DISABLE2); > > > > - sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); > > > > + intel_sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); > > > > /* Slice4 */ > > > > - sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); > > > > - sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); > > > > + intel_sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); > > > > + intel_sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & > > > > eu_mask); > > > > /* Slice5 */ > > > > - sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); > > > > + intel_sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & > > > > eu_mask); > > > > eu_en = ~I915_READ(GEN10_EU_DISABLE3); > > > > - sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); > > > > + intel_sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); > > > > > > > > /* Do a second pass where we mark the subslices > > > > disabled if all > > > > their > > > > * eus are off. > > > > */ > > > > for (s = 0; s < sseu->max_slices; s++) { > > > > for (ss = 0; ss < sseu->max_subslices; ss++) { > > > > - if (sseu_get_eus(sseu, s, ss) == 0) > > > > + if (intel_sseu_get_eus(sseu, s, ss) == > > > > 0) > > > > sseu->subslice_mask[s] &= > > > > ~BIT(ss); > > > > } > > > > } > > > > @@ -260,9 +260,10 @@ static void gen10_sseu_info_init(struct > > > > drm_i915_private *dev_priv) > > > > * EU in any one subslice may be fused off for die > > > > * recovery. > > > > */ > > > > - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? > > > > + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) > > > > ? > > > > DIV_ROUND_UP(sseu->eu_total, > > > > - sseu_subslice_tota > > > > l(sseu)) > > > > : 0; > > > > + intel_sseu_subslic > > > > e_total( > > > > sseu)) : > > > > + 0; > > > > > > > > /* No restrictions on Power Gating */ > > > > sseu->has_slice_pg = 1; > > > > @@ -290,7 +291,7 @@ static void > > > > cherryview_sseu_info_init(struct > > > > drm_i915_private *dev_priv) > > > > CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); > > > > > > > > sseu->subslice_mask[0] |= BIT(0); > > > > - sseu_set_eus(sseu, 0, 0, ~disabled_mask); > > > > + intel_sseu_set_eus(sseu, 0, 0, ~disabled_mask); > > > > } > > > > > > > > if (!(fuse & CHV_FGT_DISABLE_SS1)) { > > > > @@ -301,7 +302,7 @@ static void > > > > cherryview_sseu_info_init(struct > > > > drm_i915_private *dev_priv) > > > > CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); > > > > > > > > sseu->subslice_mask[0] |= BIT(1); > > > > - sseu_set_eus(sseu, 0, 1, ~disabled_mask); > > > > + intel_sseu_set_eus(sseu, 0, 1, ~disabled_mask); > > > > } > > > > > > > > sseu->eu_total = compute_eu_total(sseu); > > > > @@ -310,8 +311,8 @@ static void > > > > cherryview_sseu_info_init(struct > > > > drm_i915_private *dev_priv) > > > > * CHV expected to always have a uniform distribution > > > > of EU > > > > * across subslices. > > > > */ > > > > - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? > > > > - sseu->eu_total / > > > > sseu_subslice_total(sseu) : > > > > + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) > > > > ? > > > > + sseu->eu_total / > > > > intel_sseu_subslice_total(sseu) : > > > > 0; > > > > /* > > > > * CHV supports subslice power gating on devices with > > > > more than > > > > @@ -319,7 +320,7 @@ static void > > > > cherryview_sseu_info_init(struct > > > > drm_i915_private *dev_priv) > > > > * more than one EU pair per subslice. > > > > */ > > > > sseu->has_slice_pg = 0; > > > > - sseu->has_subslice_pg = sseu_subslice_total(sseu) > 1; > > > > + sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > > > > > 1; > > > > sseu->has_eu_pg = (sseu->eu_per_subslice > 2); > > > > } > > > > > > > > @@ -369,7 +370,7 @@ static void gen9_sseu_info_init(struct > > > > drm_i915_private *dev_priv) > > > > > > > > eu_disabled_mask = (eu_disable >> (ss * > > > > 8)) & > > > > eu_mask; > > > > > > > > - sseu_set_eus(sseu, s, ss, > > > > ~eu_disabled_mask); > > > > + intel_sseu_set_eus(sseu, s, ss, > > > > ~eu_disabled_mask); > > > > > > > > eu_per_ss = sseu->max_eus_per_subslice > > > > - > > > > hweight8(eu_disabled_mask); > > > > @@ -393,9 +394,10 @@ static void gen9_sseu_info_init(struct > > > > drm_i915_private *dev_priv) > > > > * recovery. BXT is expected to be perfectly uniform in > > > > EU > > > > * distribution. > > > > */ > > > > - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? > > > > + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) > > > > ? > > > > DIV_ROUND_UP(sseu->eu_total, > > > > - sseu_subslice_tota > > > > l(sseu)) > > > > : 0; > > > > + intel_sseu_subslic > > > > e_total( > > > > sseu)) : > > > > + 0; > > > > /* > > > > * SKL+ supports slice power gating on devices with > > > > more than > > > > * one slice, and supports EU power gating on devices > > > > with > > > > @@ -407,7 +409,7 @@ static void gen9_sseu_info_init(struct > > > > drm_i915_private *dev_priv) > > > > sseu->has_slice_pg = > > > > !IS_GEN9_LP(dev_priv) && hweight8(sseu- > > > > >slice_mask) > > > > > 1; > > > > sseu->has_subslice_pg = > > > > - IS_GEN9_LP(dev_priv) && > > > > sseu_subslice_total(sseu) > 1; > > > > + IS_GEN9_LP(dev_priv) && > > > > intel_sseu_subslice_total(sseu) > > > > > 1; > > > > > > > > sseu->has_eu_pg = sseu->eu_per_subslice > 2; > > > > > > > > if (IS_GEN9_LP(dev_priv)) { > > > > @@ -477,7 +479,7 @@ static void broadwell_sseu_info_init(struct > > > > drm_i915_private *dev_priv) > > > > eu_disabled_mask = > > > > eu_disable[s] >> (ss * sseu- > > > > > max_eus_per_subslice); > > > > > > > > > > > > - sseu_set_eus(sseu, s, ss, > > > > ~eu_disabled_mask); > > > > + intel_sseu_set_eus(sseu, s, ss, > > > > ~eu_disabled_mask); > > > > > > > > n_disabled = > > > > hweight8(eu_disabled_mask); > > > > > > > > @@ -496,9 +498,10 @@ static void > > > > broadwell_sseu_info_init(struct > > > > drm_i915_private *dev_priv) > > > > * subslices with the exception that any one EU in any > > > > one > > > > subslice may > > > > * be fused off for die recovery. > > > > */ > > > > - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? > > > > + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) > > > > ? > > > > DIV_ROUND_UP(sseu->eu_total, > > > > - sseu_subslice_tota > > > > l(sseu)) > > > > : 0; > > > > + intel_sseu_subslic > > > > e_total( > > > > sseu)) : > > > > + 0; > > > > > > > > /* > > > > * BDW supports slice power gating on devices with more > > > > than > > > > @@ -561,8 +564,8 @@ static void haswell_sseu_info_init(struct > > > > drm_i915_private *dev_priv) > > > > > > > > for (s = 0; s < sseu->max_slices; s++) { > > > > for (ss = 0; ss < sseu->max_subslices; ss++) { > > > > - sseu_set_eus(sseu, s, ss, > > > > - (1UL << sseu- > > > > >eu_per_subslice) - > > > > 1); > > > > + intel_sseu_set_eus(sseu, s, ss, > > > > + (1UL << sseu- > > > > > eu_per_subslice) - 1); > > > > > > > > } > > > > } > > > > > > > >
On Wed, 01 May 2019, "Summers, Stuart" <stuart.summers@intel.com> wrote: > On Wed, 2019-05-01 at 14:19 -0700, Daniele Ceraolo Spurio wrote: >> >> On 5/1/19 2:04 PM, Summers, Stuart wrote: >> > On Wed, 2019-05-01 at 13:04 -0700, Daniele Ceraolo Spurio wrote: >> > > Can you elaborate a bit more on what's the rationale for this? do >> > > you just want to avoid having too many inlines since the paths >> > > they're used in are not critical, or do you have some more >> > > functional reason? This is not a critic to the patch, I just >> > > want to understand where you're coming from ;) >> > >> > This was a request from Jani Nikula in a previous series update. I >> > don't have a strong preference either way personally. If you don't >> > have any major concerns, I'd prefer to keep the series as-is to >> > prevent too much thrash here, but let me know. >> > >> >> No concerns, just please update the commit message to explain that >> we're moving them because there is no need for them to be inline >> since they're not on a critical path where we need preformance. > > Sounds great. I've become critical of superfluous inlines. They break the abstraction by exposing the internals in the header, and make the interdependencies of headers harder to resolve. As the driver keeps growing and more people contribute to it, I think we need to pay more attention on how we structure the source. To this end we've added new gt/ subdir, are about to add gem/ and likely display/ too before long, and we've significantly split off the monster i915_drv.h and intel_drv.h headers. Obviously inlines have their place and purpose, but I think we sprinkle them a bit too eagerly without paying attention. I like the patch. Acked-by: Jani Nikula <jani.nikula@intel.com>
On Thu, 2019-05-02 at 10:15 +0300, Jani Nikula wrote: > On Wed, 01 May 2019, "Summers, Stuart" <stuart.summers@intel.com> > wrote: > > On Wed, 2019-05-01 at 14:19 -0700, Daniele Ceraolo Spurio wrote: > > > > > > On 5/1/19 2:04 PM, Summers, Stuart wrote: > > > > On Wed, 2019-05-01 at 13:04 -0700, Daniele Ceraolo Spurio > > > > wrote: > > > > > Can you elaborate a bit more on what's the rationale for > > > > > this? do > > > > > you just want to avoid having too many inlines since the > > > > > paths > > > > > they're used in are not critical, or do you have some more > > > > > functional reason? This is not a critic to the patch, I just > > > > > want to understand where you're coming from ;) > > > > > > > > This was a request from Jani Nikula in a previous series > > > > update. I > > > > don't have a strong preference either way personally. If you > > > > don't > > > > have any major concerns, I'd prefer to keep the series as-is to > > > > prevent too much thrash here, but let me know. > > > > > > > > > > No concerns, just please update the commit message to explain > > > that > > > we're moving them because there is no need for them to be inline > > > since they're not on a critical path where we need preformance. > > > > Sounds great. > > I've become critical of superfluous inlines. They break the > abstraction > by exposing the internals in the header, and make the > interdependencies > of headers harder to resolve. > > As the driver keeps growing and more people contribute to it, I think > we > need to pay more attention on how we structure the source. To this > end > we've added new gt/ subdir, are about to add gem/ and likely display/ > too before long, and we've significantly split off the monster > i915_drv.h and intel_drv.h headers. > > Obviously inlines have their place and purpose, but I think we > sprinkle > them a bit too eagerly without paying attention. > > I like the patch. > > Acked-by: Jani Nikula <jani.nikula@intel.com> Jani, based on Daniele's feedback, I'm planning on squashing this patch with the patch that moves these helper functions to intel_sseu.h. Any issue keeping your Ack here? -Stuart > >
On Thu, 2019-05-02 at 17:58 +0300, Jani Nikula wrote: > On Thu, 02 May 2019, "Summers, Stuart" <stuart.summers@intel.com> > wrote: > > On Thu, 2019-05-02 at 10:15 +0300, Jani Nikula wrote: > > > Acked-by: Jani Nikula <jani.nikula@intel.com> > > > > Jani, based on Daniele's feedback, I'm planning on squashing this > > patch > > with the patch that moves these helper functions to intel_sseu.h. > > Any > > issue keeping your Ack here? > > None. Thanks for the Ack! -Stuart > > BR, > Jani. >
On Thu, 02 May 2019, "Summers, Stuart" <stuart.summers@intel.com> wrote: > On Thu, 2019-05-02 at 10:15 +0300, Jani Nikula wrote: >> Acked-by: Jani Nikula <jani.nikula@intel.com> > > Jani, based on Daniele's feedback, I'm planning on squashing this patch > with the patch that moves these helper functions to intel_sseu.h. Any > issue keeping your Ack here? None. BR, Jani.
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 7f448f3bea0b..4a0b82fc108c 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -8,6 +8,60 @@ #include "intel_lrc_reg.h" #include "intel_sseu.h" +unsigned int +intel_sseu_subslice_total(const struct sseu_dev_info *sseu) +{ + unsigned int i, total = 0; + + for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) + total += hweight8(sseu->subslice_mask[i]); + + return total; +} + +unsigned int +intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) +{ + return hweight8(sseu->subslice_mask[slice]); +} + +static int intel_sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, + int subslice) +{ + int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice, + BITS_PER_BYTE); + int slice_stride = sseu->max_subslices * subslice_stride; + + return slice * slice_stride + subslice * subslice_stride; +} + +u16 intel_sseu_get_eus(const struct sseu_dev_info *sseu, int slice, + int subslice) +{ + int i, offset = intel_sseu_eu_idx(sseu, slice, subslice); + u16 eu_mask = 0; + + for (i = 0; + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) { + eu_mask |= ((u16)sseu->eu_mask[offset + i]) << + (i * BITS_PER_BYTE); + } + + return eu_mask; +} + +void intel_sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, + u16 eu_mask) +{ + int i, offset = intel_sseu_eu_idx(sseu, slice, subslice); + + for (i = 0; + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) { + sseu->eu_mask[offset + i] = + (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; + } +} + u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, const struct intel_sseu *req_sseu) { diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 029e71d8f140..56e3721ae83f 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -63,58 +63,17 @@ intel_sseu_from_device_info(const struct sseu_dev_info *sseu) return value; } -static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu) -{ - unsigned int i, total = 0; - - for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) - total += hweight8(sseu->subslice_mask[i]); +unsigned int +intel_sseu_subslice_total(const struct sseu_dev_info *sseu); - return total; -} +unsigned int +intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); -static inline unsigned int -sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) -{ - return hweight8(sseu->subslice_mask[slice]); -} - -static inline int sseu_eu_idx(const struct sseu_dev_info *sseu, - int slice, int subslice) -{ - int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice, - BITS_PER_BYTE); - int slice_stride = sseu->max_subslices * subslice_stride; - - return slice * slice_stride + subslice * subslice_stride; -} +u16 intel_sseu_get_eus(const struct sseu_dev_info *sseu, int slice, + int subslice); -static inline u16 sseu_get_eus(const struct sseu_dev_info *sseu, - int slice, int subslice) -{ - int i, offset = sseu_eu_idx(sseu, slice, subslice); - u16 eu_mask = 0; - - for (i = 0; - i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) { - eu_mask |= ((u16)sseu->eu_mask[offset + i]) << - (i * BITS_PER_BYTE); - } - - return eu_mask; -} - -static inline void sseu_set_eus(struct sseu_dev_info *sseu, - int slice, int subslice, u16 eu_mask) -{ - int i, offset = sseu_eu_idx(sseu, slice, subslice); - - for (i = 0; - i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) { - sseu->eu_mask[offset + i] = - (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; - } -} +void intel_sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, + u16 eu_mask); u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, const struct intel_sseu *req_sseu); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index fe854c629a32..3f3ee83ac315 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4158,7 +4158,7 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv, RUNTIME_INFO(dev_priv)->sseu.subslice_mask[s]; } sseu->eu_total = sseu->eu_per_subslice * - sseu_subslice_total(sseu); + intel_sseu_subslice_total(sseu); /* subtract fused off EU(s) from enabled slice(s) */ for (s = 0; s < fls(sseu->slice_mask); s++) { @@ -4182,10 +4182,10 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, seq_printf(m, " %s Slice Total: %u\n", type, hweight8(sseu->slice_mask)); seq_printf(m, " %s Subslice Total: %u\n", type, - sseu_subslice_total(sseu)); + intel_sseu_subslice_total(sseu)); for (s = 0; s < fls(sseu->slice_mask); s++) { seq_printf(m, " %s Slice%i subslices: %u\n", type, - s, sseu_subslices_per_slice(sseu, s)); + s, intel_sseu_subslices_per_slice(sseu, s)); } seq_printf(m, " %s EU Total: %u\n", type, sseu->eu_total); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c376244c19c4..130c5140db0d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -378,7 +378,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, value = i915_cmd_parser_get_version(dev_priv); break; case I915_PARAM_SUBSLICE_TOTAL: - value = sseu_subslice_total(sseu); + value = intel_sseu_subslice_total(sseu); if (!value) return -ENODEV; break; diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 559cf0d0628e..e1dbccf04cd9 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -90,10 +90,10 @@ static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) drm_printf(p, "slice total: %u, mask=%04x\n", hweight8(sseu->slice_mask), sseu->slice_mask); - drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu)); + drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu)); for (s = 0; s < sseu->max_slices; s++) { drm_printf(p, "slice%d: %u subslices, mask=%04x\n", - s, sseu_subslices_per_slice(sseu, s), + s, intel_sseu_subslices_per_slice(sseu, s), sseu->subslice_mask[s]); } drm_printf(p, "EU total: %u\n", sseu->eu_total); @@ -126,11 +126,11 @@ void intel_device_info_dump_topology(const struct sseu_dev_info *sseu, for (s = 0; s < sseu->max_slices; s++) { drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n", - s, sseu_subslices_per_slice(sseu, s), + s, intel_sseu_subslices_per_slice(sseu, s), sseu->subslice_mask[s]); for (ss = 0; ss < sseu->max_subslices; ss++) { - u16 enabled_eus = sseu_get_eus(sseu, s, ss); + u16 enabled_eus = intel_sseu_get_eus(sseu, s, ss); drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n", ss, hweight16(enabled_eus), enabled_eus); @@ -180,7 +180,7 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) sseu->subslice_mask[s] = (ss_en >> ss_idx) & ss_en_mask; for (ss = 0; ss < sseu->max_subslices; ss++) { if (sseu->subslice_mask[s] & BIT(ss)) - sseu_set_eus(sseu, s, ss, eu_en); + intel_sseu_set_eus(sseu, s, ss, eu_en); } } } @@ -222,32 +222,32 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) /* Slice0 */ eu_en = ~I915_READ(GEN8_EU_DISABLE0); for (ss = 0; ss < sseu->max_subslices; ss++) - sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask); + intel_sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask); /* Slice1 */ - sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); + intel_sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); eu_en = ~I915_READ(GEN8_EU_DISABLE1); - sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); + intel_sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); /* Slice2 */ - sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); - sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); + intel_sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); + intel_sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); /* Slice3 */ - sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); + intel_sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); eu_en = ~I915_READ(GEN8_EU_DISABLE2); - sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); + intel_sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); /* Slice4 */ - sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); - sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); + intel_sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); + intel_sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); /* Slice5 */ - sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); + intel_sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); eu_en = ~I915_READ(GEN10_EU_DISABLE3); - sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); + intel_sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); /* Do a second pass where we mark the subslices disabled if all their * eus are off. */ for (s = 0; s < sseu->max_slices; s++) { for (ss = 0; ss < sseu->max_subslices; ss++) { - if (sseu_get_eus(sseu, s, ss) == 0) + if (intel_sseu_get_eus(sseu, s, ss) == 0) sseu->subslice_mask[s] &= ~BIT(ss); } } @@ -260,9 +260,10 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) * EU in any one subslice may be fused off for die * recovery. */ - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? DIV_ROUND_UP(sseu->eu_total, - sseu_subslice_total(sseu)) : 0; + intel_sseu_subslice_total(sseu)) : + 0; /* No restrictions on Power Gating */ sseu->has_slice_pg = 1; @@ -290,7 +291,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); sseu->subslice_mask[0] |= BIT(0); - sseu_set_eus(sseu, 0, 0, ~disabled_mask); + intel_sseu_set_eus(sseu, 0, 0, ~disabled_mask); } if (!(fuse & CHV_FGT_DISABLE_SS1)) { @@ -301,7 +302,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); sseu->subslice_mask[0] |= BIT(1); - sseu_set_eus(sseu, 0, 1, ~disabled_mask); + intel_sseu_set_eus(sseu, 0, 1, ~disabled_mask); } sseu->eu_total = compute_eu_total(sseu); @@ -310,8 +311,8 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) * CHV expected to always have a uniform distribution of EU * across subslices. */ - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? - sseu->eu_total / sseu_subslice_total(sseu) : + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? + sseu->eu_total / intel_sseu_subslice_total(sseu) : 0; /* * CHV supports subslice power gating on devices with more than @@ -319,7 +320,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) * more than one EU pair per subslice. */ sseu->has_slice_pg = 0; - sseu->has_subslice_pg = sseu_subslice_total(sseu) > 1; + sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1; sseu->has_eu_pg = (sseu->eu_per_subslice > 2); } @@ -369,7 +370,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask; - sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); + intel_sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); eu_per_ss = sseu->max_eus_per_subslice - hweight8(eu_disabled_mask); @@ -393,9 +394,10 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) * recovery. BXT is expected to be perfectly uniform in EU * distribution. */ - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? DIV_ROUND_UP(sseu->eu_total, - sseu_subslice_total(sseu)) : 0; + intel_sseu_subslice_total(sseu)) : + 0; /* * SKL+ supports slice power gating on devices with more than * one slice, and supports EU power gating on devices with @@ -407,7 +409,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) sseu->has_slice_pg = !IS_GEN9_LP(dev_priv) && hweight8(sseu->slice_mask) > 1; sseu->has_subslice_pg = - IS_GEN9_LP(dev_priv) && sseu_subslice_total(sseu) > 1; + IS_GEN9_LP(dev_priv) && intel_sseu_subslice_total(sseu) > 1; sseu->has_eu_pg = sseu->eu_per_subslice > 2; if (IS_GEN9_LP(dev_priv)) { @@ -477,7 +479,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) eu_disabled_mask = eu_disable[s] >> (ss * sseu->max_eus_per_subslice); - sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); + intel_sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); n_disabled = hweight8(eu_disabled_mask); @@ -496,9 +498,10 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) * subslices with the exception that any one EU in any one subslice may * be fused off for die recovery. */ - sseu->eu_per_subslice = sseu_subslice_total(sseu) ? + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? DIV_ROUND_UP(sseu->eu_total, - sseu_subslice_total(sseu)) : 0; + intel_sseu_subslice_total(sseu)) : + 0; /* * BDW supports slice power gating on devices with more than @@ -561,8 +564,8 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv) for (s = 0; s < sseu->max_slices; s++) { for (ss = 0; ss < sseu->max_subslices; ss++) { - sseu_set_eus(sseu, s, ss, - (1UL << sseu->eu_per_subslice) - 1); + intel_sseu_set_eus(sseu, s, ss, + (1UL << sseu->eu_per_subslice) - 1); } }
Additionally, ensure these are all prefixed with intel_sseu_* to match the convention of other functions in i915. Signed-off-by: Stuart Summers <stuart.summers@intel.com> --- drivers/gpu/drm/i915/gt/intel_sseu.c | 54 +++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_sseu.h | 57 +++----------------- drivers/gpu/drm/i915/i915_debugfs.c | 6 +-- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/intel_device_info.c | 69 ++++++++++++------------ 5 files changed, 102 insertions(+), 86 deletions(-)