Message ID | 20160909080106.17506-5-mahesh1.kumar@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, 2016-09-09 at 13:31 +0530, Kumar, Mahesh wrote: > From: Mahesh Kumar <mahesh1.kumar@intel.com> > > This patch adds support to decode system memory bandwidth > which will be used for arbitrated display memory percentage > calculation in GEN9 based system. > > Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.c | 96 +++++++++++++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/i915_drv.h | 18 ++++++++ > drivers/gpu/drm/i915/i915_reg.h | 25 +++++++++++ > 3 files changed, 139 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index 02c34d6..0a4f18d 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -973,6 +973,96 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) > DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", yesno(i915.semaphores)); > } > > +static void > +intel_get_memdev_info(struct drm_device *dev) > +{ > + struct drm_i915_private *dev_priv = to_i915(dev); > + uint32_t val = 0; > + uint32_t mem_speed = 0; > + uint8_t dram_type; > + uint32_t dram_channel; > + uint8_t num_channel; > + bool rank_valid = false; > + > + if (!IS_GEN9(dev_priv)) > + goto exit; > + > + val = I915_READ(P_CR_MC_BIOS_REQ_0_0_0); > + mem_speed = div_u64((uint64_t) (val & REQ_DATA_MASK) * > + MEMORY_FREQ_MULTIPLIER, 1000); > + > + if (mem_speed == 0) > + goto exit; > + > + dev_priv->memdev_info.valid = true; > + dev_priv->memdev_info.mem_speed = mem_speed; > + dram_type = (val >> DRAM_TYPE_SHIFT) & DRAM_TYPE_MASK; > + dram_channel = (val >> DRAM_CHANNEL_SHIFT) & DRAM_CHANNEL_MASK; > + num_channel = hweight32(dram_channel); > + > + /* > + * The lpddr3 and lpddr4 technologies can have 1-4 channels and the > + * channels are 32bits wide; while ddr3l technologies can have 1-2 > + * channels and the channels are 64 bits wide. But SV team found that in > + * case of single 64 bit wide DDR3L dimms two bits were set and system > + * with two DDR3L 64bit dimm all four bits were set. What bits are set? It would be good to clarify the register. > + */ > + > + switch (dram_type) { > + case DRAM_TYPE_LPDDR3: > + case DRAM_TYPE_LPDDR4: > + dev_priv->memdev_info.data_width = 4; > + dev_priv->memdev_info.num_channel = num_channel; > + break; > + case DRAM_TYPE_DDR3L: > + dev_priv->memdev_info.data_width = 8; > + dev_priv->memdev_info.num_channel = num_channel / 2; Why is this /2 done here? > + break; > + default: > + dev_priv->memdev_info.data_width = 4; > + dev_priv->memdev_info.num_channel = num_channel; > + } > + > + /* > + * Now read each DUNIT8/9/10/11 to check the rank of each dimms. > + * all the dimms should have same rank as in first valid Dimm > + */ > +#define D_CR_DRP0_DUNIT_INVALID 0xFFFFFFFF > + > + dev_priv->memdev_info.rank_valid = true; > + if (I915_READ(D_CR_DRP0_DUNIT8) != D_CR_DRP0_DUNIT_INVALID) { > + val = I915_READ(D_CR_DRP0_DUNIT8); > + rank_valid = true; > + } else if (I915_READ(D_CR_DRP0_DUNIT9) != D_CR_DRP0_DUNIT_INVALID) { > + val = I915_READ(D_CR_DRP0_DUNIT9); > + rank_valid = true; > + } else if (I915_READ(D_CR_DRP0_DUNIT10) != D_CR_DRP0_DUNIT_INVALID) { > + val = I915_READ(D_CR_DRP0_DUNIT10); > + rank_valid = true; > + } else if (I915_READ(D_CR_DRP0_DUNIT11) != D_CR_DRP0_DUNIT_INVALID) { > + val = I915_READ(D_CR_DRP0_DUNIT11); > + rank_valid = true; > + } > +#undef D_CR_DRP0_DUNIT_INVALID > + > + if (rank_valid) { > + dev_priv->memdev_info.rank_valid = true; > + dev_priv->memdev_info.rank = (val & DRAM_RANK_MASK); > + } > + > + DRM_DEBUG_DRIVER("valid:%s speed-%d width-%d num_channel-%d\n", > + dev_priv->memdev_info.valid ? "true" : "false", > + dev_priv->memdev_info.mem_speed, > + dev_priv->memdev_info.data_width, > + dev_priv->memdev_info.num_channel); > + DRM_DEBUG_DRIVER("rank_valid:%s rank-%d\n", > + dev_priv->memdev_info.rank_valid ? "true" : "false", > + dev_priv->memdev_info.rank); > + return; > +exit: > + dev_priv->memdev_info.valid = false; > +} > + > /** > * i915_driver_init_hw - setup state requiring device access > * @dev_priv: device private > @@ -1076,6 +1166,12 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) > DRM_DEBUG_DRIVER("can't enable MSI"); > } > > + /* > + * Fill the memdev structure to get the system raw bandwidth > + * This will be used by WM algorithm, to implement GEN9 based WA > + */ > + intel_get_memdev_info(dev); > + > return 0; > > out_ggtt: > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 4ec23e5..4313992 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2036,6 +2036,24 @@ struct drm_i915_private { > bool distrust_bios_wm; > } wm; > > + struct { > + /* > + * memory device info > + * valid: memory info is valid or not > + * mem_speed: memory freq in KHz > + * channel_width: Channel width in bytes > + * num_channel: total number of channels > + * rank: 0-rank disable, 1-Single rank, 2-dual rank > + */ > + bool valid; > + uint32_t mem_speed; > + uint8_t data_width; > + uint8_t num_channel; > + bool rank_valid; > + uint8_t rank; > + } memdev_info; > + > + > struct i915_runtime_pm pm; > > /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index a29d707..b38445c 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -7716,6 +7716,31 @@ enum { > #define DC_STATE_DEBUG_MASK_CORES (1<<0) > #define DC_STATE_DEBUG_MASK_MEMORY_UP (1<<1) > > +#define P_CR_MC_BIOS_REQ_0_0_0 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x7114) > +#define REQ_DATA_MASK (0x3F << 0) > +#define DRAM_TYPE_SHIFT 24 > +#define DRAM_TYPE_MASK 0x7 > +#define DRAM_CHANNEL_SHIFT 12 > +#define DRAM_CHANNEL_MASK 0xF > + > +#define DRAM_TYPE_LPDDR3 0x1 > +#define DRAM_TYPE_LPDDR4 0x2 > +#define DRAM_TYPE_DDR3L 0x4 > +/* > + * BIOS programs this field of REQ_DATA [5:0] in integer > + * multiple of 133330 KHz (133.33MHz) > + */ > +#define MEMORY_FREQ_MULTIPLIER 0x208D2 > +#define D_CR_DRP0_DUNIT8 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x1000) > +#define D_CR_DRP0_DUNIT9 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x1200) > +#define D_CR_DRP0_DUNIT10 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x1400) > +#define D_CR_DRP0_DUNIT11 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x1600) > +#define D_CR_DRP0_RKEN0 (1 << 0) > +#define D_CR_DRP0_RKEN1 (1 << 1) > +#define DRAM_RANK_MASK 0x3 > +#define DRAM_SINGLE_RANK 0x1 > +#define DRAM_DUAL_RANK 0x3 Should this be Ox2 ? > + > /* Please see hsw_read_dcomp() and hsw_write_dcomp() before using this register, > * since on HSW we can't write to it using I915_WRITE. */ > #define D_COMP_HSW _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5F0C)
On Friday 16 September 2016 01:32 PM, Pandiyan, Dhinakaran wrote: > On Fri, 2016-09-09 at 13:31 +0530, Kumar, Mahesh wrote: >> From: Mahesh Kumar <mahesh1.kumar@intel.com> >> >> This patch adds support to decode system memory bandwidth >> which will be used for arbitrated display memory percentage >> calculation in GEN9 based system. >> >> Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com> >> --- >> drivers/gpu/drm/i915/i915_drv.c | 96 +++++++++++++++++++++++++++++++++++++++++ >> drivers/gpu/drm/i915/i915_drv.h | 18 ++++++++ >> drivers/gpu/drm/i915/i915_reg.h | 25 +++++++++++ >> 3 files changed, 139 insertions(+) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c >> index 02c34d6..0a4f18d 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.c >> +++ b/drivers/gpu/drm/i915/i915_drv.c >> @@ -973,6 +973,96 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) >> DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", yesno(i915.semaphores)); >> } >> >> +static void >> +intel_get_memdev_info(struct drm_device *dev) >> +{ >> + struct drm_i915_private *dev_priv = to_i915(dev); >> + uint32_t val = 0; >> + uint32_t mem_speed = 0; >> + uint8_t dram_type; >> + uint32_t dram_channel; >> + uint8_t num_channel; >> + bool rank_valid = false; >> + >> + if (!IS_GEN9(dev_priv)) >> + goto exit; >> + >> + val = I915_READ(P_CR_MC_BIOS_REQ_0_0_0); >> + mem_speed = div_u64((uint64_t) (val & REQ_DATA_MASK) * >> + MEMORY_FREQ_MULTIPLIER, 1000); >> + >> + if (mem_speed == 0) >> + goto exit; >> + >> + dev_priv->memdev_info.valid = true; >> + dev_priv->memdev_info.mem_speed = mem_speed; >> + dram_type = (val >> DRAM_TYPE_SHIFT) & DRAM_TYPE_MASK; >> + dram_channel = (val >> DRAM_CHANNEL_SHIFT) & DRAM_CHANNEL_MASK; >> + num_channel = hweight32(dram_channel); >> + >> + /* >> + * The lpddr3 and lpddr4 technologies can have 1-4 channels and the >> + * channels are 32bits wide; while ddr3l technologies can have 1-2 >> + * channels and the channels are 64 bits wide. But SV team found that in >> + * case of single 64 bit wide DDR3L dimms two bits were set and system >> + * with two DDR3L 64bit dimm all four bits were set. > What bits are set? It would be good to clarify the register. number of channel-active bits in P_CR_MC_BIOS_REQ_0_0_0_MCHBAR register, Will update the comment. > >> + */ >> + >> + switch (dram_type) { >> + case DRAM_TYPE_LPDDR3: >> + case DRAM_TYPE_LPDDR4: >> + dev_priv->memdev_info.data_width = 4; >> + dev_priv->memdev_info.num_channel = num_channel; >> + break; >> + case DRAM_TYPE_DDR3L: >> + dev_priv->memdev_info.data_width = 8; >> + dev_priv->memdev_info.num_channel = num_channel / 2; > Why is this /2 done here? This is the same reason as in comment section above, 2 bits per channel are set in P_CR_MC_BIOS_REQ_0_0_0 register in-case of DDR3L DRAM, So we need to divide "number of bit set" by 2 to get actual number of channel for DDR3L. > >> + break; >> + default: >> + dev_priv->memdev_info.data_width = 4; >> + dev_priv->memdev_info.num_channel = num_channel; >> + } >> + >> + /* >> + * Now read each DUNIT8/9/10/11 to check the rank of each dimms. >> + * all the dimms should have same rank as in first valid Dimm >> + */ >> +#define D_CR_DRP0_DUNIT_INVALID 0xFFFFFFFF >> + >> + dev_priv->memdev_info.rank_valid = true; >> + if (I915_READ(D_CR_DRP0_DUNIT8) != D_CR_DRP0_DUNIT_INVALID) { >> + val = I915_READ(D_CR_DRP0_DUNIT8); >> + rank_valid = true; >> + } else if (I915_READ(D_CR_DRP0_DUNIT9) != D_CR_DRP0_DUNIT_INVALID) { >> + val = I915_READ(D_CR_DRP0_DUNIT9); >> + rank_valid = true; >> + } else if (I915_READ(D_CR_DRP0_DUNIT10) != D_CR_DRP0_DUNIT_INVALID) { >> + val = I915_READ(D_CR_DRP0_DUNIT10); >> + rank_valid = true; >> + } else if (I915_READ(D_CR_DRP0_DUNIT11) != D_CR_DRP0_DUNIT_INVALID) { >> + val = I915_READ(D_CR_DRP0_DUNIT11); >> + rank_valid = true; >> + } >> +#undef D_CR_DRP0_DUNIT_INVALID >> + >> + if (rank_valid) { >> + dev_priv->memdev_info.rank_valid = true; >> + dev_priv->memdev_info.rank = (val & DRAM_RANK_MASK); >> + } >> + >> + DRM_DEBUG_DRIVER("valid:%s speed-%d width-%d num_channel-%d\n", >> + dev_priv->memdev_info.valid ? "true" : "false", >> + dev_priv->memdev_info.mem_speed, >> + dev_priv->memdev_info.data_width, >> + dev_priv->memdev_info.num_channel); >> + DRM_DEBUG_DRIVER("rank_valid:%s rank-%d\n", >> + dev_priv->memdev_info.rank_valid ? "true" : "false", >> + dev_priv->memdev_info.rank); >> + return; >> +exit: >> + dev_priv->memdev_info.valid = false; >> +} >> + >> /** >> * i915_driver_init_hw - setup state requiring device access >> * @dev_priv: device private >> @@ -1076,6 +1166,12 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) >> DRM_DEBUG_DRIVER("can't enable MSI"); >> } >> >> + /* >> + * Fill the memdev structure to get the system raw bandwidth >> + * This will be used by WM algorithm, to implement GEN9 based WA >> + */ >> + intel_get_memdev_info(dev); >> + >> return 0; >> >> out_ggtt: >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index 4ec23e5..4313992 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -2036,6 +2036,24 @@ struct drm_i915_private { >> bool distrust_bios_wm; >> } wm; >> >> + struct { >> + /* >> + * memory device info >> + * valid: memory info is valid or not >> + * mem_speed: memory freq in KHz >> + * channel_width: Channel width in bytes >> + * num_channel: total number of channels >> + * rank: 0-rank disable, 1-Single rank, 2-dual rank >> + */ >> + bool valid; >> + uint32_t mem_speed; >> + uint8_t data_width; >> + uint8_t num_channel; >> + bool rank_valid; >> + uint8_t rank; >> + } memdev_info; >> + >> + >> struct i915_runtime_pm pm; >> >> /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ >> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h >> index a29d707..b38445c 100644 >> --- a/drivers/gpu/drm/i915/i915_reg.h >> +++ b/drivers/gpu/drm/i915/i915_reg.h >> @@ -7716,6 +7716,31 @@ enum { >> #define DC_STATE_DEBUG_MASK_CORES (1<<0) >> #define DC_STATE_DEBUG_MASK_MEMORY_UP (1<<1) >> >> +#define P_CR_MC_BIOS_REQ_0_0_0 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x7114) >> +#define REQ_DATA_MASK (0x3F << 0) >> +#define DRAM_TYPE_SHIFT 24 >> +#define DRAM_TYPE_MASK 0x7 >> +#define DRAM_CHANNEL_SHIFT 12 >> +#define DRAM_CHANNEL_MASK 0xF >> + >> +#define DRAM_TYPE_LPDDR3 0x1 >> +#define DRAM_TYPE_LPDDR4 0x2 >> +#define DRAM_TYPE_DDR3L 0x4 >> +/* >> + * BIOS programs this field of REQ_DATA [5:0] in integer >> + * multiple of 133330 KHz (133.33MHz) >> + */ >> +#define MEMORY_FREQ_MULTIPLIER 0x208D2 >> +#define D_CR_DRP0_DUNIT8 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x1000) >> +#define D_CR_DRP0_DUNIT9 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x1200) >> +#define D_CR_DRP0_DUNIT10 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x1400) >> +#define D_CR_DRP0_DUNIT11 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x1600) >> +#define D_CR_DRP0_RKEN0 (1 << 0) >> +#define D_CR_DRP0_RKEN1 (1 << 1) >> +#define DRAM_RANK_MASK 0x3 >> +#define DRAM_SINGLE_RANK 0x1 >> +#define DRAM_DUAL_RANK 0x3 > Should this be Ox2 ? No, In case of dual-rank both the bit (rank-0 & rank-1) will be set. Thanks & Regards, -Mahesh > >> + >> /* Please see hsw_read_dcomp() and hsw_write_dcomp() before using this register, >> * since on HSW we can't write to it using I915_WRITE. */ >> #define D_COMP_HSW _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5F0C)
Hi Em Sex, 2016-09-09 às 13:31 +0530, Kumar, Mahesh escreveu: > From: Mahesh Kumar <mahesh1.kumar@intel.com> > > This patch adds support to decode system memory bandwidth > which will be used for arbitrated display memory percentage > calculation in GEN9 based system. This is not a complete review of this patch since I can't find the documentation for the registers used by the patch, but I'll try to provide some early feedback. Most of it is about styling, so feel free to provide counter arguments if you disagree. > > Signed-off-by: Mahesh Kumar <mahesh1.kumar@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.c | 96 > +++++++++++++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/i915_drv.h | 18 ++++++++ > drivers/gpu/drm/i915/i915_reg.h | 25 +++++++++++ > 3 files changed, 139 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_drv.c > b/drivers/gpu/drm/i915/i915_drv.c > index 02c34d6..0a4f18d 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -973,6 +973,96 @@ static void intel_sanitize_options(struct > drm_i915_private *dev_priv) > DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", > yesno(i915.semaphores)); > } > > +static void > +intel_get_memdev_info(struct drm_device *dev) In our current standards we prefer that you use drm_i915_private as the parameter here. And the new function doesn't even use drm_device for anything, so that reinforces the argument. > +{ > + struct drm_i915_private *dev_priv = to_i915(dev); > + uint32_t val = 0; > + uint32_t mem_speed = 0; > + uint8_t dram_type; > + uint32_t dram_channel; > + uint8_t num_channel; > + bool rank_valid = false; We generally don't zero-initialize things that don't need to be zero- initialized, like these 3 variables. I know this is an eternal discussion and each side has pros and cons, but following the already used coding style usually wins. Also, please just add this: struct memdev_info *memdev_info = &dev_priv->memdev_info; All those "dev_priv->memdev_info" statements below are already making my fingers hurt, and I didn't even type them :) > + > + if (!IS_GEN9(dev_priv)) > + goto exit; If you just set memdev_info.valid to false right at the beginning of the function then you can just return here and below without using the goto. IMHO it will look better. Or you could even rely on the fact that we used kzalloc anyway. > + > + val = I915_READ(P_CR_MC_BIOS_REQ_0_0_0); > + mem_speed = div_u64((uint64_t) (val & REQ_DATA_MASK) * > + MEMORY_FREQ_MULTIPLIER, 1000); > + > + if (mem_speed == 0) > + goto exit; Perhaps a DRM_DEBUG_KMS("something something memory data is not valid") would be useful here too. > + > + dev_priv->memdev_info.valid = true; > + dev_priv->memdev_info.mem_speed = mem_speed; > + dram_type = (val >> DRAM_TYPE_SHIFT) & DRAM_TYPE_MASK; > + dram_channel = (val >> DRAM_CHANNEL_SHIFT) & > DRAM_CHANNEL_MASK; > + num_channel = hweight32(dram_channel); > + > + /* > + * The lpddr3 and lpddr4 technologies can have 1-4 channels > and the > + * channels are 32bits wide; while ddr3l technologies can > have 1-2 > + * channels and the channels are 64 bits wide. But SV team > found that in > + * case of single 64 bit wide DDR3L dimms two bits were set > and system > + * with two DDR3L 64bit dimm all four bits were set. > + */ I still don't have access to the spec, but this comment suggests the spec doesn't match the SV team findings. So can't the SV team request the specs to reflect their findings? As an outsider, I see this as the SV team's word against the HW team's words, and I don't know who made the mistake: the SV engineer or the HW engineer. So I expect a discussion between the two and the conclusions being reflected on the specification :). Errata knowledge shouldn't go straight to the drivers, that's a recipe for eternal doubt to the future people maintaining this code. > + > + switch (dram_type) { > + case DRAM_TYPE_LPDDR3: > + case DRAM_TYPE_LPDDR4: > + dev_priv->memdev_info.data_width = 4; > + dev_priv->memdev_info.num_channel = num_channel; > + break; > + case DRAM_TYPE_DDR3L: > + dev_priv->memdev_info.data_width = 8; > + dev_priv->memdev_info.num_channel = num_channel / 2; > + break; > + default: Again, no access to the spec here, but shouldn't this case reset memdev_info.valid to false and then return (possibly with a DRM_DEBUG_KMS)? > + dev_priv->memdev_info.data_width = 4; > + dev_priv->memdev_info.num_channel = num_channel; > + } > + > + /* > + * Now read each DUNIT8/9/10/11 to check the rank of each > dimms. > + * all the dimms should have same rank as in first valid > Dimm > + */ > > +#define D_CR_DRP0_DUNIT_INVALID 0xFFFFFFFF I'm not a huge fan of these mid-file defines with later undefines. Can't we just move this to the .h file, or just use a variable, or just go with the plain 0xFFFFFFFF? > + > + dev_priv->memdev_info.rank_valid = true; We never set this to false. Bug? > + if (I915_READ(D_CR_DRP0_DUNIT8) != D_CR_DRP0_DUNIT_INVALID) > { > + val = I915_READ(D_CR_DRP0_DUNIT8); > + rank_valid = true; > + } else if (I915_READ(D_CR_DRP0_DUNIT9) != > D_CR_DRP0_DUNIT_INVALID) { > + val = I915_READ(D_CR_DRP0_DUNIT9); > + rank_valid = true; > + } else if (I915_READ(D_CR_DRP0_DUNIT10) != > D_CR_DRP0_DUNIT_INVALID) { > + val = I915_READ(D_CR_DRP0_DUNIT10); > + rank_valid = true; > + } else if (I915_READ(D_CR_DRP0_DUNIT11) != > D_CR_DRP0_DUNIT_INVALID) { > + val = I915_READ(D_CR_DRP0_DUNIT11); > + rank_valid = true; > + } You can restructure this to avoid the double I915_READ for the valid case. There are many ways to do this. Also, you could get rid of the rank_valid variable if you reorganize things a little bit and then just add an extra "else" for the "rank_valid is false" case. > +#undef D_CR_DRP0_DUNIT_INVALID > + > + if (rank_valid) { > + dev_priv->memdev_info.rank_valid = true; But we just set dev_priv->memdev_info.rank_valid to true above, so no need to redo it here. > + dev_priv->memdev_info.rank = (val & DRAM_RANK_MASK); > + } > + > + DRM_DEBUG_DRIVER("valid:%s speed-%d width-%d num_channel- > %d\n", Speed, width and num_channel are unsigned, and not all of them are 32 bits. Please replace the %d with the more appropriate modifiers and specifiers. > + dev_priv->memdev_info.valid ? "true" : "false", Perhaps you could use yesno() here and below. Also, you could prettify the text a little bit so people would be able to read dmesg and figure out interesting information about their DRAM without needing to look at the code. > + dev_priv->memdev_info.mem_speed, > + dev_priv->memdev_info.data_width, > + dev_priv->memdev_info.num_channel); > + DRM_DEBUG_DRIVER("rank_valid:%s rank-%d\n", > + dev_priv->memdev_info.rank_valid ? "true" : "false", > + dev_priv->memdev_info.rank); > + return; > +exit: > + dev_priv->memdev_info.valid = false; > +} > + > /** > * i915_driver_init_hw - setup state requiring device access > * @dev_priv: device private > @@ -1076,6 +1166,12 @@ static int i915_driver_init_hw(struct > drm_i915_private *dev_priv) > DRM_DEBUG_DRIVER("can't enable MSI"); > } > > + /* > + * Fill the memdev structure to get the system raw bandwidth > + * This will be used by WM algorithm, to implement GEN9 > based WA > + */ > + intel_get_memdev_info(dev); > + > return 0; > > out_ggtt: > diff --git a/drivers/gpu/drm/i915/i915_drv.h > b/drivers/gpu/drm/i915/i915_drv.h > index 4ec23e5..4313992 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2036,6 +2036,24 @@ struct drm_i915_private { > bool distrust_bios_wm; > } wm; > > + struct { > + /* > + * memory device info > + * valid: memory info is valid or not > + * mem_speed: memory freq in KHz > + * channel_width: Channel width in bytes channel_width or data_width? > + * num_channel: total number of channels > + * rank: 0-rank disable, 1-Single rank, 2-dual rank > + */ > + bool valid; > + uint32_t mem_speed; > + uint8_t data_width; > + uint8_t num_channel; > + bool rank_valid; > + uint8_t rank; You can remove the comments if you do this: struct memdev_info { bool valid; uint32_t mem_speed_khz; uint8_t channel/data_width_bytes; uint8_t num_channels; (note the plural) enum { SOMETHING_RANK_DISABLE = 0, SOMETHING_RANK_SINGLE, SOMETHING_RANK_DUAL } rank; } memdev_info; I *really* like documenting units in the variable names, since it avoids constant doc checking. > + } memdev_info; > + > + > struct i915_runtime_pm pm; > > /* Abstract the submission mechanism (legacy ringbuffer or > execlists) away */ > diff --git a/drivers/gpu/drm/i915/i915_reg.h > b/drivers/gpu/drm/i915/i915_reg.h > index a29d707..b38445c 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -7716,6 +7716,31 @@ enum { > #define DC_STATE_DEBUG_MASK_CORES (1<<0) > #define DC_STATE_DEBUG_MASK_MEMORY_UP (1<<1) > > +#define P_CR_MC_BIOS_REQ_0_0_0 _MMIO(MCHBAR_MIRROR_BA > SE_SNB + 0x7114) > +#define REQ_DATA_MASK (0x3F << 0) > +#define DRAM_TYPE_SHIFT 24 > +#define DRAM_TYPE_MASK 0x7 > +#define DRAM_CHANNEL_SHIFT 12 > +#define DRAM_CHANNEL_MASK 0xF > + > +#define DRAM_TYPE_LPDDR3 0x1 > +#define DRAM_TYPE_LPDDR4 0x2 > +#define DRAM_TYPE_DDR3L 0x4 > +/* > + * BIOS programs this field of REQ_DATA [5:0] in integer > + * multiple of 133330 KHz (133.33MHz) > + */ > +#define MEMORY_FREQ_MULTIPLIER 0x208D2 > +#define D_CR_DRP0_DUNIT8 _MMIO(MCHBAR_MIRROR_BASE_SNB > + 0x1000) > +#define D_CR_DRP0_DUNIT9 _MMIO(MCHBAR_MIRROR_BASE_SNB > + 0x1200) > +#define D_CR_DRP0_DUNIT10 _MMIO(MCHBAR_MIRROR_BASE_SN > B + 0x1400) > +#define D_CR_DRP0_DUNIT11 _MMIO(MCHBAR_MIRROR_BASE_SN > B + 0x1600) > +#define D_CR_DRP0_RKEN0 (1 << 0) > +#define D_CR_DRP0_RKEN1 (1 << 1) > +#define DRAM_RANK_MASK 0x3 > +#define DRAM_SINGLE_RANK 0x1 > +#define DRAM_DUAL_RANK 0x3 I'd prefix all/most of these definitions with something to group them all. Generic names such as MEMORY_FREQ_MULTIPLIER or DRAM_TYPE_LPDD3 may be confusing and/or cause problems later. Thanks, Paulo > + > /* Please see hsw_read_dcomp() and hsw_write_dcomp() before using > this register, > * since on HSW we can't write to it using I915_WRITE. */ > #define D_COMP_HSW _MMIO(MCHBAR_MIRROR_BASE_S > NB + 0x5F0C)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 02c34d6..0a4f18d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -973,6 +973,96 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", yesno(i915.semaphores)); } +static void +intel_get_memdev_info(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + uint32_t val = 0; + uint32_t mem_speed = 0; + uint8_t dram_type; + uint32_t dram_channel; + uint8_t num_channel; + bool rank_valid = false; + + if (!IS_GEN9(dev_priv)) + goto exit; + + val = I915_READ(P_CR_MC_BIOS_REQ_0_0_0); + mem_speed = div_u64((uint64_t) (val & REQ_DATA_MASK) * + MEMORY_FREQ_MULTIPLIER, 1000); + + if (mem_speed == 0) + goto exit; + + dev_priv->memdev_info.valid = true; + dev_priv->memdev_info.mem_speed = mem_speed; + dram_type = (val >> DRAM_TYPE_SHIFT) & DRAM_TYPE_MASK; + dram_channel = (val >> DRAM_CHANNEL_SHIFT) & DRAM_CHANNEL_MASK; + num_channel = hweight32(dram_channel); + + /* + * The lpddr3 and lpddr4 technologies can have 1-4 channels and the + * channels are 32bits wide; while ddr3l technologies can have 1-2 + * channels and the channels are 64 bits wide. But SV team found that in + * case of single 64 bit wide DDR3L dimms two bits were set and system + * with two DDR3L 64bit dimm all four bits were set. + */ + + switch (dram_type) { + case DRAM_TYPE_LPDDR3: + case DRAM_TYPE_LPDDR4: + dev_priv->memdev_info.data_width = 4; + dev_priv->memdev_info.num_channel = num_channel; + break; + case DRAM_TYPE_DDR3L: + dev_priv->memdev_info.data_width = 8; + dev_priv->memdev_info.num_channel = num_channel / 2; + break; + default: + dev_priv->memdev_info.data_width = 4; + dev_priv->memdev_info.num_channel = num_channel; + } + + /* + * Now read each DUNIT8/9/10/11 to check the rank of each dimms. + * all the dimms should have same rank as in first valid Dimm + */ +#define D_CR_DRP0_DUNIT_INVALID 0xFFFFFFFF + + dev_priv->memdev_info.rank_valid = true; + if (I915_READ(D_CR_DRP0_DUNIT8) != D_CR_DRP0_DUNIT_INVALID) { + val = I915_READ(D_CR_DRP0_DUNIT8); + rank_valid = true; + } else if (I915_READ(D_CR_DRP0_DUNIT9) != D_CR_DRP0_DUNIT_INVALID) { + val = I915_READ(D_CR_DRP0_DUNIT9); + rank_valid = true; + } else if (I915_READ(D_CR_DRP0_DUNIT10) != D_CR_DRP0_DUNIT_INVALID) { + val = I915_READ(D_CR_DRP0_DUNIT10); + rank_valid = true; + } else if (I915_READ(D_CR_DRP0_DUNIT11) != D_CR_DRP0_DUNIT_INVALID) { + val = I915_READ(D_CR_DRP0_DUNIT11); + rank_valid = true; + } +#undef D_CR_DRP0_DUNIT_INVALID + + if (rank_valid) { + dev_priv->memdev_info.rank_valid = true; + dev_priv->memdev_info.rank = (val & DRAM_RANK_MASK); + } + + DRM_DEBUG_DRIVER("valid:%s speed-%d width-%d num_channel-%d\n", + dev_priv->memdev_info.valid ? "true" : "false", + dev_priv->memdev_info.mem_speed, + dev_priv->memdev_info.data_width, + dev_priv->memdev_info.num_channel); + DRM_DEBUG_DRIVER("rank_valid:%s rank-%d\n", + dev_priv->memdev_info.rank_valid ? "true" : "false", + dev_priv->memdev_info.rank); + return; +exit: + dev_priv->memdev_info.valid = false; +} + /** * i915_driver_init_hw - setup state requiring device access * @dev_priv: device private @@ -1076,6 +1166,12 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("can't enable MSI"); } + /* + * Fill the memdev structure to get the system raw bandwidth + * This will be used by WM algorithm, to implement GEN9 based WA + */ + intel_get_memdev_info(dev); + return 0; out_ggtt: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4ec23e5..4313992 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2036,6 +2036,24 @@ struct drm_i915_private { bool distrust_bios_wm; } wm; + struct { + /* + * memory device info + * valid: memory info is valid or not + * mem_speed: memory freq in KHz + * channel_width: Channel width in bytes + * num_channel: total number of channels + * rank: 0-rank disable, 1-Single rank, 2-dual rank + */ + bool valid; + uint32_t mem_speed; + uint8_t data_width; + uint8_t num_channel; + bool rank_valid; + uint8_t rank; + } memdev_info; + + struct i915_runtime_pm pm; /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a29d707..b38445c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7716,6 +7716,31 @@ enum { #define DC_STATE_DEBUG_MASK_CORES (1<<0) #define DC_STATE_DEBUG_MASK_MEMORY_UP (1<<1) +#define P_CR_MC_BIOS_REQ_0_0_0 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x7114) +#define REQ_DATA_MASK (0x3F << 0) +#define DRAM_TYPE_SHIFT 24 +#define DRAM_TYPE_MASK 0x7 +#define DRAM_CHANNEL_SHIFT 12 +#define DRAM_CHANNEL_MASK 0xF + +#define DRAM_TYPE_LPDDR3 0x1 +#define DRAM_TYPE_LPDDR4 0x2 +#define DRAM_TYPE_DDR3L 0x4 +/* + * BIOS programs this field of REQ_DATA [5:0] in integer + * multiple of 133330 KHz (133.33MHz) + */ +#define MEMORY_FREQ_MULTIPLIER 0x208D2 +#define D_CR_DRP0_DUNIT8 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x1000) +#define D_CR_DRP0_DUNIT9 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x1200) +#define D_CR_DRP0_DUNIT10 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x1400) +#define D_CR_DRP0_DUNIT11 _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x1600) +#define D_CR_DRP0_RKEN0 (1 << 0) +#define D_CR_DRP0_RKEN1 (1 << 1) +#define DRAM_RANK_MASK 0x3 +#define DRAM_SINGLE_RANK 0x1 +#define DRAM_DUAL_RANK 0x3 + /* Please see hsw_read_dcomp() and hsw_write_dcomp() before using this register, * since on HSW we can't write to it using I915_WRITE. */ #define D_COMP_HSW _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5F0C)