[2/2] drm/i915/bdw: Export workaround data to debugfs

Message ID	1409060691-4916-3-git-send-email-arun.siluvery@linux.intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Arun Siluvery <arun.siluvery@linux.intel.com> To: intel-gfx@lists.freedesktop.org Date: Tue, 26 Aug 2014 14:44:51 +0100 Message-Id: <1409060691-4916-3-git-send-email-arun.siluvery@linux.intel.com> In-Reply-To: <1409060691-4916-1-git-send-email-arun.siluvery@linux.intel.com> References: <1409060691-4916-1-git-send-email-arun.siluvery@linux.intel.com> Subject: [Intel-gfx] [PATCH 2/2] drm/i915/bdw: Export workaround data to debugfs Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

arun.siluvery@linux.intel.com Aug. 26, 2014, 1:44 p.m. UTC

The workarounds that are applied are exported to a debugfs file;
this is used to verify their state after the test case (reset or
suspend/resume etc). This patch is only required to support i-g-t.

Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     | 40 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h         | 14 ++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 23 +++++++++++++++++++
 3 files changed, 77 insertions(+)

Daniel Vetter Aug. 27, 2014, 3:44 p.m. UTC | #1

On Tue, Aug 26, 2014 at 02:44:51PM +0100, Arun Siluvery wrote:
> The workarounds that are applied are exported to a debugfs file;
> this is used to verify their state after the test case (reset or
> suspend/resume etc). This patch is only required to support i-g-t.
> 
> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c     | 40 +++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_drv.h         | 14 ++++++++++++
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 23 +++++++++++++++++++
>  3 files changed, 77 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index d42db6b..f0d63f6 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2451,20 +2451,59 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused)
>  		seq_printf(m, " dpll_md: 0x%08x\n", pll->hw_state.dpll_md);
>  		seq_printf(m, " fp0:     0x%08x\n", pll->hw_state.fp0);
>  		seq_printf(m, " fp1:     0x%08x\n", pll->hw_state.fp1);
>  		seq_printf(m, " wrpll:   0x%08x\n", pll->hw_state.wrpll);
>  	}
>  	drm_modeset_unlock_all(dev);
>  
>  	return 0;
>  }
>  
> +static int intel_wa_registers(struct seq_file *m, void *unused)
> +{
> +	int i;
> +	int ret;
> +	struct drm_info_node *node = (struct drm_info_node *) m->private;
> +	struct drm_device *dev = node->minor->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	if (!IS_BROADWELL(dev)) {
> +		DRM_DEBUG_DRIVER("Workaround table not available !!\n");
> +		return -EINVAL;
> +	}
> +
> +	ret = mutex_lock_interruptible(&dev->struct_mutex);
> +	if (ret)
> +		return ret;
> +
> +	intel_runtime_pm_get(dev_priv);
> +
> +	seq_printf(m, "Workarounds applied: %d\n", dev_priv->num_wa_regs);
> +	for (i = 0; i < dev_priv->num_wa_regs; ++i) {
> +		u32 addr, mask;
> +
> +		addr = dev_priv->intel_wa_regs[i].addr;
> +		mask = dev_priv->intel_wa_regs[i].mask;
> +		dev_priv->intel_wa_regs[i].value = I915_READ(addr) | mask;
> +		if (dev_priv->intel_wa_regs[i].addr)
> +			seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
> +				   dev_priv->intel_wa_regs[i].addr,
> +				   dev_priv->intel_wa_regs[i].value,
> +				   dev_priv->intel_wa_regs[i].mask);
> +	}
> +
> +	intel_runtime_pm_put(dev_priv);
> +	mutex_unlock(&dev->struct_mutex);
> +
> +	return 0;
> +}
> +
>  struct pipe_crc_info {
>  	const char *name;
>  	struct drm_device *dev;
>  	enum pipe pipe;
>  };
>  
>  static int i915_dp_mst_info(struct seq_file *m, void *unused)
>  {
>  	struct drm_info_node *node = (struct drm_info_node *) m->private;
>  	struct drm_device *dev = node->minor->dev;
> @@ -3980,20 +4019,21 @@ static const struct drm_info_list i915_debugfs_list[] = {
>  	{"i915_llc", i915_llc, 0},
>  	{"i915_edp_psr_status", i915_edp_psr_status, 0},
>  	{"i915_sink_crc_eDP1", i915_sink_crc, 0},
>  	{"i915_energy_uJ", i915_energy_uJ, 0},
>  	{"i915_pc8_status", i915_pc8_status, 0},
>  	{"i915_power_domain_info", i915_power_domain_info, 0},
>  	{"i915_display_info", i915_display_info, 0},
>  	{"i915_semaphore_status", i915_semaphore_status, 0},
>  	{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
>  	{"i915_dp_mst_info", i915_dp_mst_info, 0},
> +	{"intel_wa_registers", intel_wa_registers, 0}
>  };
>  #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
>  
>  static const struct i915_debugfs_files {
>  	const char *name;
>  	const struct file_operations *fops;
>  } i915_debugfs_files[] = {
>  	{"i915_wedged", &i915_wedged_fops},
>  	{"i915_max_freq", &i915_max_freq_fops},
>  	{"i915_min_freq", &i915_min_freq_fops},
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index bcf79f0..49b7be7 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1546,20 +1546,34 @@ struct drm_i915_private {
>  	wait_queue_head_t pending_flip_queue;
>  
>  #ifdef CONFIG_DEBUG_FS
>  	struct intel_pipe_crc pipe_crc[I915_MAX_PIPES];
>  #endif
>  
>  	int num_shared_dpll;
>  	struct intel_shared_dpll shared_dplls[I915_NUM_PLLS];
>  	int dpio_phy_iosf_port[I915_NUM_PHYS_VLV];
>  
> +	/*
> +	 * workarounds are currently applied at different places and
> +	 * changes are being done to consolidate them so exact count is
> +	 * not clear at this point, use a max value for now.
> +	 */
> +#define I915_MAX_WA_REGS  16
> +	struct {
> +		u32 addr;
> +		u32 value;
> +		/* bitmask representing WA bits */
> +		u32 mask;
> +	} intel_wa_regs[I915_MAX_WA_REGS];
> +	u32 num_wa_regs;
> +
>  	/* Reclocking support */
>  	bool render_reclock_avail;
>  	bool lvds_downclock_avail;
>  	/* indicates the reduced downclock for LVDS*/
>  	int lvds_downclock;
>  
>  	struct i915_frontbuffer_tracking fb_tracking;
>  
>  	u16 orig_clock;
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 4146582..e48e12a 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -646,34 +646,54 @@ err_unpin:
>  	i915_gem_object_ggtt_unpin(ring->scratch.obj);
>  err_unref:
>  	drm_gem_object_unreference(&ring->scratch.obj->base);
>  err:
>  	return ret;
>  }
>  
>  static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
>  				       u32 addr, u32 value)
>  {
> +	struct drm_device *dev = ring->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	if (dev_priv->num_wa_regs > I915_MAX_WA_REGS)
> +		return;
> +
>  	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>  	intel_ring_emit(ring, addr);
>  	intel_ring_emit(ring, value);
> +
> +	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
> +	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = (value) & 0xFFFF;
> +	/* value is updated with the status of remaining bits of this
> +	 * register when it is read from debugfs file
> +	 */
> +	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
> +	dev_priv->num_wa_regs++;
> +
> +	return;
>  }
>  
>  static int gen8_init_workarounds(struct intel_engine_cs *ring)
>  {
>  	int ret;
> +	struct drm_device *dev = ring->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
>  
>  	/*
>  	 * workarounds applied in this fn are part of register state context,
>  	 * they need to be re-initialized followed by gpu reset, suspend/resume,
>  	 * module reload.
>  	 */
> +	dev_priv->num_wa_regs = 0;
> +	memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));

I've dropped this part - we allocate dev_priv already zeroed, so this is
redundant. And I expect that we'll use this w/a table from other places
(so not just for render w/a, but also other stuff that can get lost e.g.
over runtime pm), and then the clearing here could be harmful.

Both patches applied to dinq, thanks.
-Daniel
>  
>  	/*
>  	 * update the number of dwords required based on the
>  	 * actual number of workarounds applied
>  	 */
>  	ret = intel_ring_begin(ring, 24);
>  	if (ret)
>  		return ret;
>  
>  	/* WaDisablePartialInstShootdown:bdw */
> @@ -718,20 +738,23 @@ static int gen8_init_workarounds(struct intel_engine_cs *ring)
>  	 *
>  	 * Note that PS/WM thread counts depend on the WIZ hashing
>  	 * disable bit, which we don't touch here, but it's good
>  	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
>  	 */
>  	intel_ring_emit_wa(ring, GEN7_GT_MODE,
>  			   GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
>  
>  	intel_ring_advance(ring);
>  
> +	DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
> +			 dev_priv->num_wa_regs);
> +
>  	return 0;
>  }
>  
>  static int init_render_ring(struct intel_engine_cs *ring)
>  {
>  	struct drm_device *dev = ring->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	int ret = init_ring_common(ring);
>  	if (ret)
>  		return ret;
> -- 
> 2.0.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Chris Wilson Aug. 27, 2014, 3:54 p.m. UTC | #2

On Wed, Aug 27, 2014 at 05:44:55PM +0200, Daniel Vetter wrote:
> On Tue, Aug 26, 2014 at 02:44:51PM +0100, Arun Siluvery wrote:
> > The workarounds that are applied are exported to a debugfs file;
> > this is used to verify their state after the test case (reset or
> > suspend/resume etc). This patch is only required to support i-g-t.
> > 
> >  static int gen8_init_workarounds(struct intel_engine_cs *ring)
> >  {
> >  	int ret;
> > +	struct drm_device *dev = ring->dev;
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> >  
> >  	/*
> >  	 * workarounds applied in this fn are part of register state context,
> >  	 * they need to be re-initialized followed by gpu reset, suspend/resume,
> >  	 * module reload.
> >  	 */
> > +	dev_priv->num_wa_regs = 0;
> > +	memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
> 
> I've dropped this part - we allocate dev_priv already zeroed, so this is
> redundant. And I expect that we'll use this w/a table from other places
> (so not just for render w/a, but also other stuff that can get lost e.g.
> over runtime pm), and then the clearing here could be harmful.

But this is just a debugfs readback, something that userspace can
already do, and does. The w/a table here isn't particularly useful as
is, plus the changes in this patch wrt to writing the registers! still
could really do with some tlc.
-Chris

arun.siluvery@linux.intel.com Aug. 27, 2014, 3:59 p.m. UTC | #3

On 27/08/2014 16:44, Daniel Vetter wrote:
> On Tue, Aug 26, 2014 at 02:44:51PM +0100, Arun Siluvery wrote:
>> The workarounds that are applied are exported to a debugfs file;
>> this is used to verify their state after the test case (reset or
>> suspend/resume etc). This patch is only required to support i-g-t.
>>
>> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c     | 40 +++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_drv.h         | 14 ++++++++++++
>>   drivers/gpu/drm/i915/intel_ringbuffer.c | 23 +++++++++++++++++++
>>   3 files changed, 77 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>> index d42db6b..f0d63f6 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -2451,20 +2451,59 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused)
>>   		seq_printf(m, " dpll_md: 0x%08x\n", pll->hw_state.dpll_md);
>>   		seq_printf(m, " fp0:     0x%08x\n", pll->hw_state.fp0);
>>   		seq_printf(m, " fp1:     0x%08x\n", pll->hw_state.fp1);
>>   		seq_printf(m, " wrpll:   0x%08x\n", pll->hw_state.wrpll);
>>   	}
>>   	drm_modeset_unlock_all(dev);
>>
>>   	return 0;
>>   }
>>
>> +static int intel_wa_registers(struct seq_file *m, void *unused)
>> +{
>> +	int i;
>> +	int ret;
>> +	struct drm_info_node *node = (struct drm_info_node *) m->private;
>> +	struct drm_device *dev = node->minor->dev;
>> +	struct drm_i915_private *dev_priv = dev->dev_private;
>> +
>> +	if (!IS_BROADWELL(dev)) {
>> +		DRM_DEBUG_DRIVER("Workaround table not available !!\n");
>> +		return -EINVAL;
>> +	}
>> +
>> +	ret = mutex_lock_interruptible(&dev->struct_mutex);
>> +	if (ret)
>> +		return ret;
>> +
>> +	intel_runtime_pm_get(dev_priv);
>> +
>> +	seq_printf(m, "Workarounds applied: %d\n", dev_priv->num_wa_regs);
>> +	for (i = 0; i < dev_priv->num_wa_regs; ++i) {
>> +		u32 addr, mask;
>> +
>> +		addr = dev_priv->intel_wa_regs[i].addr;
>> +		mask = dev_priv->intel_wa_regs[i].mask;
>> +		dev_priv->intel_wa_regs[i].value = I915_READ(addr) | mask;
>> +		if (dev_priv->intel_wa_regs[i].addr)
>> +			seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
>> +				   dev_priv->intel_wa_regs[i].addr,
>> +				   dev_priv->intel_wa_regs[i].value,
>> +				   dev_priv->intel_wa_regs[i].mask);
>> +	}
>> +
>> +	intel_runtime_pm_put(dev_priv);
>> +	mutex_unlock(&dev->struct_mutex);
>> +
>> +	return 0;
>> +}
>> +
>>   struct pipe_crc_info {
>>   	const char *name;
>>   	struct drm_device *dev;
>>   	enum pipe pipe;
>>   };
>>
>>   static int i915_dp_mst_info(struct seq_file *m, void *unused)
>>   {
>>   	struct drm_info_node *node = (struct drm_info_node *) m->private;
>>   	struct drm_device *dev = node->minor->dev;
>> @@ -3980,20 +4019,21 @@ static const struct drm_info_list i915_debugfs_list[] = {
>>   	{"i915_llc", i915_llc, 0},
>>   	{"i915_edp_psr_status", i915_edp_psr_status, 0},
>>   	{"i915_sink_crc_eDP1", i915_sink_crc, 0},
>>   	{"i915_energy_uJ", i915_energy_uJ, 0},
>>   	{"i915_pc8_status", i915_pc8_status, 0},
>>   	{"i915_power_domain_info", i915_power_domain_info, 0},
>>   	{"i915_display_info", i915_display_info, 0},
>>   	{"i915_semaphore_status", i915_semaphore_status, 0},
>>   	{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
>>   	{"i915_dp_mst_info", i915_dp_mst_info, 0},
>> +	{"intel_wa_registers", intel_wa_registers, 0}
>>   };
>>   #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
>>
>>   static const struct i915_debugfs_files {
>>   	const char *name;
>>   	const struct file_operations *fops;
>>   } i915_debugfs_files[] = {
>>   	{"i915_wedged", &i915_wedged_fops},
>>   	{"i915_max_freq", &i915_max_freq_fops},
>>   	{"i915_min_freq", &i915_min_freq_fops},
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index bcf79f0..49b7be7 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1546,20 +1546,34 @@ struct drm_i915_private {
>>   	wait_queue_head_t pending_flip_queue;
>>
>>   #ifdef CONFIG_DEBUG_FS
>>   	struct intel_pipe_crc pipe_crc[I915_MAX_PIPES];
>>   #endif
>>
>>   	int num_shared_dpll;
>>   	struct intel_shared_dpll shared_dplls[I915_NUM_PLLS];
>>   	int dpio_phy_iosf_port[I915_NUM_PHYS_VLV];
>>
>> +	/*
>> +	 * workarounds are currently applied at different places and
>> +	 * changes are being done to consolidate them so exact count is
>> +	 * not clear at this point, use a max value for now.
>> +	 */
>> +#define I915_MAX_WA_REGS  16
>> +	struct {
>> +		u32 addr;
>> +		u32 value;
>> +		/* bitmask representing WA bits */
>> +		u32 mask;
>> +	} intel_wa_regs[I915_MAX_WA_REGS];
>> +	u32 num_wa_regs;
>> +
>>   	/* Reclocking support */
>>   	bool render_reclock_avail;
>>   	bool lvds_downclock_avail;
>>   	/* indicates the reduced downclock for LVDS*/
>>   	int lvds_downclock;
>>
>>   	struct i915_frontbuffer_tracking fb_tracking;
>>
>>   	u16 orig_clock;
>>
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index 4146582..e48e12a 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -646,34 +646,54 @@ err_unpin:
>>   	i915_gem_object_ggtt_unpin(ring->scratch.obj);
>>   err_unref:
>>   	drm_gem_object_unreference(&ring->scratch.obj->base);
>>   err:
>>   	return ret;
>>   }
>>
>>   static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
>>   				       u32 addr, u32 value)
>>   {
>> +	struct drm_device *dev = ring->dev;
>> +	struct drm_i915_private *dev_priv = dev->dev_private;
>> +
>> +	if (dev_priv->num_wa_regs > I915_MAX_WA_REGS)
>> +		return;
>> +
>>   	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>>   	intel_ring_emit(ring, addr);
>>   	intel_ring_emit(ring, value);
>> +
>> +	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
>> +	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = (value) & 0xFFFF;
>> +	/* value is updated with the status of remaining bits of this
>> +	 * register when it is read from debugfs file
>> +	 */
>> +	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
>> +	dev_priv->num_wa_regs++;
>> +
>> +	return;
>>   }
>>
>>   static int gen8_init_workarounds(struct intel_engine_cs *ring)
>>   {
>>   	int ret;
>> +	struct drm_device *dev = ring->dev;
>> +	struct drm_i915_private *dev_priv = dev->dev_private;
>>
>>   	/*
>>   	 * workarounds applied in this fn are part of register state context,
>>   	 * they need to be re-initialized followed by gpu reset, suspend/resume,
>>   	 * module reload.
>>   	 */
>> +	dev_priv->num_wa_regs = 0;
>> +	memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
>
> I've dropped this part - we allocate dev_priv already zeroed, so this is
> redundant. And I expect that we'll use this w/a table from other places
> (so not just for render w/a, but also other stuff that can get lost e.g.
> over runtime pm), and then the clearing here could be harmful.
>
> Both patches applied to dinq, thanks.
> -Daniel

Chris gave some comments on function naming and also suggested different 
way of emitting LRIs, should I update and send a new patch?

regards
Arun

>>
>>   	/*
>>   	 * update the number of dwords required based on the
>>   	 * actual number of workarounds applied
>>   	 */
>>   	ret = intel_ring_begin(ring, 24);
>>   	if (ret)
>>   		return ret;
>>
>>   	/* WaDisablePartialInstShootdown:bdw */
>> @@ -718,20 +738,23 @@ static int gen8_init_workarounds(struct intel_engine_cs *ring)
>>   	 *
>>   	 * Note that PS/WM thread counts depend on the WIZ hashing
>>   	 * disable bit, which we don't touch here, but it's good
>>   	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
>>   	 */
>>   	intel_ring_emit_wa(ring, GEN7_GT_MODE,
>>   			   GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
>>
>>   	intel_ring_advance(ring);
>>
>> +	DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
>> +			 dev_priv->num_wa_regs);
>> +
>>   	return 0;
>>   }
>>
>>   static int init_render_ring(struct intel_engine_cs *ring)
>>   {
>>   	struct drm_device *dev = ring->dev;
>>   	struct drm_i915_private *dev_priv = dev->dev_private;
>>   	int ret = init_ring_common(ring);
>>   	if (ret)
>>   		return ret;
>> --
>> 2.0.4
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>

Chris Wilson Aug. 27, 2014, 4:19 p.m. UTC | #4

On Wed, Aug 27, 2014 at 05:44:55PM +0200, Daniel Vetter wrote:
> >  static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
> >  				       u32 addr, u32 value)
> >  {
> > +	struct drm_device *dev = ring->dev;
> > +	struct drm_i915_private *dev_priv = dev->dev_private;
> > +
> > +	if (dev_priv->num_wa_regs > I915_MAX_WA_REGS)
> > +		return;
> > +
> >  	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> >  	intel_ring_emit(ring, addr);
> >  	intel_ring_emit(ring, value);
> > +
> > +	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
> > +	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = (value) & 0xFFFF;
> > +	/* value is updated with the status of remaining bits of this
> > +	 * register when it is read from debugfs file
> > +	 */
> > +	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
> > +	dev_priv->num_wa_regs++;

This is bogus. The idea of this function is be called many times during
the system uptime.

The idea of the existing tool is to be able to probe whether the
workaround has taken effect without the aid of a kernel (except for
having to work with the kernel in case of forcewake etc).
-Chris

Lespiau, Damien Aug. 30, 2014, 3:10 p.m. UTC | #5

On Tue, Aug 26, 2014 at 02:44:51PM +0100, Arun Siluvery wrote:
> The workarounds that are applied are exported to a debugfs file;
> this is used to verify their state after the test case (reset or
> suspend/resume etc). This patch is only required to support i-g-t.

I'm really, really confused. Please bear with me.

1. We only deal with masked registers AFAICS. Those registers have the
high 16 bits masking the writes.

2. The values given to intel_ring_emit_wa() are the actual values we're going
to write in the register, so they include those mask bits. say:

    intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
                       _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); 

3. We then record in intel_wa_regs the reg address and two fields named mask
and value.

3. a) mask

  intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF;

You're selecting the low 16bits and put it in mask. But the masked bits are the
upper 16bits? This may work when the W/A is about setting bits, but we have a
bug if we ever have a W/A that is about clearing a bit. It would seem better to
me to grab the upper bits which are, after all, the bitmask we're interested in.

3. b) value

   /* value is updated with the status of remaining bits of this
    * register when it is read from debugfs file
    */
  dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;

I don't understand what the comment explains. The *why* we need to do that is
missing and, frankly, having to update the reference values we capture at
intel_ring_emit_wa() time sounds like a bug to me.

I also take a note that, at this, point, intel_wa_regs.value contains both the
value and the mask. Weird.

4. Time to expose that intel_wa_regs array to user space

4. a) mask

   mask = dev_priv->intel_wa_regs[i].mask

Straigthforward enough, except that these are still the lower 16 bits, so the
value really.

4. b) value

  dev_priv->intel_wa_regs[i].value = I915_READ(addr) | mask;

Hum? This really started my journey to dig futher. So we:

  - override the reference value from intel_ring_emit_wa() with whatever we
    have in the register at that moment

  - Or it with a mask that's not really a mask (but the reference value)

5. igt test

  So you grab those mask and value fields from the debugs file and read the
register through mapped MMIO. and then

   status = (current_wa[i].value & current_wa[i].mask) !=
            (wa_regs[i].value & wa_regs[i].mask);

So that's where I'm starting to put things back together and understand what
the intention is. I still think that's not quite right, especially how we get
the mask and why we read back the register in the debugfs file.

Or am I just missing something? In any case, having to spend that much time
trying to understand what's going on is a maintainability problem, we need code
that a least looks straightforward.

HTH,

arun.siluvery@linux.intel.com Aug. 31, 2014, 7:43 p.m. UTC | #6

On 30/08/2014 16:10, Damien Lespiau wrote:
> On Tue, Aug 26, 2014 at 02:44:51PM +0100, Arun Siluvery wrote:
>> The workarounds that are applied are exported to a debugfs file;
>> this is used to verify their state after the test case (reset or
>> suspend/resume etc). This patch is only required to support i-g-t.
>
> I'm really, really confused. Please bear with me.
I have reworked all the patches hopefully things will be more clear this 
time :)

>
> 1. We only deal with masked registers AFAICS. Those registers have the
> high 16 bits masking the writes.
>
> 2. The values given to intel_ring_emit_wa() are the actual values we're going
> to write in the register, so they include those mask bits. say:
>
>      intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
>                         _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
>
> 3. We then record in intel_wa_regs the reg address and two fields named mask
> and value.
>
> 3. a) mask
>
>    intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF;
>
> You're selecting the low 16bits and put it in mask. But the masked bits are the
> upper 16bits? This may work when the W/A is about setting bits, but we have a
> bug if we ever have a W/A that is about clearing a bit. It would seem better to
> me to grab the upper bits which are, after all, the bitmask we're interested in.
>
it is a valid issue, changed to use upper 16-bits as mask.

> 3. b) value
>
>     /* value is updated with the status of remaining bits of this
>      * register when it is read from debugfs file
>      */
>    dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
>
> I don't understand what the comment explains. The *why* we need to do that is
> missing and, frankly, having to update the reference values we capture at
> intel_ring_emit_wa() time sounds like a bug to me.
>
> I also take a note that, at this, point, intel_wa_regs.value contains both the
> value and the mask. Weird.
>
I agree the why part is missing.
The idea is value represents the status of other bits in this register 
besides w/a bit; this is actually redundant here, I guess I added 
because I wanted to initialize all members.
This change is not applicable in the new patches.

> 4. Time to expose that intel_wa_regs array to user space
>
> 4. a) mask
>
>     mask = dev_priv->intel_wa_regs[i].mask
>
> Straigthforward enough, except that these are still the lower 16 bits, so the
> value really.
>
> 4. b) value
>
>    dev_priv->intel_wa_regs[i].value = I915_READ(addr) | mask;
>
> Hum? This really started my journey to dig futher. So we:
>
>    - override the reference value from intel_ring_emit_wa() with whatever we
>      have in the register at that moment
>
>    - Or it with a mask that's not really a mask (but the reference value)
>
> 5. igt test
>
>    So you grab those mask and value fields from the debugs file and read the
> register through mapped MMIO. and then
>
>     status = (current_wa[i].value & current_wa[i].mask) !=
>              (wa_regs[i].value & wa_regs[i].mask);
>
> So that's where I'm starting to put things back together and understand what
> the intention is. I still think that's not quite right, especially how we get
> the mask and why we read back the register in the debugfs file.
>
We read the register value after the test case (eg reset) and compare it 
with a known value that is exported to debugfs file.

regards
Arun

> Or am I just missing something? In any case, having to spend that much time
> trying to understand what's going on is a maintainability problem, we need code
> that a least looks straightforward.
>
> HTH,
>

[2/2] drm/i915/bdw: Export workaround data to debugfs

Commit Message

Comments

Patch