diff mbox

[2/3] drm/i915: reference counted forcewake

Message ID 1302804827-11597-5-git-send-email-ben@bwidawsk.net (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky April 14, 2011, 6:13 p.m. UTC
Provide a reference count to track the forcewake state of the GPU. The
savings is up to 1 UC read if the unit is already awake, but the main
goal is to give userspace some mechanism to wake the GPU.

v2:
Added a spin_lock for synchronizing access to forcewake. The lock
should not be heavily contested because any caller will either have
struct_mutex or config.mutex, and those locks should be much more
serializing than this. In terms of locking order:

1.[config.mutex]
2.	[struct_mutex]
3.		forcewake_lock

By default i915_read[bwlq] functions now acquire forcewake_lock. Added a new
_locked version of the read function for callers wishing to prevent the
GT from possibly going to sleep in between reads. (We could also use
these functions to try to get system state if the lock somehow becomes
stuck).

Removed all previous callers of gen6_gt_force_wake_get/put because the
register access should now implicitly be safe.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_debugfs.c  |    5 -----
 drivers/gpu/drm/i915/i915_dma.c      |    2 ++
 drivers/gpu/drm/i915/i915_drv.c      |   20 ++++++++++++++++++--
 drivers/gpu/drm/i915/i915_drv.h      |   28 ++++++++++++++++++++++++----
 drivers/gpu/drm/i915/intel_display.c |    5 -----
 5 files changed, 44 insertions(+), 16 deletions(-)

Comments

Chris Wilson April 16, 2011, 6:52 a.m. UTC | #1
On Thu, 14 Apr 2011 11:13:46 -0700, Ben Widawsky <ben@bwidawsk.net> wrote:
> Provide a reference count to track the forcewake state of the GPU. The
> savings is up to 1 UC read if the unit is already awake, but the main
> goal is to give userspace some mechanism to wake the GPU.
> 
> v2:
> Added a spin_lock for synchronizing access to forcewake. The lock
> should not be heavily contested because any caller will either have
> struct_mutex or config.mutex, and those locks should be much more
> serializing than this. In terms of locking order:
> 
> 1.[config.mutex]
> 2.	[struct_mutex]
> 3.		forcewake_lock
> 
> By default i915_read[bwlq] functions now acquire forcewake_lock. Added a new
> _locked version of the read function for callers wishing to prevent the
> GT from possibly going to sleep in between reads. (We could also use
> these functions to try to get system state if the lock somehow becomes
> stuck).

This patch needs to be split again. I want the update to the existing
interface and users to be separate from the introduction of a new
interface. (This means that should we ever regret that interface we can
rip it out without undoing the fixes.)

Replacing the get()...long sequence of read/writes...put() did make me cry
a little, but it is indeed safer to move the check into the individual
reads (though there is nothing to prevent the rc6-window from closing
during the middle of that sequence... we need to check that is ok) and too
ugly to special case those rare times when we do modify 20 regs at once
(or maybe we have to?).

Again, let's change the macro without removing the existing get()...put()
users and then remove those as a patch+review on a per-case basis.

> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -2025,6 +2025,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
>  
>  	spin_lock_init(&dev_priv->irq_lock);
>  	spin_lock_init(&dev_priv->error_lock);
> +	if (IS_GEN6(dev))
> +		spin_lock_init(&dev_priv->forcewake_lock);

Just do it.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 87c8e29..cc3818f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -872,9 +872,6 @@  static int i915_cur_delayinfo(struct seq_file *m, void *unused)
 		u32 rpdownei, rpcurdown, rpprevdown;
 		int max_freq;
 
-		/* RPSTAT1 is in the GT power well */
-		__gen6_gt_force_wake_get(dev_priv);
-
 		rpstat = I915_READ(GEN6_RPSTAT1);
 		rpupei = I915_READ(GEN6_RP_CUR_UP_EI);
 		rpcurup = I915_READ(GEN6_RP_CUR_UP);
@@ -917,8 +914,6 @@  static int i915_cur_delayinfo(struct seq_file *m, void *unused)
 		max_freq = rp_state_cap & 0xff;
 		seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
 			   max_freq * 50);
-
-		__gen6_gt_force_wake_put(dev_priv);
 	} else {
 		seq_printf(m, "no P-state info available\n");
 	}
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 7273037..838bbae 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -2025,6 +2025,8 @@  int i915_driver_load(struct drm_device *dev, unsigned long flags)
 
 	spin_lock_init(&dev_priv->irq_lock);
 	spin_lock_init(&dev_priv->error_lock);
+	if (IS_GEN6(dev))
+		spin_lock_init(&dev_priv->forcewake_lock);
 
 	if (IS_MOBILE(dev) || !IS_GEN2(dev))
 		dev_priv->num_pipe = 2;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c34a8dd..1a3a096 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -263,7 +263,7 @@  void intel_detect_pch (struct drm_device *dev)
 	}
 }
 
-void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
+static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 {
 	int count;
 
@@ -279,12 +279,28 @@  void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 		udelay(10);
 }
 
-void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
+void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
+{
+	WARN_ON(!spin_is_locked(&dev_priv->forcewake_lock));
+
+	if (dev_priv->forcewake_count++ == 0)
+		__gen6_gt_force_wake_get(dev_priv);
+}
+
+static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE_NOTRACE(FORCEWAKE, 0);
 	POSTING_READ(FORCEWAKE);
 }
 
+void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
+{
+	WARN_ON(!spin_is_locked(&dev_priv->forcewake_lock));
+
+	if (--dev_priv->forcewake_count == 0)
+		__gen6_gt_force_wake_put(dev_priv);
+}
+
 void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
 {
 	int loop = 500;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8981173..320daf1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -703,6 +703,9 @@  typedef struct drm_i915_private {
 	struct intel_fbdev *fbdev;
 
 	struct drm_property *broadcast_rgb_property;
+
+	spinlock_t forcewake_lock;
+	int forcewake_count;
 } drm_i915_private_t;
 
 struct drm_i915_gem_object {
@@ -1318,8 +1321,8 @@  extern void intel_display_print_error_state(struct seq_file *m,
  * must be set to prevent GT core from power down and stale values being
  * returned.
  */
-void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
-void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
+void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
+void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
 void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
 
 /* We give fast paths for the really cool registers */
@@ -1332,15 +1335,32 @@  void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
 static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
 	u##x val = 0; \
 	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
-		__gen6_gt_force_wake_get(dev_priv); \
+		unsigned long flags; \
+		spin_lock_irqsave(&(dev_priv)->forcewake_lock, flags); \
+		gen6_gt_force_wake_get(dev_priv); \
+		val = read##y(dev_priv->regs + reg); \
+		gen6_gt_force_wake_put(dev_priv); \
+		spin_unlock_irqrestore(&(dev_priv)->forcewake_lock, flags); \
+	} else { \
+		val = read##y(dev_priv->regs + reg); \
+	} \
+	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
+	return val; \
+} \
+\
+static inline u##x i915_read##x##_locked(struct drm_i915_private *dev_priv, u32 reg) { \
+	u##x val = 0; \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		gen6_gt_force_wake_get(dev_priv); \
 		val = read##y(dev_priv->regs + reg); \
-		__gen6_gt_force_wake_put(dev_priv); \
+		gen6_gt_force_wake_put(dev_priv); \
 	} else { \
 		val = read##y(dev_priv->regs + reg); \
 	} \
 	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
 	return val; \
 }
+
 __i915_read(8, b)
 __i915_read(16, w)
 __i915_read(32, l)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 432fc04..bd85272 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1828,7 +1828,6 @@  static void sandybridge_blit_fbc_update(struct drm_device *dev)
 	u32 blt_ecoskpd;
 
 	/* Make sure blitter notifies FBC of writes */
-	__gen6_gt_force_wake_get(dev_priv);
 	blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
 	blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
 		GEN6_BLITTER_LOCK_SHIFT;
@@ -1839,7 +1838,6 @@  static void sandybridge_blit_fbc_update(struct drm_device *dev)
 			 GEN6_BLITTER_LOCK_SHIFT);
 	I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
 	POSTING_READ(GEN6_BLITTER_ECOSKPD);
-	__gen6_gt_force_wake_put(dev_priv);
 }
 
 static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
@@ -6853,7 +6851,6 @@  void gen6_enable_rps(struct drm_i915_private *dev_priv)
 	 * userspace...
 	 */
 	I915_WRITE(GEN6_RC_STATE, 0);
-	__gen6_gt_force_wake_get(dev_priv);
 
 	/* disable the counters and set deterministic thresholds */
 	I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -6950,8 +6947,6 @@  void gen6_enable_rps(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN6_PMIMR, 0);
 	/* enable all PM interrupts */
 	I915_WRITE(GEN6_PMINTRMSK, 0);
-
-	__gen6_gt_force_wake_put(dev_priv);
 }
 
 void intel_enable_clock_gating(struct drm_device *dev)