@@ -304,7 +304,10 @@ static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
udelay(10);
I915_WRITE_NOTRACE(FORCEWAKE, 1);
- POSTING_READ(FORCEWAKE);
+ /* This is a posting read, but the POSTING_READ macro calls
+ * this function.
+ */
+ (void)I915_READ_NOTRACE(FORCEWAKE);
count = 0;
while (count++ < 50 && (I915_READ_NOTRACE(FORCEWAKE_ACK) & 1) == 0)
@@ -332,7 +335,10 @@ void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
{
I915_WRITE_NOTRACE(FORCEWAKE, 0);
- POSTING_READ(FORCEWAKE);
+ /* This is a posting read, but the POSTING_READ macro calls
+ * this function.
+ */
+ (void)I915_READ_NOTRACE(FORCEWAKE);
}
/*
@@ -360,6 +366,37 @@ void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
}
}
+/* In PCI, writes are not guaranteed to reach the device until a read
+ * from the device has occurred. The POSTING_READ macro is used to
+ * indicate when we are doing one of those reads to force writes to
+ * the device, so that we can for example test a value in some cached
+ * memory and go to sleep waiting for an interrupt
+ * (i915_wait_request()) or implement some timing requirement.
+ *
+ * However, as of gen6, a mere PCI write posting read doesn't mean
+ * that the writes have reached the device, as they get queued in a
+ * FIFO to hide the wake-from-rc6 latency, even if the chip wasn't in
+ * RC6 at the time. For gen6, we also have to wait for the fifo to
+ * drain, which means force-waking the GPU so that we can read the
+ * fifo count.
+ */
+void
+intel_posting_read(struct drm_i915_private *dev_priv, uint32_t reg)
+{
+ int ret;
+
+ if (dev_priv->info->gen < 6) {
+ (void)I915_READ_NOTRACE(reg);
+ return;
+ }
+
+ gen6_gt_force_wake_get(dev_priv);
+ ret = wait_for(I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES) == 0x3f, 500);
+ gen6_gt_force_wake_put(dev_priv);
+
+ WARN_ON_ONCE(ret != 0);
+}
+
static int i915_drm_freeze(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1366,6 +1366,7 @@ extern void intel_display_print_error_state(struct seq_file *m,
void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
+void intel_posting_read(struct drm_i915_private *dev_priv, uint32_t reg);
/* We give fast paths for the really cool registers */
#define NEEDS_FORCE_WAKE(dev_priv, reg) \
@@ -1423,7 +1424,7 @@ __i915_write(64, q)
#define I915_WRITE64(reg, val) i915_write64(dev_priv, (reg), (val))
#define I915_READ64(reg) i915_read64(dev_priv, (reg))
-#define POSTING_READ(reg) (void)I915_READ_NOTRACE(reg)
+#define POSTING_READ(reg) intel_posting_read(dev_priv, reg)
#define POSTING_READ16(reg) (void)I915_READ16_NOTRACE(reg)
Our wait-on-interrupts path goes: I915_WRITE(IMR, enabled_val) POSTING_READ(IMR); if (!i915_gem_seqno_passed()) sleep(); However, the IMR write can still be queued in the GT FIFO even after the POSTING_READ, since the GT FIFO isn't implementing PCI semantics. Yay. So, when we're doing a POSTING_READ, we get to check that the FIFO is empty, and unfortunately to check that the FIFO is empty, it appears we have to do the FORCEWAKE dance. Previously, piglit copypixels-sync test had about a 0.5% chance of triggering a missed IRQ (meaning I've never seen 500 successful runs without a failure). With this pair of patches, I'm at 5700 clean runs as I type this. Signed-off-by: Eric Anholt <eric@anholt.net> --- drivers/gpu/drm/i915/i915_drv.c | 41 +++++++++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_drv.h | 3 +- 2 files changed, 41 insertions(+), 3 deletions(-)