@@ -290,6 +290,15 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
u32 scratch_addr = pc->gtt_offset + 128;
int ret;
+ /*
+ * Ensure that any following seqno writes only happen when the render
+ * cache is indeed flushed.
+ * The documentation also mentions that every 4th PIPE_CONTROL command
+ * (except the ones with only read-cache invalidate bits set) must have
+ * the CS_STALL bit set.
+ */
+ flags |= PIPE_CONTROL_CS_STALL;
+
/* Just flush everything. Experiments have shown that reducing the
* number of bits based on the write domains has little performance
* impact.
@@ -297,11 +306,6 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring,
if (flush_domains) {
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
- /*
- * Ensure that any following seqno writes only happen
- * when the render cache is indeed flushed.
- */
- flags |= PIPE_CONTROL_CS_STALL;
}
if (invalidate_domains) {
flags |= PIPE_CONTROL_TLB_INVALIDATE;