diff mbox

[3/3] drm/i915: HSW GT3 Slices: Userspace can dynamically forcibly enable all slices

Message ID 1381848067-5269-5-git-send-email-rodrigo.vivi@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Rodrigo Vivi Oct. 15, 2013, 2:41 p.m. UTC
Rendering buffers that need full GT power can request full power through
this I915_EXEC_GT_FULL flag. If the default is the power saving with half
slices off it executes LRIs to immediately enable all slices.

CC: Eric Anholt <eric@anholt.net>
CC: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  8 ++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 67 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_reg.h            | 11 +++++
 drivers/gpu/drm/i915/i915_sysfs.c          | 11 ++++-
 drivers/gpu/drm/i915/intel_display.c       |  6 +++
 drivers/gpu/drm/i915/intel_drv.h           |  1 +
 drivers/gpu/drm/i915/intel_pm.c            | 41 ++++++++++++++++--
 include/uapi/drm/i915_drm.h                |  5 ++-
 8 files changed, 144 insertions(+), 6 deletions(-)

Comments

Eric Anholt Oct. 15, 2013, 4:14 p.m. UTC | #1
Rodrigo Vivi <rodrigo.vivi@gmail.com> writes:

> Rendering buffers that need full GT power can request full power through
> this I915_EXEC_GT_FULL flag. If the default is the power saving with half
> slices off it executes LRIs to immediately enable all slices.

How is userspace supposed to decide that the GPU needs to be at full
power?  Power management is a system problem which wants awareness of
the total load on the GPU from all clients, not just 1.  The kernel can
get that information, but userspace can't.

Basically, if you give userspace this knob, userspace will just have to
set it on every batchbuffer.  At which point, why give them the knob at
all?
Rodrigo Vivi Oct. 15, 2013, 5:19 p.m. UTC | #2
please ignore this one and other two (libdrm and igt) with exec
flag... will do a v2 and resend soon..

On Tue, Oct 15, 2013 at 1:14 PM, Eric Anholt <eric@anholt.net> wrote:
> Rodrigo Vivi <rodrigo.vivi@gmail.com> writes:
>
>> Rendering buffers that need full GT power can request full power through
>> this I915_EXEC_GT_FULL flag. If the default is the power saving with half
>> slices off it executes LRIs to immediately enable all slices.
>
> How is userspace supposed to decide that the GPU needs to be at full
> power?  Power management is a system problem which wants awareness of
> the total load on the GPU from all clients, not just 1.  The kernel can
> get that information, but userspace can't.
>
> Basically, if you give userspace this knob, userspace will just have to
> set it on every batchbuffer.  At which point, why give them the knob at
> all?
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 685fb1d..bd4774e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1219,6 +1219,12 @@  struct i915_package_c8 {
 	} regsave;
 };
 
+struct i915_gt_slices {
+	int state_default;
+	int forcing_full;
+	struct mutex m_lock;
+};
+
 typedef struct drm_i915_private {
 	struct drm_device *dev;
 	struct kmem_cache *slab;
@@ -1418,6 +1424,8 @@  typedef struct drm_i915_private {
 
 	struct i915_package_c8 pc8;
 
+	struct i915_gt_slices gt_slices;
+
 	/* Old dri1 support infrastructure, beware the dragons ya fools entering
 	 * here! */
 	struct i915_dri1_state dri1;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0ce0d47..16dc86e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -923,6 +923,66 @@  i915_reset_gen7_sol_offsets(struct drm_device *dev,
 }
 
 static int
+i915_gt_full_immediate(struct drm_device *dev, struct intel_ring_buffer *ring)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	int ret;
+
+	if (!HAS_SLICE_SHUTDOWN(dev) || ring == &dev_priv->ring[BCS])
+		return 0;
+
+	ret = intel_ring_begin(ring, 3);
+	if (ret)
+		return ret;
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, HSW_GT_SLICE_INFO);
+	intel_ring_emit(ring, SLICE_SEL_BOTH);
+	intel_ring_advance(ring);
+
+	ret = intel_ring_begin(ring, 3);
+	if (ret)
+		return ret;
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, MI_PREDICATE_RESULT_2);
+	intel_ring_emit(ring, LOWER_SLICE_ENABLED);
+	intel_ring_advance(ring);
+
+	ret = intel_ring_begin(ring, 3);
+	if (ret)
+		return ret;
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, HSW_SLICESHUTDOWN);
+	intel_ring_emit(ring, ~SLICE_SHUTDOWN);
+	intel_ring_advance(ring);
+
+	ret = intel_ring_begin(ring, 3);
+	if (ret)
+		return ret;
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
+	intel_ring_emit(ring, CS_IDLE_COUNT_1US);
+	intel_ring_advance(ring);
+
+	ret = intel_ring_begin(ring, 3);
+	if (ret)
+		return ret;
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, WAIT_FOR_RC6_EXIT);
+	intel_ring_emit(ring, WAIT_RC6_EXIT | MASK_WAIT_RC6_EXIT);
+	intel_ring_advance(ring);
+
+	ret = intel_ring_begin(ring, 3);
+	if (ret)
+		return ret;
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
+	intel_ring_emit(ring, CS_IDLE_COUNT_5US);
+	intel_ring_advance(ring);
+
+	return 0;
+}
+
+static int
 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		       struct drm_file *file,
 		       struct drm_i915_gem_execbuffer2 *args,
@@ -999,6 +1059,13 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
+	if (args->flags & I915_EXEC_GT_FULL &&
+	    dev_priv->gt_slices.state_default == 0 &&
+	    !dev_priv->gt_slices.forcing_full) {
+		dev_priv->gt_slices.forcing_full = true;
+		i915_gt_full_immediate(dev, ring);
+	}
+
 	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
 	mask = I915_EXEC_CONSTANTS_MASK;
 	switch (mode) {
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 497c441..0146bef 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -277,6 +277,17 @@ 
 #define   SLICE_STATUS_MAIN_ON	(2<<0)
 #define   SLICE_STATUS_BOTH_ON	(3<<0)
 
+#define HSW_SLICESHUTDOWN	0xA190
+#define   SLICE_SHUTDOWN	(1<<0)
+
+#define RC_IDLE_MAX_COUNT	0x2054
+#define   CS_IDLE_COUNT_1US	(1<<1)
+#define   CS_IDLE_COUNT_5US	(1<<3)
+
+#define WAIT_FOR_RC6_EXIT	0x20CC
+#define   WAIT_RC6_EXIT		(1<<0)
+#define   MASK_WAIT_RC6_EXIT	(1<<16)
+
 /*
  * 3D instructions used by the kernel
  */
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 86ccd52..25b0c5b 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -124,9 +124,13 @@  static ssize_t gt_slice_config_show(struct device *kdev,
 	struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev);
 	struct drm_device *dev = minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	bool full;
 
-	return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) ==
-		       LOWER_SLICE_ENABLED ? "full" : "half");
+	mutex_lock(&dev_priv->gt_slices.m_lock);
+	full = I915_READ(MI_PREDICATE_RESULT_2) == LOWER_SLICE_ENABLED;
+	mutex_unlock(&dev_priv->gt_slices.m_lock);
+
+	return sprintf(buf, "%s\n", full ? "full" : "half");
 }
 
 static ssize_t gt_slice_config_store(struct device *kdev,
@@ -135,16 +139,19 @@  static ssize_t gt_slice_config_store(struct device *kdev,
 {
 	struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev);
 	struct drm_device *dev = minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
 	if (!strncmp(buf, "full", sizeof("full") - 1)) {
 		ret = intel_set_gt_full(dev);
 		if (ret)
 			return ret;
+		dev_priv->gt_slices.state_default = 1;
 	} else if (!strncmp(buf, "half", sizeof("half") - 1)) {
 		ret = intel_set_gt_half(dev);
 		if (ret)
 			return ret;
+		dev_priv->gt_slices.state_default = 0;
 	} else
 		return -EINVAL;
 	return count;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 4f1b636..1a2f8bc 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -7778,6 +7778,12 @@  void intel_mark_idle(struct drm_device *dev)
 
 	if (dev_priv->info->gen >= 6)
 		gen6_rps_idle(dev->dev_private);
+
+	if (HAS_SLICE_SHUTDOWN(dev) && dev_priv->gt_slices.forcing_full &&
+	    dev_priv->gt_slices.state_default == 0) {
+		intel_set_gt_half_async(dev);
+		dev_priv->gt_slices.forcing_full = false;
+	}
 }
 
 void intel_mark_fb_busy(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a9abbb5..98cd63e 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -836,6 +836,7 @@  void intel_disable_gt_powersave(struct drm_device *dev);
 void ironlake_teardown_rc6(struct drm_device *dev);
 int intel_set_gt_full(struct drm_device *dev);
 int intel_set_gt_half(struct drm_device *dev);
+void intel_set_gt_half_async(struct drm_device *dev);
 void intel_init_gt_slices(struct drm_device *dev);
 void gen6_update_ring_freq(struct drm_device *dev);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 63af075..24a0dc7 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3873,6 +3873,7 @@  int intel_set_gt_full(struct drm_device *dev)
 	if (!HAS_SLICE_SHUTDOWN(dev))
 		return -ENODEV;
 
+	mutex_lock(&dev_priv->gt_slices.m_lock);
 	I915_WRITE(HSW_GT_SLICE_INFO, SLICE_SEL_BOTH);
 
 	/* Slices are enabled on RC6 exit */
@@ -3881,14 +3882,19 @@  int intel_set_gt_full(struct drm_device *dev)
 	if (wait_for(((I915_READ(HSW_GT_SLICE_INFO) & SLICE_STATUS_MASK) ==
 		      SLICE_STATUS_BOTH_ON), 2000)) {
 		DRM_ERROR("Timeout enabling full gt slices\n");
+
 		I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
 		I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+		mutex_unlock(&dev_priv->gt_slices.m_lock);
+
 		gen6_gt_force_wake_put(dev_priv);
 		return -ETIMEDOUT;
 	}
+
 	I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED);
-	gen6_gt_force_wake_put(dev_priv);
+	mutex_unlock(&dev_priv->gt_slices.m_lock);
 
+	gen6_gt_force_wake_put(dev_priv);
 	return 0;
 }
 
@@ -3899,6 +3905,7 @@  int intel_set_gt_half(struct drm_device *dev)
 	if (!HAS_SLICE_SHUTDOWN(dev))
 		return -ENODEV;
 
+	mutex_lock(&dev_priv->gt_slices.m_lock);
 	I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
 
 	/* Slices are disabled on RC6 exit */
@@ -3907,16 +3914,40 @@  int intel_set_gt_half(struct drm_device *dev)
 	if (wait_for(((I915_READ(HSW_GT_SLICE_INFO) & SLICE_STATUS_MASK) ==
 		      SLICE_STATUS_MAIN_ON), 2000)) {
 		DRM_ERROR("Timed out disabling half gt slices\n");
+
 		I915_WRITE(HSW_GT_SLICE_INFO, SLICE_SEL_BOTH);
 		I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED);
+		mutex_unlock(&dev_priv->gt_slices.m_lock);
+
 		gen6_gt_force_wake_put(dev_priv);
 		return -ETIMEDOUT;
 	}
+
 	I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+	mutex_unlock(&dev_priv->gt_slices.m_lock);
+
 	gen6_gt_force_wake_put(dev_priv);
 	return 0;
 }
 
+/**
+ * On Haswell, slices on/off transitions are done via RC6 sequence.
+ * This async function allows you to request slices shutdown without waiting.
+ * Slices will be disabled on next RC6 exit.
+ */
+void intel_set_gt_half_async(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (!HAS_SLICE_SHUTDOWN(dev))
+		return;
+
+	mutex_lock(&dev_priv->gt_slices.m_lock);
+	I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
+	I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+	mutex_unlock(&dev_priv->gt_slices.m_lock);
+}
+
 void intel_init_gt_slices(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3927,9 +3958,13 @@  void intel_init_gt_slices(struct drm_device *dev)
 	if (!HAS_SLICE_SHUTDOWN(dev))
 		return;
 
+	dev_priv->gt_slices.state_default = 1;
+	dev_priv->gt_slices.forcing_full = false;
+	mutex_init(&dev_priv->gt_slices.m_lock);
+
 	if (!i915_gt_slice_config) {
-		I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
-		I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+		intel_set_gt_half_async(dev);
+		dev_priv->gt_slices.state_default = 0;
 	}
 }
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3a4e97b..144054f 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -731,7 +731,10 @@  struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_HANDLE_LUT		(1<<12)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
+/* Use all available GT Slisces */
+#define I915_EXEC_GT_FULL		(1<<13)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_GT_FULL<<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \