Message ID | 1381870249-30614-1-git-send-email-rodrigo.vivi@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Oct 15, 2013 at 05:50:49PM -0300, Rodrigo Vivi wrote: > If Userspace isn't using MI_PREDICATE all slices must be enabled for > backward compatibility. > > If I915_EXEC_USE_PREDICATE isn't set and defaul is set to half, kernel will force > all slices on. > > v2: fix the inverted logic for backwards compatibility > USE_PREDICATE unset force gt_full when defaul is half > instead of GT_FULL flag. > > CC: Eric Anholt <eric@anholt.net> > CC: Kenneth Graunke <kenneth@whitecape.org> > Signed-off-by: Rodrigo Vivi <rodrigo.vivi@gmail.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 8 ++++ > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 67 ++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/i915_reg.h | 11 +++++ > drivers/gpu/drm/i915/i915_sysfs.c | 11 ++++- > drivers/gpu/drm/i915/intel_display.c | 6 +++ > drivers/gpu/drm/i915/intel_drv.h | 1 + > drivers/gpu/drm/i915/intel_pm.c | 39 ++++++++++++++++- > include/uapi/drm/i915_drm.h | 8 +++- > 8 files changed, 146 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 685fb1d..bd4774e 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1219,6 +1219,12 @@ struct i915_package_c8 { > } regsave; > }; > > +struct i915_gt_slices { > + int state_default; Either use it as an integer (a count of slices enabled by default) or make it an enum. > + int forcing_full; bool legacy_userspace_busy; ? > + struct mutex m_lock; Ugh. Just struct mutex lock; /* locks all access to this struct and slice registers */ will suffice > +}; > + > typedef struct drm_i915_private { > struct drm_device *dev; > struct kmem_cache *slab; > @@ -1418,6 +1424,8 @@ typedef struct drm_i915_private { > > struct i915_package_c8 pc8; > > + struct i915_gt_slices gt_slices; > + > /* Old dri1 support infrastructure, beware the dragons ya fools entering > * here! */ > struct i915_dri1_state dri1; > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 0ce0d47..eb09a51 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -923,6 +923,66 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, > } > > static int > +i915_gt_full_immediate(struct drm_device *dev, struct intel_ring_buffer *ring) That's a misnomer. > +{ > + drm_i915_private_t *dev_priv = dev->dev_private; > + int ret; > + > + if (!HAS_SLICE_SHUTDOWN(dev) || ring == &dev_priv->ring[BCS]) > + return 0; > + > + ret = intel_ring_begin(ring, 3); > + if (ret) > + return ret; > + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); > + intel_ring_emit(ring, HSW_GT_SLICE_INFO); > + intel_ring_emit(ring, SLICE_SEL_BOTH); > + intel_ring_advance(ring); > + > + ret = intel_ring_begin(ring, 3); > + if (ret) > + return ret; > + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); > + intel_ring_emit(ring, MI_PREDICATE_RESULT_2); > + intel_ring_emit(ring, LOWER_SLICE_ENABLED); > + intel_ring_advance(ring); > + > + ret = intel_ring_begin(ring, 3); > + if (ret) > + return ret; > + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); > + intel_ring_emit(ring, HSW_SLICESHUTDOWN); > + intel_ring_emit(ring, ~SLICE_SHUTDOWN); > + intel_ring_advance(ring); > + > + ret = intel_ring_begin(ring, 3); > + if (ret) > + return ret; > + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); > + intel_ring_emit(ring, RC_IDLE_MAX_COUNT); > + intel_ring_emit(ring, CS_IDLE_COUNT_1US); > + intel_ring_advance(ring); > + > + ret = intel_ring_begin(ring, 3); > + if (ret) > + return ret; > + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); > + intel_ring_emit(ring, WAIT_FOR_RC6_EXIT); > + intel_ring_emit(ring, WAIT_RC6_EXIT | MASK_WAIT_RC6_EXIT); > + intel_ring_advance(ring); > + > + ret = intel_ring_begin(ring, 3); > + if (ret) > + return ret; > + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); > + intel_ring_emit(ring, RC_IDLE_MAX_COUNT); > + intel_ring_emit(ring, CS_IDLE_COUNT_5US); > + intel_ring_advance(ring); > + > + return 0; > +} > + > +static int > i915_gem_do_execbuffer(struct drm_device *dev, void *data, > struct drm_file *file, > struct drm_i915_gem_execbuffer2 *args, > @@ -999,6 +1059,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > return -EINVAL; > } > > + if ((args->flags & I915_EXEC_USE_PREDICATE) == 0 && > + dev_priv->gt_slices.state_default == 0 && > + !dev_priv->gt_slices.forcing_full) { Always set legacy_userspace_busy here so that a change in state_default cannot possibly break currently active users i.e. they are independent bookkeeping. > + dev_priv->gt_slices.forcing_full = true; > + i915_gt_full_immediate(dev, ring); What happens when this fails? If it only partially emits the commands, etc. > + } > + > mode = args->flags & I915_EXEC_CONSTANTS_MASK; > mask = I915_EXEC_CONSTANTS_MASK; > switch (mode) { > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index 497c441..0146bef 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -277,6 +277,17 @@ > #define SLICE_STATUS_MAIN_ON (2<<0) > #define SLICE_STATUS_BOTH_ON (3<<0) > > +#define HSW_SLICESHUTDOWN 0xA190 > +#define SLICE_SHUTDOWN (1<<0) > + > +#define RC_IDLE_MAX_COUNT 0x2054 > +#define CS_IDLE_COUNT_1US (1<<1) > +#define CS_IDLE_COUNT_5US (1<<3) > + > +#define WAIT_FOR_RC6_EXIT 0x20CC > +#define WAIT_RC6_EXIT (1<<0) > +#define MASK_WAIT_RC6_EXIT (1<<16) > + > /* > * 3D instructions used by the kernel > */ > diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c > index 86ccd52..25b0c5b 100644 > --- a/drivers/gpu/drm/i915/i915_sysfs.c > +++ b/drivers/gpu/drm/i915/i915_sysfs.c > @@ -124,9 +124,13 @@ static ssize_t gt_slice_config_show(struct device *kdev, > struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev); > struct drm_device *dev = minor->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > + bool full; > > - return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) == > - LOWER_SLICE_ENABLED ? "full" : "half"); > + mutex_lock(&dev_priv->gt_slices.m_lock); > + full = I915_READ(MI_PREDICATE_RESULT_2) == LOWER_SLICE_ENABLED; > + mutex_unlock(&dev_priv->gt_slices.m_lock); This locking is not stopping any races - it is superfluous. > + return sprintf(buf, "%s\n", full ? "full" : "half"); > } > > static ssize_t gt_slice_config_store(struct device *kdev, > @@ -135,16 +139,19 @@ static ssize_t gt_slice_config_store(struct device *kdev, > { > struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev); > struct drm_device *dev = minor->dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > int ret; > > if (!strncmp(buf, "full", sizeof("full") - 1)) { > ret = intel_set_gt_full(dev); > if (ret) > return ret; > + dev_priv->gt_slices.state_default = 1; > } else if (!strncmp(buf, "half", sizeof("half") - 1)) { > ret = intel_set_gt_half(dev); > if (ret) > return ret; > + dev_priv->gt_slices.state_default = 0; > } else > return -EINVAL; > return count; > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index 4f1b636..1a2f8bc 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -7778,6 +7778,12 @@ void intel_mark_idle(struct drm_device *dev) > > if (dev_priv->info->gen >= 6) > gen6_rps_idle(dev->dev_private); > + > + if (HAS_SLICE_SHUTDOWN(dev) && dev_priv->gt_slices.forcing_full && > + dev_priv->gt_slices.state_default == 0) { > + intel_set_gt_half_async(dev); > + dev_priv->gt_slices.forcing_full = false; Again, you want to be ignoring state_default when changing legacy_userspace_busy. The lower level slice config handling code should be making the judgment based on all parameters after one changes. -Chris
Hi Chris, Thank you very much for all suggestions. Will do the changes. But for that question below I don't have the clear answer: On Wed, Oct 16, 2013 at 6:15 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote: > On Tue, Oct 15, 2013 at 05:50:49PM -0300, Rodrigo Vivi wrote: >> If Userspace isn't using MI_PREDICATE all slices must be enabled for >> backward compatibility. >> >> If I915_EXEC_USE_PREDICATE isn't set and defaul is set to half, kernel will force >> all slices on. >> >> v2: fix the inverted logic for backwards compatibility >> USE_PREDICATE unset force gt_full when defaul is half >> instead of GT_FULL flag. >> >> CC: Eric Anholt <eric@anholt.net> >> CC: Kenneth Graunke <kenneth@whitecape.org> >> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@gmail.com> >> --- >> drivers/gpu/drm/i915/i915_drv.h | 8 ++++ >> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 67 ++++++++++++++++++++++++++++++ >> drivers/gpu/drm/i915/i915_reg.h | 11 +++++ >> drivers/gpu/drm/i915/i915_sysfs.c | 11 ++++- >> drivers/gpu/drm/i915/intel_display.c | 6 +++ >> drivers/gpu/drm/i915/intel_drv.h | 1 + >> drivers/gpu/drm/i915/intel_pm.c | 39 ++++++++++++++++- >> include/uapi/drm/i915_drm.h | 8 +++- >> 8 files changed, 146 insertions(+), 5 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index 685fb1d..bd4774e 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -1219,6 +1219,12 @@ struct i915_package_c8 { >> } regsave; >> }; >> >> +struct i915_gt_slices { >> + int state_default; > > Either use it as an integer (a count of slices enabled by default) or > make it an enum. > >> + int forcing_full; > > bool legacy_userspace_busy; ? > >> + struct mutex m_lock; > > Ugh. Just struct mutex lock; /* locks all access to this struct and > slice registers */ will suffice > >> +}; >> + >> typedef struct drm_i915_private { >> struct drm_device *dev; >> struct kmem_cache *slab; >> @@ -1418,6 +1424,8 @@ typedef struct drm_i915_private { >> >> struct i915_package_c8 pc8; >> >> + struct i915_gt_slices gt_slices; >> + >> /* Old dri1 support infrastructure, beware the dragons ya fools entering >> * here! */ >> struct i915_dri1_state dri1; >> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> index 0ce0d47..eb09a51 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> @@ -923,6 +923,66 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, >> } >> >> static int >> +i915_gt_full_immediate(struct drm_device *dev, struct intel_ring_buffer *ring) > > That's a misnomer. > >> +{ >> + drm_i915_private_t *dev_priv = dev->dev_private; >> + int ret; >> + >> + if (!HAS_SLICE_SHUTDOWN(dev) || ring == &dev_priv->ring[BCS]) >> + return 0; >> + >> + ret = intel_ring_begin(ring, 3); >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, HSW_GT_SLICE_INFO); >> + intel_ring_emit(ring, SLICE_SEL_BOTH); >> + intel_ring_advance(ring); >> + >> + ret = intel_ring_begin(ring, 3); >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, MI_PREDICATE_RESULT_2); >> + intel_ring_emit(ring, LOWER_SLICE_ENABLED); >> + intel_ring_advance(ring); >> + >> + ret = intel_ring_begin(ring, 3); >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, HSW_SLICESHUTDOWN); >> + intel_ring_emit(ring, ~SLICE_SHUTDOWN); >> + intel_ring_advance(ring); >> + >> + ret = intel_ring_begin(ring, 3); >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, RC_IDLE_MAX_COUNT); >> + intel_ring_emit(ring, CS_IDLE_COUNT_1US); >> + intel_ring_advance(ring); >> + >> + ret = intel_ring_begin(ring, 3); >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, WAIT_FOR_RC6_EXIT); >> + intel_ring_emit(ring, WAIT_RC6_EXIT | MASK_WAIT_RC6_EXIT); >> + intel_ring_advance(ring); >> + >> + ret = intel_ring_begin(ring, 3); >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, RC_IDLE_MAX_COUNT); >> + intel_ring_emit(ring, CS_IDLE_COUNT_5US); >> + intel_ring_advance(ring); >> + >> + return 0; >> +} >> + >> +static int >> i915_gem_do_execbuffer(struct drm_device *dev, void *data, >> struct drm_file *file, >> struct drm_i915_gem_execbuffer2 *args, >> @@ -999,6 +1059,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, >> return -EINVAL; >> } >> >> + if ((args->flags & I915_EXEC_USE_PREDICATE) == 0 && >> + dev_priv->gt_slices.state_default == 0 && >> + !dev_priv->gt_slices.forcing_full) { > > Always set legacy_userspace_busy here so that a change in state_default > cannot possibly break currently active users i.e. they are independent > bookkeeping. > >> + dev_priv->gt_slices.forcing_full = true; >> + i915_gt_full_immediate(dev, ring); > > What happens when this fails? If it only partially emits the commands, > etc. This is a very good question. If it fails, my only concern would be the sync between HSW_GT_SLICE_INFO and MI_PREDICATE_RESULT_2. But I don't know how to do this during the exec buf without delaying the execution. Do you have any suggestion? > >> + } >> + >> mode = args->flags & I915_EXEC_CONSTANTS_MASK; >> mask = I915_EXEC_CONSTANTS_MASK; >> switch (mode) { >> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h >> index 497c441..0146bef 100644 >> --- a/drivers/gpu/drm/i915/i915_reg.h >> +++ b/drivers/gpu/drm/i915/i915_reg.h >> @@ -277,6 +277,17 @@ >> #define SLICE_STATUS_MAIN_ON (2<<0) >> #define SLICE_STATUS_BOTH_ON (3<<0) >> >> +#define HSW_SLICESHUTDOWN 0xA190 >> +#define SLICE_SHUTDOWN (1<<0) >> + >> +#define RC_IDLE_MAX_COUNT 0x2054 >> +#define CS_IDLE_COUNT_1US (1<<1) >> +#define CS_IDLE_COUNT_5US (1<<3) >> + >> +#define WAIT_FOR_RC6_EXIT 0x20CC >> +#define WAIT_RC6_EXIT (1<<0) >> +#define MASK_WAIT_RC6_EXIT (1<<16) >> + >> /* >> * 3D instructions used by the kernel >> */ >> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c >> index 86ccd52..25b0c5b 100644 >> --- a/drivers/gpu/drm/i915/i915_sysfs.c >> +++ b/drivers/gpu/drm/i915/i915_sysfs.c >> @@ -124,9 +124,13 @@ static ssize_t gt_slice_config_show(struct device *kdev, >> struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev); >> struct drm_device *dev = minor->dev; >> struct drm_i915_private *dev_priv = dev->dev_private; >> + bool full; >> >> - return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) == >> - LOWER_SLICE_ENABLED ? "full" : "half"); >> + mutex_lock(&dev_priv->gt_slices.m_lock); >> + full = I915_READ(MI_PREDICATE_RESULT_2) == LOWER_SLICE_ENABLED; >> + mutex_unlock(&dev_priv->gt_slices.m_lock); > > This locking is not stopping any races - it is superfluous. > >> + return sprintf(buf, "%s\n", full ? "full" : "half"); >> } >> >> static ssize_t gt_slice_config_store(struct device *kdev, >> @@ -135,16 +139,19 @@ static ssize_t gt_slice_config_store(struct device *kdev, >> { >> struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev); >> struct drm_device *dev = minor->dev; >> + struct drm_i915_private *dev_priv = dev->dev_private; >> int ret; >> >> if (!strncmp(buf, "full", sizeof("full") - 1)) { >> ret = intel_set_gt_full(dev); >> if (ret) >> return ret; >> + dev_priv->gt_slices.state_default = 1; >> } else if (!strncmp(buf, "half", sizeof("half") - 1)) { >> ret = intel_set_gt_half(dev); >> if (ret) >> return ret; >> + dev_priv->gt_slices.state_default = 0; >> } else >> return -EINVAL; >> return count; >> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c >> index 4f1b636..1a2f8bc 100644 >> --- a/drivers/gpu/drm/i915/intel_display.c >> +++ b/drivers/gpu/drm/i915/intel_display.c >> @@ -7778,6 +7778,12 @@ void intel_mark_idle(struct drm_device *dev) >> >> if (dev_priv->info->gen >= 6) >> gen6_rps_idle(dev->dev_private); >> + >> + if (HAS_SLICE_SHUTDOWN(dev) && dev_priv->gt_slices.forcing_full && >> + dev_priv->gt_slices.state_default == 0) { >> + intel_set_gt_half_async(dev); >> + dev_priv->gt_slices.forcing_full = false; > > Again, you want to be ignoring state_default when changing > legacy_userspace_busy. > > The lower level slice config handling code should be making the judgment > based on all parameters after one changes. > -Chris > > -- > Chris Wilson, Intel Open Source Technology Centre
On Wed, Oct 16, 2013 at 09:12:46AM -0300, Rodrigo Vivi wrote: > >> + dev_priv->gt_slices.forcing_full = true; > >> + i915_gt_full_immediate(dev, ring); > > > > What happens when this fails? If it only partially emits the commands, > > etc. > > This is a very good question. If it fails, my only concern would be > the sync between HSW_GT_SLICE_INFO and MI_PREDICATE_RESULT_2. But I > don't know how to do this during the exec buf without delaying the > execution. Do you have any suggestion? Reduce the series of if() to just one, act upon the return value, only set legacy_userspace_busy to true after making the change. It will eventually get cleaned up if the dispatch later fails, but we must make sure that we accurately track hardware state. -Chris
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 685fb1d..bd4774e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1219,6 +1219,12 @@ struct i915_package_c8 { } regsave; }; +struct i915_gt_slices { + int state_default; + int forcing_full; + struct mutex m_lock; +}; + typedef struct drm_i915_private { struct drm_device *dev; struct kmem_cache *slab; @@ -1418,6 +1424,8 @@ typedef struct drm_i915_private { struct i915_package_c8 pc8; + struct i915_gt_slices gt_slices; + /* Old dri1 support infrastructure, beware the dragons ya fools entering * here! */ struct i915_dri1_state dri1; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 0ce0d47..eb09a51 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -923,6 +923,66 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, } static int +i915_gt_full_immediate(struct drm_device *dev, struct intel_ring_buffer *ring) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + int ret; + + if (!HAS_SLICE_SHUTDOWN(dev) || ring == &dev_priv->ring[BCS]) + return 0; + + ret = intel_ring_begin(ring, 3); + if (ret) + return ret; + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, HSW_GT_SLICE_INFO); + intel_ring_emit(ring, SLICE_SEL_BOTH); + intel_ring_advance(ring); + + ret = intel_ring_begin(ring, 3); + if (ret) + return ret; + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, MI_PREDICATE_RESULT_2); + intel_ring_emit(ring, LOWER_SLICE_ENABLED); + intel_ring_advance(ring); + + ret = intel_ring_begin(ring, 3); + if (ret) + return ret; + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, HSW_SLICESHUTDOWN); + intel_ring_emit(ring, ~SLICE_SHUTDOWN); + intel_ring_advance(ring); + + ret = intel_ring_begin(ring, 3); + if (ret) + return ret; + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, RC_IDLE_MAX_COUNT); + intel_ring_emit(ring, CS_IDLE_COUNT_1US); + intel_ring_advance(ring); + + ret = intel_ring_begin(ring, 3); + if (ret) + return ret; + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, WAIT_FOR_RC6_EXIT); + intel_ring_emit(ring, WAIT_RC6_EXIT | MASK_WAIT_RC6_EXIT); + intel_ring_advance(ring); + + ret = intel_ring_begin(ring, 3); + if (ret) + return ret; + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, RC_IDLE_MAX_COUNT); + intel_ring_emit(ring, CS_IDLE_COUNT_5US); + intel_ring_advance(ring); + + return 0; +} + +static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, @@ -999,6 +1059,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } + if ((args->flags & I915_EXEC_USE_PREDICATE) == 0 && + dev_priv->gt_slices.state_default == 0 && + !dev_priv->gt_slices.forcing_full) { + dev_priv->gt_slices.forcing_full = true; + i915_gt_full_immediate(dev, ring); + } + mode = args->flags & I915_EXEC_CONSTANTS_MASK; mask = I915_EXEC_CONSTANTS_MASK; switch (mode) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 497c441..0146bef 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -277,6 +277,17 @@ #define SLICE_STATUS_MAIN_ON (2<<0) #define SLICE_STATUS_BOTH_ON (3<<0) +#define HSW_SLICESHUTDOWN 0xA190 +#define SLICE_SHUTDOWN (1<<0) + +#define RC_IDLE_MAX_COUNT 0x2054 +#define CS_IDLE_COUNT_1US (1<<1) +#define CS_IDLE_COUNT_5US (1<<3) + +#define WAIT_FOR_RC6_EXIT 0x20CC +#define WAIT_RC6_EXIT (1<<0) +#define MASK_WAIT_RC6_EXIT (1<<16) + /* * 3D instructions used by the kernel */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 86ccd52..25b0c5b 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -124,9 +124,13 @@ static ssize_t gt_slice_config_show(struct device *kdev, struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev); struct drm_device *dev = minor->dev; struct drm_i915_private *dev_priv = dev->dev_private; + bool full; - return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) == - LOWER_SLICE_ENABLED ? "full" : "half"); + mutex_lock(&dev_priv->gt_slices.m_lock); + full = I915_READ(MI_PREDICATE_RESULT_2) == LOWER_SLICE_ENABLED; + mutex_unlock(&dev_priv->gt_slices.m_lock); + + return sprintf(buf, "%s\n", full ? "full" : "half"); } static ssize_t gt_slice_config_store(struct device *kdev, @@ -135,16 +139,19 @@ static ssize_t gt_slice_config_store(struct device *kdev, { struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev); struct drm_device *dev = minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; int ret; if (!strncmp(buf, "full", sizeof("full") - 1)) { ret = intel_set_gt_full(dev); if (ret) return ret; + dev_priv->gt_slices.state_default = 1; } else if (!strncmp(buf, "half", sizeof("half") - 1)) { ret = intel_set_gt_half(dev); if (ret) return ret; + dev_priv->gt_slices.state_default = 0; } else return -EINVAL; return count; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4f1b636..1a2f8bc 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7778,6 +7778,12 @@ void intel_mark_idle(struct drm_device *dev) if (dev_priv->info->gen >= 6) gen6_rps_idle(dev->dev_private); + + if (HAS_SLICE_SHUTDOWN(dev) && dev_priv->gt_slices.forcing_full && + dev_priv->gt_slices.state_default == 0) { + intel_set_gt_half_async(dev); + dev_priv->gt_slices.forcing_full = false; + } } void intel_mark_fb_busy(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index a9abbb5..98cd63e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -836,6 +836,7 @@ void intel_disable_gt_powersave(struct drm_device *dev); void ironlake_teardown_rc6(struct drm_device *dev); int intel_set_gt_full(struct drm_device *dev); int intel_set_gt_half(struct drm_device *dev); +void intel_set_gt_half_async(struct drm_device *dev); void intel_init_gt_slices(struct drm_device *dev); void gen6_update_ring_freq(struct drm_device *dev); void gen6_rps_idle(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 63af075..2ded34c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3873,6 +3873,7 @@ int intel_set_gt_full(struct drm_device *dev) if (!HAS_SLICE_SHUTDOWN(dev)) return -ENODEV; + mutex_lock(&dev_priv->gt_slices.m_lock); I915_WRITE(HSW_GT_SLICE_INFO, SLICE_SEL_BOTH); /* Slices are enabled on RC6 exit */ @@ -3881,13 +3882,18 @@ int intel_set_gt_full(struct drm_device *dev) if (wait_for(((I915_READ(HSW_GT_SLICE_INFO) & SLICE_STATUS_MASK) == SLICE_STATUS_BOTH_ON), 2000)) { DRM_ERROR("Timeout enabling full gt slices\n"); + I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH); I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED); + gen6_gt_force_wake_put(dev_priv); + mutex_unlock(&dev_priv->gt_slices.m_lock); return -ETIMEDOUT; } + I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED); gen6_gt_force_wake_put(dev_priv); + mutex_unlock(&dev_priv->gt_slices.m_lock); return 0; } @@ -3899,6 +3905,7 @@ int intel_set_gt_half(struct drm_device *dev) if (!HAS_SLICE_SHUTDOWN(dev)) return -ENODEV; + mutex_lock(&dev_priv->gt_slices.m_lock); I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH); /* Slices are disabled on RC6 exit */ @@ -3907,16 +3914,40 @@ int intel_set_gt_half(struct drm_device *dev) if (wait_for(((I915_READ(HSW_GT_SLICE_INFO) & SLICE_STATUS_MASK) == SLICE_STATUS_MAIN_ON), 2000)) { DRM_ERROR("Timed out disabling half gt slices\n"); + I915_WRITE(HSW_GT_SLICE_INFO, SLICE_SEL_BOTH); I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED); + gen6_gt_force_wake_put(dev_priv); + mutex_unlock(&dev_priv->gt_slices.m_lock); return -ETIMEDOUT; } + I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED); gen6_gt_force_wake_put(dev_priv); + mutex_unlock(&dev_priv->gt_slices.m_lock); + return 0; } +/** + * On Haswell, slices on/off transitions are done via RC6 sequence. + * This async function allows you to request slices shutdown without waiting. + * Slices will be disabled on next RC6 exit. + */ +void intel_set_gt_half_async(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + if (!HAS_SLICE_SHUTDOWN(dev)) + return; + + mutex_lock(&dev_priv->gt_slices.m_lock); + I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH); + I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED); + mutex_unlock(&dev_priv->gt_slices.m_lock); +} + void intel_init_gt_slices(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -3927,9 +3958,13 @@ void intel_init_gt_slices(struct drm_device *dev) if (!HAS_SLICE_SHUTDOWN(dev)) return; + dev_priv->gt_slices.state_default = 1; + dev_priv->gt_slices.forcing_full = false; + mutex_init(&dev_priv->gt_slices.m_lock); + if (!i915_gt_slice_config) { - I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH); - I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED); + intel_set_gt_half_async(dev); + dev_priv->gt_slices.state_default = 0; } } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 3a4e97b..3fa3e24 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -731,7 +731,13 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_HANDLE_LUT (1<<12) -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1) +/* If this flag is set userspace is using predicate and half slices can be + * let disabled for power saving. Otherwise use all slices even when disabled + * by boot parameter or via sysfs interface + */ +#define I915_EXEC_USE_PREDICATE (1<<13) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_USE_PREDICATE<<1) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \
If Userspace isn't using MI_PREDICATE all slices must be enabled for backward compatibility. If I915_EXEC_USE_PREDICATE isn't set and defaul is set to half, kernel will force all slices on. v2: fix the inverted logic for backwards compatibility USE_PREDICATE unset force gt_full when defaul is half instead of GT_FULL flag. CC: Eric Anholt <eric@anholt.net> CC: Kenneth Graunke <kenneth@whitecape.org> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@gmail.com> --- drivers/gpu/drm/i915/i915_drv.h | 8 ++++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 67 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 11 +++++ drivers/gpu/drm/i915/i915_sysfs.c | 11 ++++- drivers/gpu/drm/i915/intel_display.c | 6 +++ drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 39 ++++++++++++++++- include/uapi/drm/i915_drm.h | 8 +++- 8 files changed, 146 insertions(+), 5 deletions(-)