Message ID | 1382389218-3388-1-git-send-email-rodrigo.vivi@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Oct 21, 2013 at 07:00:18PM -0200, Rodrigo Vivi wrote: > static int > +i915_legacy_userspace_busy(struct drm_device *dev, > + struct intel_ring_buffer *ring) s/i915_legacy_userspace_busy/gt_legacy_userspace_busy/ As that is a bit more distinctive. > +{ > + drm_i915_private_t *dev_priv = dev->dev_private; > + int ret; > + > + if (!HAS_SLICE_SHUTDOWN(dev) || ring == &dev_priv->ring[BCS]) > + return -ENODEV; > + > + if (dev_priv->gt_slices.state_default == 1) > + return -EBADE; This needs to be replaced. if (dev_priv->gt_slices.config == 2) return 0; > + > + ret = intel_ring_begin(ring, 3); The dword count must be even, or else the hardware chokes. However, since we cannot interrupt this sequence and leave the hw in an inconsistent state, we need to emit this entire sequence in a single block. > + if (ret) > + return ret; > + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); > + intel_ring_emit(ring, HSW_GT_SLICE_INFO); > + intel_ring_emit(ring, SLICE_SEL_BOTH); > + intel_ring_advance(ring); > + > + ret = intel_ring_begin(ring, 3); > + if (ret) > + return ret; > + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); > + intel_ring_emit(ring, MI_PREDICATE_RESULT_2); > + intel_ring_emit(ring, LOWER_SLICE_ENABLED); > + intel_ring_advance(ring); > + > + ret = intel_ring_begin(ring, 3); > + if (ret) > + return ret; > + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); > + intel_ring_emit(ring, HSW_SLICESHUTDOWN); > + intel_ring_emit(ring, ~SLICE_SHUTDOWN); > + intel_ring_advance(ring); > + > + ret = intel_ring_begin(ring, 3); > + if (ret) > + return ret; > + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); > + intel_ring_emit(ring, RC_IDLE_MAX_COUNT); > + intel_ring_emit(ring, CS_IDLE_COUNT_1US); > + intel_ring_advance(ring); > + ret = intel_ring_begin(ring, 3); > + if (ret) > + return ret; > + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); > + intel_ring_emit(ring, WAIT_FOR_RC6_EXIT); > + intel_ring_emit(ring, WAIT_RC6_EXIT | MASK_WAIT_RC6_EXIT); _MASKED_BIT_ENABLE(WAIT_RC6_EXIT) > + intel_ring_advance(ring); > + > + ret = intel_ring_begin(ring, 3); > + if (ret) > + return ret; > + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); > + intel_ring_emit(ring, RC_IDLE_MAX_COUNT); > + intel_ring_emit(ring, CS_IDLE_COUNT_5US); > + intel_ring_advance(ring); > + dev_priv->gt_slices.config = 2; > + return 0; > +} > + > +static int > i915_gem_do_execbuffer(struct drm_device *dev, void *data, > struct drm_file *file, > struct drm_i915_gem_execbuffer2 *args, > @@ -999,6 +1063,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > return -EINVAL; > } > > + if ((args->flags & I915_EXEC_USE_PREDICATE) == 0 && > + !dev_priv->gt_slices.legacy_userspace_busy) > + if (i915_legacy_userspace_busy(dev, ring) == 0) > + dev_priv->gt_slices.legacy_userspace_busy = true; Now here we cannot just ignore a failure to switch slice configuration. static bool gt_legacy_userspace(struct intel_ring_buffer *ring, struct drm_i915_gem_execbuffer2 *args) { if (ring->id == BCS) return false; if (!HAS_SLICE_SHUTDOWN(ring->dev)) return false; return (args->flags & I915_EXEC_USE_PREDICATE) == 0; } if (gt_legacy_userspace(ring, args)) { ret = gt_legacy_userspace_busy(ring); if (ret) return ret; dev_priv->gt_slices.legacy_userspace_busy = true; } > + > mode = args->flags & I915_EXEC_CONSTANTS_MASK; > mask = I915_EXEC_CONSTANTS_MASK; > switch (mode) { > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index 497c441..0146bef 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -277,6 +277,17 @@ > #define SLICE_STATUS_MAIN_ON (2<<0) > #define SLICE_STATUS_BOTH_ON (3<<0) > > +#define HSW_SLICESHUTDOWN 0xA190 > +#define SLICE_SHUTDOWN (1<<0) > + > +#define RC_IDLE_MAX_COUNT 0x2054 > +#define CS_IDLE_COUNT_1US (1<<1) > +#define CS_IDLE_COUNT_5US (1<<3) > + > +#define WAIT_FOR_RC6_EXIT 0x20CC > +#define WAIT_RC6_EXIT (1<<0) > +#define MASK_WAIT_RC6_EXIT (1<<16) > + > /* > * 3D instructions used by the kernel > */ > diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c > index 86ccd52..1dd57fb 100644 > --- a/drivers/gpu/drm/i915/i915_sysfs.c > +++ b/drivers/gpu/drm/i915/i915_sysfs.c > @@ -125,8 +125,7 @@ static ssize_t gt_slice_config_show(struct device *kdev, > struct drm_device *dev = minor->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > > - return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) == > - LOWER_SLICE_ENABLED ? "full" : "half"); > + return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) == LOWER_SLICE_ENABLED ? "full" : "half"); > } > > static ssize_t gt_slice_config_store(struct device *kdev, > @@ -135,16 +134,19 @@ static ssize_t gt_slice_config_store(struct device *kdev, > { > struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev); > struct drm_device *dev = minor->dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > int ret; This cannot change state whilst legacy_userspace_busy. > > if (!strncmp(buf, "full", sizeof("full") - 1)) { > ret = intel_set_gt_full(dev); > if (ret) > return ret; > + dev_priv->gt_slices.state_default = 1; dev_priv->gt_slices.max_config = 2; > } else if (!strncmp(buf, "half", sizeof("half") - 1)) { > ret = intel_set_gt_half(dev); > if (ret) > return ret; > + dev_priv->gt_slices.state_default = 0; dev_priv->gt_slices.max_config = 1; > } else > return -EINVAL; (void)intel_gt_update_slice_config(dev); where int intel_gt_update_slice_config(struct drm_device *dev) { int config; if (!HAS_SLICE_SHUTDOWN(dev)) return -ENODEV; if (dev_priv-gt_slices.legacy_userspace_busy) return -EBUSY; config = min(dev_priv->gt_slices.config, dev_priv->gt_slices.max_config); if (config == dev_priv->gt_slices.config) return 0; /* slice_set_config(dev, config); ? */ switch (config) { case 1: gt_slice_set_half(dev); break; case 2: gt_slice_set_full(dev); break; default: return -EINVAL; } dev_priv->gt_slices.config = config; return 0; } > return count; > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index 4f1b636..3810ecf 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -7778,6 +7778,12 @@ void intel_mark_idle(struct drm_device *dev) > > if (dev_priv->info->gen >= 6) > gen6_rps_idle(dev->dev_private); > + > + if (HAS_SLICE_SHUTDOWN(dev) && > + dev_priv->gt_slices.legacy_userspace_busy) { > + intel_set_gt_half_async(dev); > + } Just if (dev_priv->gt_slices.legacy_userspace_busy) { dev_priv->gt_slices.legacy_userspace_busy = false; intel_gt_update_slice_config(dev); }
On Tue, Oct 22, 2013 at 5:47 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote: > On Mon, Oct 21, 2013 at 07:00:18PM -0200, Rodrigo Vivi wrote: >> static int >> +i915_legacy_userspace_busy(struct drm_device *dev, >> + struct intel_ring_buffer *ring) > > s/i915_legacy_userspace_busy/gt_legacy_userspace_busy/ > > As that is a bit more distinctive. ok, makes sense. > >> +{ >> + drm_i915_private_t *dev_priv = dev->dev_private; >> + int ret; >> + >> + if (!HAS_SLICE_SHUTDOWN(dev) || ring == &dev_priv->ring[BCS]) >> + return -ENODEV; >> + >> + if (dev_priv->gt_slices.state_default == 1) >> + return -EBADE; > > This needs to be replaced. > > if (dev_priv->gt_slices.config == 2) > return 0; > >> + >> + ret = intel_ring_begin(ring, 3); > > The dword count must be even, or else the hardware chokes. > However, since we cannot interrupt this sequence and leave the hw in an > inconsistent state, we need to emit this entire sequence in a single > block. makes sense... changed and worked locally here. > >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, HSW_GT_SLICE_INFO); >> + intel_ring_emit(ring, SLICE_SEL_BOTH); >> + intel_ring_advance(ring); >> + >> + ret = intel_ring_begin(ring, 3); >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, MI_PREDICATE_RESULT_2); >> + intel_ring_emit(ring, LOWER_SLICE_ENABLED); >> + intel_ring_advance(ring); >> + >> + ret = intel_ring_begin(ring, 3); >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, HSW_SLICESHUTDOWN); >> + intel_ring_emit(ring, ~SLICE_SHUTDOWN); >> + intel_ring_advance(ring); >> + >> + ret = intel_ring_begin(ring, 3); >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, RC_IDLE_MAX_COUNT); >> + intel_ring_emit(ring, CS_IDLE_COUNT_1US); >> + intel_ring_advance(ring); >> + ret = intel_ring_begin(ring, 3); >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, WAIT_FOR_RC6_EXIT); >> + intel_ring_emit(ring, WAIT_RC6_EXIT | MASK_WAIT_RC6_EXIT); > > _MASKED_BIT_ENABLE(WAIT_RC6_EXIT) thanks > >> + intel_ring_advance(ring); >> + >> + ret = intel_ring_begin(ring, 3); >> + if (ret) >> + return ret; >> + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); >> + intel_ring_emit(ring, RC_IDLE_MAX_COUNT); >> + intel_ring_emit(ring, CS_IDLE_COUNT_5US); >> + intel_ring_advance(ring); >> + > dev_priv->gt_slices.config = 2; >> + return 0; >> +} >> + >> +static int >> i915_gem_do_execbuffer(struct drm_device *dev, void *data, >> struct drm_file *file, >> struct drm_i915_gem_execbuffer2 *args, >> @@ -999,6 +1063,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, >> return -EINVAL; >> } >> >> + if ((args->flags & I915_EXEC_USE_PREDICATE) == 0 && >> + !dev_priv->gt_slices.legacy_userspace_busy) >> + if (i915_legacy_userspace_busy(dev, ring) == 0) >> + dev_priv->gt_slices.legacy_userspace_busy = true; > > Now here we cannot just ignore a failure to switch slice configuration. > > static bool gt_legacy_userspace(struct intel_ring_buffer *ring, > struct drm_i915_gem_execbuffer2 *args) > { > if (ring->id == BCS) > return false; > > if (!HAS_SLICE_SHUTDOWN(ring->dev)) > return false; > > return (args->flags & I915_EXEC_USE_PREDICATE) == 0; > } > > if (gt_legacy_userspace(ring, args)) { > ret = gt_legacy_userspace_busy(ring); > if (ret) > return ret; > > dev_priv->gt_slices.legacy_userspace_busy = true; > } I liked all this simplification. Thanks. > > >> + >> mode = args->flags & I915_EXEC_CONSTANTS_MASK; >> mask = I915_EXEC_CONSTANTS_MASK; >> switch (mode) { >> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h >> index 497c441..0146bef 100644 >> --- a/drivers/gpu/drm/i915/i915_reg.h >> +++ b/drivers/gpu/drm/i915/i915_reg.h >> @@ -277,6 +277,17 @@ >> #define SLICE_STATUS_MAIN_ON (2<<0) >> #define SLICE_STATUS_BOTH_ON (3<<0) >> >> +#define HSW_SLICESHUTDOWN 0xA190 >> +#define SLICE_SHUTDOWN (1<<0) >> + >> +#define RC_IDLE_MAX_COUNT 0x2054 >> +#define CS_IDLE_COUNT_1US (1<<1) >> +#define CS_IDLE_COUNT_5US (1<<3) >> + >> +#define WAIT_FOR_RC6_EXIT 0x20CC >> +#define WAIT_RC6_EXIT (1<<0) >> +#define MASK_WAIT_RC6_EXIT (1<<16) >> + >> /* >> * 3D instructions used by the kernel >> */ >> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c >> index 86ccd52..1dd57fb 100644 >> --- a/drivers/gpu/drm/i915/i915_sysfs.c >> +++ b/drivers/gpu/drm/i915/i915_sysfs.c >> @@ -125,8 +125,7 @@ static ssize_t gt_slice_config_show(struct device *kdev, >> struct drm_device *dev = minor->dev; >> struct drm_i915_private *dev_priv = dev->dev_private; >> >> - return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) == >> - LOWER_SLICE_ENABLED ? "full" : "half"); >> + return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) == LOWER_SLICE_ENABLED ? "full" : "half"); >> } >> >> static ssize_t gt_slice_config_store(struct device *kdev, >> @@ -135,16 +134,19 @@ static ssize_t gt_slice_config_store(struct device *kdev, >> { >> struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev); >> struct drm_device *dev = minor->dev; >> + struct drm_i915_private *dev_priv = dev->dev_private; >> int ret; > > This cannot change state whilst legacy_userspace_busy. We cannot change to half while in legacy_userspace_busy and this check was already inside gt_slices_set_half(dev). So just the state_default was changed so on next idleness it would come to half anyway. >> >> if (!strncmp(buf, "full", sizeof("full") - 1)) { >> ret = intel_set_gt_full(dev); >> if (ret) >> return ret; >> + dev_priv->gt_slices.state_default = 1; > dev_priv->gt_slices.max_config = 2; >> } else if (!strncmp(buf, "half", sizeof("half") - 1)) { >> ret = intel_set_gt_half(dev); >> if (ret) >> return ret; >> + dev_priv->gt_slices.state_default = 0; > dev_priv->gt_slices.max_config = 1; >> } else >> return -EINVAL; > > (void)intel_gt_update_slice_config(dev); This remove the syncrony you had asked. Without it testcase for sysfs change is impossible because we don't know if it is in _busy... so we cannot simply check state after request a change. > > where > > int intel_gt_update_slice_config(struct drm_device *dev) > { > int config; > > if (!HAS_SLICE_SHUTDOWN(dev)) > return -ENODEV; > > if (dev_priv-gt_slices.legacy_userspace_busy) > return -EBUSY; > > config = min(dev_priv->gt_slices.config, > dev_priv->gt_slices.max_config); > if (config == dev_priv->gt_slices.config) I still think state_default is working, enough and simpler... or do you really want me to use config and max_config instead? > return 0; > > /* slice_set_config(dev, config); ? */ > switch (config) { > case 1: gt_slice_set_half(dev); break; > case 2: gt_slice_set_full(dev); break; > default: return -EINVAL; > } > > dev_priv->gt_slices.config = config; > return 0; > } > >> return count; >> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c >> index 4f1b636..3810ecf 100644 >> --- a/drivers/gpu/drm/i915/intel_display.c >> +++ b/drivers/gpu/drm/i915/intel_display.c >> @@ -7778,6 +7778,12 @@ void intel_mark_idle(struct drm_device *dev) >> >> if (dev_priv->info->gen >= 6) >> gen6_rps_idle(dev->dev_private); >> + >> + if (HAS_SLICE_SHUTDOWN(dev) && >> + dev_priv->gt_slices.legacy_userspace_busy) { >> + intel_set_gt_half_async(dev); >> + } > > Just if (dev_priv->gt_slices.legacy_userspace_busy) { > dev_priv->gt_slices.legacy_userspace_busy = false; > intel_gt_update_slice_config(dev); > } > > -- > Chris Wilson, Intel Open Source Technology Centre Thank you very much.
On Tue, Oct 22, 2013 at 03:33:16PM -0200, Rodrigo Vivi wrote: > I still think state_default is working, enough and simpler... > or do you really want me to use config and max_config instead? I think keeping the hw config separate from the requested config helps a lot when reviewing the code in 6 months time. See pc8 for a recent example where we also need to track hw vs sw state. Similarly in trying to split the code that implements the policy and that implements the mechanism. The devil is in the details, of course, and so this suggestion may indeed suck when implemented. You are free to refute any suggestion with a good reason. :) -Chris
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 685fb1d..67bbbce 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1219,6 +1219,12 @@ struct i915_package_c8 { } regsave; }; +struct i915_gt_slices { + int state_default; + int legacy_userspace_busy; + struct mutex lock; /* locks access to this scruct and slice registers */ +}; + typedef struct drm_i915_private { struct drm_device *dev; struct kmem_cache *slab; @@ -1418,6 +1424,8 @@ typedef struct drm_i915_private { struct i915_package_c8 pc8; + struct i915_gt_slices gt_slices; + /* Old dri1 support infrastructure, beware the dragons ya fools entering * here! */ struct i915_dri1_state dri1; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 0ce0d47..1d8187a 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -923,6 +923,70 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, } static int +i915_legacy_userspace_busy(struct drm_device *dev, + struct intel_ring_buffer *ring) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + int ret; + + if (!HAS_SLICE_SHUTDOWN(dev) || ring == &dev_priv->ring[BCS]) + return -ENODEV; + + if (dev_priv->gt_slices.state_default == 1) + return -EBADE; + + ret = intel_ring_begin(ring, 3); + if (ret) + return ret; + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, HSW_GT_SLICE_INFO); + intel_ring_emit(ring, SLICE_SEL_BOTH); + intel_ring_advance(ring); + + ret = intel_ring_begin(ring, 3); + if (ret) + return ret; + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, MI_PREDICATE_RESULT_2); + intel_ring_emit(ring, LOWER_SLICE_ENABLED); + intel_ring_advance(ring); + + ret = intel_ring_begin(ring, 3); + if (ret) + return ret; + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, HSW_SLICESHUTDOWN); + intel_ring_emit(ring, ~SLICE_SHUTDOWN); + intel_ring_advance(ring); + + ret = intel_ring_begin(ring, 3); + if (ret) + return ret; + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, RC_IDLE_MAX_COUNT); + intel_ring_emit(ring, CS_IDLE_COUNT_1US); + intel_ring_advance(ring); + + ret = intel_ring_begin(ring, 3); + if (ret) + return ret; + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, WAIT_FOR_RC6_EXIT); + intel_ring_emit(ring, WAIT_RC6_EXIT | MASK_WAIT_RC6_EXIT); + intel_ring_advance(ring); + + ret = intel_ring_begin(ring, 3); + if (ret) + return ret; + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, RC_IDLE_MAX_COUNT); + intel_ring_emit(ring, CS_IDLE_COUNT_5US); + intel_ring_advance(ring); + + return 0; +} + +static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, @@ -999,6 +1063,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } + if ((args->flags & I915_EXEC_USE_PREDICATE) == 0 && + !dev_priv->gt_slices.legacy_userspace_busy) + if (i915_legacy_userspace_busy(dev, ring) == 0) + dev_priv->gt_slices.legacy_userspace_busy = true; + mode = args->flags & I915_EXEC_CONSTANTS_MASK; mask = I915_EXEC_CONSTANTS_MASK; switch (mode) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 497c441..0146bef 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -277,6 +277,17 @@ #define SLICE_STATUS_MAIN_ON (2<<0) #define SLICE_STATUS_BOTH_ON (3<<0) +#define HSW_SLICESHUTDOWN 0xA190 +#define SLICE_SHUTDOWN (1<<0) + +#define RC_IDLE_MAX_COUNT 0x2054 +#define CS_IDLE_COUNT_1US (1<<1) +#define CS_IDLE_COUNT_5US (1<<3) + +#define WAIT_FOR_RC6_EXIT 0x20CC +#define WAIT_RC6_EXIT (1<<0) +#define MASK_WAIT_RC6_EXIT (1<<16) + /* * 3D instructions used by the kernel */ diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 86ccd52..1dd57fb 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -125,8 +125,7 @@ static ssize_t gt_slice_config_show(struct device *kdev, struct drm_device *dev = minor->dev; struct drm_i915_private *dev_priv = dev->dev_private; - return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) == - LOWER_SLICE_ENABLED ? "full" : "half"); + return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) == LOWER_SLICE_ENABLED ? "full" : "half"); } static ssize_t gt_slice_config_store(struct device *kdev, @@ -135,16 +134,19 @@ static ssize_t gt_slice_config_store(struct device *kdev, { struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev); struct drm_device *dev = minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; int ret; if (!strncmp(buf, "full", sizeof("full") - 1)) { ret = intel_set_gt_full(dev); if (ret) return ret; + dev_priv->gt_slices.state_default = 1; } else if (!strncmp(buf, "half", sizeof("half") - 1)) { ret = intel_set_gt_half(dev); if (ret) return ret; + dev_priv->gt_slices.state_default = 0; } else return -EINVAL; return count; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4f1b636..3810ecf 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7778,6 +7778,12 @@ void intel_mark_idle(struct drm_device *dev) if (dev_priv->info->gen >= 6) gen6_rps_idle(dev->dev_private); + + if (HAS_SLICE_SHUTDOWN(dev) && + dev_priv->gt_slices.legacy_userspace_busy) { + intel_set_gt_half_async(dev); + dev_priv->gt_slices.legacy_userspace_busy = false; + } } void intel_mark_fb_busy(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index a9abbb5..98cd63e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -836,6 +836,7 @@ void intel_disable_gt_powersave(struct drm_device *dev); void ironlake_teardown_rc6(struct drm_device *dev); int intel_set_gt_full(struct drm_device *dev); int intel_set_gt_half(struct drm_device *dev); +void intel_set_gt_half_async(struct drm_device *dev); void intel_init_gt_slices(struct drm_device *dev); void gen6_update_ring_freq(struct drm_device *dev); void gen6_rps_idle(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 63af075..c1bde36 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3873,6 +3873,7 @@ int intel_set_gt_full(struct drm_device *dev) if (!HAS_SLICE_SHUTDOWN(dev)) return -ENODEV; + mutex_lock(&dev_priv->gt_slices.lock); I915_WRITE(HSW_GT_SLICE_INFO, SLICE_SEL_BOTH); /* Slices are enabled on RC6 exit */ @@ -3881,13 +3882,18 @@ int intel_set_gt_full(struct drm_device *dev) if (wait_for(((I915_READ(HSW_GT_SLICE_INFO) & SLICE_STATUS_MASK) == SLICE_STATUS_BOTH_ON), 2000)) { DRM_ERROR("Timeout enabling full gt slices\n"); + I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH); I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED); + gen6_gt_force_wake_put(dev_priv); + mutex_unlock(&dev_priv->gt_slices.lock); return -ETIMEDOUT; } + I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED); gen6_gt_force_wake_put(dev_priv); + mutex_unlock(&dev_priv->gt_slices.lock); return 0; } @@ -3899,6 +3905,10 @@ int intel_set_gt_half(struct drm_device *dev) if (!HAS_SLICE_SHUTDOWN(dev)) return -ENODEV; + if (dev_priv->gt_slices.legacy_userspace_busy) + return 0; + + mutex_lock(&dev_priv->gt_slices.lock); I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH); /* Slices are disabled on RC6 exit */ @@ -3907,16 +3917,43 @@ int intel_set_gt_half(struct drm_device *dev) if (wait_for(((I915_READ(HSW_GT_SLICE_INFO) & SLICE_STATUS_MASK) == SLICE_STATUS_MAIN_ON), 2000)) { DRM_ERROR("Timed out disabling half gt slices\n"); + I915_WRITE(HSW_GT_SLICE_INFO, SLICE_SEL_BOTH); I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED); + gen6_gt_force_wake_put(dev_priv); + mutex_unlock(&dev_priv->gt_slices.lock); return -ETIMEDOUT; } + I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED); gen6_gt_force_wake_put(dev_priv); + + mutex_unlock(&dev_priv->gt_slices.lock); return 0; } +/** + * On Haswell, slices on/off transitions are done via RC6 sequence. + * This async function allows you to request slices shutdown without waiting. + * Slices will be disabled on next RC6 exit. + */ +void intel_set_gt_half_async(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + if (!HAS_SLICE_SHUTDOWN(dev)) + return; + + if (dev_priv->gt_slices.state_default == 1) + return; + + mutex_lock(&dev_priv->gt_slices.lock); + I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH); + I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED); + mutex_unlock(&dev_priv->gt_slices.lock); +} + void intel_init_gt_slices(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -3927,9 +3964,13 @@ void intel_init_gt_slices(struct drm_device *dev) if (!HAS_SLICE_SHUTDOWN(dev)) return; + dev_priv->gt_slices.state_default = 1; + dev_priv->gt_slices.legacy_userspace_busy = false; + mutex_init(&dev_priv->gt_slices.lock); + if (!i915_gt_slice_config) { - I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH); - I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED); + dev_priv->gt_slices.state_default = 0; + intel_set_gt_half_async(dev); } } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 3a4e97b..3fa3e24 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -731,7 +731,13 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_HANDLE_LUT (1<<12) -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1) +/* If this flag is set userspace is using predicate and half slices can be + * let disabled for power saving. Otherwise use all slices even when disabled + * by boot parameter or via sysfs interface + */ +#define I915_EXEC_USE_PREDICATE (1<<13) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_USE_PREDICATE<<1) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \
If Userspace isn't using MI_PREDICATE all slices must be enabled for backward compatibility. If I915_EXEC_USE_PREDICATE isn't set and defaul is set to half, kernel will force all slices on. v2: fix the inverted logic for backwards compatibility USE_PREDICATE unset force gt_full when defaul is half instead of GT_FULL flag. v3: Accepting Chris's suggestions: better variable names; better logic around state_default x legacy_userspace_busy; remove unecessary mutex; CC: Chris Wilson <chris@chris-wilson.co.uk> CC: Eric Anholt <eric@anholt.net> CC: Kenneth Graunke <kenneth@whitecape.org> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@gmail.com> --- drivers/gpu/drm/i915/i915_drv.h | 8 ++++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 69 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 11 +++++ drivers/gpu/drm/i915/i915_sysfs.c | 6 ++- drivers/gpu/drm/i915/intel_display.c | 6 +++ drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 45 ++++++++++++++++++- include/uapi/drm/i915_drm.h | 8 +++- 8 files changed, 149 insertions(+), 5 deletions(-)