diff mbox

drm/i915: fix plane/cursor handling when runtime suspended

Message ID 1407869712-1748-1-git-send-email-przanoni@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paulo Zanoni Aug. 12, 2014, 6:55 p.m. UTC
From: Paulo Zanoni <paulo.r.zanoni@intel.com>

If we're runtime suspended and try to use the plane interfaces, we
will get a lot of WARNs saying we did the wrong thing.

We need to get runtime PM references to pin/unpin the objects, and to
change the fences. The pin/unpin functions are the ideal places for
this, but intel_crtc_cursor_set_obj() doesn't call them, so we also
have to add get/put calls inside it. There is no problem if we runtime
suspend right after these functions are finished, because the
registers written are forwarded to system memory.

Note: for a complete fix of the cursor-dpms test case, we also need
the patch named "drm/i915: Don't try to enable cursor from setplane
when crtc is disabled".

v2: - Narrow the put/get calls on intel_crtc_cursor_set_obj() (Daniel)
v3: - Make get/put also surround the fence and unpin calls (Daniel and
      Ville).
    - Merge all the plane changes into a single patch since they're
      the same fix.
    - Add the comment requested by Daniel.
v4: - Remove spurious whitespace (Ville).
v5: - Remove intel_crtc_update_cursor() chunk since Ville did an
      equivalent fix in another patch (Ville).

Testcase: igt/pm_rpm/cursor
Testcase: igt/pm_rpm/cursor-dpms
Testcase: igt/pm_rpm/legacy-planes
Testcase: igt/pm_rpm/legacy-planes-dpms
Testcase: igt/pm_rpm/universal-planes
Testcase: igt/pm_rpm/universal-planes-dpms
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=81645
Cc: stable@vger.kernel.org
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

Comments

Chris Wilson Aug. 12, 2014, 7:09 p.m. UTC | #1
On Tue, Aug 12, 2014 at 03:55:12PM -0300, Paulo Zanoni wrote:
> From: Paulo Zanoni <paulo.r.zanoni@intel.com>
> 
> If we're runtime suspended and try to use the plane interfaces, we
> will get a lot of WARNs saying we did the wrong thing.
> 
> We need to get runtime PM references to pin/unpin the objects, and to
> change the fences. The pin/unpin functions are the ideal places for
> this, but intel_crtc_cursor_set_obj() doesn't call them, so we also
> have to add get/put calls inside it. There is no problem if we runtime
> suspend right after these functions are finished, because the
> registers written are forwarded to system memory.
> 
> Note: for a complete fix of the cursor-dpms test case, we also need
> the patch named "drm/i915: Don't try to enable cursor from setplane
> when crtc is disabled".
> 
> v2: - Narrow the put/get calls on intel_crtc_cursor_set_obj() (Daniel)
> v3: - Make get/put also surround the fence and unpin calls (Daniel and
>       Ville).
>     - Merge all the plane changes into a single patch since they're
>       the same fix.
>     - Add the comment requested by Daniel.
> v4: - Remove spurious whitespace (Ville).
> v5: - Remove intel_crtc_update_cursor() chunk since Ville did an
>       equivalent fix in another patch (Ville).
> 
> Testcase: igt/pm_rpm/cursor
> Testcase: igt/pm_rpm/cursor-dpms
> Testcase: igt/pm_rpm/legacy-planes
> Testcase: igt/pm_rpm/legacy-planes-dpms
> Testcase: igt/pm_rpm/universal-planes
> Testcase: igt/pm_rpm/universal-planes-dpms
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=81645
> Cc: stable@vger.kernel.org
> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_display.c | 35 ++++++++++++++++++++++++++++++++++-
>  1 file changed, 34 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index a1cf052..2db9e06 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -2149,6 +2149,15 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
>  	if (need_vtd_wa(dev) && alignment < 256 * 1024)
>  		alignment = 256 * 1024;
>  
> +	/*
> +	 * Global gtt pte registers are special registers which actually forward
> +	 * writes to a chunk of system memory. Which means that there is no risk
> +	 * that the register values disappear as soon as we call
> +	 * intel_runtime_pm_put(), so it is correct to wrap only the
> +	 * pin/unpin/fence and not more.
> +	 */
> +	intel_runtime_pm_get(dev_priv);
> +
>  	dev_priv->mm.interruptible = false;
>  	ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined);
>  	if (ret)
> @@ -2166,21 +2175,30 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
>  	i915_gem_object_pin_fence(obj);
>  
>  	dev_priv->mm.interruptible = true;
> +	intel_runtime_pm_put(dev_priv);
>  	return 0;
>  
>  err_unpin:
>  	i915_gem_object_unpin_from_display_plane(obj);
>  err_interruptible:
>  	dev_priv->mm.interruptible = true;
> +	intel_runtime_pm_put(dev_priv);
>  	return ret;
>  }
>  
>  void intel_unpin_fb_obj(struct drm_i915_gem_object *obj)
>  {
> -	WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex));
> +	struct drm_device *dev = obj->base.dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
> +
> +	intel_runtime_pm_get(dev_priv);
>  
>  	i915_gem_object_unpin_fence(obj);
>  	i915_gem_object_unpin_from_display_plane(obj);
> +
> +	intel_runtime_pm_put(dev_priv);
>  }

framebuffer objects are pinned for a very long time, and the fbcon is
permanently pinned. This should have the effect of disabling rpm
entirely.
-Chris
Paulo Zanoni Aug. 12, 2014, 7:12 p.m. UTC | #2
2014-08-12 16:09 GMT-03:00 Chris Wilson <chris@chris-wilson.co.uk>:
> On Tue, Aug 12, 2014 at 03:55:12PM -0300, Paulo Zanoni wrote:
>> From: Paulo Zanoni <paulo.r.zanoni@intel.com>
>>
>> If we're runtime suspended and try to use the plane interfaces, we
>> will get a lot of WARNs saying we did the wrong thing.
>>
>> We need to get runtime PM references to pin/unpin the objects, and to
>> change the fences. The pin/unpin functions are the ideal places for
>> this, but intel_crtc_cursor_set_obj() doesn't call them, so we also
>> have to add get/put calls inside it. There is no problem if we runtime
>> suspend right after these functions are finished, because the
>> registers written are forwarded to system memory.
>>
>> Note: for a complete fix of the cursor-dpms test case, we also need
>> the patch named "drm/i915: Don't try to enable cursor from setplane
>> when crtc is disabled".
>>
>> v2: - Narrow the put/get calls on intel_crtc_cursor_set_obj() (Daniel)
>> v3: - Make get/put also surround the fence and unpin calls (Daniel and
>>       Ville).
>>     - Merge all the plane changes into a single patch since they're
>>       the same fix.
>>     - Add the comment requested by Daniel.
>> v4: - Remove spurious whitespace (Ville).
>> v5: - Remove intel_crtc_update_cursor() chunk since Ville did an
>>       equivalent fix in another patch (Ville).
>>
>> Testcase: igt/pm_rpm/cursor
>> Testcase: igt/pm_rpm/cursor-dpms
>> Testcase: igt/pm_rpm/legacy-planes
>> Testcase: igt/pm_rpm/legacy-planes-dpms
>> Testcase: igt/pm_rpm/universal-planes
>> Testcase: igt/pm_rpm/universal-planes-dpms
>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=81645
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
>> ---
>>  drivers/gpu/drm/i915/intel_display.c | 35 ++++++++++++++++++++++++++++++++++-
>>  1 file changed, 34 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
>> index a1cf052..2db9e06 100644
>> --- a/drivers/gpu/drm/i915/intel_display.c
>> +++ b/drivers/gpu/drm/i915/intel_display.c
>> @@ -2149,6 +2149,15 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
>>       if (need_vtd_wa(dev) && alignment < 256 * 1024)
>>               alignment = 256 * 1024;
>>
>> +     /*
>> +      * Global gtt pte registers are special registers which actually forward
>> +      * writes to a chunk of system memory. Which means that there is no risk
>> +      * that the register values disappear as soon as we call
>> +      * intel_runtime_pm_put(), so it is correct to wrap only the
>> +      * pin/unpin/fence and not more.
>> +      */
>> +     intel_runtime_pm_get(dev_priv);
>> +
>>       dev_priv->mm.interruptible = false;
>>       ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined);
>>       if (ret)
>> @@ -2166,21 +2175,30 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
>>       i915_gem_object_pin_fence(obj);
>>
>>       dev_priv->mm.interruptible = true;
>> +     intel_runtime_pm_put(dev_priv);
>>       return 0;
>>
>>  err_unpin:
>>       i915_gem_object_unpin_from_display_plane(obj);
>>  err_interruptible:
>>       dev_priv->mm.interruptible = true;
>> +     intel_runtime_pm_put(dev_priv);
>>       return ret;
>>  }
>>
>>  void intel_unpin_fb_obj(struct drm_i915_gem_object *obj)
>>  {
>> -     WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex));
>> +     struct drm_device *dev = obj->base.dev;
>> +     struct drm_i915_private *dev_priv = dev->dev_private;
>> +
>> +     WARN_ON(!mutex_is_locked(&dev->struct_mutex));
>> +
>> +     intel_runtime_pm_get(dev_priv);
>>
>>       i915_gem_object_unpin_fence(obj);
>>       i915_gem_object_unpin_from_display_plane(obj);
>> +
>> +     intel_runtime_pm_put(dev_priv);
>>  }
>
> framebuffer objects are pinned for a very long time, and the fbcon is
> permanently pinned. This should have the effect of disabling rpm
> entirely.

But we just get/put RPM around this function, not for the whole time
while the object is pinned.

> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre
Chris Wilson Aug. 12, 2014, 7:28 p.m. UTC | #3
On Tue, Aug 12, 2014 at 04:12:38PM -0300, Paulo Zanoni wrote:
> But we just get/put RPM around this function, not for the whole time
> while the object is pinned.

Ah misread, saw pin->get, unpin->put and assumed the symmetry. But why
unpin then? It doesn't touch any hardware registers.
-Chris
Paulo Zanoni Aug. 12, 2014, 7:33 p.m. UTC | #4
2014-08-12 16:28 GMT-03:00 Chris Wilson <chris@chris-wilson.co.uk>:
> On Tue, Aug 12, 2014 at 04:12:38PM -0300, Paulo Zanoni wrote:
>> But we just get/put RPM around this function, not for the whole time
>> while the object is pinned.
>
> Ah misread, saw pin->get, unpin->put and assumed the symmetry. But why
> unpin then? It doesn't touch any hardware registers.

Only because Daniel asked it on a conversation we had on IRC, and I
automatically assumed the patch would be rejected if I didn't include
it :)

Since both you and VIlle pointed that out, I should probably submit
yet another version, without the unpin part, and let Daniel choose
which one to merge...

> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre
Daniel Vetter Aug. 12, 2014, 8:19 p.m. UTC | #5
On Tue, Aug 12, 2014 at 9:33 PM, Paulo Zanoni <przanoni@gmail.com> wrote:
> 2014-08-12 16:28 GMT-03:00 Chris Wilson <chris@chris-wilson.co.uk>:
>> On Tue, Aug 12, 2014 at 04:12:38PM -0300, Paulo Zanoni wrote:
>>> But we just get/put RPM around this function, not for the whole time
>>> while the object is pinned.
>>
>> Ah misread, saw pin->get, unpin->put and assumed the symmetry. But why
>> unpin then? It doesn't touch any hardware registers.
>
> Only because Daniel asked it on a conversation we had on IRC, and I
> automatically assumed the patch would be rejected if I didn't include
> it :)
>
> Since both you and VIlle pointed that out, I should probably submit
> yet another version, without the unpin part, and let Daniel choose
> which one to merge...

Hm, I've indeed forgotten about the lazy unbinding. But that poses the
question about the final bo unref. For example:
1) create bo, gtt mmap it to force it into existence (and into the global gtt)
2) unmap binding
3) wait for rpm entry
4) unref bo ... causing pte writes for the global gtt unbinding while
runtime suspended or not?

boom or not boom?

Maybe the bug is simply in a different function ;-)
-Daniel
Chris Wilson Aug. 12, 2014, 8:30 p.m. UTC | #6
On Tue, Aug 12, 2014 at 10:19:20PM +0200, Daniel Vetter wrote:
> On Tue, Aug 12, 2014 at 9:33 PM, Paulo Zanoni <przanoni@gmail.com> wrote:
> > 2014-08-12 16:28 GMT-03:00 Chris Wilson <chris@chris-wilson.co.uk>:
> >> On Tue, Aug 12, 2014 at 04:12:38PM -0300, Paulo Zanoni wrote:
> >>> But we just get/put RPM around this function, not for the whole time
> >>> while the object is pinned.
> >>
> >> Ah misread, saw pin->get, unpin->put and assumed the symmetry. But why
> >> unpin then? It doesn't touch any hardware registers.
> >
> > Only because Daniel asked it on a conversation we had on IRC, and I
> > automatically assumed the patch would be rejected if I didn't include
> > it :)
> >
> > Since both you and VIlle pointed that out, I should probably submit
> > yet another version, without the unpin part, and let Daniel choose
> > which one to merge...
> 
> Hm, I've indeed forgotten about the lazy unbinding. But that poses the
> question about the final bo unref. For example:
> 1) create bo, gtt mmap it to force it into existence (and into the global gtt)
> 2) unmap binding
> 3) wait for rpm entry
> 4) unref bo ... causing pte writes for the global gtt unbinding while
> runtime suspended or not?
> 
> boom or not boom?
> 
> Maybe the bug is simply in a different function ;-)

Yes. If you get serious about it, you will want to move the lazy stuff
into its own workqueue to be run the next time the device is awake.
-Chris
Daniel Vetter Aug. 12, 2014, 8:37 p.m. UTC | #7
On Tue, Aug 12, 2014 at 10:30 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Tue, Aug 12, 2014 at 10:19:20PM +0200, Daniel Vetter wrote:
>> On Tue, Aug 12, 2014 at 9:33 PM, Paulo Zanoni <przanoni@gmail.com> wrote:
>> > 2014-08-12 16:28 GMT-03:00 Chris Wilson <chris@chris-wilson.co.uk>:
>> >> On Tue, Aug 12, 2014 at 04:12:38PM -0300, Paulo Zanoni wrote:
>> >>> But we just get/put RPM around this function, not for the whole time
>> >>> while the object is pinned.
>> >>
>> >> Ah misread, saw pin->get, unpin->put and assumed the symmetry. But why
>> >> unpin then? It doesn't touch any hardware registers.
>> >
>> > Only because Daniel asked it on a conversation we had on IRC, and I
>> > automatically assumed the patch would be rejected if I didn't include
>> > it :)
>> >
>> > Since both you and VIlle pointed that out, I should probably submit
>> > yet another version, without the unpin part, and let Daniel choose
>> > which one to merge...
>>
>> Hm, I've indeed forgotten about the lazy unbinding. But that poses the
>> question about the final bo unref. For example:
>> 1) create bo, gtt mmap it to force it into existence (and into the global gtt)
>> 2) unmap binding
>> 3) wait for rpm entry
>> 4) unref bo ... causing pte writes for the global gtt unbinding while
>> runtime suspended or not?
>>
>> boom or not boom?
>>
>> Maybe the bug is simply in a different function ;-)
>
> Yes. If you get serious about it, you will want to move the lazy stuff
> into its own workqueue to be run the next time the device is awake.

4b) shrinker happens and unbinds (potentially purgeable) buffer objects.

In that case I don't think the core mm would be happy if we'd
indefinitely delay this until someone wiggles the mouse. Especially if
the compositor wants that memory to render the frame it needs to
switch everything on again ...
-Daniel
Chris Wilson Aug. 12, 2014, 8:51 p.m. UTC | #8
On Tue, Aug 12, 2014 at 10:37:21PM +0200, Daniel Vetter wrote:
> On Tue, Aug 12, 2014 at 10:30 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > On Tue, Aug 12, 2014 at 10:19:20PM +0200, Daniel Vetter wrote:
> >> On Tue, Aug 12, 2014 at 9:33 PM, Paulo Zanoni <przanoni@gmail.com> wrote:
> >> > 2014-08-12 16:28 GMT-03:00 Chris Wilson <chris@chris-wilson.co.uk>:
> >> >> On Tue, Aug 12, 2014 at 04:12:38PM -0300, Paulo Zanoni wrote:
> >> >>> But we just get/put RPM around this function, not for the whole time
> >> >>> while the object is pinned.
> >> >>
> >> >> Ah misread, saw pin->get, unpin->put and assumed the symmetry. But why
> >> >> unpin then? It doesn't touch any hardware registers.
> >> >
> >> > Only because Daniel asked it on a conversation we had on IRC, and I
> >> > automatically assumed the patch would be rejected if I didn't include
> >> > it :)
> >> >
> >> > Since both you and VIlle pointed that out, I should probably submit
> >> > yet another version, without the unpin part, and let Daniel choose
> >> > which one to merge...
> >>
> >> Hm, I've indeed forgotten about the lazy unbinding. But that poses the
> >> question about the final bo unref. For example:
> >> 1) create bo, gtt mmap it to force it into existence (and into the global gtt)
> >> 2) unmap binding
> >> 3) wait for rpm entry
> >> 4) unref bo ... causing pte writes for the global gtt unbinding while
> >> runtime suspended or not?
> >>
> >> boom or not boom?
> >>
> >> Maybe the bug is simply in a different function ;-)
> >
> > Yes. If you get serious about it, you will want to move the lazy stuff
> > into its own workqueue to be run the next time the device is awake.
> 
> 4b) shrinker happens and unbinds (potentially purgeable) buffer objects.
> 
> In that case I don't think the core mm would be happy if we'd
> indefinitely delay this until someone wiggles the mouse.

You underestimate just how much we can delay it ;-) But for your next
trick, you could unbind the buffer without touching the ptes since the
gpu is not using those pages... Diminishing returns I guess.

> Especially if
> the compositor wants that memory to render the frame it needs to
> switch everything on again ...

But's true without rpm anyway. It would need to enable the device to
render, whether or not the system is thrashing.
-Chris
Daniel Vetter Aug. 13, 2014, 7:59 a.m. UTC | #9
On Tue, Aug 12, 2014 at 09:51:13PM +0100, Chris Wilson wrote:
> On Tue, Aug 12, 2014 at 10:37:21PM +0200, Daniel Vetter wrote:
> > On Tue, Aug 12, 2014 at 10:30 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > On Tue, Aug 12, 2014 at 10:19:20PM +0200, Daniel Vetter wrote:
> > >> On Tue, Aug 12, 2014 at 9:33 PM, Paulo Zanoni <przanoni@gmail.com> wrote:
> > >> > 2014-08-12 16:28 GMT-03:00 Chris Wilson <chris@chris-wilson.co.uk>:
> > >> >> On Tue, Aug 12, 2014 at 04:12:38PM -0300, Paulo Zanoni wrote:
> > >> >>> But we just get/put RPM around this function, not for the whole time
> > >> >>> while the object is pinned.
> > >> >>
> > >> >> Ah misread, saw pin->get, unpin->put and assumed the symmetry. But why
> > >> >> unpin then? It doesn't touch any hardware registers.
> > >> >
> > >> > Only because Daniel asked it on a conversation we had on IRC, and I
> > >> > automatically assumed the patch would be rejected if I didn't include
> > >> > it :)
> > >> >
> > >> > Since both you and VIlle pointed that out, I should probably submit
> > >> > yet another version, without the unpin part, and let Daniel choose
> > >> > which one to merge...
> > >>
> > >> Hm, I've indeed forgotten about the lazy unbinding. But that poses the
> > >> question about the final bo unref. For example:
> > >> 1) create bo, gtt mmap it to force it into existence (and into the global gtt)
> > >> 2) unmap binding
> > >> 3) wait for rpm entry
> > >> 4) unref bo ... causing pte writes for the global gtt unbinding while
> > >> runtime suspended or not?
> > >>
> > >> boom or not boom?
> > >>
> > >> Maybe the bug is simply in a different function ;-)
> > >
> > > Yes. If you get serious about it, you will want to move the lazy stuff
> > > into its own workqueue to be run the next time the device is awake.
> > 
> > 4b) shrinker happens and unbinds (potentially purgeable) buffer objects.
> > 
> > In that case I don't think the core mm would be happy if we'd
> > indefinitely delay this until someone wiggles the mouse.
> 
> You underestimate just how much we can delay it ;-) But for your next
> trick, you could unbind the buffer without touching the ptes since the
> gpu is not using those pages... Diminishing returns I guess.

That's actually something I've considered - on gen6+ we don't use the
global gtt any more for rendering, so it's fully isolated from whatever
userspace can get at. Well ignoring an icky regression from full ppgtt for
the aliasing ppgtt binding rules.

So we /could/ just leave the stale pte hanging in the air forever. But I'm
not sure whether we want to do that for general robustness reasons.
Clearing all ptes on device wake-up isn't an option since it takes too
much time, and delaying the clearing doesn't look like worth it from a
complexity pov.

> > Especially if
> > the compositor wants that memory to render the frame it needs to
> > switch everything on again ...
> 
> But's true without rpm anyway. It would need to enable the device to
> render, whether or not the system is thrashing.

Yeah, which is also why I don't think just waking the device in the shrinker/bo_free
callback is harmful - very likely we didn't wake it for nothing anyway. Oh
any my scenario can be fixed with some software rendering into an Xshm or
so, and if you assume something else running overnight has thrashed all
the memory to swap.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a1cf052..2db9e06 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2149,6 +2149,15 @@  intel_pin_and_fence_fb_obj(struct drm_device *dev,
 	if (need_vtd_wa(dev) && alignment < 256 * 1024)
 		alignment = 256 * 1024;
 
+	/*
+	 * Global gtt pte registers are special registers which actually forward
+	 * writes to a chunk of system memory. Which means that there is no risk
+	 * that the register values disappear as soon as we call
+	 * intel_runtime_pm_put(), so it is correct to wrap only the
+	 * pin/unpin/fence and not more.
+	 */
+	intel_runtime_pm_get(dev_priv);
+
 	dev_priv->mm.interruptible = false;
 	ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined);
 	if (ret)
@@ -2166,21 +2175,30 @@  intel_pin_and_fence_fb_obj(struct drm_device *dev,
 	i915_gem_object_pin_fence(obj);
 
 	dev_priv->mm.interruptible = true;
+	intel_runtime_pm_put(dev_priv);
 	return 0;
 
 err_unpin:
 	i915_gem_object_unpin_from_display_plane(obj);
 err_interruptible:
 	dev_priv->mm.interruptible = true;
+	intel_runtime_pm_put(dev_priv);
 	return ret;
 }
 
 void intel_unpin_fb_obj(struct drm_i915_gem_object *obj)
 {
-	WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex));
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
+	intel_runtime_pm_get(dev_priv);
 
 	i915_gem_object_unpin_fence(obj);
 	i915_gem_object_unpin_from_display_plane(obj);
+
+	intel_runtime_pm_put(dev_priv);
 }
 
 /* Computes the linear offset to the base tile and adjusts x, y. bytes per pixel
@@ -8170,6 +8188,16 @@  static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc,
 
 	/* we only need to pin inside GTT if cursor is non-phy */
 	mutex_lock(&dev->struct_mutex);
+
+	/*
+	 * Global gtt pte registers are special registers which actually forward
+	 * writes to a chunk of system memory. Which means that there is no risk
+	 * that the register values disappear as soon as we call
+	 * intel_runtime_pm_put(), so it is correct to wrap only the
+	 * pin/unpin/fence and not more.
+	 */
+	intel_runtime_pm_get(dev_priv);
+
 	if (!INTEL_INFO(dev)->cursor_needs_physical) {
 		unsigned alignment;
 
@@ -8219,6 +8247,10 @@  static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc,
 
 	i915_gem_track_fb(intel_crtc->cursor_bo, obj,
 			  INTEL_FRONTBUFFER_CURSOR(pipe));
+
+	if (obj)
+		intel_runtime_pm_put(dev_priv);
+
 	mutex_unlock(&dev->struct_mutex);
 
 	old_width = intel_crtc->cursor_width;
@@ -8240,6 +8272,7 @@  static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc,
 fail_unpin:
 	i915_gem_object_unpin_from_display_plane(obj);
 fail_locked:
+	intel_runtime_pm_put(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
 fail:
 	drm_gem_object_unreference_unlocked(&obj->base);