[v2,05/11] drm/i915: Separate out reset flags from the reset counter

Message ID	1470414607-32453-6-git-send-email-arun.siluvery@linux.intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Arun Siluvery <arun.siluvery@linux.intel.com> To: intel-gfx@lists.freedesktop.org Date: Fri, 5 Aug 2016 17:30:01 +0100 Message-Id: <1470414607-32453-6-git-send-email-arun.siluvery@linux.intel.com> In-Reply-To: <1470414607-32453-1-git-send-email-arun.siluvery@linux.intel.com> References: <1470414607-32453-1-git-send-email-arun.siluvery@linux.intel.com> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Subject: [Intel-gfx] [PATCH v2 05/11] drm/i915: Separate out reset flags from the reset counter Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 09f7372..cacb6a2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1739,20 +1739,13 @@ int i915_reset(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; struct i915_gpu_error *error = &dev_priv->gpu_error; - unsigned reset_counter; int ret; mutex_lock(&dev->struct_mutex); /* Clear any previous failed attempts at recovery. Time to try again. */ - atomic_andnot(I915_WEDGED, &error->reset_counter); - - /* Clear the reset-in-progress flag and increment the reset epoch. */ - reset_counter = atomic_inc_return(&error->reset_counter); - if (WARN_ON(__i915_reset_in_progress(reset_counter))) { - ret = -EIO; - goto error; - } + __clear_bit(I915_WEDGED, &error->flags); + error->reset_count++; pr_notice("drm/i915: Resetting chip after gpu hang\n"); @@ -1789,6 +1782,7 @@ int i915_reset(struct drm_i915_private *dev_priv) goto error; } + clear_bit(I915_RESET_IN_PROGRESS, &error->flags); mutex_unlock(&dev->struct_mutex); /* diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fa1b6e6..9bfc1d0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1378,9 +1378,10 @@ struct i915_gpu_error { * State variable controlling the reset flow and count * * This is a counter which gets incremented when reset is triggered, - * and again when reset has been handled. So odd values (lowest bit set) - * means that reset is in progress and even values that - * (reset_counter >> 1):th reset was successfully completed. + * + * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set + * meaning that any waiters holding onto the struct_mutex should + * relinquish the lock immediately in order for the reset to start. * * If reset is not completed succesfully, the I915_WEDGE bit is * set meaning that hardware is terminally sour and there is no @@ -1395,10 +1396,11 @@ struct i915_gpu_error { * naturally enforces the correct ordering between the bail-out of the * waiter and the gpu reset work code. */ - atomic_t reset_counter; + unsigned long reset_count; -#define I915_RESET_IN_PROGRESS_FLAG 1 -#define I915_WEDGED (1 << 31) + unsigned long flags; +#define I915_RESET_IN_PROGRESS 0 +#define I915_WEDGED (BITS_PER_LONG-1) /** * Waitqueue to signal when a hang is detected. Used to for waiters @@ -3204,44 +3206,24 @@ i915_gem_find_active_request(struct intel_engine_cs *engine); void i915_gem_retire_requests(struct drm_i915_private *dev_priv); -static inline u32 i915_reset_counter(struct i915_gpu_error *error) -{ - return atomic_read(&error->reset_counter); -} - -static inline bool __i915_reset_in_progress(u32 reset) -{ - return unlikely(reset & I915_RESET_IN_PROGRESS_FLAG); -} - -static inline bool __i915_reset_in_progress_or_wedged(u32 reset) -{ - return unlikely(reset & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED)); -} - -static inline bool __i915_terminally_wedged(u32 reset) -{ - return unlikely(reset & I915_WEDGED); -} - static inline bool i915_reset_in_progress(struct i915_gpu_error *error) { - return __i915_reset_in_progress(i915_reset_counter(error)); + return test_bit(I915_RESET_IN_PROGRESS, &error->flags); } -static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error) +static inline bool i915_terminally_wedged(struct i915_gpu_error *error) { - return __i915_reset_in_progress_or_wedged(i915_reset_counter(error)); + return test_bit(I915_WEDGED, &error->flags); } -static inline bool i915_terminally_wedged(struct i915_gpu_error *error) +static inline bool i915_reset_in_progress_or_wedged(struct i915_gpu_error *error) { - return __i915_terminally_wedged(i915_reset_counter(error)); + return i915_reset_in_progress(error) | i915_terminally_wedged(error); } static inline u32 i915_reset_count(struct i915_gpu_error *error) { - return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2; + return READ_ONCE(error->reset_count); } void i915_gem_reset(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d4bcf88..669fb35 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2515,7 +2515,7 @@ void i915_gem_set_wedged(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter); + set_bit(I915_WEDGED, &dev_priv->gpu_error.flags); for_each_engine(engine, dev_priv) i915_gem_cleanup_engine(engine); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index fe06687..e8e4204 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -238,16 +238,18 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) } while (tmp != req); } -static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible) +static int i915_gem_check_wedge(struct drm_i915_private *dev_priv) { - if (__i915_terminally_wedged(reset_counter)) + struct i915_gpu_error *error = &dev_priv->gpu_error; + + if (i915_terminally_wedged(error)) return -EIO; - if (__i915_reset_in_progress(reset_counter)) { + if (i915_reset_in_progress(error)) { /* Non-interruptible callers can't handle -EAGAIN, hence return * -EIO unconditionally for these. */ - if (!interruptible) + if (!dev_priv->mm.interruptible) return -EIO; return -EAGAIN; @@ -336,7 +338,6 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { struct drm_i915_private *dev_priv = engine->i915; - unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error); struct drm_i915_gem_request *req; u32 seqno; int ret; @@ -345,7 +346,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex * and restart. */ - ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); + ret = i915_gem_check_wedge(dev_priv); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 006a855..2ee0d35 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2509,53 +2509,41 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv) kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); + DRM_DEBUG_DRIVER("resetting chip\n"); + kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); + /* - * Note that there's only one work item which does gpu resets, so we - * need not worry about concurrent gpu resets potentially incrementing - * error->reset_counter twice. We only need to take care of another - * racing irq/hangcheck declaring the gpu dead for a second time. A - * quick check for that is good enough: schedule_work ensures the - * correct ordering between hang detection and this work item, and since - * the reset in-progress bit is only ever set by code outside of this - * work we don't need to worry about any other races. + * In most cases it's guaranteed that we get here with an RPM + * reference held, for example because there is a pending GPU + * request that won't finish until the reset is done. This + * isn't the case at least when we get here by doing a + * simulated reset via debugs, so get an RPM reference. */ - if (i915_reset_in_progress(&dev_priv->gpu_error)) { - DRM_DEBUG_DRIVER("resetting chip\n"); - kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); - - /* - * In most cases it's guaranteed that we get here with an RPM - * reference held, for example because there is a pending GPU - * request that won't finish until the reset is done. This - * isn't the case at least when we get here by doing a - * simulated reset via debugs, so get an RPM reference. - */ - intel_runtime_pm_get(dev_priv); + intel_runtime_pm_get(dev_priv); - intel_prepare_reset(dev_priv); + intel_prepare_reset(dev_priv); - /* - * All state reset _must_ be completed before we update the - * reset counter, for otherwise waiters might miss the reset - * pending state and not properly drop locks, resulting in - * deadlocks with the reset work. - */ - ret = i915_reset(dev_priv); + /* + * All state reset _must_ be completed before we update the + * reset counter, for otherwise waiters might miss the reset + * pending state and not properly drop locks, resulting in + * deadlocks with the reset work. + */ + ret = i915_reset(dev_priv); - intel_finish_reset(dev_priv); + intel_finish_reset(dev_priv); - intel_runtime_pm_put(dev_priv); + intel_runtime_pm_put(dev_priv); - if (ret == 0) - kobject_uevent_env(kobj, - KOBJ_CHANGE, reset_done_event); + if (ret == 0) + kobject_uevent_env(kobj, + KOBJ_CHANGE, reset_done_event); - /* - * Note: The wake_up also serves as a memory barrier so that - * waiters see the update value of the reset counter atomic_t. - */ - wake_up_all(&dev_priv->gpu_error.reset_queue); - } + /* + * Note: The wake_up also serves as a memory barrier so that + * waiters see the update value of the reset counter atomic_t. + */ + i915_error_wake_up(dev_priv); } static void i915_report_and_clear_eir(struct drm_i915_private *dev_priv) @@ -2674,25 +2662,27 @@ void i915_handle_error(struct drm_i915_private *dev_priv, i915_capture_error_state(dev_priv, engine_mask, error_msg); i915_report_and_clear_eir(dev_priv); - if (engine_mask) { - atomic_or(I915_RESET_IN_PROGRESS_FLAG, - &dev_priv->gpu_error.reset_counter); + if (!engine_mask) + return; - /* - * Wakeup waiting processes so that the reset function - * i915_reset_and_wakeup doesn't deadlock trying to grab - * various locks. By bumping the reset counter first, the woken - * processes will see a reset in progress and back off, - * releasing their locks and then wait for the reset completion. - * We must do this for _all_ gpu waiters that might hold locks - * that the reset work needs to acquire. - * - * Note: The wake_up serves as the required memory barrier to - * ensure that the waiters see the updated value of the reset - * counter atomic_t. - */ - i915_error_wake_up(dev_priv); - } + if (test_and_set_bit(I915_RESET_IN_PROGRESS, + &dev_priv->gpu_error.flags)) + return; + + /* + * Wakeup waiting processes so that the reset function + * i915_reset_and_wakeup doesn't deadlock trying to grab + * various locks. By bumping the reset counter first, the woken + * processes will see a reset in progress and back off, + * releasing their locks and then wait for the reset completion. + * We must do this for _all_ gpu waiters that might hold locks + * that the reset work needs to acquire. + * + * Note: The wake_up serves as the required memory barrier to + * ensure that the waiters see the updated value of the reset + * counter atomic_t. + */ + i915_error_wake_up(dev_priv); i915_reset_and_wakeup(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9cbf543..5934789 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3174,15 +3174,26 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) drm_modeset_unlock_all(&dev_priv->drm); } +static bool abort_flip_on_reset(struct intel_crtc *crtc) +{ + struct i915_gpu_error *error = &to_i915(crtc->base.dev)->gpu_error; + + if (i915_reset_in_progress(error)) + return true; + + if (crtc->reset_count != i915_reset_count(error)) + return true; + + return false; +} + static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - unsigned reset_counter; bool pending; - reset_counter = i915_reset_counter(&to_i915(dev)->gpu_error); - if (intel_crtc->reset_counter != reset_counter) + if (abort_flip_on_reset(intel_crtc)) return false; spin_lock_irq(&dev->event_lock); @@ -10971,10 +10982,8 @@ static bool __pageflip_finished_cs(struct intel_crtc *crtc, { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - unsigned reset_counter; - reset_counter = i915_reset_counter(&dev_priv->gpu_error); - if (crtc->reset_counter != reset_counter) + if (abort_flip_on_reset(crtc)) return true; /* @@ -11655,8 +11664,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, if (ret) goto cleanup; - intel_crtc->reset_counter = i915_reset_counter(&dev_priv->gpu_error); - if (__i915_reset_in_progress_or_wedged(intel_crtc->reset_counter)) { + intel_crtc->reset_count = i915_reset_count(&dev_priv->gpu_error); + if (i915_reset_in_progress_or_wedged(&dev_priv->gpu_error)) { ret = -EIO; goto cleanup; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index e54e6c20..fbd10bd 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -683,8 +683,8 @@ struct intel_crtc { struct intel_crtc_state *config; - /* reset counter value when the last flip was submitted */ - unsigned int reset_counter; + /* global reset count when the last flip was submitted */ + unsigned int reset_count; /* Access to these should be protected by dev_priv->irq_lock. */ bool cpu_fifo_underrun_disabled;

[v2,05/11] drm/i915: Separate out reset flags from the reset counter

Commit Message

Patch