@@ -99,6 +99,10 @@ struct intel_context {
#define CONTEXT_FORCE_SINGLE_SUBMISSION 7
#define CONTEXT_NOPREEMPT 8
+ struct {
+ u64 timeout_us;
+ } watchdog;
+
u32 *lrc_reg_state;
union {
struct {
@@ -6,6 +6,7 @@
#ifndef __INTEL_EXECLISTS_SUBMISSION_H__
#define __INTEL_EXECLISTS_SUBMISSION_H__
+#include <linux/llist.h>
#include <linux/types.h>
struct drm_printer;
@@ -13,6 +14,7 @@ struct drm_printer;
struct i915_request;
struct intel_context;
struct intel_engine_cs;
+struct intel_gt;
enum {
INTEL_CONTEXT_SCHEDULE_IN = 0,
@@ -31,6 +31,9 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
INIT_LIST_HEAD(>->closed_vma);
spin_lock_init(>->closed_lock);
+ init_llist_head(>->watchdog.list);
+ INIT_WORK(>->watchdog.work, intel_gt_watchdog_work);
+
intel_gt_init_buffer_pool(gt);
intel_gt_init_reset(gt);
intel_gt_init_requests(gt);
@@ -78,4 +78,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt)
void intel_gt_info_print(const struct intel_gt_info *info,
struct drm_printer *p);
+void intel_gt_watchdog_work(struct work_struct *work);
+
#endif /* __INTEL_GT_H__ */
@@ -8,6 +8,7 @@
#include "i915_drv.h" /* for_each_engine() */
#include "i915_request.h"
#include "intel_engine_heartbeat.h"
+#include "intel_execlists_submission.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
@@ -242,4 +243,29 @@ void intel_gt_fini_requests(struct intel_gt *gt)
{
/* Wait until the work is marked as finished before unloading! */
cancel_delayed_work_sync(>->requests.retire_work);
+
+ flush_work(>->watchdog.work);
+}
+
+void intel_gt_watchdog_work(struct work_struct *work)
+{
+ struct intel_gt *gt =
+ container_of(work, typeof(*gt), watchdog.work);
+ struct i915_request *rq, *rn;
+ struct llist_node *first;
+
+ first = llist_del_all(>->watchdog.list);
+ if (!first)
+ return;
+
+ llist_for_each_entry_safe(rq, rn, first, watchdog.link) {
+ if (!i915_request_completed(rq)) {
+ drm_notice(>->i915->drm,
+ "Fence expiration time out %llx:%llu!\n",
+ rq->fence.context,
+ rq->fence.seqno);
+ i915_request_cancel(rq, -EINTR);
+ }
+ i915_request_put(rq);
+ }
}
@@ -8,10 +8,12 @@
#include <linux/ktime.h>
#include <linux/list.h>
+#include <linux/llist.h>
#include <linux/mutex.h>
#include <linux/notifier.h>
#include <linux/spinlock.h>
#include <linux/types.h>
+#include <linux/workqueue.h>
#include "uc/intel_uc.h"
@@ -62,6 +64,11 @@ struct intel_gt {
struct delayed_work retire_work;
} requests;
+ struct {
+ struct llist_head list;
+ struct work_struct work;
+ } watchdog;
+
struct intel_wakeref wakeref;
atomic_t user_wakeref;
@@ -277,6 +277,57 @@ static void remove_from_engine(struct i915_request *rq)
__notify_execute_cb_imm(rq);
}
+static void __rq_init_watchdog(struct i915_request *rq)
+{
+ rq->watchdog.timer.function = NULL;
+}
+
+static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer)
+{
+ struct i915_request *rq =
+ container_of(hrtimer, struct i915_request, watchdog.timer);
+ struct intel_gt *gt = rq->engine->gt;
+
+ if (!i915_request_completed(rq)) {
+ if (llist_add(&rq->watchdog.link, >->watchdog.list))
+ schedule_work(>->watchdog.work);
+ } else {
+ i915_request_put(rq);
+ }
+
+ return HRTIMER_NORESTART;
+}
+
+static void __rq_arm_watchdog(struct i915_request *rq)
+{
+ struct i915_request_watchdog *wdg = &rq->watchdog;
+ struct intel_context *ce = rq->context;
+
+ if (!ce->watchdog.timeout_us)
+ return;
+
+ hrtimer_init(&wdg->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ wdg->timer.function = __rq_watchdog_expired;
+ hrtimer_start_range_ns(&wdg->timer,
+ ns_to_ktime(ce->watchdog.timeout_us *
+ NSEC_PER_USEC),
+ /*
+ * FIXME check if it gives the "not sooner"
+ * guarantee or slack is both ways
+ */
+ NSEC_PER_MSEC,
+ HRTIMER_MODE_REL);
+ i915_request_get(rq);
+}
+
+static void __rq_cancel_watchdog(struct i915_request *rq)
+{
+ struct i915_request_watchdog *wdg = &rq->watchdog;
+
+ if (wdg->timer.function && hrtimer_try_to_cancel(&wdg->timer) > 0)
+ i915_request_put(rq);
+}
+
bool i915_request_retire(struct i915_request *rq)
{
if (!__i915_request_is_complete(rq))
@@ -288,6 +339,8 @@ bool i915_request_retire(struct i915_request *rq)
trace_i915_request_retire(rq);
i915_request_mark_complete(rq);
+ __rq_cancel_watchdog(rq);
+
/*
* We know the GPU must have read the request to have
* sent us the seqno + interrupt, so use the position
@@ -667,6 +720,8 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
if (unlikely(fence->error))
i915_request_set_error_once(request, fence->error);
+ else
+ __rq_arm_watchdog(request);
/*
* We need to serialize use of the submit_request() callback
@@ -854,6 +909,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
/* No zalloc, everything must be cleared after use */
rq->batch = NULL;
+ __rq_init_watchdog(rq);
GEM_BUG_ON(rq->capture_list);
GEM_BUG_ON(!llist_empty(&rq->execute_cb));
@@ -26,7 +26,9 @@
#define I915_REQUEST_H
#include <linux/dma-fence.h>
+#include <linux/hrtimer.h>
#include <linux/irq_work.h>
+#include <linux/llist.h>
#include <linux/lockdep.h>
#include "gem/i915_gem_context_types.h"
@@ -289,6 +291,12 @@ struct i915_request {
/** timeline->request entry for this request */
struct list_head link;
+ /** Watchdog support fields. */
+ struct i915_request_watchdog {
+ struct llist_node link;
+ struct hrtimer timer;
+ } watchdog;
+
I915_SELFTEST_DECLARE(struct {
struct list_head link;
unsigned long delay;