Message ID | 20231214100641.2479582-1-karthik.poosa@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/xe: Add wait for completion after gt force reset | expand |
I think title should be make sysfs gt force reset synchronous. On 14-12-2023 15:36, Karthik Poosa wrote: > Wait for gt reset to complete before returning from force_reset > sysfs call. Without this igt test freq_reset_multiple fails > sporadically in case xe_guc_pc is not started. > > Testcase: igt@xe_guc_pc@freq_reset_multiple > Signed-off-by: Karthik Poosa <karthik.poosa@intel.com> > --- > drivers/gpu/drm/xe/xe_gt.c | 3 +++ > drivers/gpu/drm/xe/xe_gt_debugfs.c | 10 ++++++++++ > drivers/gpu/drm/xe/xe_gt_types.h | 3 +++ > 3 files changed, 16 insertions(+) > > diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c > index dfd9cf01a5d5..eb7552b6dfa5 100644 > --- a/drivers/gpu/drm/xe/xe_gt.c > +++ b/drivers/gpu/drm/xe/xe_gt.c > @@ -65,6 +65,7 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile) > > gt->tile = tile; > gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0); > + init_completion(>->reset_done); > > return gt; > } > @@ -647,6 +648,8 @@ static int gt_reset(struct xe_gt *gt) > xe_device_mem_access_put(gt_to_xe(gt)); > XE_WARN_ON(err); > > + complete(>->reset_done); > + > xe_gt_info(gt, "reset done\n"); > > return 0; > diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c > index c4b67cf09f8f..49b30937a28b 100644 > --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c > +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c > @@ -23,6 +23,8 @@ > #include "xe_uc_debugfs.h" > #include "xe_wa.h" > > +#define XE_GT_RESET_TIMEOUT_MS (msecs_to_jiffies(5*1000)) > + > static struct xe_gt *node_to_gt(struct drm_info_node *node) > { > return node->info_ent->data; > @@ -58,9 +60,17 @@ static int hw_engines(struct seq_file *m, void *data) > static int force_reset(struct seq_file *m, void *data) > { > struct xe_gt *gt = node_to_gt(m->private); > + struct xe_device *xe = gt_to_xe(gt); > + unsigned long timeout; > This may not work when multiple processes tries gt reset simultaneously. Check for reset in progress should be here. Regards, Badal > xe_gt_reset_async(gt); > > + timeout = wait_for_completion_timeout(>->reset_done, XE_GT_RESET_TIMEOUT_MS); > + if (timeout == 0) { > + drm_err(&xe->drm, "gt reset timed out"); > + return -ETIMEDOUT; > + } > + > return 0; > } > > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h > index f74684660475..6f2fb9e3cfea 100644 > --- a/drivers/gpu/drm/xe/xe_gt_types.h > +++ b/drivers/gpu/drm/xe/xe_gt_types.h > @@ -358,6 +358,9 @@ struct xe_gt { > /** @oob: bitmap with active OOB workaroudns */ > unsigned long *oob; > } wa_active; > + > + /** @reset_done : Completion of GT reset */ > + struct completion reset_done; > }; > > #endif
On Thu, 14 Dec 2023, Karthik Poosa <karthik.poosa@intel.com> wrote: > Wait for gt reset to complete before returning from force_reset > sysfs call. Without this igt test freq_reset_multiple fails > sporadically in case xe_guc_pc is not started. Please send xe changes to intel-xe mailing list. Thanks, Jani. > > Testcase: igt@xe_guc_pc@freq_reset_multiple > Signed-off-by: Karthik Poosa <karthik.poosa@intel.com> > --- > drivers/gpu/drm/xe/xe_gt.c | 3 +++ > drivers/gpu/drm/xe/xe_gt_debugfs.c | 10 ++++++++++ > drivers/gpu/drm/xe/xe_gt_types.h | 3 +++ > 3 files changed, 16 insertions(+) > > diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c > index dfd9cf01a5d5..eb7552b6dfa5 100644 > --- a/drivers/gpu/drm/xe/xe_gt.c > +++ b/drivers/gpu/drm/xe/xe_gt.c > @@ -65,6 +65,7 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile) > > gt->tile = tile; > gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0); > + init_completion(>->reset_done); > > return gt; > } > @@ -647,6 +648,8 @@ static int gt_reset(struct xe_gt *gt) > xe_device_mem_access_put(gt_to_xe(gt)); > XE_WARN_ON(err); > > + complete(>->reset_done); > + > xe_gt_info(gt, "reset done\n"); > > return 0; > diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c > index c4b67cf09f8f..49b30937a28b 100644 > --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c > +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c > @@ -23,6 +23,8 @@ > #include "xe_uc_debugfs.h" > #include "xe_wa.h" > > +#define XE_GT_RESET_TIMEOUT_MS (msecs_to_jiffies(5*1000)) > + > static struct xe_gt *node_to_gt(struct drm_info_node *node) > { > return node->info_ent->data; > @@ -58,9 +60,17 @@ static int hw_engines(struct seq_file *m, void *data) > static int force_reset(struct seq_file *m, void *data) > { > struct xe_gt *gt = node_to_gt(m->private); > + struct xe_device *xe = gt_to_xe(gt); > + unsigned long timeout; > > xe_gt_reset_async(gt); > > + timeout = wait_for_completion_timeout(>->reset_done, XE_GT_RESET_TIMEOUT_MS); > + if (timeout == 0) { > + drm_err(&xe->drm, "gt reset timed out"); > + return -ETIMEDOUT; > + } > + > return 0; > } > > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h > index f74684660475..6f2fb9e3cfea 100644 > --- a/drivers/gpu/drm/xe/xe_gt_types.h > +++ b/drivers/gpu/drm/xe/xe_gt_types.h > @@ -358,6 +358,9 @@ struct xe_gt { > /** @oob: bitmap with active OOB workaroudns */ > unsigned long *oob; > } wa_active; > + > + /** @reset_done : Completion of GT reset */ > + struct completion reset_done; > }; > > #endif
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index dfd9cf01a5d5..eb7552b6dfa5 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -65,6 +65,7 @@ struct xe_gt *xe_gt_alloc(struct xe_tile *tile) gt->tile = tile; gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0); + init_completion(>->reset_done); return gt; } @@ -647,6 +648,8 @@ static int gt_reset(struct xe_gt *gt) xe_device_mem_access_put(gt_to_xe(gt)); XE_WARN_ON(err); + complete(>->reset_done); + xe_gt_info(gt, "reset done\n"); return 0; diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c index c4b67cf09f8f..49b30937a28b 100644 --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c @@ -23,6 +23,8 @@ #include "xe_uc_debugfs.h" #include "xe_wa.h" +#define XE_GT_RESET_TIMEOUT_MS (msecs_to_jiffies(5*1000)) + static struct xe_gt *node_to_gt(struct drm_info_node *node) { return node->info_ent->data; @@ -58,9 +60,17 @@ static int hw_engines(struct seq_file *m, void *data) static int force_reset(struct seq_file *m, void *data) { struct xe_gt *gt = node_to_gt(m->private); + struct xe_device *xe = gt_to_xe(gt); + unsigned long timeout; xe_gt_reset_async(gt); + timeout = wait_for_completion_timeout(>->reset_done, XE_GT_RESET_TIMEOUT_MS); + if (timeout == 0) { + drm_err(&xe->drm, "gt reset timed out"); + return -ETIMEDOUT; + } + return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index f74684660475..6f2fb9e3cfea 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -358,6 +358,9 @@ struct xe_gt { /** @oob: bitmap with active OOB workaroudns */ unsigned long *oob; } wa_active; + + /** @reset_done : Completion of GT reset */ + struct completion reset_done; }; #endif
Wait for gt reset to complete before returning from force_reset sysfs call. Without this igt test freq_reset_multiple fails sporadically in case xe_guc_pc is not started. Testcase: igt@xe_guc_pc@freq_reset_multiple Signed-off-by: Karthik Poosa <karthik.poosa@intel.com> --- drivers/gpu/drm/xe/xe_gt.c | 3 +++ drivers/gpu/drm/xe/xe_gt_debugfs.c | 10 ++++++++++ drivers/gpu/drm/xe/xe_gt_types.h | 3 +++ 3 files changed, 16 insertions(+)