diff mbox series

[v5] drm/i915/pxp: Add drm_dbgs for critical PXP events.

Message ID 20231122191523.58379-1-alan.previn.teres.alexis@intel.com (mailing list archive)
State New, archived
Headers show
Series [v5] drm/i915/pxp: Add drm_dbgs for critical PXP events. | expand

Commit Message

Alan Previn Nov. 22, 2023, 7:15 p.m. UTC
Debugging PXP issues can't even begin without understanding precedding
sequence of important events. Add drm_dbg into the most important PXP
events.

 v5 : - rebase.
 v4 : - rebase.
 v3 : - move gt_dbg to after mutex block in function
        i915_gsc_proxy_component_bind. (Vivaik)
 v2 : - remove __func__ since drm_dbg covers that (Jani).
      - add timeout dbg of the restart from front-end (Alan).

Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com>
Reviewed-by: Vivaik Balasubrawmanian <vivaik.balasubrawmanian@intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c |  2 ++
 drivers/gpu/drm/i915/pxp/intel_pxp.c         | 15 ++++++++++++---
 drivers/gpu/drm/i915/pxp/intel_pxp_irq.c     |  5 +++--
 drivers/gpu/drm/i915/pxp/intel_pxp_session.c |  6 +++++-
 drivers/gpu/drm/i915/pxp/intel_pxp_types.h   |  1 +
 5 files changed, 23 insertions(+), 6 deletions(-)


base-commit: 5429d55de723544dfc0630cf39d96392052b27a1

Comments

Tvrtko Ursulin Nov. 24, 2023, 8:30 a.m. UTC | #1
On 22/11/2023 19:15, Alan Previn wrote:
> Debugging PXP issues can't even begin without understanding precedding
> sequence of important events. Add drm_dbg into the most important PXP
> events.
> 
>   v5 : - rebase.
>   v4 : - rebase.
>   v3 : - move gt_dbg to after mutex block in function
>          i915_gsc_proxy_component_bind. (Vivaik)
>   v2 : - remove __func__ since drm_dbg covers that (Jani).
>        - add timeout dbg of the restart from front-end (Alan).
> 
> Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com>
> Reviewed-by: Vivaik Balasubrawmanian <vivaik.balasubrawmanian@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c |  2 ++
>   drivers/gpu/drm/i915/pxp/intel_pxp.c         | 15 ++++++++++++---
>   drivers/gpu/drm/i915/pxp/intel_pxp_irq.c     |  5 +++--
>   drivers/gpu/drm/i915/pxp/intel_pxp_session.c |  6 +++++-
>   drivers/gpu/drm/i915/pxp/intel_pxp_types.h   |  1 +
>   5 files changed, 23 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
> index 5f138de3c14f..40817ebcca71 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
> @@ -322,6 +322,7 @@ static int i915_gsc_proxy_component_bind(struct device *i915_kdev,
>   	gsc->proxy.component = data;
>   	gsc->proxy.component->mei_dev = mei_kdev;
>   	mutex_unlock(&gsc->proxy.mutex);
> +	gt_dbg(gt, "GSC proxy mei component bound\n");
>   
>   	return 0;
>   }
> @@ -342,6 +343,7 @@ static void i915_gsc_proxy_component_unbind(struct device *i915_kdev,
>   	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
>   		intel_uncore_rmw(gt->uncore, HECI_H_CSR(MTL_GSC_HECI2_BASE),
>   				 HECI_H_CSR_IE | HECI_H_CSR_RST, 0);
> +	gt_dbg(gt, "GSC proxy mei component unbound\n");
>   }
>   
>   static const struct component_ops i915_gsc_proxy_component_ops = {
> diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c
> index dc327cf40b5a..e11f562b1876 100644
> --- a/drivers/gpu/drm/i915/pxp/intel_pxp.c
> +++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c
> @@ -303,6 +303,8 @@ static int __pxp_global_teardown_final(struct intel_pxp *pxp)
>   
>   	if (!pxp->arb_is_valid)
>   		return 0;
> +
> +	drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: teardown for suspend/fini");
>   	/*
>   	 * To ensure synchronous and coherent session teardown completion
>   	 * in response to suspend or shutdown triggers, don't use a worker.
> @@ -324,6 +326,8 @@ static int __pxp_global_teardown_restart(struct intel_pxp *pxp)
>   
>   	if (pxp->arb_is_valid)
>   		return 0;
> +
> +	drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: teardown for restart");
>   	/*
>   	 * The arb-session is currently inactive and we are doing a reset and restart
>   	 * due to a runtime event. Use the worker that was designed for this.
> @@ -332,8 +336,11 @@ static int __pxp_global_teardown_restart(struct intel_pxp *pxp)
>   
>   	timeout = intel_pxp_get_backend_timeout_ms(pxp);
>   
> -	if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout)))
> +	if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout))) {
> +		drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: restart backend timed out (%d ms)",
> +			timeout);
>   		return -ETIMEDOUT;
> +	}
>   
>   	return 0;
>   }
> @@ -414,10 +421,12 @@ int intel_pxp_start(struct intel_pxp *pxp)
>   	int ret = 0;
>   
>   	ret = intel_pxp_get_readiness_status(pxp, PXP_READINESS_TIMEOUT);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: tried but not-avail (%d)", ret);
>   		return ret;
> -	else if (ret > 1)
> +	} else if (ret > 1) {
>   		return -EIO; /* per UAPI spec, user may retry later */
> +	}
>   
>   	mutex_lock(&pxp->arb_mutex);
>   
> diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
> index 91e9622c07d0..d81750b9bdda 100644
> --- a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
> +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
> @@ -40,11 +40,12 @@ void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir)
>   		   GEN12_DISPLAY_APP_TERMINATED_PER_FW_REQ_INTERRUPT)) {
>   		/* immediately mark PXP as inactive on termination */
>   		intel_pxp_mark_termination_in_progress(pxp);
> -		pxp->session_events |= PXP_TERMINATION_REQUEST | PXP_INVAL_REQUIRED;
> +		pxp->session_events |= PXP_TERMINATION_REQUEST | PXP_INVAL_REQUIRED |
> +				       PXP_EVENT_TYPE_IRQ;
>   	}
>   
>   	if (iir & GEN12_DISPLAY_STATE_RESET_COMPLETE_INTERRUPT)
> -		pxp->session_events |= PXP_TERMINATION_COMPLETE;
> +		pxp->session_events |= PXP_TERMINATION_COMPLETE | PXP_EVENT_TYPE_IRQ;

This looked to be doing more than adding debug logs, but then no one is 
actually consuming this new flag?!

Regards,

Tvrtko

>   
>   	if (pxp->session_events)
>   		queue_work(system_unbound_wq, &pxp->session_work);
> diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
> index 0a3e66b0265e..091c86e03d1a 100644
> --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
> +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
> @@ -137,8 +137,10 @@ void intel_pxp_terminate(struct intel_pxp *pxp, bool post_invalidation_needs_res
>   static void pxp_terminate_complete(struct intel_pxp *pxp)
>   {
>   	/* Re-create the arb session after teardown handle complete */
> -	if (fetch_and_zero(&pxp->hw_state_invalidated))
> +	if (fetch_and_zero(&pxp->hw_state_invalidated)) {
> +		drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: creating arb_session after invalidation");
>   		pxp_create_arb_session(pxp);
> +	}
>   
>   	complete_all(&pxp->termination);
>   }
> @@ -157,6 +159,8 @@ static void pxp_session_work(struct work_struct *work)
>   	if (!events)
>   		return;
>   
> +	drm_dbg(&gt->i915->drm, "PXP: processing event-flags 0x%08x", events);
> +
>   	if (events & PXP_INVAL_REQUIRED)
>   		intel_pxp_invalidate(pxp);
>   
> diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
> index 7e11fa8034b2..07864b584cf4 100644
> --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
> +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
> @@ -124,6 +124,7 @@ struct intel_pxp {
>   #define PXP_TERMINATION_REQUEST  BIT(0)
>   #define PXP_TERMINATION_COMPLETE BIT(1)
>   #define PXP_INVAL_REQUIRED       BIT(2)
> +#define PXP_EVENT_TYPE_IRQ       BIT(3)
>   };
>   
>   #endif /* __INTEL_PXP_TYPES_H__ */
> 
> base-commit: 5429d55de723544dfc0630cf39d96392052b27a1
Alan Previn Nov. 29, 2023, 7:22 p.m. UTC | #2
On Fri, 2023-11-24 at 08:30 +0000, Tvrtko Ursulin wrote:
> On 22/11/2023 19:15, Alan Previn wrote:
alan:snip
alan: thanks for reviewing.
> >   	if (iir & GEN12_DISPLAY_STATE_RESET_COMPLETE_INTERRUPT)
> > -		pxp->session_events |= PXP_TERMINATION_COMPLETE;
> > +		pxp->session_events |= PXP_TERMINATION_COMPLETE | PXP_EVENT_TYPE_IRQ;
> 
> This looked to be doing more than adding debug logs, but then no one is 
> actually consuming this new flag?!
alan: see below hunk, inside pxp_session_work the drm_dbg that was prints
the "events" that came from above. we need this debug message to uniquely
recognize that contexts pxp keys would get invalidated in response to
a KCR IRQ (as opposed to other known flows that are being reported by other
means like banning hanging contexts, or as part of suspend etc).
NOTE: pxp->session_events does get set in at lease one other path
outside the IRQ (which triggers teardown but not coming from KCR-hw)
This flag is solely for the debug message. Hope this works.

> 
> Regards,
> 
> Tvrtko
> 
> >   
> >   	if (pxp->session_events)
> >   		queue_work(system_unbound_wq, &pxp->session_work);
> > diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
> > index 0a3e66b0265e..091c86e03d1a 100644
> > --- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
> > +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
> > @@ -137,8 +137,10 @@ void intel_pxp_terminate(struct intel_pxp *pxp, bool post_invalidation_needs_res
> >   static void pxp_terminate_complete(struct intel_pxp *pxp)
> >   {
> >   	/* Re-create the arb session after teardown handle complete */
> > -	if (fetch_and_zero(&pxp->hw_state_invalidated))
> > +	if (fetch_and_zero(&pxp->hw_state_invalidated)) {
> > +		drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: creating arb_session after invalidation");
> >   		pxp_create_arb_session(pxp);
> > +	}
> >   
> >   	complete_all(&pxp->termination);
> >   }
> > @@ -157,6 +159,8 @@ static void pxp_session_work(struct work_struct *work)
> >   	if (!events)
> >   		return;
> >   
> > +	drm_dbg(&gt->i915->drm, "PXP: processing event-flags 0x%08x", events);
> > +
> >   	if (events & PXP_INVAL_REQUIRED)
> >   		intel_pxp_invalidate(pxp);
> >   
> > diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
> > index 7e11fa8034b2..07864b584cf4 100644
> > --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
> > +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
> > @@ -124,6 +124,7 @@ struct intel_pxp {
> >   #define PXP_TERMINATION_REQUEST  BIT(0)
> >   #define PXP_TERMINATION_COMPLETE BIT(1)
> >   #define PXP_INVAL_REQUIRED       BIT(2)
> > +#define PXP_EVENT_TYPE_IRQ       BIT(3)
> >   };
> >   
> >   #endif /* __INTEL_PXP_TYPES_H__ */
> > 
> > base-commit: 5429d55de723544dfc0630cf39d96392052b27a1
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
index 5f138de3c14f..40817ebcca71 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
@@ -322,6 +322,7 @@  static int i915_gsc_proxy_component_bind(struct device *i915_kdev,
 	gsc->proxy.component = data;
 	gsc->proxy.component->mei_dev = mei_kdev;
 	mutex_unlock(&gsc->proxy.mutex);
+	gt_dbg(gt, "GSC proxy mei component bound\n");
 
 	return 0;
 }
@@ -342,6 +343,7 @@  static void i915_gsc_proxy_component_unbind(struct device *i915_kdev,
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		intel_uncore_rmw(gt->uncore, HECI_H_CSR(MTL_GSC_HECI2_BASE),
 				 HECI_H_CSR_IE | HECI_H_CSR_RST, 0);
+	gt_dbg(gt, "GSC proxy mei component unbound\n");
 }
 
 static const struct component_ops i915_gsc_proxy_component_ops = {
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c
index dc327cf40b5a..e11f562b1876 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c
@@ -303,6 +303,8 @@  static int __pxp_global_teardown_final(struct intel_pxp *pxp)
 
 	if (!pxp->arb_is_valid)
 		return 0;
+
+	drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: teardown for suspend/fini");
 	/*
 	 * To ensure synchronous and coherent session teardown completion
 	 * in response to suspend or shutdown triggers, don't use a worker.
@@ -324,6 +326,8 @@  static int __pxp_global_teardown_restart(struct intel_pxp *pxp)
 
 	if (pxp->arb_is_valid)
 		return 0;
+
+	drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: teardown for restart");
 	/*
 	 * The arb-session is currently inactive and we are doing a reset and restart
 	 * due to a runtime event. Use the worker that was designed for this.
@@ -332,8 +336,11 @@  static int __pxp_global_teardown_restart(struct intel_pxp *pxp)
 
 	timeout = intel_pxp_get_backend_timeout_ms(pxp);
 
-	if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout)))
+	if (!wait_for_completion_timeout(&pxp->termination, msecs_to_jiffies(timeout))) {
+		drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: restart backend timed out (%d ms)",
+			timeout);
 		return -ETIMEDOUT;
+	}
 
 	return 0;
 }
@@ -414,10 +421,12 @@  int intel_pxp_start(struct intel_pxp *pxp)
 	int ret = 0;
 
 	ret = intel_pxp_get_readiness_status(pxp, PXP_READINESS_TIMEOUT);
-	if (ret < 0)
+	if (ret < 0) {
+		drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: tried but not-avail (%d)", ret);
 		return ret;
-	else if (ret > 1)
+	} else if (ret > 1) {
 		return -EIO; /* per UAPI spec, user may retry later */
+	}
 
 	mutex_lock(&pxp->arb_mutex);
 
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
index 91e9622c07d0..d81750b9bdda 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
@@ -40,11 +40,12 @@  void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir)
 		   GEN12_DISPLAY_APP_TERMINATED_PER_FW_REQ_INTERRUPT)) {
 		/* immediately mark PXP as inactive on termination */
 		intel_pxp_mark_termination_in_progress(pxp);
-		pxp->session_events |= PXP_TERMINATION_REQUEST | PXP_INVAL_REQUIRED;
+		pxp->session_events |= PXP_TERMINATION_REQUEST | PXP_INVAL_REQUIRED |
+				       PXP_EVENT_TYPE_IRQ;
 	}
 
 	if (iir & GEN12_DISPLAY_STATE_RESET_COMPLETE_INTERRUPT)
-		pxp->session_events |= PXP_TERMINATION_COMPLETE;
+		pxp->session_events |= PXP_TERMINATION_COMPLETE | PXP_EVENT_TYPE_IRQ;
 
 	if (pxp->session_events)
 		queue_work(system_unbound_wq, &pxp->session_work);
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
index 0a3e66b0265e..091c86e03d1a 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
@@ -137,8 +137,10 @@  void intel_pxp_terminate(struct intel_pxp *pxp, bool post_invalidation_needs_res
 static void pxp_terminate_complete(struct intel_pxp *pxp)
 {
 	/* Re-create the arb session after teardown handle complete */
-	if (fetch_and_zero(&pxp->hw_state_invalidated))
+	if (fetch_and_zero(&pxp->hw_state_invalidated)) {
+		drm_dbg(&pxp->ctrl_gt->i915->drm, "PXP: creating arb_session after invalidation");
 		pxp_create_arb_session(pxp);
+	}
 
 	complete_all(&pxp->termination);
 }
@@ -157,6 +159,8 @@  static void pxp_session_work(struct work_struct *work)
 	if (!events)
 		return;
 
+	drm_dbg(&gt->i915->drm, "PXP: processing event-flags 0x%08x", events);
+
 	if (events & PXP_INVAL_REQUIRED)
 		intel_pxp_invalidate(pxp);
 
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
index 7e11fa8034b2..07864b584cf4 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
@@ -124,6 +124,7 @@  struct intel_pxp {
 #define PXP_TERMINATION_REQUEST  BIT(0)
 #define PXP_TERMINATION_COMPLETE BIT(1)
 #define PXP_INVAL_REQUIRED       BIT(2)
+#define PXP_EVENT_TYPE_IRQ       BIT(3)
 };
 
 #endif /* __INTEL_PXP_TYPES_H__ */