diff mbox series

[6/6] drm/i915: do not defer cleanup work

Message ID 20240205101053.3698717-7-chaitanya.kumar.borah@intel.com (mailing list archive)
State New, archived
Headers show
Series Cursor Fault Fixes | expand

Commit Message

Chaitanya Kumar Borah Feb. 5, 2024, 10:10 a.m. UTC
After we move the cursor fb unpin to a vblank work, we encounter
race conditions between the vblank work and the atomic clean up
work leading to dump stacks[1]. Let's serialize the clean up
to avoid theses races.

[1]

   [  278.748767] Workqueue: events_highpri intel_atomic_cleanup_work [i915]
   [  278.749115] RIP: 0010:intel_display_rps_mark_interactive+0x4/0x40 [i915]
   [  278.749425] Code: 92 cb 20 e1 e9 49 ff ff ff 5b 48 89 ef 5d 41 5c e9 11 23 44 e1 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00 <38> 96 a5 05 00 00 74 2a 55 48 89 f5 0f b6 f2 53 48 8b bf 40 37 00
   [  278.749428] RSP: 0018:ffffc9000029fdc8 EFLAGS: 00010246
   [  278.749433] RAX: 0000000000000060 RBX: 0000000000000000 RCX: 0000000000000000
   [  278.749435] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff888124d70000
   [  278.749438] RBP: ffff88810394c000 R08: 0000000000000000 R09: ffffc9000029fc80
   [  278.749441] R10: 0000000000f6d950 R11: 0000000000f6da18 R12: ffff888124d70000
   [  278.749443] R13: ffff88814c952000 R14: ffff8881000aac05 R15: ffff8881059baf10
   [  278.749446] FS:  0000000000000000(0000) GS:ffff88817bd80000(0000) knlGS:0000000000000000
   [  278.749449] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
   [  278.749452] CR2: 00000000000005a5 CR3: 0000000104078000 CR4: 0000000000350ef0
   [  278.749454] Call Trace:
   [  278.749458]  <TASK>
   [  278.749461]  ? __die_body+0x1a/0x60
   [  278.749469]  ? page_fault_oops+0x156/0x450
   [  278.749474]  ? do_user_addr_fault+0x65/0x9e0
   [  278.749479]  ? exc_page_fault+0x68/0x1a0
   [  278.749486]  ? asm_exc_page_fault+0x26/0x30
   [  278.749494]  ? intel_display_rps_mark_interactive+0x4/0x40 [i915]
   [  278.749802]  intel_cleanup_plane_fb+0x6f/0xc0 [i915]
   [  278.750114]  drm_atomic_helper_cleanup_planes+0x42/0x60
   [  278.750122]  intel_atomic_cleanup_work+0x70/0xc0 [i915]
   [  278.750433]  ? process_scheduled_works+0x264/0x530
   [  278.750438]  process_scheduled_works+0x2db/0x530
   [  278.750444]  ? __pfx_worker_thread+0x10/0x10
   [  278.750448]  worker_thread+0x18c/0x350
   [  278.750452]  ? __pfx_worker_thread+0x10/0x10
   [  278.750455]  kthread+0xfe/0x130
   [  278.750460]  ? __pfx_kthread+0x10/0x10
   [  278.750464]  ret_from_fork+0x2c/0x50
   [  278.750468]  ? __pfx_kthread+0x10/0x10
   [  278.750472]  ret_from_fork_asm+0x1b/0x30

Signed-off-by: Chaitanya Kumar Borah <chaitanya.kumar.borah@intel.com>
---
 drivers/gpu/drm/i915/display/intel_display.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

Comments

Ville Syrjälä Feb. 13, 2024, 5:12 p.m. UTC | #1
On Mon, Feb 05, 2024 at 03:40:53PM +0530, Chaitanya Kumar Borah wrote:
> After we move the cursor fb unpin to a vblank work, we encounter
> race conditions between the vblank work and the atomic clean up
> work leading to dump stacks[1]. Let's serialize the clean up
> to avoid theses races.

This needs to be properly root caused, not just duct taped over.

> 
> [1]
> 
>    [  278.748767] Workqueue: events_highpri intel_atomic_cleanup_work [i915]
>    [  278.749115] RIP: 0010:intel_display_rps_mark_interactive+0x4/0x40 [i915]
>    [  278.749425] Code: 92 cb 20 e1 e9 49 ff ff ff 5b 48 89 ef 5d 41 5c e9 11 23 44 e1 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 0f 1f 00 <38> 96 a5 05 00 00 74 2a 55 48 89 f5 0f b6 f2 53 48 8b bf 40 37 00
>    [  278.749428] RSP: 0018:ffffc9000029fdc8 EFLAGS: 00010246
>    [  278.749433] RAX: 0000000000000060 RBX: 0000000000000000 RCX: 0000000000000000
>    [  278.749435] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff888124d70000
>    [  278.749438] RBP: ffff88810394c000 R08: 0000000000000000 R09: ffffc9000029fc80
>    [  278.749441] R10: 0000000000f6d950 R11: 0000000000f6da18 R12: ffff888124d70000
>    [  278.749443] R13: ffff88814c952000 R14: ffff8881000aac05 R15: ffff8881059baf10
>    [  278.749446] FS:  0000000000000000(0000) GS:ffff88817bd80000(0000) knlGS:0000000000000000
>    [  278.749449] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>    [  278.749452] CR2: 00000000000005a5 CR3: 0000000104078000 CR4: 0000000000350ef0
>    [  278.749454] Call Trace:
>    [  278.749458]  <TASK>
>    [  278.749461]  ? __die_body+0x1a/0x60
>    [  278.749469]  ? page_fault_oops+0x156/0x450
>    [  278.749474]  ? do_user_addr_fault+0x65/0x9e0
>    [  278.749479]  ? exc_page_fault+0x68/0x1a0
>    [  278.749486]  ? asm_exc_page_fault+0x26/0x30
>    [  278.749494]  ? intel_display_rps_mark_interactive+0x4/0x40 [i915]
>    [  278.749802]  intel_cleanup_plane_fb+0x6f/0xc0 [i915]
>    [  278.750114]  drm_atomic_helper_cleanup_planes+0x42/0x60
>    [  278.750122]  intel_atomic_cleanup_work+0x70/0xc0 [i915]
>    [  278.750433]  ? process_scheduled_works+0x264/0x530
>    [  278.750438]  process_scheduled_works+0x2db/0x530
>    [  278.750444]  ? __pfx_worker_thread+0x10/0x10
>    [  278.750448]  worker_thread+0x18c/0x350
>    [  278.750452]  ? __pfx_worker_thread+0x10/0x10
>    [  278.750455]  kthread+0xfe/0x130
>    [  278.750460]  ? __pfx_kthread+0x10/0x10
>    [  278.750464]  ret_from_fork+0x2c/0x50
>    [  278.750468]  ? __pfx_kthread+0x10/0x10
>    [  278.750472]  ret_from_fork_asm+0x1b/0x30
> 
> Signed-off-by: Chaitanya Kumar Borah <chaitanya.kumar.borah@intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_display.c | 8 +++-----
>  1 file changed, 3 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
> index bf684c4d1732..b0e89036508e 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -7006,10 +7006,8 @@ static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_stat
>  	}
>  }
>  
> -static void intel_atomic_cleanup_work(struct work_struct *work)
> +static void intel_atomic_cleanup_work(struct intel_atomic_state *state)
>  {
> -	struct intel_atomic_state *state =
> -		container_of(work, struct intel_atomic_state, base.commit_work);
>  	struct drm_i915_private *i915 = to_i915(state->base.dev);
>  	struct intel_crtc_state *old_crtc_state;
>  	struct intel_crtc *crtc;
> @@ -7283,8 +7281,8 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
>  	 * schedule point (cond_resched()) here anyway to keep latencies
>  	 * down.
>  	 */
> -	INIT_WORK(&state->base.commit_work, intel_atomic_cleanup_work);
> -	queue_work(system_highpri_wq, &state->base.commit_work);
> +
> +	intel_atomic_cleanup_work(state);
>  }
>  
>  static void intel_atomic_commit_work(struct work_struct *work)
> -- 
> 2.25.1
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index bf684c4d1732..b0e89036508e 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -7006,10 +7006,8 @@  static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_stat
 	}
 }
 
-static void intel_atomic_cleanup_work(struct work_struct *work)
+static void intel_atomic_cleanup_work(struct intel_atomic_state *state)
 {
-	struct intel_atomic_state *state =
-		container_of(work, struct intel_atomic_state, base.commit_work);
 	struct drm_i915_private *i915 = to_i915(state->base.dev);
 	struct intel_crtc_state *old_crtc_state;
 	struct intel_crtc *crtc;
@@ -7283,8 +7281,8 @@  static void intel_atomic_commit_tail(struct intel_atomic_state *state)
 	 * schedule point (cond_resched()) here anyway to keep latencies
 	 * down.
 	 */
-	INIT_WORK(&state->base.commit_work, intel_atomic_cleanup_work);
-	queue_work(system_highpri_wq, &state->base.commit_work);
+
+	intel_atomic_cleanup_work(state);
 }
 
 static void intel_atomic_commit_work(struct work_struct *work)