diff mbox

lockdep splats with blk-mq drivers

Message ID alpine.LFD.2.21.1804191404540.17056@schleppi (mailing list archive)
State New, archived
Headers show

Commit Message

Sebastian Ott April 19, 2018, 12:20 p.m. UTC
Since commit 1d9bd5161ba3 ("blk-mq: replace timeout synchronization with a
RCU and generation based scheme") I observe lockdep complaints (full
message attached):

[   21.763369]        CPU0                    CPU1
[   21.763370]        ----                    ----
[   21.763371]   lock(&rq->gstate_seq);
[   21.763374]                                local_irq_disable();
[   21.763375]                                lock(&(&dq->lock)->rlock);
[   21.763377]                                lock(&rq->gstate_seq);
[   21.763379]   <Interrupt>
[   21.763380]     lock(&(&dq->lock)->rlock);
[   21.763382] 
                *** DEADLOCK ***


This only happens in combination with DASD (s390 specific) and another
blk-mq driver (scsi, null_blk). The distinctive behavior of DASD is that
it calls blk_mq_start_request with irqs disabled.

This is a false positive since gstate_seq on CPU0 is from a different
request queue / block driver than gstate_seq on CPU1.

Possible fixes are to use local_irq_save/restore in blk_mq_start_request.
Or, since it's a false positive, teach lockdep that these are different
locks - like below:

------>8
[   74.182295] WARNING: possible irq lock inversion dependency detected
[   74.182296] 4.16.0-11766-ge241e3f2bf97 #166 Not tainted
[   74.182297] --------------------------------------------------------
[   74.182298] kworker/u64:7/175 just changed the state of lock:
[   74.182301] 000000001b8db3f6 (&rq->gstate_seq){+.+.}, at: scsi_queue_rq+0x460/0x578
[   74.182308] but this lock was taken by another, SOFTIRQ-safe lock in the past:
[   74.182310]  (&(&dq->lock)->rlock){..-.}
[   74.182311] 
               
               and interrupts could create inverse lock ordering between them.

[   74.182315] 
               other info that might help us debug this:
[   74.182316]  Possible interrupt unsafe locking scenario:

[   74.182318]        CPU0                    CPU1
[   74.182319]        ----                    ----
[   74.182320]   lock(&rq->gstate_seq);
[   74.182340]                                local_irq_disable();
[   74.182341]                                lock(&(&dq->lock)->rlock);
[   74.182343]                                lock(&rq->gstate_seq);
[   74.182345]   <Interrupt>
[   74.182346]     lock(&(&dq->lock)->rlock);
[   74.182348] 
                *** DEADLOCK ***

[   74.182350] 4 locks held by kworker/u64:7/175:
[   74.182351]  #0: 000000001751423f ((wq_completion)"%s"shost->work_q_name){+.+.}, at: process_one_work+0x1a0/0x6f0
[   74.182359]  #1: 000000001c880b65 ((work_completion)(&rport->scan_work)){+.+.}, at: process_one_work+0x1a0/0x6f0
[   74.182363]  #2: 000000000c5e0f4d (&shost->scan_mutex){+.+.}, at: scsi_scan_target+0xac/0x128
[   74.182368]  #3: 00000000f2c11806 (rcu_read_lock){....}, at: hctx_lock+0x30/0x110
[   74.182374] 
               the shortest dependencies between 2nd lock and 1st lock:
[   74.182376]  -> (&(&dq->lock)->rlock){..-.} ops: 51556787421184 {
[   74.182380]     IN-SOFTIRQ-W at:
[   74.182384]                       __lock_acquire+0x4e6/0x13f0
[   74.182385]                       lock_acquire+0xf6/0x290
[   74.182390]                       _raw_spin_lock_irq+0x6a/0xb0
[   74.182392]                       dasd_block_tasklet+0xf8/0x400
[   74.182394]                       tasklet_action_common.isra.5+0x82/0x110
[   74.182396]                       __do_softirq+0xd2/0x5d8
[   74.182398]                       irq_exit+0x132/0x138
[   74.182400]                       do_IRQ+0xaa/0xf0
[   74.182402]                       io_int_handler+0x138/0x31c
[   74.182405]                       enabled_wait+0x6a/0x138
[   74.182406]                       enabled_wait+0x52/0x138
[   74.182408]                       arch_cpu_idle+0x32/0x50
[   74.182410]                       default_idle_call+0x3e/0x68
[   74.182413]                       do_idle+0xea/0x1c0
[   74.182415]                       cpu_startup_entry+0x3e/0x48
[   74.182417]                       smp_start_secondary+0x13e/0x158
[   74.182419]                       restart_int_handler+0x5a/0x70
[   74.182420]                                 (null)
[   74.182422]     INITIAL USE at:
[   74.182424]                      __lock_acquire+0x26c/0x13f0
[   74.182426]                      lock_acquire+0xf6/0x290
[   74.182428]                      _raw_spin_lock_irq+0x6a/0xb0
[   74.182432]                      do_dasd_request+0x50/0x470
[   74.182434]                      __blk_mq_try_issue_directly+0x1ca/0x208
[   74.182436]                      blk_mq_try_issue_directly+0x5c/0x108
[   74.182437]                      blk_mq_make_request+0x4b8/0xa20
[   74.182440]                      generic_make_request+0xea/0x2c8
[   74.182441]                      submit_bio+0xa4/0x1a0
[   74.182444]                      submit_bh_wbc+0x1c4/0x218
[   74.182446]                      block_read_full_page+0x366/0x408
[   74.182450]                      do_read_cache_page+0x1b2/0x3d0
[   74.182452]                      read_cache_page+0x30/0x40
[   74.182455]                      read_dev_sector+0x58/0xe8
[   74.182457]                      read_lba.isra.0+0x12a/0x1b8
[   74.182459]                      efi_partition+0x198/0x660
[   74.182461]                      check_partition+0x15c/0x2b8
[   74.182463]                      rescan_partitions+0xea/0x3f8
[   74.182464]                      blkdev_reread_part+0x44/0x68
[   74.182467]                      dasd_scan_partitions+0x90/0x148
[   74.182469]                      dasd_change_state+0x8cc/0xb28
[   74.182471]                      do_kick_device+0x4c/0x98
[   74.182473]                      process_one_work+0x25c/0x6f0
[   74.182475]                      worker_thread+0x52/0x400
[   74.182477]                      kthread+0x15a/0x178
[   74.182479]                      kernel_thread_starter+0x6/0xc
[   74.182481]                      kernel_thread_starter+0x0/0xc
[   74.182482]   }
[   74.182486]   ... key      at: [<0000000001bd0e10>] __key.43387+0x0/0x8
[   74.182487]   ... acquired at:
[   74.182489]    __lock_acquire+0x130a/0x13f0
[   74.182490]    lock_acquire+0xf6/0x290
[   74.182492]    blk_mq_start_request+0xde/0x300
[   74.182494]    do_dasd_request+0x17a/0x470
[   74.182495]    __blk_mq_try_issue_directly+0x1ca/0x208
[   74.182497]    blk_mq_try_issue_directly+0x5c/0x108
[   74.182499]    blk_mq_make_request+0x4b8/0xa20
[   74.182500]    generic_make_request+0xea/0x2c8
[   74.182502]    submit_bio+0xa4/0x1a0
[   74.182504]    submit_bh_wbc+0x1c4/0x218
[   74.182506]    block_read_full_page+0x366/0x408
[   74.182507]    do_read_cache_page+0x1b2/0x3d0
[   74.182509]    read_cache_page+0x30/0x40
[   74.182512]    read_dev_sector+0x58/0xe8
[   74.182513]    read_lba.isra.0+0x12a/0x1b8
[   74.182515]    efi_partition+0x198/0x660
[   74.182516]    check_partition+0x15c/0x2b8
[   74.182517]    rescan_partitions+0xea/0x3f8
[   74.182519]    blkdev_reread_part+0x44/0x68
[   74.182520]    dasd_scan_partitions+0x90/0x148
[   74.182522]    dasd_change_state+0x8cc/0xb28
[   74.182524]    do_kick_device+0x4c/0x98
[   74.182526]    process_one_work+0x25c/0x6f0
[   74.182527]    worker_thread+0x52/0x400
[   74.182529]    kthread+0x15a/0x178
[   74.182530]    kernel_thread_starter+0x6/0xc
[   74.182532]    kernel_thread_starter+0x0/0xc

[   74.182534] -> (&rq->gstate_seq){+.+.} ops: 24322399797248 {
[   74.182538]    HARDIRQ-ON-W at:
[   74.182540]                     __lock_acquire+0x3da/0x13f0
[   74.182542]                     lock_acquire+0xf6/0x290
[   74.182543]                     blk_mq_start_request+0xde/0x300
[   74.182546]                     scsi_queue_rq+0x460/0x578
[   74.182547]                     blk_mq_dispatch_rq_list+0x186/0x678
[   74.182549]                     blk_mq_do_dispatch_sched+0x90/0x128
[   74.182551]                     blk_mq_sched_dispatch_requests+0x168/0x200
[   74.182553]                     __blk_mq_run_hw_queue+0xd0/0x128
[   74.182555]                     __blk_mq_delay_run_hw_queue+0xd4/0x120
[   74.182556]                     blk_mq_run_hw_queue+0xa0/0x118
[   74.182558]                     blk_mq_sched_insert_request+0xc6/0x1e0
[   74.182561]                     blk_execute_rq_nowait+0x8e/0x118
[   74.182563]                     blk_execute_rq+0x76/0x90
[   74.182564]                     scsi_execute+0x114/0x218
[   74.182566]                     scsi_probe_and_add_lun+0x23e/0xe60
[   74.182568]                     __scsi_scan_target+0x112/0x630
[   74.182570]                     scsi_scan_target+0x110/0x128
[   74.182573]                     fc_scsi_scan_rport+0xd8/0xe0
[   74.182575]                     process_one_work+0x25c/0x6f0
[   74.182577]                     worker_thread+0x52/0x400
[   74.182579]                     kthread+0x15a/0x178
[   74.182580]                     kernel_thread_starter+0x6/0xc
[   74.182582]                     kernel_thread_starter+0x0/0xc
[   74.182584]    SOFTIRQ-ON-W at:
[   74.182586]                     __lock_acquire+0x3fc/0x13f0
[   74.182588]                     lock_acquire+0xf6/0x290
[   74.182589]                     blk_mq_start_request+0xde/0x300
[   74.182591]                     scsi_queue_rq+0x460/0x578
[   74.182593]                     blk_mq_dispatch_rq_list+0x186/0x678
[   74.182595]                     blk_mq_do_dispatch_sched+0x90/0x128
[   74.182597]                     blk_mq_sched_dispatch_requests+0x168/0x200
[   74.182599]                     __blk_mq_run_hw_queue+0xd0/0x128
[   74.182600]                     __blk_mq_delay_run_hw_queue+0xd4/0x120
[   74.182602]                     blk_mq_run_hw_queue+0xa0/0x118
[   74.182603]                     blk_mq_sched_insert_request+0xc6/0x1e0
[   74.182605]                     blk_execute_rq_nowait+0x8e/0x118
[   74.182607]                     blk_execute_rq+0x76/0x90
[   74.182609]                     scsi_execute+0x114/0x218
[   74.182611]                     scsi_probe_and_add_lun+0x23e/0xe60
[   74.182613]                     __scsi_scan_target+0x112/0x630
[   74.182615]                     scsi_scan_target+0x110/0x128
[   74.182617]                     fc_scsi_scan_rport+0xd8/0xe0
[   74.182618]                     process_one_work+0x25c/0x6f0
[   74.182620]                     worker_thread+0x52/0x400
[   74.182622]                     kthread+0x15a/0x178
[   74.182623]                     kernel_thread_starter+0x6/0xc
[   74.182625]                     kernel_thread_starter+0x0/0xc
[   74.182626]    INITIAL USE at:
[   74.182629]                    __lock_acquire+0x26c/0x13f0
[   74.182631]                    lock_acquire+0xf6/0x290
[   74.182632]                    blk_mq_start_request+0xde/0x300
[   74.182634]                    do_dasd_request+0x17a/0x470
[   74.182636]                    __blk_mq_try_issue_directly+0x1ca/0x208
[   74.182638]                    blk_mq_try_issue_directly+0x5c/0x108
[   74.182640]                    blk_mq_make_request+0x4b8/0xa20
[   74.182641]                    generic_make_request+0xea/0x2c8
[   74.182643]                    submit_bio+0xa4/0x1a0
[   74.182645]                    submit_bh_wbc+0x1c4/0x218
[   74.182646]                    block_read_full_page+0x366/0x408
[   74.182648]                    do_read_cache_page+0x1b2/0x3d0
[   74.182650]                    read_cache_page+0x30/0x40
[   74.182652]                    read_dev_sector+0x58/0xe8
[   74.182654]                    read_lba.isra.0+0x12a/0x1b8
[   74.182655]                    efi_partition+0x198/0x660
[   74.182657]                    check_partition+0x15c/0x2b8
[   74.182659]                    rescan_partitions+0xea/0x3f8
[   74.182660]                    blkdev_reread_part+0x44/0x68
[   74.182662]                    dasd_scan_partitions+0x90/0x148
[   74.182664]                    dasd_change_state+0x8cc/0xb28
[   74.182665]                    do_kick_device+0x4c/0x98
[   74.182667]                    process_one_work+0x25c/0x6f0
[   74.182669]                    worker_thread+0x52/0x400
[   74.182670]                    kthread+0x15a/0x178
[   74.182672]                    kernel_thread_starter+0x6/0xc
[   74.182674]                    kernel_thread_starter+0x0/0xc
[   74.182675]  }
[   74.182677]  ... key      at: [<0000000001bc22f0>] __key.43630+0x0/0x8
[   74.182678]  ... acquired at:
[   74.182680]    check_usage_backwards+0xa2/0x140
[   74.182681]    mark_lock+0x3ee/0x6f8
[   74.182682]    __lock_acquire+0x3fc/0x13f0
[   74.182684]    lock_acquire+0xf6/0x290
[   74.182685]    blk_mq_start_request+0xde/0x300
[   74.182687]    scsi_queue_rq+0x460/0x578
[   74.182688]    blk_mq_dispatch_rq_list+0x186/0x678
[   74.182690]    blk_mq_do_dispatch_sched+0x90/0x128
[   74.182692]    blk_mq_sched_dispatch_requests+0x168/0x200
[   74.182693]    __blk_mq_run_hw_queue+0xd0/0x128
[   74.182695]    __blk_mq_delay_run_hw_queue+0xd4/0x120
[   74.182696]    blk_mq_run_hw_queue+0xa0/0x118
[   74.182698]    blk_mq_sched_insert_request+0xc6/0x1e0
[   74.182699]    blk_execute_rq_nowait+0x8e/0x118
[   74.182701]    blk_execute_rq+0x76/0x90
[   74.182703]    scsi_execute+0x114/0x218
[   74.182704]    scsi_probe_and_add_lun+0x23e/0xe60
[   74.182706]    __scsi_scan_target+0x112/0x630
[   74.182708]    scsi_scan_target+0x110/0x128
[   74.182709]    fc_scsi_scan_rport+0xd8/0xe0
[   74.182711]    process_one_work+0x25c/0x6f0
[   74.182713]    worker_thread+0x52/0x400
[   74.182714]    kthread+0x15a/0x178
[   74.182716]    kernel_thread_starter+0x6/0xc
[   74.182717]    kernel_thread_starter+0x0/0xc

[   74.182719] 
               stack backtrace:
[   74.182722] CPU: 0 PID: 175 Comm: kworker/u64:7 Not tainted 4.16.0-11766-ge241e3f2bf97 #166
[   74.182724] Hardware name: IBM 3906 M04 704 (LPAR)
[   74.182726] Workqueue: scsi_wq_0 fc_scsi_scan_rport
[   74.182727] Call Trace:
[   74.182730] ([<00000000001151d4>] show_stack+0x9c/0x108)
[   74.182733]  [<00000000008c3c9a>] dump_stack+0x9a/0xd8 
[   74.182735]  [<00000000001b8f02>] print_irq_inversion_bug+0x1ba/0x230 
[   74.182737]  [<00000000001b915a>] check_usage_backwards+0xa2/0x140 
[   74.182739]  [<00000000001ba7e6>] mark_lock+0x3ee/0x6f8 
[   74.182740]  [<00000000001bb524>] __lock_acquire+0x3fc/0x13f0 
[   74.182742]  [<00000000001bcd8e>] lock_acquire+0xf6/0x290 
[   74.182744]  [<000000000058239e>] blk_mq_start_request+0xde/0x300 
[   74.182745]  [<0000000000671898>] scsi_queue_rq+0x460/0x578 
[   74.182747]  [<00000000005856ae>] blk_mq_dispatch_rq_list+0x186/0x678 
[   74.182749]  [<000000000058b0b8>] blk_mq_do_dispatch_sched+0x90/0x128 
[   74.182751]  [<000000000058ba90>] blk_mq_sched_dispatch_requests+0x168/0x200 
[   74.182752]  [<00000000005839b8>] __blk_mq_run_hw_queue+0xd0/0x128 
[   74.182754]  [<0000000000583ae4>] __blk_mq_delay_run_hw_queue+0xd4/0x120 
[   74.182756]  [<0000000000583c28>] blk_mq_run_hw_queue+0xa0/0x118 
[   74.182758]  [<000000000058c0ae>] blk_mq_sched_insert_request+0xc6/0x1e0 
[   74.182759]  [<000000000057d186>] blk_execute_rq_nowait+0x8e/0x118 
[   74.182761]  [<000000000057d2a6>] blk_execute_rq+0x76/0x90 
[   74.182763]  [<000000000066c7fc>] scsi_execute+0x114/0x218 
[   74.182765]  [<00000000006735f6>] scsi_probe_and_add_lun+0x23e/0xe60 
[   74.182766]  [<0000000000674722>] __scsi_scan_target+0x112/0x630 
[   74.182768]  [<0000000000674d50>] scsi_scan_target+0x110/0x128 
[   74.182770]  [<000000000067f730>] fc_scsi_scan_rport+0xd8/0xe0 
[   74.182772]  [<000000000016ace4>] process_one_work+0x25c/0x6f0 
[   74.182774]  [<000000000016b1ca>] worker_thread+0x52/0x400 
[   74.182775]  [<00000000001730ba>] kthread+0x15a/0x178 
[   74.182777]  [<00000000008e66c2>] kernel_thread_starter+0x6/0xc 
[   74.182779]  [<00000000008e66bc>] kernel_thread_starter+0x0/0xc 
[   74.182780] INFO: lockdep is turned off.
[   21.763347] WARNING: possible irq lock inversion dependency detected
[   21.763349] 4.16.0-11958-g16e205cf42da #168 Not tainted
[   21.763350] --------------------------------------------------------
[   21.763352] kworker/1:1H/129 just changed the state of lock:
[   21.763353] 000000007d3db786 (&rq->gstate_seq){+.+.}, at: null_queue_rq+0x70/0xc8 [null_blk]
[   21.763360] but this lock was taken by another, SOFTIRQ-safe lock in the past:
[   21.763361]  (&(&dq->lock)->rlock){..-.}
[   21.763363] 
               
               and interrupts could create inverse lock ordering between them.

[   21.763366] 
               other info that might help us debug this:
[   21.763368]  Possible interrupt unsafe locking scenario:

[   21.763369]        CPU0                    CPU1
[   21.763370]        ----                    ----
[   21.763371]   lock(&rq->gstate_seq);
[   21.763374]                                local_irq_disable();
[   21.763375]                                lock(&(&dq->lock)->rlock);
[   21.763377]                                lock(&rq->gstate_seq);
[   21.763379]   <Interrupt>
[   21.763380]     lock(&(&dq->lock)->rlock);
[   21.763382] 
                *** DEADLOCK ***

[   21.763385] 3 locks held by kworker/1:1H/129:
[   21.763399]  #0: 000000003a6e153c ((wq_completion)"kblockd"){+.+.}, at: process_one_work+0x1a0/0x6f0
[   21.763406]  #1: 00000000b52029e8 ((work_completion)(&(&hctx->run_work)->work)){+.+.}, at: process_one_work+0x1a0/0x6f0
[   21.763411]  #2: 000000001fde8b5d (rcu_read_lock){....}, at: hctx_lock+0x30/0x110
[   21.763416] 
               the shortest dependencies between 2nd lock and 1st lock:
[   21.763418]  -> (&(&dq->lock)->rlock){..-.} ops: 55959128899584 {
[   21.763422]     IN-SOFTIRQ-W at:
[   21.763426]                       __lock_acquire+0x4e6/0x13f0
[   21.763428]                       lock_acquire+0xf6/0x290
[   21.763431]                       _raw_spin_lock_irq+0x6a/0xb0
[   21.763434]                       dasd_block_tasklet+0xf8/0x400
[   21.763436]                       tasklet_action_common.isra.5+0x82/0x110
[   21.763438]                       __do_softirq+0xd2/0x5d8
[   21.763441]                       irq_exit+0x132/0x138
[   21.763443]                       do_IRQ+0xaa/0xf0
[   21.763445]                       io_int_handler+0x138/0x31c
[   21.763447]                       enabled_wait+0x6a/0x138
[   21.763449]                       enabled_wait+0x52/0x138
[   21.763450]                       arch_cpu_idle+0x32/0x50
[   21.763452]                       default_idle_call+0x3e/0x68
[   21.763454]                       do_idle+0xea/0x1c0
[   21.763456]                       cpu_startup_entry+0x3e/0x48
[   21.763458]                       smp_start_secondary+0x13e/0x158
[   21.763460]                       restart_int_handler+0x5a/0x70
[   21.763462]                                 (null)
[   21.763463]     INITIAL USE at:
[   21.763465]                      __lock_acquire+0x26c/0x13f0
[   21.763467]                      lock_acquire+0xf6/0x290
[   21.763469]                      _raw_spin_lock_irq+0x6a/0xb0
[   21.763472]                      do_dasd_request+0x50/0x470
[   21.763474]                      __blk_mq_try_issue_directly+0x1ca/0x208
[   21.763476]                      blk_mq_try_issue_directly+0x5c/0x108
[   21.763478]                      blk_mq_make_request+0x4b8/0xa20
[   21.763480]                      generic_make_request+0xea/0x2c8
[   21.763482]                      submit_bio+0xa4/0x1a0
[   21.763485]                      submit_bh_wbc+0x1c4/0x218
[   21.763487]                      block_read_full_page+0x366/0x408
[   21.763491]                      do_read_cache_page+0x1b2/0x3d0
[   21.763493]                      read_cache_page+0x30/0x40
[   21.763497]                      read_dev_sector+0x58/0xe8
[   21.763498]                      read_lba.isra.0+0x12a/0x1b8
[   21.763500]                      efi_partition+0x198/0x660
[   21.763501]                      check_partition+0x15c/0x2b8
[   21.763503]                      rescan_partitions+0xea/0x3f8
[   21.763505]                      blkdev_reread_part+0x44/0x68
[   21.763507]                      dasd_scan_partitions+0x90/0x148
[   21.763509]                      dasd_change_state+0x8cc/0xb28
[   21.763511]                      do_kick_device+0x4c/0x98
[   21.763513]                      process_one_work+0x25c/0x6f0
[   21.763515]                      worker_thread+0x52/0x400
[   21.763518]                      kthread+0x15a/0x178
[   21.763520]                      kernel_thread_starter+0x6/0xc
[   21.763522]                      kernel_thread_starter+0x0/0xc
[   21.763523]   }
[   21.763527]   ... key      at: [<0000000001bd0e08>] __key.43428+0x0/0x8
[   21.763528]   ... acquired at:
[   21.763530]    __lock_acquire+0x130a/0x13f0
[   21.763531]    lock_acquire+0xf6/0x290
[   21.763533]    blk_mq_start_request+0xde/0x300
[   21.763534]    do_dasd_request+0x17a/0x470
[   21.763536]    __blk_mq_try_issue_directly+0x1ca/0x208
[   21.763538]    blk_mq_try_issue_directly+0x5c/0x108
[   21.763539]    blk_mq_make_request+0x4b8/0xa20
[   21.763541]    generic_make_request+0xea/0x2c8
[   21.763543]    submit_bio+0xa4/0x1a0
[   21.763544]    submit_bh_wbc+0x1c4/0x218
[   21.763546]    block_read_full_page+0x366/0x408
[   21.763548]    do_read_cache_page+0x1b2/0x3d0
[   21.763550]    read_cache_page+0x30/0x40
[   21.763552]    read_dev_sector+0x58/0xe8
[   21.763553]    read_lba.isra.0+0x12a/0x1b8
[   21.763555]    efi_partition+0x198/0x660
[   21.763556]    check_partition+0x15c/0x2b8
[   21.763558]    rescan_partitions+0xea/0x3f8
[   21.763560]    blkdev_reread_part+0x44/0x68
[   21.763561]    dasd_scan_partitions+0x90/0x148
[   21.763563]    dasd_change_state+0x8cc/0xb28
[   21.763564]    do_kick_device+0x4c/0x98
[   21.763566]    process_one_work+0x25c/0x6f0
[   21.763568]    worker_thread+0x52/0x400
[   21.763570]    kthread+0x15a/0x178
[   21.763571]    kernel_thread_starter+0x6/0xc
[   21.763573]    kernel_thread_starter+0x0/0xc

[   21.763575] -> (&rq->gstate_seq){+.+.} ops: 17862768984064 {
[   21.763579]    HARDIRQ-ON-W at:
[   21.763581]                     __lock_acquire+0x3da/0x13f0
[   21.763583]                     lock_acquire+0xf6/0x290
[   21.763585]                     blk_mq_start_request+0xde/0x300
[   21.763588]                     null_queue_rq+0x70/0xc8 [null_blk]
[   21.763590]                     blk_mq_dispatch_rq_list+0x186/0x678
[   21.763592]                     blk_mq_do_dispatch_sched+0x90/0x128
[   21.763593]                     blk_mq_sched_dispatch_requests+0x168/0x200
[   21.763595]                     __blk_mq_run_hw_queue+0xd0/0x128
[   21.763597]                     process_one_work+0x25c/0x6f0
[   21.763599]                     worker_thread+0x52/0x400
[   21.763601]                     kthread+0x15a/0x178
[   21.763602]                     kernel_thread_starter+0x6/0xc
[   21.763604]                     kernel_thread_starter+0x0/0xc
[   21.763606]    SOFTIRQ-ON-W at:
[   21.763608]                     __lock_acquire+0x3fc/0x13f0
[   21.763609]                     lock_acquire+0xf6/0x290
[   21.763611]                     blk_mq_start_request+0xde/0x300
[   21.763614]                     null_queue_rq+0x70/0xc8 [null_blk]
[   21.763615]                     blk_mq_dispatch_rq_list+0x186/0x678
[   21.763617]                     blk_mq_do_dispatch_sched+0x90/0x128
[   21.763619]                     blk_mq_sched_dispatch_requests+0x168/0x200
[   21.763621]                     __blk_mq_run_hw_queue+0xd0/0x128
[   21.763623]                     process_one_work+0x25c/0x6f0
[   21.763625]                     worker_thread+0x52/0x400
[   21.763627]                     kthread+0x15a/0x178
[   21.763628]                     kernel_thread_starter+0x6/0xc
[   21.763630]                     kernel_thread_starter+0x0/0xc
[   21.763631]    INITIAL USE at:
[   21.763634]                    __lock_acquire+0x26c/0x13f0
[   21.763635]                    lock_acquire+0xf6/0x290
[   21.763637]                    blk_mq_start_request+0xde/0x300
[   21.763639]                    do_dasd_request+0x17a/0x470
[   21.763640]                    __blk_mq_try_issue_directly+0x1ca/0x208
[   21.763642]                    blk_mq_try_issue_directly+0x5c/0x108
[   21.763644]                    blk_mq_make_request+0x4b8/0xa20
[   21.763646]                    generic_make_request+0xea/0x2c8
[   21.763647]                    submit_bio+0xa4/0x1a0
[   21.763649]                    submit_bh_wbc+0x1c4/0x218
[   21.763651]                    block_read_full_page+0x366/0x408
[   21.763653]                    do_read_cache_page+0x1b2/0x3d0
[   21.763655]                    read_cache_page+0x30/0x40
[   21.763657]                    read_dev_sector+0x58/0xe8
[   21.763659]                    read_lba.isra.0+0x12a/0x1b8
[   21.763660]                    efi_partition+0x198/0x660
[   21.763662]                    check_partition+0x15c/0x2b8
[   21.763663]                    rescan_partitions+0xea/0x3f8
[   21.763666]                    blkdev_reread_part+0x44/0x68
[   21.763667]                    dasd_scan_partitions+0x90/0x148
[   21.763669]                    dasd_change_state+0x8cc/0xb28
[   21.763671]                    do_kick_device+0x4c/0x98
[   21.763673]                    process_one_work+0x25c/0x6f0
[   21.763675]                    worker_thread+0x52/0x400
[   21.763676]                    kthread+0x15a/0x178
[   21.763678]                    kernel_thread_starter+0x6/0xc
[   21.763680]                    kernel_thread_starter+0x0/0xc
[   21.763681]  }
[   21.763684]  ... key      at: [<0000000001bc22e8>] __key.43671+0x0/0x8
[   21.763685]  ... acquired at:
[   21.763687]    check_usage_backwards+0xa2/0x140
[   21.763688]    mark_lock+0x3ee/0x6f8
[   21.763690]    __lock_acquire+0x3fc/0x13f0
[   21.763692]    lock_acquire+0xf6/0x290
[   21.763693]    blk_mq_start_request+0xde/0x300
[   21.763696]    null_queue_rq+0x70/0xc8 [null_blk]
[   21.763697]    blk_mq_dispatch_rq_list+0x186/0x678
[   21.763699]    blk_mq_do_dispatch_sched+0x90/0x128
[   21.763701]    blk_mq_sched_dispatch_requests+0x168/0x200
[   21.763702]    __blk_mq_run_hw_queue+0xd0/0x128
[   21.763704]    process_one_work+0x25c/0x6f0
[   21.763706]    worker_thread+0x52/0x400
[   21.763707]    kthread+0x15a/0x178
[   21.763709]    kernel_thread_starter+0x6/0xc
[   21.763711]    kernel_thread_starter+0x0/0xc

[   21.763713] 
               stack backtrace:
[   21.763716] CPU: 1 PID: 129 Comm: kworker/1:1H Not tainted 4.16.0-11958-g16e205cf42da #168
[   21.763718] Hardware name: IBM 3906 M04 704 (LPAR)
[   21.763720] Workqueue: kblockd blk_mq_run_work_fn
[   21.763722] Call Trace:
[   21.763725] ([<00000000001151d4>] show_stack+0x9c/0x108)
[   21.763728]  [<00000000008c3e92>] dump_stack+0x9a/0xd8 
[   21.763730]  [<00000000001b8f02>] print_irq_inversion_bug+0x1ba/0x230 
[   21.763732]  [<00000000001b915a>] check_usage_backwards+0xa2/0x140 
[   21.763733]  [<00000000001ba7e6>] mark_lock+0x3ee/0x6f8 
[   21.763735]  [<00000000001bb524>] __lock_acquire+0x3fc/0x13f0 
[   21.763737]  [<00000000001bcd8e>] lock_acquire+0xf6/0x290 
[   21.763739]  [<00000000005824b6>] blk_mq_start_request+0xde/0x300 
[   21.763741]  [<000003ff803d34f8>] null_queue_rq+0x70/0xc8 [null_blk] 
[   21.763743]  [<00000000005857c6>] blk_mq_dispatch_rq_list+0x186/0x678 
[   21.763745]  [<000000000058b1d0>] blk_mq_do_dispatch_sched+0x90/0x128 
[   21.763747]  [<000000000058bba8>] blk_mq_sched_dispatch_requests+0x168/0x200 
[   21.763749]  [<0000000000583ad0>] __blk_mq_run_hw_queue+0xd0/0x128 
[   21.763751]  [<000000000016ace4>] process_one_work+0x25c/0x6f0 
[   21.763753]  [<000000000016b1ca>] worker_thread+0x52/0x400 
[   21.763754]  [<00000000001730ba>] kthread+0x15a/0x178 
[   21.763756]  [<00000000008e68ba>] kernel_thread_starter+0x6/0xc 
[   21.763758]  [<00000000008e68b4>] kernel_thread_starter+0x0/0xc 
[   21.763759] INFO: lockdep is turned off.
[   21.764713] null: module loaded

Comments

Sebastian Ott May 7, 2018, 2:48 p.m. UTC | #1
Hi,

On Thu, 19 Apr 2018, Sebastian Ott wrote:
> Since commit 1d9bd5161ba3 ("blk-mq: replace timeout synchronization with a
> RCU and generation based scheme") I observe lockdep complaints (full
> message attached):
> 
> [   21.763369]        CPU0                    CPU1
> [   21.763370]        ----                    ----
> [   21.763371]   lock(&rq->gstate_seq);
> [   21.763374]                                local_irq_disable();
> [   21.763375]                                lock(&(&dq->lock)->rlock);
> [   21.763377]                                lock(&rq->gstate_seq);
> [   21.763379]   <Interrupt>
> [   21.763380]     lock(&(&dq->lock)->rlock);
> [   21.763382] 
>                 *** DEADLOCK ***
> 
> 
> This only happens in combination with DASD (s390 specific) and another
> blk-mq driver (scsi, null_blk). The distinctive behavior of DASD is that
> it calls blk_mq_start_request with irqs disabled.
> 
> This is a false positive since gstate_seq on CPU0 is from a different
> request queue / block driver than gstate_seq on CPU1.
> 
> Possible fixes are to use local_irq_save/restore in blk_mq_start_request.
> Or, since it's a false positive, teach lockdep that these are different
> locks - like below:

Something we can do here? I've send 2 proposals to address this. Do they
make sense?

Regards,
Sebastian
diff mbox

Patch

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0dc9e341c2a7..d1a2c2fa015d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2040,7 +2040,7 @@  static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
 			return ret;
 	}
 
-	seqcount_init(&rq->gstate_seq);
+	__seqcount_init(&rq->gstate_seq, "&rq->gstate_seq", set->rq_seqcount_key);
 	u64_stats_init(&rq->aborted_gstate_sync);
 	return 0;
 }
@@ -2774,7 +2774,7 @@  static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
  * requested depth down, if if it too large. In that case, the set
  * value will be stored in set->queue_depth.
  */
-int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
+int __blk_mq_alloc_tag_set(struct blk_mq_tag_set *set, struct lock_class_key *key)
 {
 	int ret;
 
@@ -2825,6 +2825,7 @@  int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 	if (!set->mq_map)
 		goto out_free_tags;
 
+	set->rq_seqcount_key = key;
 	ret = blk_mq_update_queue_map(set);
 	if (ret)
 		goto out_free_mq_map;
@@ -2846,7 +2847,7 @@  int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 	set->tags = NULL;
 	return ret;
 }
-EXPORT_SYMBOL(blk_mq_alloc_tag_set);
+EXPORT_SYMBOL(__blk_mq_alloc_tag_set);
 
 void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
 {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index e3986f4b3461..1fb8bc1fdb07 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -85,6 +85,8 @@  struct blk_mq_tag_set {
 
 	struct mutex		tag_list_lock;
 	struct list_head	tag_list;
+
+	struct lock_class_key	*rq_seqcount_key;
 };
 
 struct blk_mq_queue_data {
@@ -201,7 +203,15 @@  struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 int blk_mq_register_dev(struct device *, struct request_queue *);
 void blk_mq_unregister_dev(struct device *, struct request_queue *);
 
-int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set);
+int __blk_mq_alloc_tag_set(struct blk_mq_tag_set *set, struct lock_class_key *key);
+
+static inline int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
+{
+	static struct lock_class_key _seqcount_key;
+
+	return __blk_mq_alloc_tag_set(set, &_seqcount_key);
+}
+
 void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
 
 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);