Message ID | 20170208222507.25715-38-bart.vanassche@sandisk.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
On Wed, 2017-02-08 at 14:25 -0800, Bart Van Assche wrote: > Move the target_depend_item() call out of the critical section. > This patch avoids that lockdep reports the following: > > ====================================================== > [ INFO: possible circular locking dependency detected ] > 4.10.0-rc4-debug+ #1 Not tainted > ------------------------------------------------------- > rmdir/1799 is trying to acquire lock: > (&sess->cmdsn_mutex){+.+.+.}, at: [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod] > > but task is already holding lock: > (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140 > > which lock already depends on the new lock. > > the existing dependency chain (in reverse order) is: > > -> #2 (&sb->s_type->i_mutex_key#15){++++++}: > > [<ffffffff810b4731>] lock_acquire+0x71/0x90 > [<ffffffff8167eacf>] down_write+0x3f/0x70 > [<ffffffffa025315a>] configfs_depend_item+0x3a/0xb0 [configfs] > [<ffffffffa0264053>] target_depend_item+0x13/0x20 [target_core_mod] > [<ffffffffa02891db>] target_xcopy_locate_se_dev_e4+0xdb/0x1a0 [target_core_mod] > [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod] > [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod] > [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod] > [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod] > [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod] > [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod] > [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod] > [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod] > [<ffffffff810828c2>] kthread+0x102/0x140 > [<ffffffff81681b21>] ret_from_fork+0x31/0x40 > > -> #1 (g_device_mutex){+.+...}: > > [<ffffffff810b4731>] lock_acquire+0x71/0x90 > [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670 > [<ffffffffa0289123>] target_xcopy_locate_se_dev_e4+0x23/0x1a0 [target_core_mod] > [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod] > [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod] > [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod] > [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod] > [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod] > [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod] > [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod] > [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod] > [<ffffffff810828c2>] kthread+0x102/0x140 > [<ffffffff81681b21>] ret_from_fork+0x31/0x40 > > -> #0 (&sess->cmdsn_mutex){+.+.+.}: > > [<ffffffff810b41e6>] __lock_acquire+0x10e6/0x1260 > [<ffffffff810b4731>] lock_acquire+0x71/0x90 > [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670 > [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod] > [<ffffffffa02ecc8c>] iscsit_close_session+0xac/0x200 [iscsi_target_mod] > [<ffffffffa02f27af>] lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod] > [<ffffffffa02796f3>] target_shutdown_sessions+0xc3/0xd0 [target_core_mod] > [<ffffffffa0279cf1>] core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod] > [<ffffffffa026c8c0>] target_fabric_nacl_base_release+0x20/0x30 [target_core_mod] > [<ffffffffa02560ca>] config_item_release+0x5a/0xc0 [configfs] > [<ffffffffa025614d>] config_item_put+0x1d/0x1f [configfs] > [<ffffffffa02544d6>] configfs_rmdir+0x1a6/0x300 [configfs] > [<ffffffff811c5927>] vfs_rmdir+0xb7/0x140 > [<ffffffff811cb404>] do_rmdir+0x1f4/0x200 > [<ffffffff811cc131>] SyS_rmdir+0x11/0x20 > [<ffffffff81681885>] entry_SYSCALL_64_fastpath+0x23/0xc6 > > other info that might help us debug this: > > Chain exists of: > &sess->cmdsn_mutex --> g_device_mutex --> &sb->s_type->i_mutex_key#15 > > Possible unsafe locking scenario: > > CPU0 CPU1 > ---- ---- > lock(&sb->s_type->i_mutex_key#15); > lock(g_device_mutex); > lock(&sb->s_type->i_mutex_key#15); > lock(&sess->cmdsn_mutex); > > *** DEADLOCK *** > > 3 locks held by rmdir/1799: > #0: (sb_writers#10){.+.+.+}, at: [<ffffffff811e059f>] mnt_want_write+0x1f/0x50 > #1: (&default_group_class[depth - 1]#2/1){+.+.+.}, at: [<ffffffff811cb36e>] do_rmdir+0x15e/0x200 > #2: (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140 > > stack backtrace: > CPU: 1 PID: 1799 Comm: rmdir Not tainted 4.10.0-rc4-debug+ #1 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014 > Call Trace: > dump_stack+0x86/0xc3 > print_circular_bug+0x1c7/0x220 > __lock_acquire+0x10e6/0x1260 > lock_acquire+0x71/0x90 > mutex_lock_nested+0x5f/0x670 > iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod] > iscsit_close_session+0xac/0x200 [iscsi_target_mod] > lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod] > target_shutdown_sessions+0xc3/0xd0 [target_core_mod] > core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod] > target_fabric_nacl_base_release+0x20/0x30 [target_core_mod] > config_item_release+0x5a/0xc0 [configfs] > config_item_put+0x1d/0x1f [configfs] > configfs_rmdir+0x1a6/0x300 [configfs] > vfs_rmdir+0xb7/0x140 > do_rmdir+0x1f4/0x200 > SyS_rmdir+0x11/0x20 > entry_SYSCALL_64_fastpath+0x23/0xc6 > > Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com> > Reviewed-by: Christoph Hellwig <hch@lst.de> > Cc: Hannes Reinecke <hare@suse.com> > Cc: David Disseldorp <ddiss@suse.de> > --- > drivers/target/target_core_xcopy.c | 43 ++++++++++++++++++++------------------ > 1 file changed, 23 insertions(+), 20 deletions(-) > > diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c > index 180d8d718db5..623835371f2f 100644 > --- a/drivers/target/target_core_xcopy.c > +++ b/drivers/target/target_core_xcopy.c > @@ -60,6 +60,8 @@ static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn, > unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; > int rc; > > + *found_dev = NULL; > + > mutex_lock(&g_device_mutex); > list_for_each_entry(se_dev, &g_device_list, g_dev_node) { > > @@ -69,32 +71,33 @@ static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn, > memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN); > target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]); > > - rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN); > - if (rc != 0) > + if (memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN)) > continue; > > - *found_dev = se_dev; > - pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev); > + if (target_get_device(se_dev)) > + *found_dev = se_dev; > + break; > + } > + mutex_unlock(&g_device_mutex); > > - rc = target_depend_item(&se_dev->dev_group.cg_item); > - if (rc != 0) { > - pr_err("configfs_depend_item attempt failed:" > - " %d for se_dev: %p\n", rc, se_dev); > - mutex_unlock(&g_device_mutex); > - return rc; > - } > + if (*found_dev == NULL) { > + pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); > + return -EINVAL; > + } > > - pr_debug("Called configfs_depend_item for se_dev: %p" > - " se_dev->se_dev_group: %p\n", se_dev, > - &se_dev->dev_group); > + pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev); > > - mutex_unlock(&g_device_mutex); > - return 0; > - } > - mutex_unlock(&g_device_mutex); > + rc = target_depend_item(&se_dev->dev_group.cg_item); > + if (rc != 0) > + pr_err("configfs_depend_item attempt failed: %d for se_dev: %p\n", > + rc, se_dev); > + else > + pr_debug("Called configfs_depend_item for se_dev: %p se_dev->se_dev_group: %p\n", > + se_dev, &se_dev->dev_group); > > - pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); > - return -EINVAL; > + target_put_device(se_dev); > + > + return rc; > } > > static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop, In practice this, this deadlock has never triggered with EXTENDED_COPY and active I/O session shutdown with iscsi-target. I do think it's a reasonable approach to avoid it the potential of the deadlock (or at least make deadlock happy), but #35 needs to get merged first. That said, ignoring this patch for now. -- To unsubscribe from this list: send the line "unsubscribe target-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
====================================================== [ INFO: possible circular locking dependency detected ] 4.10.0-rc4-debug+ #1 Not tainted ------------------------------------------------------- rmdir/1799 is trying to acquire lock: (&sess->cmdsn_mutex){+.+.+.}, at: [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod] but task is already holding lock: (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&sb->s_type->i_mutex_key#15){++++++}: [<ffffffff810b4731>] lock_acquire+0x71/0x90 [<ffffffff8167eacf>] down_write+0x3f/0x70 [<ffffffffa025315a>] configfs_depend_item+0x3a/0xb0 [configfs] [<ffffffffa0264053>] target_depend_item+0x13/0x20 [target_core_mod] [<ffffffffa02891db>] target_xcopy_locate_se_dev_e4+0xdb/0x1a0 [target_core_mod] [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod] [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod] [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod] [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod] [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod] [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod] [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod] [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod] [<ffffffff810828c2>] kthread+0x102/0x140 [<ffffffff81681b21>] ret_from_fork+0x31/0x40 -> #1 (g_device_mutex){+.+...}: [<ffffffff810b4731>] lock_acquire+0x71/0x90 [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670 [<ffffffffa0289123>] target_xcopy_locate_se_dev_e4+0x23/0x1a0 [target_core_mod] [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod] [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod] [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod] [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod] [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod] [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod] [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod] [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod] [<ffffffff810828c2>] kthread+0x102/0x140 [<ffffffff81681b21>] ret_from_fork+0x31/0x40 -> #0 (&sess->cmdsn_mutex){+.+.+.}: [<ffffffff810b41e6>] __lock_acquire+0x10e6/0x1260 [<ffffffff810b4731>] lock_acquire+0x71/0x90 [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670 [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod] [<ffffffffa02ecc8c>] iscsit_close_session+0xac/0x200 [iscsi_target_mod] [<ffffffffa02f27af>] lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod] [<ffffffffa02796f3>] target_shutdown_sessions+0xc3/0xd0 [target_core_mod] [<ffffffffa0279cf1>] core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod] [<ffffffffa026c8c0>] target_fabric_nacl_base_release+0x20/0x30 [target_core_mod] [<ffffffffa02560ca>] config_item_release+0x5a/0xc0 [configfs] [<ffffffffa025614d>] config_item_put+0x1d/0x1f [configfs] [<ffffffffa02544d6>] configfs_rmdir+0x1a6/0x300 [configfs] [<ffffffff811c5927>] vfs_rmdir+0xb7/0x140 [<ffffffff811cb404>] do_rmdir+0x1f4/0x200 [<ffffffff811cc131>] SyS_rmdir+0x11/0x20 [<ffffffff81681885>] entry_SYSCALL_64_fastpath+0x23/0xc6 other info that might help us debug this: Chain exists of: &sess->cmdsn_mutex --> g_device_mutex --> &sb->s_type->i_mutex_key#15 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&sb->s_type->i_mutex_key#15); lock(g_device_mutex); lock(&sb->s_type->i_mutex_key#15); lock(&sess->cmdsn_mutex); *** DEADLOCK *** 3 locks held by rmdir/1799: #0: (sb_writers#10){.+.+.+}, at: [<ffffffff811e059f>] mnt_want_write+0x1f/0x50 #1: (&default_group_class[depth - 1]#2/1){+.+.+.}, at: [<ffffffff811cb36e>] do_rmdir+0x15e/0x200 #2: (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140 stack backtrace: CPU: 1 PID: 1799 Comm: rmdir Not tainted 4.10.0-rc4-debug+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0x86/0xc3 print_circular_bug+0x1c7/0x220 __lock_acquire+0x10e6/0x1260 lock_acquire+0x71/0x90 mutex_lock_nested+0x5f/0x670 iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod] iscsit_close_session+0xac/0x200 [iscsi_target_mod] lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod] target_shutdown_sessions+0xc3/0xd0 [target_core_mod] core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod] target_fabric_nacl_base_release+0x20/0x30 [target_core_mod] config_item_release+0x5a/0xc0 [configfs] config_item_put+0x1d/0x1f [configfs] configfs_rmdir+0x1a6/0x300 [configfs] vfs_rmdir+0xb7/0x140 do_rmdir+0x1f4/0x200 SyS_rmdir+0x11/0x20 entry_SYSCALL_64_fastpath+0x23/0xc6 Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.com> Cc: David Disseldorp <ddiss@suse.de> --- drivers/target/target_core_xcopy.c | 43 ++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 180d8d718db5..623835371f2f 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -60,6 +60,8 @@ static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn, unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; int rc; + *found_dev = NULL; + mutex_lock(&g_device_mutex); list_for_each_entry(se_dev, &g_device_list, g_dev_node) { @@ -69,32 +71,33 @@ static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn, memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN); target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]); - rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN); - if (rc != 0) + if (memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN)) continue; - *found_dev = se_dev; - pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev); + if (target_get_device(se_dev)) + *found_dev = se_dev; + break; + } + mutex_unlock(&g_device_mutex); - rc = target_depend_item(&se_dev->dev_group.cg_item); - if (rc != 0) { - pr_err("configfs_depend_item attempt failed:" - " %d for se_dev: %p\n", rc, se_dev); - mutex_unlock(&g_device_mutex); - return rc; - } + if (*found_dev == NULL) { + pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); + return -EINVAL; + } - pr_debug("Called configfs_depend_item for se_dev: %p" - " se_dev->se_dev_group: %p\n", se_dev, - &se_dev->dev_group); + pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev); - mutex_unlock(&g_device_mutex); - return 0; - } - mutex_unlock(&g_device_mutex); + rc = target_depend_item(&se_dev->dev_group.cg_item); + if (rc != 0) + pr_err("configfs_depend_item attempt failed: %d for se_dev: %p\n", + rc, se_dev); + else + pr_debug("Called configfs_depend_item for se_dev: %p se_dev->se_dev_group: %p\n", + se_dev, &se_dev->dev_group); - pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); - return -EINVAL; + target_put_device(se_dev); + + return rc; } static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop,