Message ID | 20170523234854.21452-12-bart.vanassche@sandisk.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, 2017-05-23 at 16:48 -0700, Bart Van Assche wrote: > Move the target_depend_item() call out of the critical section. > This patch avoids that lockdep reports the following: > > ====================================================== > [ INFO: possible circular locking dependency detected ] > 4.10.0-rc4-debug+ #1 Not tainted > ------------------------------------------------------- > rmdir/1799 is trying to acquire lock: > (&sess->cmdsn_mutex){+.+.+.}, at: [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod] > > but task is already holding lock: > (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140 > > which lock already depends on the new lock. > > the existing dependency chain (in reverse order) is: > > -> #2 (&sb->s_type->i_mutex_key#15){++++++}: > > [<ffffffff810b4731>] lock_acquire+0x71/0x90 > [<ffffffff8167eacf>] down_write+0x3f/0x70 > [<ffffffffa025315a>] configfs_depend_item+0x3a/0xb0 [configfs] > [<ffffffffa0264053>] target_depend_item+0x13/0x20 [target_core_mod] > [<ffffffffa02891db>] target_xcopy_locate_se_dev_e4+0xdb/0x1a0 [target_core_mod] > [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod] > [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod] > [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod] > [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod] > [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod] > [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod] > [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod] > [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod] > [<ffffffff810828c2>] kthread+0x102/0x140 > [<ffffffff81681b21>] ret_from_fork+0x31/0x40 > > -> #1 (g_device_mutex){+.+...}: > > [<ffffffff810b4731>] lock_acquire+0x71/0x90 > [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670 > [<ffffffffa0289123>] target_xcopy_locate_se_dev_e4+0x23/0x1a0 [target_core_mod] > [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod] > [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod] > [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod] > [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod] > [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod] > [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod] > [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod] > [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod] > [<ffffffff810828c2>] kthread+0x102/0x140 > [<ffffffff81681b21>] ret_from_fork+0x31/0x40 > > -> #0 (&sess->cmdsn_mutex){+.+.+.}: > > [<ffffffff810b41e6>] __lock_acquire+0x10e6/0x1260 > [<ffffffff810b4731>] lock_acquire+0x71/0x90 > [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670 > [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod] > [<ffffffffa02ecc8c>] iscsit_close_session+0xac/0x200 [iscsi_target_mod] > [<ffffffffa02f27af>] lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod] > [<ffffffffa02796f3>] target_shutdown_sessions+0xc3/0xd0 [target_core_mod] > [<ffffffffa0279cf1>] core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod] > [<ffffffffa026c8c0>] target_fabric_nacl_base_release+0x20/0x30 [target_core_mod] > [<ffffffffa02560ca>] config_item_release+0x5a/0xc0 [configfs] > [<ffffffffa025614d>] config_item_put+0x1d/0x1f [configfs] > [<ffffffffa02544d6>] configfs_rmdir+0x1a6/0x300 [configfs] > [<ffffffff811c5927>] vfs_rmdir+0xb7/0x140 > [<ffffffff811cb404>] do_rmdir+0x1f4/0x200 > [<ffffffff811cc131>] SyS_rmdir+0x11/0x20 > [<ffffffff81681885>] entry_SYSCALL_64_fastpath+0x23/0xc6 > > other info that might help us debug this: > > Chain exists of: > &sess->cmdsn_mutex --> g_device_mutex --> &sb->s_type->i_mutex_key#15 > > Possible unsafe locking scenario: > > CPU0 CPU1 > ---- ---- > lock(&sb->s_type->i_mutex_key#15); > lock(g_device_mutex); > lock(&sb->s_type->i_mutex_key#15); > lock(&sess->cmdsn_mutex); > > *** DEADLOCK *** > > 3 locks held by rmdir/1799: > #0: (sb_writers#10){.+.+.+}, at: [<ffffffff811e059f>] mnt_want_write+0x1f/0x50 > #1: (&default_group_class[depth - 1]#2/1){+.+.+.}, at: [<ffffffff811cb36e>] do_rmdir+0x15e/0x200 > #2: (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140 > > stack backtrace: > CPU: 1 PID: 1799 Comm: rmdir Not tainted 4.10.0-rc4-debug+ #1 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014 > Call Trace: > dump_stack+0x86/0xc3 > print_circular_bug+0x1c7/0x220 > __lock_acquire+0x10e6/0x1260 > lock_acquire+0x71/0x90 > mutex_lock_nested+0x5f/0x670 > iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod] > iscsit_close_session+0xac/0x200 [iscsi_target_mod] > lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod] > target_shutdown_sessions+0xc3/0xd0 [target_core_mod] > core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod] > target_fabric_nacl_base_release+0x20/0x30 [target_core_mod] > config_item_release+0x5a/0xc0 [configfs] > config_item_put+0x1d/0x1f [configfs] > configfs_rmdir+0x1a6/0x300 [configfs] > vfs_rmdir+0xb7/0x140 > do_rmdir+0x1f4/0x200 > SyS_rmdir+0x11/0x20 > entry_SYSCALL_64_fastpath+0x23/0xc6 > > Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com> > Reviewed-by: Christoph Hellwig <hch@lst.de> > Cc: Hannes Reinecke <hare@suse.com> > Cc: David Disseldorp <ddiss@suse.de> > --- > drivers/target/target_core_xcopy.c | 43 ++++++++++++++++++++------------------ > 1 file changed, 23 insertions(+), 20 deletions(-) > AFAICT with patch #8 in place to do the target_xcopy_locate_se_dev_e4() outside of target_do_xcopy() while iscsi-target code is holding sess->cmdsn_mutex, this patch along with patch #9 and #10 are unnecessary. Dropping them. -- To unsubscribe from this list: send the line "unsubscribe target-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/23/2017 06:48 PM, Bart Van Assche wrote: > Move the target_depend_item() call out of the critical section. > This patch avoids that lockdep reports the following: > > ====================================================== > [ INFO: possible circular locking dependency detected ] > 4.10.0-rc4-debug+ #1 Not tainted > ------------------------------------------------------- > rmdir/1799 is trying to acquire lock: > (&sess->cmdsn_mutex){+.+.+.}, at: [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod] > > but task is already holding lock: > (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140 > > which lock already depends on the new lock. > > the existing dependency chain (in reverse order) is: > > -> #2 (&sb->s_type->i_mutex_key#15){++++++}: > > [<ffffffff810b4731>] lock_acquire+0x71/0x90 > [<ffffffff8167eacf>] down_write+0x3f/0x70 > [<ffffffffa025315a>] configfs_depend_item+0x3a/0xb0 [configfs] > [<ffffffffa0264053>] target_depend_item+0x13/0x20 [target_core_mod] > [<ffffffffa02891db>] target_xcopy_locate_se_dev_e4+0xdb/0x1a0 [target_core_mod] > [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod] > [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod] > [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod] > [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod] > [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod] > [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod] > [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod] > [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod] > [<ffffffff810828c2>] kthread+0x102/0x140 > [<ffffffff81681b21>] ret_from_fork+0x31/0x40 > > -> #1 (g_device_mutex){+.+...}: > > [<ffffffff810b4731>] lock_acquire+0x71/0x90 > [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670 > [<ffffffffa0289123>] target_xcopy_locate_se_dev_e4+0x23/0x1a0 [target_core_mod] > [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod] > [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod] > [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod] > [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod] > [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod] > [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod] > [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod] > [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod] > [<ffffffff810828c2>] kthread+0x102/0x140 > [<ffffffff81681b21>] ret_from_fork+0x31/0x40 > > -> #0 (&sess->cmdsn_mutex){+.+.+.}: > > [<ffffffff810b41e6>] __lock_acquire+0x10e6/0x1260 > [<ffffffff810b4731>] lock_acquire+0x71/0x90 > [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670 > [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod] > [<ffffffffa02ecc8c>] iscsit_close_session+0xac/0x200 [iscsi_target_mod] > [<ffffffffa02f27af>] lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod] > [<ffffffffa02796f3>] target_shutdown_sessions+0xc3/0xd0 [target_core_mod] > [<ffffffffa0279cf1>] core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod] > [<ffffffffa026c8c0>] target_fabric_nacl_base_release+0x20/0x30 [target_core_mod] > [<ffffffffa02560ca>] config_item_release+0x5a/0xc0 [configfs] > [<ffffffffa025614d>] config_item_put+0x1d/0x1f [configfs] > [<ffffffffa02544d6>] configfs_rmdir+0x1a6/0x300 [configfs] > [<ffffffff811c5927>] vfs_rmdir+0xb7/0x140 > [<ffffffff811cb404>] do_rmdir+0x1f4/0x200 > [<ffffffff811cc131>] SyS_rmdir+0x11/0x20 > [<ffffffff81681885>] entry_SYSCALL_64_fastpath+0x23/0xc6 > > other info that might help us debug this: > > Chain exists of: > &sess->cmdsn_mutex --> g_device_mutex --> &sb->s_type->i_mutex_key#15 > > Possible unsafe locking scenario: > > CPU0 CPU1 > ---- ---- > lock(&sb->s_type->i_mutex_key#15); > lock(g_device_mutex); > lock(&sb->s_type->i_mutex_key#15); > lock(&sess->cmdsn_mutex); > > *** DEADLOCK *** > > 3 locks held by rmdir/1799: > #0: (sb_writers#10){.+.+.+}, at: [<ffffffff811e059f>] mnt_want_write+0x1f/0x50 > #1: (&default_group_class[depth - 1]#2/1){+.+.+.}, at: [<ffffffff811cb36e>] do_rmdir+0x15e/0x200 > #2: (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140 > > stack backtrace: > CPU: 1 PID: 1799 Comm: rmdir Not tainted 4.10.0-rc4-debug+ #1 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014 > Call Trace: > dump_stack+0x86/0xc3 > print_circular_bug+0x1c7/0x220 > __lock_acquire+0x10e6/0x1260 > lock_acquire+0x71/0x90 > mutex_lock_nested+0x5f/0x670 > iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod] > iscsit_close_session+0xac/0x200 [iscsi_target_mod] > lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod] > target_shutdown_sessions+0xc3/0xd0 [target_core_mod] > core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod] > target_fabric_nacl_base_release+0x20/0x30 [target_core_mod] > config_item_release+0x5a/0xc0 [configfs] > config_item_put+0x1d/0x1f [configfs] > configfs_rmdir+0x1a6/0x300 [configfs] > vfs_rmdir+0xb7/0x140 > do_rmdir+0x1f4/0x200 > SyS_rmdir+0x11/0x20 > entry_SYSCALL_64_fastpath+0x23/0xc6 > > Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com> > Reviewed-by: Christoph Hellwig <hch@lst.de> > Cc: Hannes Reinecke <hare@suse.com> > Cc: David Disseldorp <ddiss@suse.de> I tested the patch here while doing lun unmap + backend device removals at the same time as xcopys, and it works as expected for me. Reviewed-by: Mike Christie <mchristi@redhat.com> -- To unsubscribe from this list: send the line "unsubscribe target-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/23/2017 06:48 PM, Bart Van Assche wrote: > diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c > index 56738a41e346..27414ab01b24 100644 > --- a/drivers/target/target_core_xcopy.c > +++ b/drivers/target/target_core_xcopy.c > @@ -62,6 +62,8 @@ static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn, > unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; > int rc; > > + *found_dev = NULL; > + > mutex_lock(&g_device_mutex); > list_for_each_entry(se_dev, &g_device_list, g_dev_node) { > > @@ -71,32 +73,33 @@ static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn, > memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN); > target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]); > > - rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN); > - if (rc != 0) > + if (memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN)) > continue; > > - *found_dev = se_dev; > - pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev); > + if (target_get_device(se_dev)) Hey Bart, I think there is one small problem. Maybe not necessarily with this patch but the change in it exposes it. If after we do a successful target_get_device a user app deletes the backend device (does rmdir /sys/kernel/config/target/core/fileio_1/somedevice), rmdir will will return success immediately since we have a done a get above. The app will then do rmdir on (/sys/kernel/config/target/core/fileio_1). The problem is that if the second rmdir on fileio_1 runs before target_put_device below is done, then core_delete_hba will hit the WARN_ON(hba->dev_count). Note that in this test target_depend_item will fail because the first rmdir on "somedevice" has already succeeded. This does not happen without your patch, because the first rmdir on /sys/kernel/config/target/core/fileio_1/somedevice would drop the refcount to zero and the removing thread would either run target_free_device before target_xcopy_locate_se_dev_e4 can grab the g_device_mutex. Or, if target_xcopy_locate_se_dev_e4 grabs the g_device_mutex firs, then target_depend_item will fail since because we already deleting the device. > + *found_dev = se_dev; > + break; > + } > + mutex_unlock(&g_device_mutex); > > - rc = target_depend_item(&se_dev->dev_group.cg_item); > - if (rc != 0) { > - pr_err("configfs_depend_item attempt failed:" > - " %d for se_dev: %p\n", rc, se_dev); > - mutex_unlock(&g_device_mutex); > - return rc; > - } > + if (*found_dev == NULL) { > + pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); > + return -EINVAL; > + } > > - pr_debug("Called configfs_depend_item for se_dev: %p" > - " se_dev->se_dev_group: %p\n", se_dev, > - &se_dev->dev_group); > + pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev); > > - mutex_unlock(&g_device_mutex); > - return 0; > - } > - mutex_unlock(&g_device_mutex); > + rc = target_depend_item(&se_dev->dev_group.cg_item); > + if (rc != 0) > + pr_err("configfs_depend_item attempt failed: %d for se_dev: %p\n", > + rc, se_dev); > + else > + pr_debug("Called configfs_depend_item for se_dev: %p se_dev->se_dev_group: %p\n", > + se_dev, &se_dev->dev_group); > > - pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); > - return -EINVAL; > + target_put_device(se_dev); > + > + return rc; > } > > static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop, > -- To unsubscribe from this list: send the line "unsubscribe target-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/23/2017 06:48 PM, Bart Van Assche wrote: > Move the target_depend_item() call out of the critical section. > This patch avoids that lockdep reports the following: > > ====================================================== > [ INFO: possible circular locking dependency detected ] > 4.10.0-rc4-debug+ #1 Not tainted > ------------------------------------------------------- > rmdir/1799 is trying to acquire lock: > (&sess->cmdsn_mutex){+.+.+.}, at: [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod] > > but task is already holding lock: > (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140 > > which lock already depends on the new lock. > > the existing dependency chain (in reverse order) is: > > -> #2 (&sb->s_type->i_mutex_key#15){++++++}: > > [<ffffffff810b4731>] lock_acquire+0x71/0x90 > [<ffffffff8167eacf>] down_write+0x3f/0x70 > [<ffffffffa025315a>] configfs_depend_item+0x3a/0xb0 [configfs] > [<ffffffffa0264053>] target_depend_item+0x13/0x20 [target_core_mod] > [<ffffffffa02891db>] target_xcopy_locate_se_dev_e4+0xdb/0x1a0 [target_core_mod] > [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod] > [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod] > [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod] > [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod] > [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod] > [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod] > [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod] > [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod] > [<ffffffff810828c2>] kthread+0x102/0x140 > [<ffffffff81681b21>] ret_from_fork+0x31/0x40 > > -> #1 (g_device_mutex){+.+...}: > > [<ffffffff810b4731>] lock_acquire+0x71/0x90 > [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670 > [<ffffffffa0289123>] target_xcopy_locate_se_dev_e4+0x23/0x1a0 [target_core_mod] > [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod] > [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod] > [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod] > [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod] > [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod] > [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod] > [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod] > [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod] > [<ffffffff810828c2>] kthread+0x102/0x140 > [<ffffffff81681b21>] ret_from_fork+0x31/0x40 > > -> #0 (&sess->cmdsn_mutex){+.+.+.}: > > [<ffffffff810b41e6>] __lock_acquire+0x10e6/0x1260 > [<ffffffff810b4731>] lock_acquire+0x71/0x90 > [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670 > [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod] > [<ffffffffa02ecc8c>] iscsit_close_session+0xac/0x200 [iscsi_target_mod] > [<ffffffffa02f27af>] lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod] > [<ffffffffa02796f3>] target_shutdown_sessions+0xc3/0xd0 [target_core_mod] > [<ffffffffa0279cf1>] core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod] > [<ffffffffa026c8c0>] target_fabric_nacl_base_release+0x20/0x30 [target_core_mod] > [<ffffffffa02560ca>] config_item_release+0x5a/0xc0 [configfs] > [<ffffffffa025614d>] config_item_put+0x1d/0x1f [configfs] > [<ffffffffa02544d6>] configfs_rmdir+0x1a6/0x300 [configfs] > [<ffffffff811c5927>] vfs_rmdir+0xb7/0x140 > [<ffffffff811cb404>] do_rmdir+0x1f4/0x200 > [<ffffffff811cc131>] SyS_rmdir+0x11/0x20 > [<ffffffff81681885>] entry_SYSCALL_64_fastpath+0x23/0xc6 > > other info that might help us debug this: > > Chain exists of: > &sess->cmdsn_mutex --> g_device_mutex --> &sb->s_type->i_mutex_key#15 > > Possible unsafe locking scenario: > > CPU0 CPU1 > ---- ---- > lock(&sb->s_type->i_mutex_key#15); > lock(g_device_mutex); > lock(&sb->s_type->i_mutex_key#15); > lock(&sess->cmdsn_mutex); > Hey Bart, is the g_device_mutex an issue here? It is not clear to me. It looks like the problem that is reported is that: iscsit_sequence_cmd will grab the cmdsn mutex and then if it is a xcopy command we can end up calling configfs_depend_item which grabs the configfs/fs lock. And, if you remove a acl, it is saying we grab the same configfs/fs lock and then grab the cmdsn_mutex during cleanup, so here we have a issue. -- To unsubscribe from this list: send the line "unsubscribe target-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2017-06-15 at 19:10 -0500, Mike Christie wrote: > I think there is one small problem. Maybe not necessarily with this > patch but the change in it exposes it. > > If after we do a successful target_get_device a user app deletes the > backend device (does rmdir > /sys/kernel/config/target/core/fileio_1/somedevice), rmdir will will > return success immediately since we have a done a get above. The app > will then do rmdir on (/sys/kernel/config/target/core/fileio_1). > > The problem is that if the second rmdir on fileio_1 runs before > target_put_device below is done, then core_delete_hba will hit the > WARN_ON(hba->dev_count). Note that in this test target_depend_item will > fail because the first rmdir on "somedevice" has already succeeded. > > This does not happen without your patch, because the first rmdir on > /sys/kernel/config/target/core/fileio_1/somedevice would drop the > refcount to zero and the removing thread would either run > target_free_device before target_xcopy_locate_se_dev_e4 can grab the > g_device_mutex. Or, if target_xcopy_locate_se_dev_e4 grabs the > g_device_mutex firs, then target_depend_item will fail since because we > already deleting the device. Hello Mike, Had you noticed that due to patch "target: Fix a deadlock between the XCOPY code and iSCSI session shutdown" (commit 06546e842c24 in Nic's for-next branch) this patch is no longer necessary? Please let me know if you would need this patch anyway for another purpose than for fixing the deadlock explained in the description of this patch. Bart.-- To unsubscribe from this list: send the line "unsubscribe target-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
====================================================== [ INFO: possible circular locking dependency detected ] 4.10.0-rc4-debug+ #1 Not tainted ------------------------------------------------------- rmdir/1799 is trying to acquire lock: (&sess->cmdsn_mutex){+.+.+.}, at: [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod] but task is already holding lock: (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&sb->s_type->i_mutex_key#15){++++++}: [<ffffffff810b4731>] lock_acquire+0x71/0x90 [<ffffffff8167eacf>] down_write+0x3f/0x70 [<ffffffffa025315a>] configfs_depend_item+0x3a/0xb0 [configfs] [<ffffffffa0264053>] target_depend_item+0x13/0x20 [target_core_mod] [<ffffffffa02891db>] target_xcopy_locate_se_dev_e4+0xdb/0x1a0 [target_core_mod] [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod] [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod] [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod] [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod] [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod] [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod] [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod] [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod] [<ffffffff810828c2>] kthread+0x102/0x140 [<ffffffff81681b21>] ret_from_fork+0x31/0x40 -> #1 (g_device_mutex){+.+...}: [<ffffffff810b4731>] lock_acquire+0x71/0x90 [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670 [<ffffffffa0289123>] target_xcopy_locate_se_dev_e4+0x23/0x1a0 [target_core_mod] [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod] [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod] [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod] [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod] [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod] [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod] [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod] [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod] [<ffffffff810828c2>] kthread+0x102/0x140 [<ffffffff81681b21>] ret_from_fork+0x31/0x40 -> #0 (&sess->cmdsn_mutex){+.+.+.}: [<ffffffff810b41e6>] __lock_acquire+0x10e6/0x1260 [<ffffffff810b4731>] lock_acquire+0x71/0x90 [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670 [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod] [<ffffffffa02ecc8c>] iscsit_close_session+0xac/0x200 [iscsi_target_mod] [<ffffffffa02f27af>] lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod] [<ffffffffa02796f3>] target_shutdown_sessions+0xc3/0xd0 [target_core_mod] [<ffffffffa0279cf1>] core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod] [<ffffffffa026c8c0>] target_fabric_nacl_base_release+0x20/0x30 [target_core_mod] [<ffffffffa02560ca>] config_item_release+0x5a/0xc0 [configfs] [<ffffffffa025614d>] config_item_put+0x1d/0x1f [configfs] [<ffffffffa02544d6>] configfs_rmdir+0x1a6/0x300 [configfs] [<ffffffff811c5927>] vfs_rmdir+0xb7/0x140 [<ffffffff811cb404>] do_rmdir+0x1f4/0x200 [<ffffffff811cc131>] SyS_rmdir+0x11/0x20 [<ffffffff81681885>] entry_SYSCALL_64_fastpath+0x23/0xc6 other info that might help us debug this: Chain exists of: &sess->cmdsn_mutex --> g_device_mutex --> &sb->s_type->i_mutex_key#15 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&sb->s_type->i_mutex_key#15); lock(g_device_mutex); lock(&sb->s_type->i_mutex_key#15); lock(&sess->cmdsn_mutex); *** DEADLOCK *** 3 locks held by rmdir/1799: #0: (sb_writers#10){.+.+.+}, at: [<ffffffff811e059f>] mnt_want_write+0x1f/0x50 #1: (&default_group_class[depth - 1]#2/1){+.+.+.}, at: [<ffffffff811cb36e>] do_rmdir+0x15e/0x200 #2: (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140 stack backtrace: CPU: 1 PID: 1799 Comm: rmdir Not tainted 4.10.0-rc4-debug+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0x86/0xc3 print_circular_bug+0x1c7/0x220 __lock_acquire+0x10e6/0x1260 lock_acquire+0x71/0x90 mutex_lock_nested+0x5f/0x670 iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod] iscsit_close_session+0xac/0x200 [iscsi_target_mod] lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod] target_shutdown_sessions+0xc3/0xd0 [target_core_mod] core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod] target_fabric_nacl_base_release+0x20/0x30 [target_core_mod] config_item_release+0x5a/0xc0 [configfs] config_item_put+0x1d/0x1f [configfs] configfs_rmdir+0x1a6/0x300 [configfs] vfs_rmdir+0xb7/0x140 do_rmdir+0x1f4/0x200 SyS_rmdir+0x11/0x20 entry_SYSCALL_64_fastpath+0x23/0xc6 Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Cc: Hannes Reinecke <hare@suse.com> Cc: David Disseldorp <ddiss@suse.de> --- drivers/target/target_core_xcopy.c | 43 ++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 56738a41e346..27414ab01b24 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -62,6 +62,8 @@ static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn, unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; int rc; + *found_dev = NULL; + mutex_lock(&g_device_mutex); list_for_each_entry(se_dev, &g_device_list, g_dev_node) { @@ -71,32 +73,33 @@ static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn, memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN); target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]); - rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN); - if (rc != 0) + if (memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN)) continue; - *found_dev = se_dev; - pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev); + if (target_get_device(se_dev)) + *found_dev = se_dev; + break; + } + mutex_unlock(&g_device_mutex); - rc = target_depend_item(&se_dev->dev_group.cg_item); - if (rc != 0) { - pr_err("configfs_depend_item attempt failed:" - " %d for se_dev: %p\n", rc, se_dev); - mutex_unlock(&g_device_mutex); - return rc; - } + if (*found_dev == NULL) { + pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); + return -EINVAL; + } - pr_debug("Called configfs_depend_item for se_dev: %p" - " se_dev->se_dev_group: %p\n", se_dev, - &se_dev->dev_group); + pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev); - mutex_unlock(&g_device_mutex); - return 0; - } - mutex_unlock(&g_device_mutex); + rc = target_depend_item(&se_dev->dev_group.cg_item); + if (rc != 0) + pr_err("configfs_depend_item attempt failed: %d for se_dev: %p\n", + rc, se_dev); + else + pr_debug("Called configfs_depend_item for se_dev: %p se_dev->se_dev_group: %p\n", + se_dev, &se_dev->dev_group); - pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); - return -EINVAL; + target_put_device(se_dev); + + return rc; } static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop,