diff mbox

iscsi: make mutex for target scanning and unbinding per-session

Message ID 1478542920-24460-1-git-send-email-cleech@redhat.com (mailing list archive)
State Changes Requested, archived
Headers show

Commit Message

Chris Leech Nov. 7, 2016, 6:22 p.m. UTC
Currently the iSCSI transport class synchronises target scanning and
unbinding with a host level mutex.  For multi-session hosts (offloading
iSCSI HBAs) connecting to storage arrays that may implement one
target-per-lun, this can result in the target scan work for hundreds of
sessions being serialized behind a single mutex.  With slow enough
response times, this can cause scan requests initiated from userspace to
block on the mutex long enough to trigger 120 sec hung task warnings.

I can't see any reason not to move this to a session level mutex and let
the target scans run in parallel, speeding up connecting to a large
number of targets.  Note that as iscsi_tcp creates a virtual host for
each session, software iSCSI is effectively doing this already.

Signed-off-by: Chris Leech <cleech@redhat.com>
---
 drivers/scsi/scsi_transport_iscsi.c | 19 ++++++-------------
 include/scsi/scsi_transport_iscsi.h |  2 +-
 2 files changed, 7 insertions(+), 14 deletions(-)

Comments

Chris Leech Nov. 10, 2016, 9:22 p.m. UTC | #1
On Thu, Nov 10, 2016 at 10:00:54AM -0800, The Lee-Man wrote:
> On Monday, November 7, 2016 at 11:22:23 AM UTC-7, Chris Leech wrote:
> >
> > Currently the iSCSI transport class synchronises target scanning and 
> > unbinding with a host level mutex.  For multi-session hosts (offloading 
> > iSCSI HBAs) connecting to storage arrays that may implement one 
> > target-per-lun, this can result in the target scan work for hundreds of 
> > sessions being serialized behind a single mutex.  With slow enough 
> > response times, this can cause scan requests initiated from userspace to 
> > block on the mutex long enough to trigger 120 sec hung task warnings. 
> >
> > I can't see any reason not to move this to a session level mutex and let 
> > the target scans run in parallel, speeding up connecting to a large 
> > number of targets.  Note that as iscsi_tcp creates a virtual host for 
> > each session, software iSCSI is effectively doing this already. 
> >
> 
> I understood the reason for this mutex was to protect against the case
> where there are multiple paths to a target. In such cases, you can get
> simultaneous access to sysfs attributes (files), which can cause errors,
> i.e. two threads trying to write an attribute at the same time, or one
> changing an attribute while another reads or removes it.

This particular mutex is only serializing scanning targets for devices,
and used in __iscsi_unbind_session to ensure that no scans are in
progress adding new scsi devices while we're trying to remove a target.
 
> I worry that changing it will not address those issues.
> 
> [Side note: we *really* need a test suite that somehow includes
>  cases like this.]
> 
> >
> > Signed-off-by: Chris Leech <cleech@redhat.com> 
> > --- 
> >  drivers/scsi/scsi_transport_iscsi.c | 19 ++++++------------- 
> >  include/scsi/scsi_transport_iscsi.h |  2 +- 
> >  2 files changed, 7 insertions(+), 14 deletions(-) 
> >
> > diff --git a/drivers/scsi/scsi_transport_iscsi.c 
> > b/drivers/scsi/scsi_transport_iscsi.c 
> > index 42bca61..83c90fa 100644 
> > --- a/drivers/scsi/scsi_transport_iscsi.c 
> > +++ b/drivers/scsi/scsi_transport_iscsi.c 
> > @@ -1568,7 +1568,6 @@ static int iscsi_setup_host(struct 
> > transport_container *tc, struct device *dev, 
> >   
> >          memset(ihost, 0, sizeof(*ihost)); 
> >          atomic_set(&ihost->nr_scans, 0); 
> > -        mutex_init(&ihost->mutex); 
> >   
> >          iscsi_bsg_host_add(shost, ihost); 
> >          /* ignore any bsg add error - we just can't do sgio */ 
> > @@ -1789,8 +1788,6 @@ static int iscsi_user_scan_session(struct device 
> > *dev, void *data) 
> >  { 
> >          struct iscsi_scan_data *scan_data = data; 
> >          struct iscsi_cls_session *session; 
> > -        struct Scsi_Host *shost; 
> > -        struct iscsi_cls_host *ihost; 
> >          unsigned long flags; 
> >          unsigned int id; 
> >   
> > @@ -1801,10 +1798,7 @@ static int iscsi_user_scan_session(struct device 
> > *dev, void *data) 
> >   
> >          ISCSI_DBG_TRANS_SESSION(session, "Scanning session\n"); 
> >   
> > -        shost = iscsi_session_to_shost(session); 
> > -        ihost = shost->shost_data; 
> > - 
> > -        mutex_lock(&ihost->mutex); 
> > +        mutex_lock(&session->mutex); 
> >          spin_lock_irqsave(&session->lock, flags); 
> >          if (session->state != ISCSI_SESSION_LOGGED_IN) { 
> >                  spin_unlock_irqrestore(&session->lock, flags); 
> > @@ -1823,7 +1817,7 @@ static int iscsi_user_scan_session(struct device 
> > *dev, void *data) 
> >          } 
> >   
> >  user_scan_exit: 
> > -        mutex_unlock(&ihost->mutex); 
> > +        mutex_unlock(&session->mutex); 
> >          ISCSI_DBG_TRANS_SESSION(session, "Completed session scan\n"); 
> >          return 0; 
> >  } 
> > @@ -2001,26 +1995,24 @@ static void __iscsi_unbind_session(struct 
> > work_struct *work) 
> >          struct iscsi_cls_session *session = 
> >                          container_of(work, struct iscsi_cls_session, 
> >                                       unbind_work); 
> > -        struct Scsi_Host *shost = iscsi_session_to_shost(session); 
> > -        struct iscsi_cls_host *ihost = shost->shost_data; 
> >          unsigned long flags; 
> >          unsigned int target_id; 
> >   
> >          ISCSI_DBG_TRANS_SESSION(session, "Unbinding session\n"); 
> >   
> >          /* Prevent new scans and make sure scanning is not in progress */ 
> > -        mutex_lock(&ihost->mutex); 
> > +        mutex_lock(&session->mutex); 
> >          spin_lock_irqsave(&session->lock, flags); 
> >          if (session->target_id == ISCSI_MAX_TARGET) { 
> >                  spin_unlock_irqrestore(&session->lock, flags); 
> > -                mutex_unlock(&ihost->mutex); 
> > +                mutex_unlock(&session->mutex); 
> >                  return; 
> >          } 
> >   
> >          target_id = session->target_id; 
> >          session->target_id = ISCSI_MAX_TARGET; 
> >          spin_unlock_irqrestore(&session->lock, flags); 
> > -        mutex_unlock(&ihost->mutex); 
> > +        mutex_unlock(&session->mutex); 
> >   
> >          if (session->ida_used) 
> >                  ida_simple_remove(&iscsi_sess_ida, target_id); 
> > @@ -2053,6 +2045,7 @@ iscsi_alloc_session(struct Scsi_Host *shost, struct 
> > iscsi_transport *transport, 
> >          INIT_WORK(&session->unbind_work, __iscsi_unbind_session); 
> >          INIT_WORK(&session->scan_work, iscsi_scan_session); 
> >          spin_lock_init(&session->lock); 
> > +        mutex_init(&session->mutex); 
> >   
> >          /* this is released in the dev's release function */ 
> >          scsi_host_get(shost); 
> > diff --git a/include/scsi/scsi_transport_iscsi.h 
> > b/include/scsi/scsi_transport_iscsi.h 
> > index 6183d20..acf9d9d 100644 
> > --- a/include/scsi/scsi_transport_iscsi.h 
> > +++ b/include/scsi/scsi_transport_iscsi.h 
> > @@ -238,6 +238,7 @@ struct iscsi_cls_session { 
> >          struct work_struct unblock_work; 
> >          struct work_struct scan_work; 
> >          struct work_struct unbind_work; 
> > +        struct mutex mutex; 
> >   
> >          /* recovery fields */ 
> >          int recovery_tmo; 
> > @@ -272,7 +273,6 @@ struct iscsi_cls_session { 
> >   
> >  struct iscsi_cls_host { 
> >          atomic_t nr_scans; 
> > -        struct mutex mutex; 
> >          struct request_queue *bsg_q; 
> >          uint32_t port_speed; 
> >          uint32_t port_state; 
> > -- 
> > 2.7.4 
> >
> >
> 
> -- 
> You received this message because you are subscribed to the Google Groups "open-iscsi" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to open-iscsi+unsubscribe@googlegroups.com.
> To post to this group, send email to open-iscsi@googlegroups.com.
> Visit this group at https://groups.google.com/group/open-iscsi.
> For more options, visit https://groups.google.com/d/optout.

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mike Christie Nov. 10, 2016, 11:22 p.m. UTC | #2
On 11/07/2016 12:22 PM, Chris Leech wrote:
> Currently the iSCSI transport class synchronises target scanning and
> unbinding with a host level mutex.  For multi-session hosts (offloading
> iSCSI HBAs) connecting to storage arrays that may implement one
> target-per-lun, this can result in the target scan work for hundreds of
> sessions being serialized behind a single mutex.  With slow enough

Does this patch alone help or is there a scsi piece too?

There is also scsi_host->scan_mutex taken in the scsi layer during
scsi_scan_target, so it is serialized there too. It seems like this
patch would move that problem one layer down.

>  
> @@ -1801,10 +1798,7 @@ static int iscsi_user_scan_session(struct device *dev, void *data)
>  
>  	ISCSI_DBG_TRANS_SESSION(session, "Scanning session\n");
>  
> -	shost = iscsi_session_to_shost(session);
> -	ihost = shost->shost_data;
> -
> -	mutex_lock(&ihost->mutex);
> +	mutex_lock(&session->mutex);
>  	spin_lock_irqsave(&session->lock, flags);
>  	if (session->state != ISCSI_SESSION_LOGGED_IN) {
>  		spin_unlock_irqrestore(&session->lock, flags);
> @@ -1823,7 +1817,7 @@ static int iscsi_user_scan_session(struct device *dev, void *data)
>  	}

The patch will allow you to remove other sessions while scanning is
running, so it could still be a good idea.

I think I originally added the mutex because we did our own loop over a
list of the host's sessions. If a unbind were to occur at the same time
then it would be freed while scanning. We changed the user scan to use
device_for_each_child so that will grab a reference to the session so
the memory will not be freed now. It now just makes sure that scsi
target removal and iscsi_remove_session wait until the scan is done.

On a related note, you can remove all the iscsi_scan_session code. We do
not use it anymore. qla4xxx used to do async scans in the kernel with
that code but does not anymore. In the future someone will also not ask
why we grab the mutex around one scan and not the other.
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chris Leech Nov. 11, 2016, 1:13 a.m. UTC | #3
On Thu, Nov 10, 2016 at 05:22:44PM -0600, Mike Christie wrote:
> On 11/07/2016 12:22 PM, Chris Leech wrote:
> > Currently the iSCSI transport class synchronises target scanning and
> > unbinding with a host level mutex.  For multi-session hosts (offloading
> > iSCSI HBAs) connecting to storage arrays that may implement one
> > target-per-lun, this can result in the target scan work for hundreds of
> > sessions being serialized behind a single mutex.  With slow enough
> 
> Does this patch alone help or is there a scsi piece too?
 
I had this tested when working a hung task timeout issue at boot, and
was told that it fixed the issue.  The exact situation may be more
complex, I think it was only 128 sessions which is surprising that it
would hit a 2 minute timeout.  But every backtrace was at this mutex.

> There is also scsi_host->scan_mutex taken in the scsi layer during
> scsi_scan_target, so it is serialized there too. It seems like this
> patch would move that problem one layer down.

I'll take a closer look at that, thanks.
 
> >  
> > @@ -1801,10 +1798,7 @@ static int iscsi_user_scan_session(struct device *dev, void *data)
> >  
> >  	ISCSI_DBG_TRANS_SESSION(session, "Scanning session\n");
> >  
> > -	shost = iscsi_session_to_shost(session);
> > -	ihost = shost->shost_data;
> > -
> > -	mutex_lock(&ihost->mutex);
> > +	mutex_lock(&session->mutex);
> >  	spin_lock_irqsave(&session->lock, flags);
> >  	if (session->state != ISCSI_SESSION_LOGGED_IN) {
> >  		spin_unlock_irqrestore(&session->lock, flags);
> > @@ -1823,7 +1817,7 @@ static int iscsi_user_scan_session(struct device *dev, void *data)
> >  	}
> 
> The patch will allow you to remove other sessions while scanning is
> running, so it could still be a good idea.
> 
> I think I originally added the mutex because we did our own loop over a
> list of the host's sessions. If a unbind were to occur at the same time
> then it would be freed while scanning. We changed the user scan to use
> device_for_each_child so that will grab a reference to the session so
> the memory will not be freed now. It now just makes sure that scsi
> target removal and iscsi_remove_session wait until the scan is done.
> 
> On a related note, you can remove all the iscsi_scan_session code. We do
> not use it anymore. qla4xxx used to do async scans in the kernel with
> that code but does not anymore. In the future someone will also not ask
> why we grab the mutex around one scan and not the other.
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mike Christie Nov. 11, 2016, 5:01 a.m. UTC | #4
On 11/10/2016 07:13 PM, Chris Leech wrote:
> On Thu, Nov 10, 2016 at 05:22:44PM -0600, Mike Christie wrote:
>> > On 11/07/2016 12:22 PM, Chris Leech wrote:
>>> > > Currently the iSCSI transport class synchronises target scanning and
>>> > > unbinding with a host level mutex.  For multi-session hosts (offloading
>>> > > iSCSI HBAs) connecting to storage arrays that may implement one
>>> > > target-per-lun, this can result in the target scan work for hundreds of
>>> > > sessions being serialized behind a single mutex.  With slow enough
>> > 
>> > Does this patch alone help or is there a scsi piece too?
>  
> I had this tested when working a hung task timeout issue at boot, and
> was told that it fixed the issue.  The exact situation may be more
> complex, I think it was only 128 sessions which is surprising that it
> would hit a 2 minute timeout.  But every backtrace was at this mutex.
> 

I think you are also hitting a issue where the normal scan time is
higher than usual and that might be a userspace bug. I am not sure how
the mutex patch helps, but I have not thought about it.

I think iscsid will scan the entire host during login. We only want
iscsi_sysfs_scan_host to scan the specific target for the session we
just logged into.

In the kernel we are setting SCSI_SCAN_RESCAN for the scan and userspace
did echo - - - > ..../scan for the entire host, so iscsi_user_scan is
going to rescan every target that is setup. So once you get to 127
sessions/targets it could be a loonngg scan of all 127 of them, and
target 128 is going to have to wait a loooonnggg time for that mutex and
then also execute a long scan.

If you have userspace do the single target scan, it should execute
faster. I know that does not solve the serialization problem. You will
still have lots of targets waiting to be scanned.

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 42bca61..83c90fa 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1568,7 +1568,6 @@  static int iscsi_setup_host(struct transport_container *tc, struct device *dev,
 
 	memset(ihost, 0, sizeof(*ihost));
 	atomic_set(&ihost->nr_scans, 0);
-	mutex_init(&ihost->mutex);
 
 	iscsi_bsg_host_add(shost, ihost);
 	/* ignore any bsg add error - we just can't do sgio */
@@ -1789,8 +1788,6 @@  static int iscsi_user_scan_session(struct device *dev, void *data)
 {
 	struct iscsi_scan_data *scan_data = data;
 	struct iscsi_cls_session *session;
-	struct Scsi_Host *shost;
-	struct iscsi_cls_host *ihost;
 	unsigned long flags;
 	unsigned int id;
 
@@ -1801,10 +1798,7 @@  static int iscsi_user_scan_session(struct device *dev, void *data)
 
 	ISCSI_DBG_TRANS_SESSION(session, "Scanning session\n");
 
-	shost = iscsi_session_to_shost(session);
-	ihost = shost->shost_data;
-
-	mutex_lock(&ihost->mutex);
+	mutex_lock(&session->mutex);
 	spin_lock_irqsave(&session->lock, flags);
 	if (session->state != ISCSI_SESSION_LOGGED_IN) {
 		spin_unlock_irqrestore(&session->lock, flags);
@@ -1823,7 +1817,7 @@  static int iscsi_user_scan_session(struct device *dev, void *data)
 	}
 
 user_scan_exit:
-	mutex_unlock(&ihost->mutex);
+	mutex_unlock(&session->mutex);
 	ISCSI_DBG_TRANS_SESSION(session, "Completed session scan\n");
 	return 0;
 }
@@ -2001,26 +1995,24 @@  static void __iscsi_unbind_session(struct work_struct *work)
 	struct iscsi_cls_session *session =
 			container_of(work, struct iscsi_cls_session,
 				     unbind_work);
-	struct Scsi_Host *shost = iscsi_session_to_shost(session);
-	struct iscsi_cls_host *ihost = shost->shost_data;
 	unsigned long flags;
 	unsigned int target_id;
 
 	ISCSI_DBG_TRANS_SESSION(session, "Unbinding session\n");
 
 	/* Prevent new scans and make sure scanning is not in progress */
-	mutex_lock(&ihost->mutex);
+	mutex_lock(&session->mutex);
 	spin_lock_irqsave(&session->lock, flags);
 	if (session->target_id == ISCSI_MAX_TARGET) {
 		spin_unlock_irqrestore(&session->lock, flags);
-		mutex_unlock(&ihost->mutex);
+		mutex_unlock(&session->mutex);
 		return;
 	}
 
 	target_id = session->target_id;
 	session->target_id = ISCSI_MAX_TARGET;
 	spin_unlock_irqrestore(&session->lock, flags);
-	mutex_unlock(&ihost->mutex);
+	mutex_unlock(&session->mutex);
 
 	if (session->ida_used)
 		ida_simple_remove(&iscsi_sess_ida, target_id);
@@ -2053,6 +2045,7 @@  iscsi_alloc_session(struct Scsi_Host *shost, struct iscsi_transport *transport,
 	INIT_WORK(&session->unbind_work, __iscsi_unbind_session);
 	INIT_WORK(&session->scan_work, iscsi_scan_session);
 	spin_lock_init(&session->lock);
+	mutex_init(&session->mutex);
 
 	/* this is released in the dev's release function */
 	scsi_host_get(shost);
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 6183d20..acf9d9d 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -238,6 +238,7 @@  struct iscsi_cls_session {
 	struct work_struct unblock_work;
 	struct work_struct scan_work;
 	struct work_struct unbind_work;
+	struct mutex mutex;
 
 	/* recovery fields */
 	int recovery_tmo;
@@ -272,7 +273,6 @@  struct iscsi_cls_session {
 
 struct iscsi_cls_host {
 	atomic_t nr_scans;
-	struct mutex mutex;
 	struct request_queue *bsg_q;
 	uint32_t port_speed;
 	uint32_t port_state;