Message ID | 1512086178.3020.35.camel@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
On Thu, 30 Nov 2017, James Bottomley wrote: > +#define __sdev_for_each_get(sdev, head, list) \ > + list_for_each_entry(sdev, head, list) \ > + if (kobject_get_unless_zero(&sdev->sdev_gendev.kobj)) > + I think that should have an 'else' clause, like this macro from include/drm/drmP.h: #define for_each_if(condition) if (!(condition)) {} else --
On 2017/12/1 7:56, James Bottomley wrote: > On Thu, 2017-11-30 at 16:08 +0000, Bart Van Assche wrote: >> On Thu, 2017-11-30 at 09:18 +0800, Jason Yan wrote: >>> >>> Hi Bart, I chose the approach in my patch because it has been used >>> in scsi_device_get() for years and been proved safe. I think using >>> kobject_get_unless_zero() is safe here and can fix this issue too. >>> And this approach is beneficial to all users. >> >> Hello Jason, >> >> A possible approach is that we start with your patch and defer any >> get_device() changes until after your patch has been applied. > > It's possible, but not quite good enough: the same race can be produced > with any of our sdev lists that are deleted in the release callback, > because there could be a released device on any one of them. The only > way to mediate it properly is to get a reference in the iterator using > kobject_get_unless_zero(). > > It's a bit like a huge can of worms, there's another problem every time > I look. However, this is something like the mechanism that could work > (and if get_device() ever gets fixed, we can put it in place of > kobject_get_unless_zero()). > > James > > --- > > diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c > index 6be77b3aa8a5..c3246f26c02c 100644 > --- a/drivers/scsi/53c700.c > +++ b/drivers/scsi/53c700.c > @@ -1169,6 +1169,7 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, > > > } > + put_device(&SDp->sdev_gendev); > } else if(dsps == A_RESELECTED_DURING_SELECTION) { > > /* This section is full of debugging code because I've > diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c > index c3fc34b9964d..7736f3fb2501 100644 > --- a/drivers/scsi/esp_scsi.c > +++ b/drivers/scsi/esp_scsi.c > @@ -1198,6 +1198,7 @@ static int esp_reconnect(struct esp *esp) > goto do_reset; > } > lp = dev->hostdata; > + put_device(&dev->sdev_gendev); > > ent = lp->non_tagged_cmd; > if (!ent) { > diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c > index a7e4fba724b7..c96c11716152 100644 > --- a/drivers/scsi/scsi.c > +++ b/drivers/scsi/scsi.c > @@ -677,11 +677,10 @@ struct scsi_device *__scsi_device_lookup_by_target(struct scsi_target *starget, > { > struct scsi_device *sdev; > > - list_for_each_entry(sdev, &starget->devices, same_target_siblings) { > - if (sdev->sdev_state == SDEV_DEL) > - continue; > - if (sdev->lun ==lun) > + __sdev_for_each_get(sdev, &starget->devices, same_target_siblings) { > + if (sdev->sdev_state != SDEV_DEL && sdev->lun ==lun) > return sdev; > + put_device(&sdev->sdev_gendev); > } > > return NULL; > @@ -700,15 +699,16 @@ EXPORT_SYMBOL(__scsi_device_lookup_by_target); > struct scsi_device *scsi_device_lookup_by_target(struct scsi_target *starget, > u64 lun) > { > - struct scsi_device *sdev; > + struct scsi_device *sdev, *sdev_copy; > struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); > unsigned long flags; > > spin_lock_irqsave(shost->host_lock, flags); > - sdev = __scsi_device_lookup_by_target(starget, lun); > + sdev_copy = sdev = __scsi_device_lookup_by_target(starget, lun); > + spin_unlock_irqrestore(shost->host_lock, flags); > if (sdev && scsi_device_get(sdev)) > sdev = NULL; > - spin_unlock_irqrestore(shost->host_lock, flags); > + put_device(&sdev_copy->sdev_gendev); > > return sdev; > } > @@ -735,12 +735,12 @@ struct scsi_device *__scsi_device_lookup(struct Scsi_Host *shost, > { > struct scsi_device *sdev; > > - list_for_each_entry(sdev, &shost->__devices, siblings) { > - if (sdev->sdev_state == SDEV_DEL) > - continue; > - if (sdev->channel == channel && sdev->id == id && > - sdev->lun ==lun) > + __sdev_for_each_get(sdev, &shost->__devices, siblings) { > + if (sdev->sdev_state != SDEV_DEL && > + sdev->channel == channel && sdev->id == id && > + sdev->lun ==lun) > return sdev; > + put_device(&sdev->sdev_gendev); > } > > return NULL; > @@ -761,14 +761,15 @@ EXPORT_SYMBOL(__scsi_device_lookup); > struct scsi_device *scsi_device_lookup(struct Scsi_Host *shost, > uint channel, uint id, u64 lun) > { > - struct scsi_device *sdev; > + struct scsi_device *sdev, *sdev_copy; > unsigned long flags; > > spin_lock_irqsave(shost->host_lock, flags); > - sdev = __scsi_device_lookup(shost, channel, id, lun); > + sdev_copy = sdev = __scsi_device_lookup(shost, channel, id, lun); > + spin_unlock_irqrestore(shost->host_lock, flags); > if (sdev && scsi_device_get(sdev)) > sdev = NULL; > - spin_unlock_irqrestore(shost->host_lock, flags); > + put_device(&sdev_copy->sdev_gendev); > > return sdev; > } > diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c > index 40124648a07b..cddd5a93e962 100644 > --- a/drivers/scsi/scsi_scan.c > +++ b/drivers/scsi/scsi_scan.c > @@ -1870,11 +1870,14 @@ void scsi_forget_host(struct Scsi_Host *shost) > > restart: > spin_lock_irqsave(shost->host_lock, flags); > - list_for_each_entry(sdev, &shost->__devices, siblings) { > - if (sdev->sdev_state == SDEV_DEL) > + __sdev_for_each_get(sdev, &shost->__devices, siblings) { > + if (sdev->sdev_state == SDEV_DEL) { > + put_device(&sdev->sdev_gendev); > continue; > + } > spin_unlock_irqrestore(shost->host_lock, flags); > __scsi_remove_device(sdev); > + put_device(&sdev->sdev_gendev); > goto restart; > } > spin_unlock_irqrestore(shost->host_lock, flags); > diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c > index f796bd61f3f0..380404ec49cd 100644 > --- a/drivers/scsi/scsi_sysfs.c > +++ b/drivers/scsi/scsi_sysfs.c > @@ -1375,17 +1375,7 @@ static void __scsi_remove_target(struct scsi_target *starget) > > spin_lock_irqsave(shost->host_lock, flags); > restart: > - list_for_each_entry(sdev, &shost->__devices, siblings) { > - /* > - * We cannot call scsi_device_get() here, as > - * we might've been called from rmmod() causing > - * scsi_device_get() to fail the module_is_live() > - * check. > - */ > - if (sdev->channel != starget->channel || > - sdev->id != starget->id || > - !get_device(&sdev->sdev_gendev)) > - continue; > + __sdev_for_each_get(sdev, &starget->devices, same_target_siblings) { > spin_unlock_irqrestore(shost->host_lock, flags); > scsi_remove_device(sdev); > put_device(&sdev->sdev_gendev); > diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h > index 571ddb49b926..2e4d48d8cd68 100644 > --- a/include/scsi/scsi_device.h > +++ b/include/scsi/scsi_device.h > @@ -380,6 +380,23 @@ extern struct scsi_device *__scsi_iterate_devices(struct Scsi_Host *, > #define __shost_for_each_device(sdev, shost) \ > list_for_each_entry((sdev), &((shost)->__devices), siblings) > Seems that __shost_for_each_device() is still not safe. scsi device been deleted stays in the list and put_device() can be called anywhere out of the host lock. > +/** > + * __sdev_list_for_each_get - get a reference to each element > + * @sdev: the scsi device to use in the body > + * @head: the head of the list > + * @list: the element (sdev->list) containing list members > + * > + * Iterator that only executes the body if it can obtain a reference > + * to the element. This closes a race where the device release can > + * have been called, but the element is still on the lists. > + * > + * The lock protecting the list (the host lock) must be held before > + * calling this iterator > + */ > +#define __sdev_for_each_get(sdev, head, list) \ > + list_for_each_entry(sdev, head, list) \ > + if (kobject_get_unless_zero(&sdev->sdev_gendev.kobj)) > + > extern int scsi_change_queue_depth(struct scsi_device *, int); > extern int scsi_track_queue_full(struct scsi_device *, int); > > > > . >
We have another test case that demonstrates this issue involving duplicate invocations of scsi_device_dev_release() on the same device. This other test case involves repeated log in / log out of an iSCSI target. (The first test case I mentioned in an earlier mail was an oscillating FC port with a low dev_loss_tmo value.) The iSCSI test was not fixed by Jason Yan's patch, however adding Bart's change to use kobject_get_unless_zero() in get_device() as well seems to have resolved it. We are going to try with just Bart's change next. -Ewan
On Fri, 2017-12-01 at 16:40 +0800, Jason Yan wrote: > On 2017/12/1 7:56, James Bottomley wrote: > > b/include/scsi/scsi_device.h > > index 571ddb49b926..2e4d48d8cd68 100644 > > --- a/include/scsi/scsi_device.h > > +++ b/include/scsi/scsi_device.h > > @@ -380,6 +380,23 @@ extern struct scsi_device > > *__scsi_iterate_devices(struct Scsi_Host *, > > #define __shost_for_each_device(sdev, shost) \ > > list_for_each_entry((sdev), &((shost)->__devices), > > siblings) > > > > Seems that __shost_for_each_device() is still not safe. scsi device > been deleted stays in the list and put_device() can be called > anywhere out of the host lock. Not if it's used with scsi_get_device(). As I said, I only did a cursory inspectiont, so if I've missed a loop, please specify. The point was more a demonstration of how we could fix the problem if we don't change get_device(). James
On 2017/12/1 23:35, James Bottomley wrote: > On Fri, 2017-12-01 at 16:40 +0800, Jason Yan wrote: >> On 2017/12/1 7:56, James Bottomley wrote: >>> b/include/scsi/scsi_device.h >>> index 571ddb49b926..2e4d48d8cd68 100644 >>> --- a/include/scsi/scsi_device.h >>> +++ b/include/scsi/scsi_device.h >>> @@ -380,6 +380,23 @@ extern struct scsi_device >>> *__scsi_iterate_devices(struct Scsi_Host *, >>> #define __shost_for_each_device(sdev, shost) \ >>> list_for_each_entry((sdev), &((shost)->__devices), >>> siblings) >>> >> >> Seems that __shost_for_each_device() is still not safe. scsi device >> been deleted stays in the list and put_device() can be called >> anywhere out of the host lock. > > Not if it's used with scsi_get_device(). As I said, I only did a > cursory inspectiont, so if I've missed a loop, please specify. > > The point was more a demonstration of how we could fix the problem if > we don't change get_device(). > > James > Yes, it's OK now. __shost_for_each_device() is not used with scsi_get_device() yet. Another problem is that put_device() cannot be called while holding the host lock, so we need to remove all put_device() out of the lock. Some places like scsi_device_lookup() and scsi_device_lookup_by_target() need rework: @@ -765,12 +772,22 @@ struct scsi_device *scsi_device_lookup(struct Scsi_Host *shost, unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); - sdev = __scsi_device_lookup(shost, channel, id, lun); - if (sdev && scsi_device_get(sdev)) - sdev = NULL; + __sdev_for_each_get(sdev, &shost->__devices, siblings) { + spin_unlock_irqrestore(shost->host_lock, flags); + if (sdev->sdev_state != SDEV_DEL && + sdev->channel == channel && sdev->id == id && + sdev->lun ==lun) { + if (!scsi_device_get(sdev)) { + put_device(&sdev->sdev_gendev); + return sdev; + } + } + put_device(&sdev->sdev_gendev); + spin_lock_irqsave(shost->host_lock, flags); + } spin_unlock_irqrestore(shost->host_lock, flags); - return sdev; + return NULL; } EXPORT_SYMBOL(scsi_device_lookup); > >
On Tue, 2017-12-05 at 20:37 +0800, Jason Yan wrote: > > On 2017/12/1 23:35, James Bottomley wrote: > > > > On Fri, 2017-12-01 at 16:40 +0800, Jason Yan wrote: > > > > > > On 2017/12/1 7:56, James Bottomley wrote: > > > > > > > > b/include/scsi/scsi_device.h > > > > index 571ddb49b926..2e4d48d8cd68 100644 > > > > --- a/include/scsi/scsi_device.h > > > > +++ b/include/scsi/scsi_device.h > > > > @@ -380,6 +380,23 @@ extern struct scsi_device > > > > *__scsi_iterate_devices(struct Scsi_Host *, > > > > #define __shost_for_each_device(sdev, shost) \ > > > > list_for_each_entry((sdev), &((shost)->__devices), > > > > siblings) > > > > > > > > > > Seems that __shost_for_each_device() is still not safe. scsi > > > device > > > been deleted stays in the list and put_device() can be called > > > anywhere out of the host lock. > > > > Not if it's used with scsi_get_device(). As I said, I only did a > > cursory inspectiont, so if I've missed a loop, please specify. > > > > The point was more a demonstration of how we could fix the problem > > if we don't change get_device(). > > > > James > > > > Yes, it's OK now. __shost_for_each_device() is not used with > scsi_get_device() yet. > > Another problem is that put_device() cannot be called while holding > the host lock, Yes it can. That's one of the design goals of the execute in process context: you can call it from interrupt context and you can call it with locks held and we'll return immediately and delay all the dangerous stuff until we have a process context. To get the process context to be acquired, the in_interrupt() test must pass (so the spin lock must be acquired irqsave) ; is that condition missing anywhere? James
On 2017/12/5 23:37, James Bottomley wrote: > On Tue, 2017-12-05 at 20:37 +0800, Jason Yan wrote: >> >> On 2017/12/1 23:35, James Bottomley wrote: >>> >>> On Fri, 2017-12-01 at 16:40 +0800, Jason Yan wrote: >>>> >>>> On 2017/12/1 7:56, James Bottomley wrote: >>>>> >>>>> b/include/scsi/scsi_device.h >>>>> index 571ddb49b926..2e4d48d8cd68 100644 >>>>> --- a/include/scsi/scsi_device.h >>>>> +++ b/include/scsi/scsi_device.h >>>>> @@ -380,6 +380,23 @@ extern struct scsi_device >>>>> *__scsi_iterate_devices(struct Scsi_Host *, >>>>> #define __shost_for_each_device(sdev, shost) \ >>>>> list_for_each_entry((sdev), &((shost)->__devices), >>>>> siblings) >>>>> >>>> >>>> Seems that __shost_for_each_device() is still not safe. scsi >>>> device >>>> been deleted stays in the list and put_device() can be called >>>> anywhere out of the host lock. >>> >>> Not if it's used with scsi_get_device(). As I said, I only did a >>> cursory inspectiont, so if I've missed a loop, please specify. >>> >>> The point was more a demonstration of how we could fix the problem >>> if we don't change get_device(). >>> >>> James >>> >> >> Yes, it's OK now. __shost_for_each_device() is not used with >> scsi_get_device() yet. >> >> Another problem is that put_device() cannot be called while holding >> the host lock, > > Yes it can. That's one of the design goals of the execute in process > context: you can call it from interrupt context and you can call it > with locks held and we'll return immediately and delay all the > dangerous stuff until we have a process context. > > To get the process context to be acquired, the in_interrupt() test must > pass (so the spin lock must be acquired irqsave) ; is that condition > missing anywhere? > > James > > Call it from interrupt context is ok. I'm talking about calling it from process context. Think about this in a process context: scsi_device_lookup() ->spin_lock_irqsave(shost->host_lock, flags); ->__scsi_device_lookup() ->iterate and kobject_get_unless_zero() ->put_device() ->scsi_device_dev_release() if the last put ->scsi_device_dev_release_usercontext() ->acquire the host lock = deadlock Jason > . >
On Wed, 2017-12-06 at 08:41 +0800, Jason Yan wrote: > On 2017/12/5 23:37, James Bottomley wrote: > > > > On Tue, 2017-12-05 at 20:37 +0800, Jason Yan wrote: > > > > > > > > > On 2017/12/1 23:35, James Bottomley wrote: > > > > > > > > > > > > On Fri, 2017-12-01 at 16:40 +0800, Jason Yan wrote: > > > > > > > > > > > > > > > On 2017/12/1 7:56, James Bottomley wrote: > > > > > > > > > > > > > > > > > > b/include/scsi/scsi_device.h > > > > > > index 571ddb49b926..2e4d48d8cd68 100644 > > > > > > --- a/include/scsi/scsi_device.h > > > > > > +++ b/include/scsi/scsi_device.h > > > > > > @@ -380,6 +380,23 @@ extern struct scsi_device > > > > > > *__scsi_iterate_devices(struct Scsi_Host *, > > > > > > #define __shost_for_each_device(sdev, shost) \ > > > > > > list_for_each_entry((sdev), &((shost)- > > > > > > >__devices), > > > > > > siblings) > > > > > > > > > > > > > > > > Seems that __shost_for_each_device() is still not safe. scsi > > > > > device > > > > > been deleted stays in the list and put_device() can be called > > > > > anywhere out of the host lock. > > > > > > > > Not if it's used with scsi_get_device(). As I said, I only did > > > > a cursory inspectiont, so if I've missed a loop, please > > > > specify. > > > > > > > > The point was more a demonstration of how we could fix the > > > > problem if we don't change get_device(). > > > > > > > > James > > > > > > > > > > Yes, it's OK now. __shost_for_each_device() is not used with > > > scsi_get_device() yet. > > > > > > Another problem is that put_device() cannot be called while > > > holding the host lock, > > > > Yes it can. That's one of the design goals of the execute in > > process context: you can call it from interrupt context and you can > > call it with locks held and we'll return immediately and delay all > > the dangerous stuff until we have a process context. > > > > To get the process context to be acquired, the in_interrupt() test > > must pass (so the spin lock must be acquired irqsave) ; is that > > condition missing anywhere? > > > > James > > > > > > Call it from interrupt context is ok. I'm talking about calling it > from process context. > > Think about this in a process context: > scsi_device_lookup() > ->spin_lock_irqsave(shost->host_lock, flags); > ->__scsi_device_lookup() > ->iterate and kobject_get_unless_zero() > ->put_device() > ->scsi_device_dev_release() if the last put > ->scsi_device_dev_release_usercontext() > ->acquire the host lock = deadlock execute_in_process_context() is supposed to produce us a context whenever the local context isn't available, and that's supposed to include when interrupts are disabled as in spin_lock_irqsave(). So let me ask this another way: have you seen this deadlock (which would mean we have a bug in execute_process_context())? James
> execute_in_process_context() is supposed to produce us a context > whenever the local context isn't available, and that's supposed to > include when interrupts are disabled as in spin_lock_irqsave(). > > So let me ask this another way: have you seen this deadlock (which > would mean we have a bug in execute_process_context())? > > James > > I havn't seen this dead lock but in_interrupt() do not check whether the interrupts are disabled. Please refer to the definition of in_interrupt(). #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK\ | NMI_MASK)) #define in_interrupt() (irq_count()) Jason > . >
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index 6be77b3aa8a5..c3246f26c02c 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -1169,6 +1169,7 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, } + put_device(&SDp->sdev_gendev); } else if(dsps == A_RESELECTED_DURING_SELECTION) { /* This section is full of debugging code because I've diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c index c3fc34b9964d..7736f3fb2501 100644 --- a/drivers/scsi/esp_scsi.c +++ b/drivers/scsi/esp_scsi.c @@ -1198,6 +1198,7 @@ static int esp_reconnect(struct esp *esp) goto do_reset; } lp = dev->hostdata; + put_device(&dev->sdev_gendev); ent = lp->non_tagged_cmd; if (!ent) { diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index a7e4fba724b7..c96c11716152 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -677,11 +677,10 @@ struct scsi_device *__scsi_device_lookup_by_target(struct scsi_target *starget, { struct scsi_device *sdev; - list_for_each_entry(sdev, &starget->devices, same_target_siblings) { - if (sdev->sdev_state == SDEV_DEL) - continue; - if (sdev->lun ==lun) + __sdev_for_each_get(sdev, &starget->devices, same_target_siblings) { + if (sdev->sdev_state != SDEV_DEL && sdev->lun ==lun) return sdev; + put_device(&sdev->sdev_gendev); } return NULL; @@ -700,15 +699,16 @@ EXPORT_SYMBOL(__scsi_device_lookup_by_target); struct scsi_device *scsi_device_lookup_by_target(struct scsi_target *starget, u64 lun) { - struct scsi_device *sdev; + struct scsi_device *sdev, *sdev_copy; struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); - sdev = __scsi_device_lookup_by_target(starget, lun); + sdev_copy = sdev = __scsi_device_lookup_by_target(starget, lun); + spin_unlock_irqrestore(shost->host_lock, flags); if (sdev && scsi_device_get(sdev)) sdev = NULL; - spin_unlock_irqrestore(shost->host_lock, flags); + put_device(&sdev_copy->sdev_gendev); return sdev; } @@ -735,12 +735,12 @@ struct scsi_device *__scsi_device_lookup(struct Scsi_Host *shost, { struct scsi_device *sdev; - list_for_each_entry(sdev, &shost->__devices, siblings) { - if (sdev->sdev_state == SDEV_DEL) - continue; - if (sdev->channel == channel && sdev->id == id && - sdev->lun ==lun) + __sdev_for_each_get(sdev, &shost->__devices, siblings) { + if (sdev->sdev_state != SDEV_DEL && + sdev->channel == channel && sdev->id == id && + sdev->lun ==lun) return sdev; + put_device(&sdev->sdev_gendev); } return NULL; @@ -761,14 +761,15 @@ EXPORT_SYMBOL(__scsi_device_lookup); struct scsi_device *scsi_device_lookup(struct Scsi_Host *shost, uint channel, uint id, u64 lun) { - struct scsi_device *sdev; + struct scsi_device *sdev, *sdev_copy; unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); - sdev = __scsi_device_lookup(shost, channel, id, lun); + sdev_copy = sdev = __scsi_device_lookup(shost, channel, id, lun); + spin_unlock_irqrestore(shost->host_lock, flags); if (sdev && scsi_device_get(sdev)) sdev = NULL; - spin_unlock_irqrestore(shost->host_lock, flags); + put_device(&sdev_copy->sdev_gendev); return sdev; } diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 40124648a07b..cddd5a93e962 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1870,11 +1870,14 @@ void scsi_forget_host(struct Scsi_Host *shost) restart: spin_lock_irqsave(shost->host_lock, flags); - list_for_each_entry(sdev, &shost->__devices, siblings) { - if (sdev->sdev_state == SDEV_DEL) + __sdev_for_each_get(sdev, &shost->__devices, siblings) { + if (sdev->sdev_state == SDEV_DEL) { + put_device(&sdev->sdev_gendev); continue; + } spin_unlock_irqrestore(shost->host_lock, flags); __scsi_remove_device(sdev); + put_device(&sdev->sdev_gendev); goto restart; } spin_unlock_irqrestore(shost->host_lock, flags); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index f796bd61f3f0..380404ec49cd 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1375,17 +1375,7 @@ static void __scsi_remove_target(struct scsi_target *starget) spin_lock_irqsave(shost->host_lock, flags); restart: - list_for_each_entry(sdev, &shost->__devices, siblings) { - /* - * We cannot call scsi_device_get() here, as - * we might've been called from rmmod() causing - * scsi_device_get() to fail the module_is_live() - * check. - */ - if (sdev->channel != starget->channel || - sdev->id != starget->id || - !get_device(&sdev->sdev_gendev)) - continue; + __sdev_for_each_get(sdev, &starget->devices, same_target_siblings) { spin_unlock_irqrestore(shost->host_lock, flags); scsi_remove_device(sdev); put_device(&sdev->sdev_gendev); diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 571ddb49b926..2e4d48d8cd68 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -380,6 +380,23 @@ extern struct scsi_device *__scsi_iterate_devices(struct Scsi_Host *, #define __shost_for_each_device(sdev, shost) \ list_for_each_entry((sdev), &((shost)->__devices), siblings) +/** + * __sdev_list_for_each_get - get a reference to each element + * @sdev: the scsi device to use in the body + * @head: the head of the list + * @list: the element (sdev->list) containing list members + * + * Iterator that only executes the body if it can obtain a reference + * to the element. This closes a race where the device release can + * have been called, but the element is still on the lists. + * + * The lock protecting the list (the host lock) must be held before + * calling this iterator + */ +#define __sdev_for_each_get(sdev, head, list) \ + list_for_each_entry(sdev, head, list) \ + if (kobject_get_unless_zero(&sdev->sdev_gendev.kobj)) + extern int scsi_change_queue_depth(struct scsi_device *, int); extern int scsi_track_queue_full(struct scsi_device *, int);