diff mbox

[v4,2/2] mpt2sas: Refcount fw_events and fix unsafe list usage

Message ID 5710d5e272a19c252db0171133a1092be00208f2.1439510240.git.calvinowens@fb.com (mailing list archive)
State New, archived
Headers show

Commit Message

Calvin Owens Aug. 14, 2015, 1:48 a.m. UTC
The fw_event_work struct is concurrently referenced at shutdown, so
add a refcount to protect it, and refactor the code to use it.

Additionally, refactor _scsih_fw_event_cleanup_queue() such that it
no longer iterates over the list without holding the lock, since
_firmware_event_work() concurrently deletes items from the list.

Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Calvin Owens <calvinowens@fb.com>
---
Changes in v4: None

Changes in v3:
	* Add a break condition to the REMOVE_UNRESPONDING_DEVICES fw_event,
	  which can loop over a sleep forever (5m+ at least) at unloading. I
	  don't think anything prevented this before, but taking the fw_event
	  object off the list at the top of _firmware_event_work() seems to have
	  made it more likely to happen.

Changes in v2:
	* Squished patches 4-6 into one patch
	* Remove the fw_event from fw_event_list at the start of
	  _firmware_event_work()
	* Explicitly seperate fw_event_list removal from fw_event freeing

drivers/scsi/mpt2sas/mpt2sas_scsih.c | 112 ++++++++++++++++++++++++++++-------
 1 file changed, 91 insertions(+), 21 deletions(-)

Comments

Nicholas A. Bellinger Aug. 25, 2015, 9:06 p.m. UTC | #1
On Thu, 2015-08-13 at 18:48 -0700, Calvin Owens wrote:
> The fw_event_work struct is concurrently referenced at shutdown, so
> add a refcount to protect it, and refactor the code to use it.
> 
> Additionally, refactor _scsih_fw_event_cleanup_queue() such that it
> no longer iterates over the list without holding the lock, since
> _firmware_event_work() concurrently deletes items from the list.
> 
> Cc: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Calvin Owens <calvinowens@fb.com>
> ---
> Changes in v4: None
> 
> Changes in v3:
> 	* Add a break condition to the REMOVE_UNRESPONDING_DEVICES fw_event,
> 	  which can loop over a sleep forever (5m+ at least) at unloading. I
> 	  don't think anything prevented this before, but taking the fw_event
> 	  object off the list at the top of _firmware_event_work() seems to have
> 	  made it more likely to happen.
> 
> Changes in v2:
> 	* Squished patches 4-6 into one patch
> 	* Remove the fw_event from fw_event_list at the start of
> 	  _firmware_event_work()
> 	* Explicitly seperate fw_event_list removal from fw_event freeing
> 
> drivers/scsi/mpt2sas/mpt2sas_scsih.c | 112 ++++++++++++++++++++++++++++-------
>  1 file changed, 91 insertions(+), 21 deletions(-)
> 

Looks good.

Reviewed-by: Nicholas Bellinger <nab@linux-iscsi.org>

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sreekanth Reddy Sept. 4, 2015, 2:35 p.m. UTC | #2
On Fri, Aug 14, 2015 at 7:18 AM, Calvin Owens <calvinowens@fb.com> wrote:
> The fw_event_work struct is concurrently referenced at shutdown, so
> add a refcount to protect it, and refactor the code to use it.
>
> Additionally, refactor _scsih_fw_event_cleanup_queue() such that it
> no longer iterates over the list without holding the lock, since
> _firmware_event_work() concurrently deletes items from the list.
>
> Cc: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Calvin Owens <calvinowens@fb.com>

Tested-by: Chaitra Basappa <chaitra.basappa@avagotech.com>
ACK-by: Sreekanth Reddy <sreekanth.reddy@avagotech.com>

> ---
> Changes in v4: None
>
> Changes in v3:
>         * Add a break condition to the REMOVE_UNRESPONDING_DEVICES fw_event,
>           which can loop over a sleep forever (5m+ at least) at unloading. I
>           don't think anything prevented this before, but taking the fw_event
>           object off the list at the top of _firmware_event_work() seems to have
>           made it more likely to happen.
>
> Changes in v2:
>         * Squished patches 4-6 into one patch
>         * Remove the fw_event from fw_event_list at the start of
>           _firmware_event_work()
>         * Explicitly seperate fw_event_list removal from fw_event freeing
>
> drivers/scsi/mpt2sas/mpt2sas_scsih.c | 112 ++++++++++++++++++++++++++++-------
>  1 file changed, 91 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
> index 5eca3a4..c0ff55b 100644
> --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
> +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
> @@ -176,9 +176,37 @@ struct fw_event_work {
>         u8                      VP_ID;
>         u8                      ignore;
>         u16                     event;
> +       struct kref             refcount;
>         char                    event_data[0] __aligned(4);
>  };
>
> +static void fw_event_work_free(struct kref *r)
> +{
> +       kfree(container_of(r, struct fw_event_work, refcount));
> +}
> +
> +static void fw_event_work_get(struct fw_event_work *fw_work)
> +{
> +       kref_get(&fw_work->refcount);
> +}
> +
> +static void fw_event_work_put(struct fw_event_work *fw_work)
> +{
> +       kref_put(&fw_work->refcount, fw_event_work_free);
> +}
> +
> +static struct fw_event_work *alloc_fw_event_work(int len)
> +{
> +       struct fw_event_work *fw_event;
> +
> +       fw_event = kzalloc(sizeof(*fw_event) + len, GFP_ATOMIC);
> +       if (!fw_event)
> +               return NULL;
> +
> +       kref_init(&fw_event->refcount);
> +       return fw_event;
> +}
> +
>  /* raid transport support */
>  static struct raid_template *mpt2sas_raid_template;
>
> @@ -2872,36 +2900,39 @@ _scsih_fw_event_add(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work *fw_event)
>                 return;
>
>         spin_lock_irqsave(&ioc->fw_event_lock, flags);
> +       fw_event_work_get(fw_event);
>         list_add_tail(&fw_event->list, &ioc->fw_event_list);
>         INIT_DELAYED_WORK(&fw_event->delayed_work, _firmware_event_work);
> +       fw_event_work_get(fw_event);
>         queue_delayed_work(ioc->firmware_event_thread,
>             &fw_event->delayed_work, 0);
>         spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
>  }
>
>  /**
> - * _scsih_fw_event_free - delete fw_event
> + * _scsih_fw_event_del_from_list - delete fw_event from the list
>   * @ioc: per adapter object
>   * @fw_event: object describing the event
>   * Context: This function will acquire ioc->fw_event_lock.
>   *
> - * This removes firmware event object from link list, frees associated memory.
> + * If the fw_event is on the fw_event_list, remove it and do a put.
>   *
>   * Return nothing.
>   */
>  static void
> -_scsih_fw_event_free(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work
> +_scsih_fw_event_del_from_list(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work
>      *fw_event)
>  {
>         unsigned long flags;
>
>         spin_lock_irqsave(&ioc->fw_event_lock, flags);
> -       list_del(&fw_event->list);
> -       kfree(fw_event);
> +       if (!list_empty(&fw_event->list)) {
> +               list_del_init(&fw_event->list);
> +               fw_event_work_put(fw_event);
> +       }
>         spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
>  }
>
> -
>  /**
>   * _scsih_error_recovery_delete_devices - remove devices not responding
>   * @ioc: per adapter object
> @@ -2916,13 +2947,14 @@ _scsih_error_recovery_delete_devices(struct MPT2SAS_ADAPTER *ioc)
>         if (ioc->is_driver_loading)
>                 return;
>
> -       fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC);
> +       fw_event = alloc_fw_event_work(0);
>         if (!fw_event)
>                 return;
>
>         fw_event->event = MPT2SAS_REMOVE_UNRESPONDING_DEVICES;
>         fw_event->ioc = ioc;
>         _scsih_fw_event_add(ioc, fw_event);
> +       fw_event_work_put(fw_event);
>  }
>
>  /**
> @@ -2936,12 +2968,29 @@ mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc)
>  {
>         struct fw_event_work *fw_event;
>
> -       fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC);
> +       fw_event = alloc_fw_event_work(0);
>         if (!fw_event)
>                 return;
>         fw_event->event = MPT2SAS_PORT_ENABLE_COMPLETE;
>         fw_event->ioc = ioc;
>         _scsih_fw_event_add(ioc, fw_event);
> +       fw_event_work_put(fw_event);
> +}
> +
> +static struct fw_event_work *dequeue_next_fw_event(struct MPT2SAS_ADAPTER *ioc)
> +{
> +       unsigned long flags;
> +       struct fw_event_work *fw_event = NULL;
> +
> +       spin_lock_irqsave(&ioc->fw_event_lock, flags);
> +       if (!list_empty(&ioc->fw_event_list)) {
> +               fw_event = list_first_entry(&ioc->fw_event_list,
> +                               struct fw_event_work, list);
> +               list_del_init(&fw_event->list);
> +       }
> +       spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
> +
> +       return fw_event;
>  }
>
>  /**
> @@ -2956,17 +3005,25 @@ mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc)
>  static void
>  _scsih_fw_event_cleanup_queue(struct MPT2SAS_ADAPTER *ioc)
>  {
> -       struct fw_event_work *fw_event, *next;
> +       struct fw_event_work *fw_event;
>
>         if (list_empty(&ioc->fw_event_list) ||
>              !ioc->firmware_event_thread || in_interrupt())
>                 return;
>
> -       list_for_each_entry_safe(fw_event, next, &ioc->fw_event_list, list) {
> -               if (cancel_delayed_work_sync(&fw_event->delayed_work)) {
> -                       _scsih_fw_event_free(ioc, fw_event);
> -                       continue;
> -               }
> +       while ((fw_event = dequeue_next_fw_event(ioc))) {
> +               /*
> +                * Wait on the fw_event to complete. If this returns 1, then
> +                * the event was never executed, and we need a put for the
> +                * reference the delayed_work had on the fw_event.
> +                *
> +                * If it did execute, we wait for it to finish, and the put will
> +                * happen from _firmware_event_work()
> +                */
> +               if (cancel_delayed_work_sync(&fw_event->delayed_work))
> +                       fw_event_work_put(fw_event);
> +
> +               fw_event_work_put(fw_event);
>         }
>  }
>
> @@ -4447,13 +4504,14 @@ _scsih_send_event_to_turn_on_pfa_led(struct MPT2SAS_ADAPTER *ioc, u16 handle)
>  {
>         struct fw_event_work *fw_event;
>
> -       fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC);
> +       fw_event = alloc_fw_event_work(0);
>         if (!fw_event)
>                 return;
>         fw_event->event = MPT2SAS_TURN_ON_PFA_LED;
>         fw_event->device_handle = handle;
>         fw_event->ioc = ioc;
>         _scsih_fw_event_add(ioc, fw_event);
> +       fw_event_work_put(fw_event);
>  }
>
>  /**
> @@ -7554,17 +7612,27 @@ _firmware_event_work(struct work_struct *work)
>             struct fw_event_work, delayed_work.work);
>         struct MPT2SAS_ADAPTER *ioc = fw_event->ioc;
>
> +       _scsih_fw_event_del_from_list(ioc, fw_event);
> +
>         /* the queue is being flushed so ignore this event */
> -       if (ioc->remove_host ||
> -           ioc->pci_error_recovery) {
> -               _scsih_fw_event_free(ioc, fw_event);
> +       if (ioc->remove_host || ioc->pci_error_recovery) {
> +               fw_event_work_put(fw_event);
>                 return;
>         }
>
>         switch (fw_event->event) {
>         case MPT2SAS_REMOVE_UNRESPONDING_DEVICES:
> -               while (scsi_host_in_recovery(ioc->shost) || ioc->shost_recovery)
> +               while (scsi_host_in_recovery(ioc->shost) ||
> +                               ioc->shost_recovery) {
> +                       /*
> +                        * If we're unloading, bail. Otherwise, this can become
> +                        * an infinite loop.
> +                        */
> +                       if (ioc->remove_host)
> +                               goto out;
> +
>                         ssleep(1);
> +               }
>                 _scsih_remove_unresponding_sas_devices(ioc);
>                 _scsih_scan_for_devices_after_reset(ioc);
>                 break;
> @@ -7613,7 +7681,8 @@ _firmware_event_work(struct work_struct *work)
>                 _scsih_sas_ir_operation_status_event(ioc, fw_event);
>                 break;
>         }
> -       _scsih_fw_event_free(ioc, fw_event);
> +out:
> +       fw_event_work_put(fw_event);
>  }
>
>  /**
> @@ -7751,7 +7820,7 @@ mpt2sas_scsih_event_callback(struct MPT2SAS_ADAPTER *ioc, u8 msix_index,
>         }
>
>         sz = le16_to_cpu(mpi_reply->EventDataLength) * 4;
> -       fw_event = kzalloc(sizeof(*fw_event) + sz, GFP_ATOMIC);
> +       fw_event = alloc_fw_event_work(sz);
>         if (!fw_event) {
>                 printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n",
>                     ioc->name, __FILE__, __LINE__, __func__);
> @@ -7764,6 +7833,7 @@ mpt2sas_scsih_event_callback(struct MPT2SAS_ADAPTER *ioc, u8 msix_index,
>         fw_event->VP_ID = mpi_reply->VP_ID;
>         fw_event->event = event;
>         _scsih_fw_event_add(ioc, fw_event);
> +       fw_event_work_put(fw_event);
>         return;
>  }
>
> --
> 2.5.0
>
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index 5eca3a4..c0ff55b 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -176,9 +176,37 @@  struct fw_event_work {
 	u8			VP_ID;
 	u8			ignore;
 	u16			event;
+	struct kref		refcount;
 	char			event_data[0] __aligned(4);
 };
 
+static void fw_event_work_free(struct kref *r)
+{
+	kfree(container_of(r, struct fw_event_work, refcount));
+}
+
+static void fw_event_work_get(struct fw_event_work *fw_work)
+{
+	kref_get(&fw_work->refcount);
+}
+
+static void fw_event_work_put(struct fw_event_work *fw_work)
+{
+	kref_put(&fw_work->refcount, fw_event_work_free);
+}
+
+static struct fw_event_work *alloc_fw_event_work(int len)
+{
+	struct fw_event_work *fw_event;
+
+	fw_event = kzalloc(sizeof(*fw_event) + len, GFP_ATOMIC);
+	if (!fw_event)
+		return NULL;
+
+	kref_init(&fw_event->refcount);
+	return fw_event;
+}
+
 /* raid transport support */
 static struct raid_template *mpt2sas_raid_template;
 
@@ -2872,36 +2900,39 @@  _scsih_fw_event_add(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work *fw_event)
 		return;
 
 	spin_lock_irqsave(&ioc->fw_event_lock, flags);
+	fw_event_work_get(fw_event);
 	list_add_tail(&fw_event->list, &ioc->fw_event_list);
 	INIT_DELAYED_WORK(&fw_event->delayed_work, _firmware_event_work);
+	fw_event_work_get(fw_event);
 	queue_delayed_work(ioc->firmware_event_thread,
 	    &fw_event->delayed_work, 0);
 	spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
 }
 
 /**
- * _scsih_fw_event_free - delete fw_event
+ * _scsih_fw_event_del_from_list - delete fw_event from the list
  * @ioc: per adapter object
  * @fw_event: object describing the event
  * Context: This function will acquire ioc->fw_event_lock.
  *
- * This removes firmware event object from link list, frees associated memory.
+ * If the fw_event is on the fw_event_list, remove it and do a put.
  *
  * Return nothing.
  */
 static void
-_scsih_fw_event_free(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work
+_scsih_fw_event_del_from_list(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work
     *fw_event)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&ioc->fw_event_lock, flags);
-	list_del(&fw_event->list);
-	kfree(fw_event);
+	if (!list_empty(&fw_event->list)) {
+		list_del_init(&fw_event->list);
+		fw_event_work_put(fw_event);
+	}
 	spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
 }
 
-
 /**
  * _scsih_error_recovery_delete_devices - remove devices not responding
  * @ioc: per adapter object
@@ -2916,13 +2947,14 @@  _scsih_error_recovery_delete_devices(struct MPT2SAS_ADAPTER *ioc)
 	if (ioc->is_driver_loading)
 		return;
 
-	fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC);
+	fw_event = alloc_fw_event_work(0);
 	if (!fw_event)
 		return;
 
 	fw_event->event = MPT2SAS_REMOVE_UNRESPONDING_DEVICES;
 	fw_event->ioc = ioc;
 	_scsih_fw_event_add(ioc, fw_event);
+	fw_event_work_put(fw_event);
 }
 
 /**
@@ -2936,12 +2968,29 @@  mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc)
 {
 	struct fw_event_work *fw_event;
 
-	fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC);
+	fw_event = alloc_fw_event_work(0);
 	if (!fw_event)
 		return;
 	fw_event->event = MPT2SAS_PORT_ENABLE_COMPLETE;
 	fw_event->ioc = ioc;
 	_scsih_fw_event_add(ioc, fw_event);
+	fw_event_work_put(fw_event);
+}
+
+static struct fw_event_work *dequeue_next_fw_event(struct MPT2SAS_ADAPTER *ioc)
+{
+	unsigned long flags;
+	struct fw_event_work *fw_event = NULL;
+
+	spin_lock_irqsave(&ioc->fw_event_lock, flags);
+	if (!list_empty(&ioc->fw_event_list)) {
+		fw_event = list_first_entry(&ioc->fw_event_list,
+				struct fw_event_work, list);
+		list_del_init(&fw_event->list);
+	}
+	spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
+
+	return fw_event;
 }
 
 /**
@@ -2956,17 +3005,25 @@  mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc)
 static void
 _scsih_fw_event_cleanup_queue(struct MPT2SAS_ADAPTER *ioc)
 {
-	struct fw_event_work *fw_event, *next;
+	struct fw_event_work *fw_event;
 
 	if (list_empty(&ioc->fw_event_list) ||
 	     !ioc->firmware_event_thread || in_interrupt())
 		return;
 
-	list_for_each_entry_safe(fw_event, next, &ioc->fw_event_list, list) {
-		if (cancel_delayed_work_sync(&fw_event->delayed_work)) {
-			_scsih_fw_event_free(ioc, fw_event);
-			continue;
-		}
+	while ((fw_event = dequeue_next_fw_event(ioc))) {
+		/*
+		 * Wait on the fw_event to complete. If this returns 1, then
+		 * the event was never executed, and we need a put for the
+		 * reference the delayed_work had on the fw_event.
+		 *
+		 * If it did execute, we wait for it to finish, and the put will
+		 * happen from _firmware_event_work()
+		 */
+		if (cancel_delayed_work_sync(&fw_event->delayed_work))
+			fw_event_work_put(fw_event);
+
+		fw_event_work_put(fw_event);
 	}
 }
 
@@ -4447,13 +4504,14 @@  _scsih_send_event_to_turn_on_pfa_led(struct MPT2SAS_ADAPTER *ioc, u16 handle)
 {
 	struct fw_event_work *fw_event;
 
-	fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC);
+	fw_event = alloc_fw_event_work(0);
 	if (!fw_event)
 		return;
 	fw_event->event = MPT2SAS_TURN_ON_PFA_LED;
 	fw_event->device_handle = handle;
 	fw_event->ioc = ioc;
 	_scsih_fw_event_add(ioc, fw_event);
+	fw_event_work_put(fw_event);
 }
 
 /**
@@ -7554,17 +7612,27 @@  _firmware_event_work(struct work_struct *work)
 	    struct fw_event_work, delayed_work.work);
 	struct MPT2SAS_ADAPTER *ioc = fw_event->ioc;
 
+	_scsih_fw_event_del_from_list(ioc, fw_event);
+
 	/* the queue is being flushed so ignore this event */
-	if (ioc->remove_host ||
-	    ioc->pci_error_recovery) {
-		_scsih_fw_event_free(ioc, fw_event);
+	if (ioc->remove_host || ioc->pci_error_recovery) {
+		fw_event_work_put(fw_event);
 		return;
 	}
 
 	switch (fw_event->event) {
 	case MPT2SAS_REMOVE_UNRESPONDING_DEVICES:
-		while (scsi_host_in_recovery(ioc->shost) || ioc->shost_recovery)
+		while (scsi_host_in_recovery(ioc->shost) ||
+				ioc->shost_recovery) {
+			/*
+			 * If we're unloading, bail. Otherwise, this can become
+			 * an infinite loop.
+			 */
+			if (ioc->remove_host)
+				goto out;
+
 			ssleep(1);
+		}
 		_scsih_remove_unresponding_sas_devices(ioc);
 		_scsih_scan_for_devices_after_reset(ioc);
 		break;
@@ -7613,7 +7681,8 @@  _firmware_event_work(struct work_struct *work)
 		_scsih_sas_ir_operation_status_event(ioc, fw_event);
 		break;
 	}
-	_scsih_fw_event_free(ioc, fw_event);
+out:
+	fw_event_work_put(fw_event);
 }
 
 /**
@@ -7751,7 +7820,7 @@  mpt2sas_scsih_event_callback(struct MPT2SAS_ADAPTER *ioc, u8 msix_index,
 	}
 
 	sz = le16_to_cpu(mpi_reply->EventDataLength) * 4;
-	fw_event = kzalloc(sizeof(*fw_event) + sz, GFP_ATOMIC);
+	fw_event = alloc_fw_event_work(sz);
 	if (!fw_event) {
 		printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n",
 		    ioc->name, __FILE__, __LINE__, __func__);
@@ -7764,6 +7833,7 @@  mpt2sas_scsih_event_callback(struct MPT2SAS_ADAPTER *ioc, u8 msix_index,
 	fw_event->VP_ID = mpi_reply->VP_ID;
 	fw_event->event = event;
 	_scsih_fw_event_add(ioc, fw_event);
+	fw_event_work_put(fw_event);
 	return;
 }