@@ -1544,6 +1544,10 @@ enum FW_BOOT_CONTEXT {
#define MR_CAN_HANDLE_64_BIT_DMA_OFFSET (1 << 25)
+#define MEGASAS_WATCHDOG_THREAD_INTERVAL 1000
+#define MEGASAS_WAIT_FOR_NEXT_DMA_MSECS 20
+#define MEGASAS_WATCHDOG_WAIT_COUNT 50
+
enum MR_ADAPTER_TYPE {
MFI_SERIES = 1,
THUNDERBOLT_SERIES = 2,
@@ -2250,7 +2254,9 @@ struct megasas_instance {
struct megasas_instance_template *instancet;
struct tasklet_struct isr_tasklet;
struct work_struct work_init;
- struct work_struct crash_init;
+ struct delayed_work fw_fault_work;
+ struct workqueue_struct *fw_fault_work_q;
+ char fault_handler_work_q_name[48];
u8 flag;
u8 unload;
@@ -2539,7 +2545,6 @@ int megasas_get_target_prop(struct megasas_instance *instance,
int megasas_set_crash_dump_params(struct megasas_instance *instance,
u8 crash_buf_state);
void megasas_free_host_crash_buffer(struct megasas_instance *instance);
-void megasas_fusion_crash_dump_wq(struct work_struct *work);
void megasas_return_cmd_fusion(struct megasas_instance *instance,
struct megasas_cmd_fusion *cmd);
@@ -2560,6 +2565,9 @@ int megasas_reset_target_fusion(struct scsi_cmnd *scmd);
u32 mega_mod64(u64 dividend, u32 divisor);
int megasas_alloc_fusion_context(struct megasas_instance *instance);
void megasas_free_fusion_context(struct megasas_instance *instance);
+int megasas_fusion_start_watchdog(struct megasas_instance *instance);
+void megasas_fusion_stop_watchdog(struct megasas_instance *instance);
+
void megasas_set_dma_settings(struct megasas_instance *instance,
struct megasas_dcmd_frame *dcmd,
dma_addr_t dma_addr, u32 dma_len);
@@ -5582,8 +5582,20 @@ static int megasas_init_fw(struct megasas_instance *instance)
instance->skip_heartbeat_timer_del = 1;
}
+ /*
+ * Create and start watchdog thread which will monitor
+ * controller state every 1 sec and trigger OCR when
+ * it enters fault state
+ */
+ if (instance->adapter_type != MFI_SERIES)
+ if (megasas_fusion_start_watchdog(instance) != SUCCESS)
+ goto fail_start_watchdog;
+
return 0;
+fail_start_watchdog:
+ if (instance->requestorId && !instance->skip_heartbeat_timer_del)
+ del_timer_sync(&instance->sriov_heartbeat_timer);
fail_get_ld_pd_list:
instance->instancet->disable_intr(instance);
fail_init_adapter:
@@ -6434,12 +6446,10 @@ static inline void megasas_init_ctrl_params(struct megasas_instance *instance)
instance->disableOnlineCtrlReset = 1;
instance->UnevenSpanSupport = 0;
- if (instance->adapter_type != MFI_SERIES) {
+ if (instance->adapter_type != MFI_SERIES)
INIT_WORK(&instance->work_init, megasas_fusion_ocr_wq);
- INIT_WORK(&instance->crash_init, megasas_fusion_crash_dump_wq);
- } else {
+ else
INIT_WORK(&instance->work_init, process_fw_state_change_wq);
- }
}
/**
@@ -6708,6 +6718,10 @@ megasas_suspend(struct pci_dev *pdev, pm_message_t state)
if (instance->requestorId && !instance->skip_heartbeat_timer_del)
del_timer_sync(&instance->sriov_heartbeat_timer);
+ /* Stop the FW fault detection watchdog */
+ if (instance->adapter_type != MFI_SERIES)
+ megasas_fusion_stop_watchdog(instance);
+
megasas_flush_cache(instance);
megasas_shutdown_controller(instance, MR_DCMD_HIBERNATE_SHUTDOWN);
@@ -6843,8 +6857,16 @@ megasas_resume(struct pci_dev *pdev)
if (megasas_start_aen(instance))
dev_err(&instance->pdev->dev, "Start AEN failed\n");
+ /* Re-launch FW fault watchdog */
+ if (instance->adapter_type != MFI_SERIES)
+ if (megasas_fusion_start_watchdog(instance) != SUCCESS)
+ goto fail_start_watchdog;
+
return 0;
+fail_start_watchdog:
+ if (instance->requestorId && !instance->skip_heartbeat_timer_del)
+ del_timer_sync(&instance->sriov_heartbeat_timer);
fail_init_mfi:
megasas_free_ctrl_dma_buffers(instance);
megasas_free_ctrl_mem(instance);
@@ -6912,6 +6934,10 @@ static void megasas_detach_one(struct pci_dev *pdev)
if (instance->requestorId && !instance->skip_heartbeat_timer_del)
del_timer_sync(&instance->sriov_heartbeat_timer);
+ /* Stop the FW fault detection watchdog */
+ if (instance->adapter_type != MFI_SERIES)
+ megasas_fusion_stop_watchdog(instance);
+
if (instance->fw_crash_state != UNAVAILABLE)
megasas_free_host_crash_buffer(instance);
scsi_remove_host(instance->host);
@@ -48,6 +48,7 @@
#include <linux/mutex.h>
#include <linux/poll.h>
#include <linux/vmalloc.h>
+#include <linux/workqueue.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
@@ -95,6 +96,7 @@ static void megasas_free_rdpq_fusion(struct megasas_instance *instance);
static void megasas_free_reply_fusion(struct megasas_instance *instance);
static inline
void megasas_configure_queue_sizes(struct megasas_instance *instance);
+static void megasas_fusion_crash_dump(struct megasas_instance *instance);
/**
* megasas_check_same_4gb_region - check if allocation
@@ -1759,6 +1761,90 @@ megasas_init_adapter_fusion(struct megasas_instance *instance)
return 1;
}
+/**
+ * megasas_fault_detect_work - Worker function of
+ * FW fault handling workqueue.
+ */
+static void
+megasas_fault_detect_work(struct work_struct *work)
+{
+ struct megasas_instance *instance =
+ container_of(work, struct megasas_instance,
+ fw_fault_work.work);
+ u32 fw_state, dma_state, status;
+
+ /* Check the fw state */
+ fw_state = instance->instancet->read_fw_status_reg(instance->reg_set) &
+ MFI_STATE_MASK;
+
+ if (fw_state == MFI_STATE_FAULT) {
+ dma_state = instance->instancet->read_fw_status_reg(
+ instance->reg_set) & MFI_STATE_DMADONE;
+ /* Start collecting crash, if DMA bit is done */
+ if (instance->crash_dump_drv_support &&
+ instance->crash_dump_app_support && dma_state) {
+ megasas_fusion_crash_dump(instance);
+ } else {
+ if (instance->unload == 0) {
+ status = megasas_reset_fusion(instance->host, 0);
+ if (status != SUCCESS) {
+ dev_err(&instance->pdev->dev,
+ "Failed from %s %d, do not re-arm timer\n",
+ __func__, __LINE__);
+ return;
+ }
+ }
+ }
+ }
+
+ if (instance->fw_fault_work_q)
+ queue_delayed_work(instance->fw_fault_work_q,
+ &instance->fw_fault_work,
+ msecs_to_jiffies(MEGASAS_WATCHDOG_THREAD_INTERVAL));
+}
+
+int
+megasas_fusion_start_watchdog(struct megasas_instance *instance)
+{
+ /* Check if the Fault WQ is already started */
+ if (instance->fw_fault_work_q)
+ return SUCCESS;
+
+ INIT_DELAYED_WORK(&instance->fw_fault_work, megasas_fault_detect_work);
+
+ snprintf(instance->fault_handler_work_q_name,
+ sizeof(instance->fault_handler_work_q_name),
+ "poll_megasas%d_status", instance->host->host_no);
+
+ instance->fw_fault_work_q =
+ create_singlethread_workqueue(instance->fault_handler_work_q_name);
+ if (!instance->fw_fault_work_q) {
+ dev_err(&instance->pdev->dev, "Failed from %s %d\n",
+ __func__, __LINE__);
+ return FAILED;
+ }
+
+ queue_delayed_work(instance->fw_fault_work_q,
+ &instance->fw_fault_work,
+ msecs_to_jiffies(MEGASAS_WATCHDOG_THREAD_INTERVAL));
+
+ return SUCCESS;
+}
+
+void
+megasas_fusion_stop_watchdog(struct megasas_instance *instance)
+{
+ struct workqueue_struct *wq;
+
+ if (instance->fw_fault_work_q) {
+ wq = instance->fw_fault_work_q;
+ instance->fw_fault_work_q = NULL;
+ if (!cancel_delayed_work_sync(&instance->fw_fault_work))
+ flush_workqueue(wq);
+ destroy_workqueue(wq);
+ }
+}
+
/**
* map_cmd_status - Maps FW cmd status to OS cmd status
* @cmd : Pointer to cmd
@@ -3525,7 +3611,7 @@ irqreturn_t megasas_isr_fusion(int irq, void *devp)
{
struct megasas_irq_context *irq_context = devp;
struct megasas_instance *instance = irq_context->instance;
- u32 mfiStatus, fw_state, dma_state;
+ u32 mfiStatus;
if (instance->mask_interrupts)
return IRQ_NONE;
@@ -3542,31 +3628,7 @@ irqreturn_t megasas_isr_fusion(int irq, void *devp)
return IRQ_HANDLED;
}
- if (!complete_cmd_fusion(instance, irq_context->MSIxIndex)) {
- instance->instancet->clear_intr(instance->reg_set);
- /* If we didn't complete any commands, check for FW fault */
- fw_state = instance->instancet->read_fw_status_reg(
- instance->reg_set) & MFI_STATE_MASK;
- dma_state = instance->instancet->read_fw_status_reg
- (instance->reg_set) & MFI_STATE_DMADONE;
- if (instance->crash_dump_drv_support &&
- instance->crash_dump_app_support) {
- /* Start collecting crash, if DMA bit is done */
- if ((fw_state == MFI_STATE_FAULT) && dma_state)
- schedule_work(&instance->crash_init);
- else if (fw_state == MFI_STATE_FAULT) {
- if (instance->unload == 0)
- schedule_work(&instance->work_init);
- }
- } else if (fw_state == MFI_STATE_FAULT) {
- dev_warn(&instance->pdev->dev, "Iop2SysDoorbellInt"
- "for scsi%d\n", instance->host->host_no);
- if (instance->unload == 0)
- schedule_work(&instance->work_init);
- }
- }
-
- return IRQ_HANDLED;
+ return complete_cmd_fusion(instance, irq_context->MSIxIndex);
}
/**
@@ -4752,13 +4814,12 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason)
return retval;
}
-/* Fusion Crash dump collection work queue */
-void megasas_fusion_crash_dump_wq(struct work_struct *work)
+/* Fusion Crash dump collection */
+void megasas_fusion_crash_dump(struct megasas_instance *instance)
{
- struct megasas_instance *instance =
- container_of(work, struct megasas_instance, crash_init);
u32 status_reg;
u8 partial_copy = 0;
+ int wait = 0;
status_reg = instance->instancet->read_fw_status_reg(instance->reg_set);
@@ -4786,21 +4847,42 @@ void megasas_fusion_crash_dump_wq(struct work_struct *work)
"allocated: %d\n", instance->drv_buf_alloc);
}
- /*
- * Driver has allocated max buffers, which can be allocated
- * and FW has more crash dump data, then driver will
- * ignore the data.
- */
- if (instance->drv_buf_index >= (instance->drv_buf_alloc)) {
- dev_info(&instance->pdev->dev, "Driver is done copying "
- "the buffer: %d\n", instance->drv_buf_alloc);
- status_reg |= MFI_STATE_CRASH_DUMP_DONE;
- partial_copy = 1;
- } else {
- memcpy(instance->crash_buf[instance->drv_buf_index],
- instance->crash_dump_buf, CRASH_DMA_BUF_SIZE);
- instance->drv_buf_index++;
- status_reg &= ~MFI_STATE_DMADONE;
+ while (!(status_reg & MFI_STATE_CRASH_DUMP_DONE) &&
+ (wait < MEGASAS_WATCHDOG_WAIT_COUNT)) {
+ if (!(status_reg & MFI_STATE_DMADONE)) {
+ /*
+ * Next crash dump buffer is not yet DMA'd by FW
+ * Check after 10ms. Wait for 1 second for FW to
+ * post the next buffer. If not bail out.
+ */
+ wait++;
+ msleep(MEGASAS_WAIT_FOR_NEXT_DMA_MSECS);
+ status_reg = instance->instancet->read_fw_status_reg(
+ instance->reg_set);
+ continue;
+ }
+
+ wait = 0;
+ if (instance->drv_buf_index >= instance->drv_buf_alloc) {
+ dev_info(&instance->pdev->dev,
+ "Driver is done copying the buffer: %d\n",
+ instance->drv_buf_alloc);
+ status_reg |= MFI_STATE_CRASH_DUMP_DONE;
+ partial_copy = 1;
+ break;
+ } else {
+ memcpy(instance->crash_buf[instance->drv_buf_index],
+ instance->crash_dump_buf, CRASH_DMA_BUF_SIZE);
+ instance->drv_buf_index++;
+ status_reg &= ~MFI_STATE_DMADONE;
+ }
+
+ writel(status_reg, &instance->reg_set->outbound_scratch_pad);
+ readl(&instance->reg_set->outbound_scratch_pad);
+
+ msleep(MEGASAS_WAIT_FOR_NEXT_DMA_MSECS);
+ status_reg = instance->instancet->read_fw_status_reg(
+ instance->reg_set);
}
if (status_reg & MFI_STATE_CRASH_DUMP_DONE) {
@@ -4813,9 +4895,6 @@ void megasas_fusion_crash_dump_wq(struct work_struct *work)
readl(&instance->reg_set->outbound_scratch_pad);
if (!partial_copy)
megasas_reset_fusion(instance->host, 0);
- } else {
- writel(status_reg, &instance->reg_set->outbound_scratch_pad);
- readl(&instance->reg_set->outbound_scratch_pad);
}
}