diff mbox

[v3,2/4] cxlflash: Base error recovery support

Message ID 1438576402-32935-1-git-send-email-mrochs@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Matthew R. Ochs Aug. 3, 2015, 4:33 a.m. UTC
Introduce support for enhanced I/O error handling.

Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>
---
 drivers/scsi/cxlflash/Kconfig  |   2 +-
 drivers/scsi/cxlflash/common.h |  11 ++-
 drivers/scsi/cxlflash/main.c   | 151 ++++++++++++++++++++++++++++++++++++++---
 3 files changed, 152 insertions(+), 12 deletions(-)

Comments

Brian King Aug. 5, 2015, 4:04 p.m. UTC | #1
On 08/02/2015 11:33 PM, Matthew R. Ochs wrote:

> diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h
> index ba070a5..3d6217a 100644
> --- a/drivers/scsi/cxlflash/common.h
> +++ b/drivers/scsi/cxlflash/common.h
> @@ -76,6 +76,12 @@ enum cxlflash_init_state {
>  	INIT_STATE_SCSI
>  };
> 
> +enum eeh_state {
> +	EEH_STATE_NONE,
> +	EEH_STATE_ACTIVE,
> +	EEH_STATE_FAILED
> +};

Can you use pdev->error_state and pci_channel_offline instead of duplicating this
state information in a private driver definition?

> +
>  /*
>   * Each context has its own set of resource handles that is visible
>   * only from that context.
> @@ -91,8 +97,6 @@ struct cxlflash_cfg {
> 
>  	ulong cxlflash_regs_pci;
> 
> -	wait_queue_head_t eeh_waitq;
> -
>  	struct work_struct work_q;
>  	enum cxlflash_init_state init_state;
>  	enum cxlflash_lr_state lr_state;
> @@ -105,7 +109,8 @@ struct cxlflash_cfg {
>  	wait_queue_head_t tmf_waitq;
>  	spinlock_t tmf_slock;
>  	bool tmf_active;
> -	u8 err_recovery_active:1;
> +	wait_queue_head_t eeh_waitq;
> +	enum eeh_state eeh_active;
>  };
> 
>  struct afu_cmd {
> diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
> index ae2351a..9515525 100644
> --- a/drivers/scsi/cxlflash/main.c
> +++ b/drivers/scsi/cxlflash/main.c
> @@ -524,6 +524,18 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp)
>  	}
>  	spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
> 
> +	switch (cfg->eeh_active) {
> +	case EEH_STATE_ACTIVE:
> +		pr_debug_ratelimited("%s: BUSY w/ EEH Recovery!\n", __func__);
> +		rc = SCSI_MLQUEUE_HOST_BUSY;
> +		goto out;
> +	case EEH_STATE_FAILED:
> +		pr_debug_ratelimited("%s: device has failed!\n", __func__);
> +		goto out;
> +	case EEH_STATE_NONE:
> +		break;
> +	}
> +
>  	cmd = cmd_checkout(afu);
>  	if (unlikely(!cmd)) {
>  		dev_err(dev, "%s: could not get a free command\n", __func__);
> @@ -1694,6 +1706,10 @@ static int init_afu(struct cxlflash_cfg *cfg)
>  	struct afu *afu = cfg->afu;
>  	struct device *dev = &cfg->dev->dev;
> 
> +#ifdef CONFIG_CXL_EEH
> +	cxl_perst_reloads_same_image(afu, val) 
> +#endif

I'd suggest moving this to a .h and defining the function as a noop there if appropriate, something
like:

#ifndef CONFIG_CXL_EEH
#define cxl_perst_reloads_same_image(cfg->cxl_afu, true) do { } while(0)
#endif


> +
>  	rc = init_mc(cfg);
>  	if (rc) {
>  		dev_err(dev, "%s: call to init_mc failed, rc=%d!\n",
> @@ -1748,6 +1764,12 @@ err1:
>   * the sync. This design point requires calling threads to not be on interrupt
>   * context due to the possibility of sleeping during concurrent sync operations.
>   *
> + * AFU sync operations should be gated during EEH recovery. When a recovery
> + * fails and an adapter is to be removed, sync requests can occur as part of
> + * cleaning up resources associated with an adapter prior to its removal. In
> + * this scenario, these requests are identified here and simply ignored (safe
> + * due to the AFU going away).
> + *
>   * Return:
>   *	0 on success
>   *	-1 on failure
> @@ -1762,6 +1784,11 @@ int cxlflash_afu_sync(struct afu *afu, ctx_hndl_t ctx_hndl_u,
>  	int retry_cnt = 0;
>  	static DEFINE_MUTEX(sync_active);
> 
> +	if (cfg->eeh_active == EEH_STATE_FAILED) {
> +		pr_debug("%s: Sync not required due to EEH state!\n", __func__);
> +		return 0;
> +	}
> +
>  	mutex_lock(&sync_active);
>  retry:
>  	cmd = cmd_checkout(afu);
> @@ -1857,9 +1884,18 @@ static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp)
>  		 get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
>  		 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
> 
> -	rcr = send_tmf(afu, scp, TMF_LUN_RESET);
> -	if (unlikely(rcr))
> -		rc = FAILED;
> +	switch (cfg->eeh_active) {
> +	case EEH_STATE_NONE:
> +		rcr = send_tmf(afu, scp, TMF_LUN_RESET);
> +		if (unlikely(rcr))
> +			rc = FAILED;
> +		break;
> +	case EEH_STATE_ACTIVE:
> +		wait_event(cfg->eeh_waitq, cfg->eeh_active != EEH_STATE_ACTIVE);
> +		break;
> +	case EEH_STATE_FAILED:
> +		break;
> +	}
> 
>  	pr_debug("%s: returning rc=%d\n", __func__, rc);
>  	return rc;
> @@ -1889,11 +1925,23 @@ static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp)
>  		 get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
>  		 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
> 
> -	rcr = afu_reset(cfg);
> -	if (rcr == 0)
> -		rc = SUCCESS;
> -	else
> -		rc = FAILED;
> +	switch (cfg->eeh_active) {
> +	case EEH_STATE_NONE:
> +		cfg->eeh_active = EEH_STATE_FAILED;

Seems a little strange to be messing with the EEH state machine here when EEH isn't even at play.
If you can't switch to use the existing EEH state machine in the pdev struct, suggest renaming
this internal state machine to something more accurate and using the pdev EEH state machine where you can.
Same goes for the eeh_waitq...

> +		rcr = afu_reset(cfg);
> +		if (rcr == 0)
> +			rc = SUCCESS;
> +		else
> +			rc = FAILED;
> +		cfg->eeh_active = EEH_STATE_NONE;
> +		wake_up_all(&cfg->eeh_waitq);
> +		break;
> +	case EEH_STATE_ACTIVE:
> +		wait_event(cfg->eeh_waitq, cfg->eeh_active != EEH_STATE_ACTIVE);
> +		break;
> +	case EEH_STATE_FAILED:
> +		break;
> +	}
> 
>  	pr_debug("%s: returning rc=%d\n", __func__, rc);
>  	return rc;
> @@ -2145,6 +2193,11 @@ static void cxlflash_worker_thread(struct work_struct *work)
>  	int port;
>  	ulong lock_flags;
> 
> +	/* Avoid MMIO if the device has failed */
> +
> +	if (cfg->eeh_active == EEH_STATE_FAILED)
> +		return;
> +
>  	spin_lock_irqsave(cfg->host->host_lock, lock_flags);
> 
>  	if (cfg->lr_state == LINK_RESET_REQUIRED) {
> @@ -2226,6 +2279,8 @@ static int cxlflash_probe(struct pci_dev *pdev,
> 
>  	cfg->init_state = INIT_STATE_NONE;
>  	cfg->dev = pdev;
> +
> +	cfg->eeh_active = EEH_STATE_NONE;
>  	cfg->dev_id = (struct pci_device_id *)dev_id;
> 
> 
> @@ -2286,6 +2341,85 @@ out_remove:
>  	goto out;
>  }
> 
> +/**
> + * cxlflash_pci_error_detected() - called when a PCI error is detected
> + * @pdev:	PCI device struct.
> + * @state:	PCI channel state.
> + *
> + * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT
> + */
> +static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev,
> +						    pci_channel_state_t state)
> +{
> +	struct cxlflash_cfg *cfg = pci_get_drvdata(pdev);
> +
> +	pr_debug("%s: pdev=%p state=%u\n", __func__, pdev, state);
> +
> +	switch (state) {
> +	case pci_channel_io_frozen:
> +		cfg->eeh_active = EEH_STATE_ACTIVE;
> +		udelay(100);
> +

I think this udelay needs a comment...

I'd suggest calling scsi_block_requests here to stop your queuecommand function from being called.
Note that this won't stop EH commands from being sent, so you will still need to check this
in queuecommand, although the right thing to do may be to fix scsi_send_eh_cmnd to not call
queuecommand if the host is blocked.

You'd then need to call scsi_unblock_requests when EEH in the perm failure and resume cases.

> +		term_mc(cfg, UNDO_START);
> +		stop_afu(cfg);
> +
> +		return PCI_ERS_RESULT_CAN_RECOVER;
> +	case pci_channel_io_perm_failure:
> +		cfg->eeh_active = EEH_STATE_FAILED;
> +		wake_up_all(&cfg->eeh_waitq);
> +		return PCI_ERS_RESULT_DISCONNECT;
> +	default:
> +		break;
> +	}
> +	return PCI_ERS_RESULT_NEED_RESET;
> +}
> +
> +/**
> + * cxlflash_pci_slot_reset() - called when PCI slot has been reset
> + * @pdev:	PCI device struct.
> + *
> + * This routine is called by the pci error recovery code after the PCI
> + * slot has been reset, just before we should resume normal operations.
> + *
> + * Return: PCI_ERS_RESULT_RECOVERED or PCI_ERS_RESULT_DISCONNECT
> + */
> +static pci_ers_result_t cxlflash_pci_slot_reset(struct pci_dev *pdev)
> +{
> +	int rc = 0;
> +	struct cxlflash_cfg *cfg = pci_get_drvdata(pdev);
> +	struct device *dev = &cfg->dev->dev;
> +
> +	pr_debug("%s: pdev=%p\n", __func__, pdev);
> +
> +	rc = init_afu(cfg);
> +	if (unlikely(rc)) {
> +		dev_err(dev, "%s: EEH recovery failed! (%d)\n", __func__, rc);
> +		return PCI_ERS_RESULT_DISCONNECT;
> +	}
> +
> +	return PCI_ERS_RESULT_RECOVERED;
> +}
> +
> +/**
> + * cxlflash_pci_resume() - called when normal operation can resume
> + * @pdev:	PCI device struct
> + */
> +static void cxlflash_pci_resume(struct pci_dev *pdev)
> +{
> +	struct cxlflash_cfg *cfg = pci_get_drvdata(pdev);
> +
> +	pr_debug("%s: pdev=%p\n", __func__, pdev);
> +
> +	cfg->eeh_active = EEH_STATE_NONE;
> +	wake_up_all(&cfg->eeh_waitq);
> +}
> +
> +static const struct pci_error_handlers cxlflash_err_handler = {
> +	.error_detected = cxlflash_pci_error_detected,
> +	.slot_reset = cxlflash_pci_slot_reset,
> +	.resume = cxlflash_pci_resume,
> +};
> +
>  /*
>   * PCI device structure
>   */
> @@ -2294,6 +2428,7 @@ static struct pci_driver cxlflash_driver = {
>  	.id_table = cxlflash_pci_table,
>  	.probe = cxlflash_probe,
>  	.remove = cxlflash_remove,
> +	.err_handler = &cxlflash_err_handler,
>  };
> 
>  /**
>
Matthew R. Ochs Aug. 5, 2015, 10:30 p.m. UTC | #2
Hi Brian,

Thanks for reviewing. Comments inline below.


-matt

> On Aug 5, 2015, at 11:04 AM, Brian King <brking@linux.vnet.ibm.com> wrote:
> 
> On 08/02/2015 11:33 PM, Matthew R. Ochs wrote:
> 
>> diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h
>> index ba070a5..3d6217a 100644
>> --- a/drivers/scsi/cxlflash/common.h
>> +++ b/drivers/scsi/cxlflash/common.h
>> @@ -76,6 +76,12 @@ enum cxlflash_init_state {
>> 	INIT_STATE_SCSI
>> };
>> 
>> +enum eeh_state {
>> +	EEH_STATE_NONE,
>> +	EEH_STATE_ACTIVE,
>> +	EEH_STATE_FAILED
>> +};
> 
> Can you use pdev->error_state and pci_channel_offline instead of duplicating this
> state information in a private driver definition?

Makes sense, I’ll look into this.

>> 
>> +#ifdef CONFIG_CXL_EEH
>> +	cxl_perst_reloads_same_image(afu, val) 
>> +#endif
> 
> I'd suggest moving this to a .h and defining the function as a noop there if appropriate, something
> like:
> 
> #ifndef CONFIG_CXL_EEH
> #define cxl_perst_reloads_same_image(cfg->cxl_afu, true) do { } while(0)
> #endif

Done.

>> 
>> -	rcr = afu_reset(cfg);
>> -	if (rcr == 0)
>> -		rc = SUCCESS;
>> -	else
>> -		rc = FAILED;
>> +	switch (cfg->eeh_active) {
>> +	case EEH_STATE_NONE:
>> +		cfg->eeh_active = EEH_STATE_FAILED;
> 
> Seems a little strange to be messing with the EEH state machine here when EEH isn't even at play.
> If you can't switch to use the existing EEH state machine in the pdev struct, suggest renaming
> this internal state machine to something more accurate and using the pdev EEH state machine where you can.
> Same goes for the eeh_waitq…

I do agree that this is a bit strange. What we’re doing here is borrowing the framework we
put in place to quiesce user contexts and hold off new threads coming in during an EEH
event. I’ll look into how we can refactor this given that we’re going to move to using the
existing EEH state machine (pdev->error_state) and will no longer be able to toggle state.

>> +	pr_debug("%s: pdev=%p state=%u\n", __func__, pdev, state);
>> +
>> +	switch (state) {
>> +	case pci_channel_io_frozen:
>> +		cfg->eeh_active = EEH_STATE_ACTIVE;
>> +		udelay(100);
>> +
> 
> I think this udelay needs a comment…

This may end up going away. I’ll add a comment if we keep it.

> I'd suggest calling scsi_block_requests here to stop your queuecommand function from being called.
> Note that this won't stop EH commands from being sent, so you will still need to check this
> in queuecommand, although the right thing to do may be to fix scsi_send_eh_cmnd to not call
> queuecommand if the host is blocked.
> 
> You’d then need to call scsi_unblock_requests when EEH in the perm failure and resume cases.

Good suggestion, we’ll look at adding this in.

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Daniel Axtens Aug. 5, 2015, 10:38 p.m. UTC | #3
On Wed, 2015-08-05 at 17:30 -0500, Matthew R. Ochs wrote:
> Hi Brian,
> 
> Thanks for reviewing. Comments inline below.
> 
> 
> -matt
> 
> > On Aug 5, 2015, at 11:04 AM, Brian King <brking@linux.vnet.ibm.com> wrote:
> > 
> > On 08/02/2015 11:33 PM, Matthew R. Ochs wrote:
> > 
> >> diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h
> >> index ba070a5..3d6217a 100644
> >> --- a/drivers/scsi/cxlflash/common.h
> >> +++ b/drivers/scsi/cxlflash/common.h
> >> @@ -76,6 +76,12 @@ enum cxlflash_init_state {
> >> 	INIT_STATE_SCSI
> >> };
> >> 
> >> +enum eeh_state {
> >> +	EEH_STATE_NONE,
> >> +	EEH_STATE_ACTIVE,
> >> +	EEH_STATE_FAILED
> >> +};
> > 
> > Can you use pdev->error_state and pci_channel_offline instead of duplicating this
> > state information in a private driver definition?
> 
> Makes sense, I’ll look into this.
> 
I don't think my vPHB code propagates error_state yet. I'll check, and
if necessary, push a patch and fold it into my v3.

Regards
Daniel

> >> 
> >> +#ifdef CONFIG_CXL_EEH
> >> +	cxl_perst_reloads_same_image(afu, val) 
> >> +#endif
> > 
> > I'd suggest moving this to a .h and defining the function as a noop there if appropriate, something
> > like:
> > 
> > #ifndef CONFIG_CXL_EEH
> > #define cxl_perst_reloads_same_image(cfg->cxl_afu, true) do { } while(0)
> > #endif
> 
> Done.
> 
> >> 
> >> -	rcr = afu_reset(cfg);
> >> -	if (rcr == 0)
> >> -		rc = SUCCESS;
> >> -	else
> >> -		rc = FAILED;
> >> +	switch (cfg->eeh_active) {
> >> +	case EEH_STATE_NONE:
> >> +		cfg->eeh_active = EEH_STATE_FAILED;
> > 
> > Seems a little strange to be messing with the EEH state machine here when EEH isn't even at play.
> > If you can't switch to use the existing EEH state machine in the pdev struct, suggest renaming
> > this internal state machine to something more accurate and using the pdev EEH state machine where you can.
> > Same goes for the eeh_waitq…
> 
> I do agree that this is a bit strange. What we’re doing here is borrowing the framework we
> put in place to quiesce user contexts and hold off new threads coming in during an EEH
> event. I’ll look into how we can refactor this given that we’re going to move to using the
> existing EEH state machine (pdev->error_state) and will no longer be able to toggle state.
> 
> >> +	pr_debug("%s: pdev=%p state=%u\n", __func__, pdev, state);
> >> +
> >> +	switch (state) {
> >> +	case pci_channel_io_frozen:
> >> +		cfg->eeh_active = EEH_STATE_ACTIVE;
> >> +		udelay(100);
> >> +
> > 
> > I think this udelay needs a comment…
> 
> This may end up going away. I’ll add a comment if we keep it.
> 
> > I'd suggest calling scsi_block_requests here to stop your queuecommand function from being called.
> > Note that this won't stop EH commands from being sent, so you will still need to check this
> > in queuecommand, although the right thing to do may be to fix scsi_send_eh_cmnd to not call
> > queuecommand if the host is blocked.
> > 
> > You’d then need to call scsi_unblock_requests when EEH in the perm failure and resume cases.
> 
> Good suggestion, we’ll look at adding this in.


--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Daniel Axtens Aug. 7, 2015, 5:12 a.m. UTC | #4
Hi, 

> @@ -1857,9 +1884,18 @@ static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp)
>  		 get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
>  		 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
>  
> -	rcr = send_tmf(afu, scp, TMF_LUN_RESET);
> -	if (unlikely(rcr))
> -		rc = FAILED;
> +	switch (cfg->eeh_active) {
> +	case EEH_STATE_NONE:
> +		rcr = send_tmf(afu, scp, TMF_LUN_RESET);
> +		if (unlikely(rcr))
> +			rc = FAILED;
> +		break;
> +	case EEH_STATE_ACTIVE:
> +		wait_event(cfg->eeh_waitq, cfg->eeh_active != EEH_STATE_ACTIVE);
> +		break;
> +	case EEH_STATE_FAILED:
> +		break;
> +	}
>  

Looking at the context here, it looks like rc gets initalised to
SUCCESS. In that case, in the EEH failed case, you'll return SUCCESS.
I'm not particularly clear on the semantics here: does that make sense?

Likewise, in the EEH active state, when you finish the wait_event,
should you check if the EEH state went to NONE or FAILED before you
break?

There's a similar case below in host_reset.

>  	pr_debug("%s: returning rc=%d\n", __func__, rc);
>  	return rc;
> @@ -1889,11 +1925,23 @@ static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp)
>  		 get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
>  		 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
>  
> -	rcr = afu_reset(cfg);
> -	if (rcr == 0)
> -		rc = SUCCESS;
> -	else
> -		rc = FAILED;
> +	switch (cfg->eeh_active) {
> +	case EEH_STATE_NONE:
> +		cfg->eeh_active = EEH_STATE_FAILED;
> +		rcr = afu_reset(cfg);
> +		if (rcr == 0)
> +			rc = SUCCESS;
> +		else
> +			rc = FAILED;
> +		cfg->eeh_active = EEH_STATE_NONE;
> +		wake_up_all(&cfg->eeh_waitq);
> +		break;
> +	case EEH_STATE_ACTIVE:
> +		wait_event(cfg->eeh_waitq, cfg->eeh_active != EEH_STATE_ACTIVE);
> +		break;
> +	case EEH_STATE_FAILED:
> +		break;
> +	}
>  
>  	pr_debug("%s: returning rc=%d\n", __func__, rc);
>  	return rc;
> @@ -2145,6 +2193,11 @@ static void cxlflash_worker_thread(struct work_struct *work)
>  	int port;
>  	ulong lock_flags;
>  
> +	/* Avoid MMIO if the device has failed */
> +
> +	if (cfg->eeh_active == EEH_STATE_FAILED)
> +		return;
> +
Should this check be != EEH_STATE_NONE? Or is this called within the
error recovery process?

>  	spin_lock_irqsave(cfg->host->host_lock, lock_flags);
>  
>  	if (cfg->lr_state == LINK_RESET_REQUIRED) {
> @@ -2226,6 +2279,8 @@ static int cxlflash_probe(struct pci_dev *pdev,
>  
>  	cfg->init_state = INIT_STATE_NONE;
>  	cfg->dev = pdev;
> +
> +	cfg->eeh_active = EEH_STATE_NONE;
>  	cfg->dev_id = (struct pci_device_id *)dev_id;
>  
> 
> @@ -2286,6 +2341,85 @@ out_remove:
>  	goto out;
>  }
>  
> +/**
> + * cxlflash_pci_error_detected() - called when a PCI error is detected
> + * @pdev:	PCI device struct.
> + * @state:	PCI channel state.
> + *
> + * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT
> + */
> +static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev,
> +						    pci_channel_state_t state)
> +{
> +	struct cxlflash_cfg *cfg = pci_get_drvdata(pdev);
> +
> +	pr_debug("%s: pdev=%p state=%u\n", __func__, pdev, state);
> +
> +	switch (state) {
> +	case pci_channel_io_frozen:
> +		cfg->eeh_active = EEH_STATE_ACTIVE;
> +		udelay(100);
> +
> +		term_mc(cfg, UNDO_START);
> +		stop_afu(cfg);
> +
> +		return PCI_ERS_RESULT_CAN_RECOVER;

I think that should PCI_ERS_RESULT_NEED_RESET.

Apart from that, it's looking pretty good from an EEH perspective.
Matthew R. Ochs Aug. 7, 2015, 8:53 p.m. UTC | #5
Hi Daniel,

Thanks for reviewing. Comments inline below.


-matt

> On Aug 7, 2015, at 12:12 AM, Daniel Axtens <dja@axtens.net> wrote:
> 
> Hi, 
> 
>> @@ -1857,9 +1884,18 @@ static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp)
>> 		 get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
>> 		 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
>> 
>> -	rcr = send_tmf(afu, scp, TMF_LUN_RESET);
>> -	if (unlikely(rcr))
>> -		rc = FAILED;
>> +	switch (cfg->eeh_active) {
>> +	case EEH_STATE_NONE:
>> +		rcr = send_tmf(afu, scp, TMF_LUN_RESET);
>> +		if (unlikely(rcr))
>> +			rc = FAILED;
>> +		break;
>> +	case EEH_STATE_ACTIVE:
>> +		wait_event(cfg->eeh_waitq, cfg->eeh_active != EEH_STATE_ACTIVE);
>> +		break;
>> +	case EEH_STATE_FAILED:
>> +		break;
>> +	}
>> 
> 
> Looking at the context here, it looks like rc gets initalised to
> SUCCESS. In that case, in the EEH failed case, you'll return SUCCESS.
> I'm not particularly clear on the semantics here: does that make sense?
> 
> Likewise, in the EEH active state, when you finish the wait_event,
> should you check if the EEH state went to NONE or FAILED before you
> break?
> 
> There’s a similar case below in host_reset.

Good catch, this is a bug we had previously identified. Look for a fix in v4.

> 
>> 	pr_debug("%s: returning rc=%d\n", __func__, rc);
>> 	return rc;
>> @@ -1889,11 +1925,23 @@ static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp)
>> 		 get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
>> 		 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
>> 
>> -	rcr = afu_reset(cfg);
>> -	if (rcr == 0)
>> -		rc = SUCCESS;
>> -	else
>> -		rc = FAILED;
>> +	switch (cfg->eeh_active) {
>> +	case EEH_STATE_NONE:
>> +		cfg->eeh_active = EEH_STATE_FAILED;
>> +		rcr = afu_reset(cfg);
>> +		if (rcr == 0)
>> +			rc = SUCCESS;
>> +		else
>> +			rc = FAILED;
>> +		cfg->eeh_active = EEH_STATE_NONE;
>> +		wake_up_all(&cfg->eeh_waitq);
>> +		break;
>> +	case EEH_STATE_ACTIVE:
>> +		wait_event(cfg->eeh_waitq, cfg->eeh_active != EEH_STATE_ACTIVE);
>> +		break;
>> +	case EEH_STATE_FAILED:
>> +		break;
>> +	}
>> 
>> 	pr_debug("%s: returning rc=%d\n", __func__, rc);
>> 	return rc;
>> @@ -2145,6 +2193,11 @@ static void cxlflash_worker_thread(struct work_struct *work)
>> 	int port;
>> 	ulong lock_flags;
>> 
>> +	/* Avoid MMIO if the device has failed */
>> +
>> +	if (cfg->eeh_active == EEH_STATE_FAILED)
>> +		return;
>> +
> Should this check be != EEH_STATE_NONE? Or is this called within the
> error recovery process?

Yes, we have already swapped the logic around as you pointed out. You’ll see it in v4.

> 
>> 	spin_lock_irqsave(cfg->host->host_lock, lock_flags);
>> 
>> 	if (cfg->lr_state == LINK_RESET_REQUIRED) {
>> @@ -2226,6 +2279,8 @@ static int cxlflash_probe(struct pci_dev *pdev,
>> 
>> 	cfg->init_state = INIT_STATE_NONE;
>> 	cfg->dev = pdev;
>> +
>> +	cfg->eeh_active = EEH_STATE_NONE;
>> 	cfg->dev_id = (struct pci_device_id *)dev_id;
>> 
>> 
>> @@ -2286,6 +2341,85 @@ out_remove:
>> 	goto out;
>> }
>> 
>> +/**
>> + * cxlflash_pci_error_detected() - called when a PCI error is detected
>> + * @pdev:	PCI device struct.
>> + * @state:	PCI channel state.
>> + *
>> + * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT
>> + */
>> +static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev,
>> +						    pci_channel_state_t state)
>> +{
>> +	struct cxlflash_cfg *cfg = pci_get_drvdata(pdev);
>> +
>> +	pr_debug("%s: pdev=%p state=%u\n", __func__, pdev, state);
>> +
>> +	switch (state) {
>> +	case pci_channel_io_frozen:
>> +		cfg->eeh_active = EEH_STATE_ACTIVE;
>> +		udelay(100);
>> +
>> +		term_mc(cfg, UNDO_START);
>> +		stop_afu(cfg);
>> +
>> +		return PCI_ERS_RESULT_CAN_RECOVER;
> 
> I think that should PCI_ERS_RESULT_NEED_RESET.

Agreed. Will change for v4.

> 
> Apart from that, it’s looking pretty good from an EEH perspective.

Thanks.

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/scsi/cxlflash/Kconfig b/drivers/scsi/cxlflash/Kconfig
index c707508..c052104 100644
--- a/drivers/scsi/cxlflash/Kconfig
+++ b/drivers/scsi/cxlflash/Kconfig
@@ -4,7 +4,7 @@ 
 
 config CXLFLASH
 	tristate "Support for IBM CAPI Flash"
-	depends on PCI && SCSI && CXL
+	depends on PCI && SCSI && CXL && EEH
 	default m
 	help
 	  Allows CAPI Accelerated IO to Flash
diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h
index ba070a5..3d6217a 100644
--- a/drivers/scsi/cxlflash/common.h
+++ b/drivers/scsi/cxlflash/common.h
@@ -76,6 +76,12 @@  enum cxlflash_init_state {
 	INIT_STATE_SCSI
 };
 
+enum eeh_state {
+	EEH_STATE_NONE,
+	EEH_STATE_ACTIVE,
+	EEH_STATE_FAILED
+};
+
 /*
  * Each context has its own set of resource handles that is visible
  * only from that context.
@@ -91,8 +97,6 @@  struct cxlflash_cfg {
 
 	ulong cxlflash_regs_pci;
 
-	wait_queue_head_t eeh_waitq;
-
 	struct work_struct work_q;
 	enum cxlflash_init_state init_state;
 	enum cxlflash_lr_state lr_state;
@@ -105,7 +109,8 @@  struct cxlflash_cfg {
 	wait_queue_head_t tmf_waitq;
 	spinlock_t tmf_slock;
 	bool tmf_active;
-	u8 err_recovery_active:1;
+	wait_queue_head_t eeh_waitq;
+	enum eeh_state eeh_active;
 };
 
 struct afu_cmd {
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index ae2351a..9515525 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -524,6 +524,18 @@  static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp)
 	}
 	spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
 
+	switch (cfg->eeh_active) {
+	case EEH_STATE_ACTIVE:
+		pr_debug_ratelimited("%s: BUSY w/ EEH Recovery!\n", __func__);
+		rc = SCSI_MLQUEUE_HOST_BUSY;
+		goto out;
+	case EEH_STATE_FAILED:
+		pr_debug_ratelimited("%s: device has failed!\n", __func__);
+		goto out;
+	case EEH_STATE_NONE:
+		break;
+	}
+
 	cmd = cmd_checkout(afu);
 	if (unlikely(!cmd)) {
 		dev_err(dev, "%s: could not get a free command\n", __func__);
@@ -1694,6 +1706,10 @@  static int init_afu(struct cxlflash_cfg *cfg)
 	struct afu *afu = cfg->afu;
 	struct device *dev = &cfg->dev->dev;
 
+#ifdef CONFIG_CXL_EEH
+	cxl_perst_reloads_same_image(cfg->cxl_afu, true);
+#endif
+
 	rc = init_mc(cfg);
 	if (rc) {
 		dev_err(dev, "%s: call to init_mc failed, rc=%d!\n",
@@ -1748,6 +1764,12 @@  err1:
  * the sync. This design point requires calling threads to not be on interrupt
  * context due to the possibility of sleeping during concurrent sync operations.
  *
+ * AFU sync operations should be gated during EEH recovery. When a recovery
+ * fails and an adapter is to be removed, sync requests can occur as part of
+ * cleaning up resources associated with an adapter prior to its removal. In
+ * this scenario, these requests are identified here and simply ignored (safe
+ * due to the AFU going away).
+ *
  * Return:
  *	0 on success
  *	-1 on failure
@@ -1762,6 +1784,11 @@  int cxlflash_afu_sync(struct afu *afu, ctx_hndl_t ctx_hndl_u,
 	int retry_cnt = 0;
 	static DEFINE_MUTEX(sync_active);
 
+	if (cfg->eeh_active == EEH_STATE_FAILED) {
+		pr_debug("%s: Sync not required due to EEH state!\n", __func__);
+		return 0;
+	}
+
 	mutex_lock(&sync_active);
 retry:
 	cmd = cmd_checkout(afu);
@@ -1857,9 +1884,18 @@  static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp)
 		 get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
 		 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
 
-	rcr = send_tmf(afu, scp, TMF_LUN_RESET);
-	if (unlikely(rcr))
-		rc = FAILED;
+	switch (cfg->eeh_active) {
+	case EEH_STATE_NONE:
+		rcr = send_tmf(afu, scp, TMF_LUN_RESET);
+		if (unlikely(rcr))
+			rc = FAILED;
+		break;
+	case EEH_STATE_ACTIVE:
+		wait_event(cfg->eeh_waitq, cfg->eeh_active != EEH_STATE_ACTIVE);
+		break;
+	case EEH_STATE_FAILED:
+		break;
+	}
 
 	pr_debug("%s: returning rc=%d\n", __func__, rc);
 	return rc;
@@ -1889,11 +1925,23 @@  static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp)
 		 get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
 		 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
 
-	rcr = afu_reset(cfg);
-	if (rcr == 0)
-		rc = SUCCESS;
-	else
-		rc = FAILED;
+	switch (cfg->eeh_active) {
+	case EEH_STATE_NONE:
+		cfg->eeh_active = EEH_STATE_FAILED;
+		rcr = afu_reset(cfg);
+		if (rcr == 0)
+			rc = SUCCESS;
+		else
+			rc = FAILED;
+		cfg->eeh_active = EEH_STATE_NONE;
+		wake_up_all(&cfg->eeh_waitq);
+		break;
+	case EEH_STATE_ACTIVE:
+		wait_event(cfg->eeh_waitq, cfg->eeh_active != EEH_STATE_ACTIVE);
+		break;
+	case EEH_STATE_FAILED:
+		break;
+	}
 
 	pr_debug("%s: returning rc=%d\n", __func__, rc);
 	return rc;
@@ -2145,6 +2193,11 @@  static void cxlflash_worker_thread(struct work_struct *work)
 	int port;
 	ulong lock_flags;
 
+	/* Avoid MMIO if the device has failed */
+
+	if (cfg->eeh_active == EEH_STATE_FAILED)
+		return;
+
 	spin_lock_irqsave(cfg->host->host_lock, lock_flags);
 
 	if (cfg->lr_state == LINK_RESET_REQUIRED) {
@@ -2226,6 +2279,8 @@  static int cxlflash_probe(struct pci_dev *pdev,
 
 	cfg->init_state = INIT_STATE_NONE;
 	cfg->dev = pdev;
+
+	cfg->eeh_active = EEH_STATE_NONE;
 	cfg->dev_id = (struct pci_device_id *)dev_id;
 
 
@@ -2286,6 +2341,85 @@  out_remove:
 	goto out;
 }
 
+/**
+ * cxlflash_pci_error_detected() - called when a PCI error is detected
+ * @pdev:	PCI device struct.
+ * @state:	PCI channel state.
+ *
+ * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT
+ */
+static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev,
+						    pci_channel_state_t state)
+{
+	struct cxlflash_cfg *cfg = pci_get_drvdata(pdev);
+
+	pr_debug("%s: pdev=%p state=%u\n", __func__, pdev, state);
+
+	switch (state) {
+	case pci_channel_io_frozen:
+		cfg->eeh_active = EEH_STATE_ACTIVE;
+		udelay(100);
+
+		term_mc(cfg, UNDO_START);
+		stop_afu(cfg);
+
+		return PCI_ERS_RESULT_CAN_RECOVER;
+	case pci_channel_io_perm_failure:
+		cfg->eeh_active = EEH_STATE_FAILED;
+		wake_up_all(&cfg->eeh_waitq);
+		return PCI_ERS_RESULT_DISCONNECT;
+	default:
+		break;
+	}
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * cxlflash_pci_slot_reset() - called when PCI slot has been reset
+ * @pdev:	PCI device struct.
+ *
+ * This routine is called by the pci error recovery code after the PCI
+ * slot has been reset, just before we should resume normal operations.
+ *
+ * Return: PCI_ERS_RESULT_RECOVERED or PCI_ERS_RESULT_DISCONNECT
+ */
+static pci_ers_result_t cxlflash_pci_slot_reset(struct pci_dev *pdev)
+{
+	int rc = 0;
+	struct cxlflash_cfg *cfg = pci_get_drvdata(pdev);
+	struct device *dev = &cfg->dev->dev;
+
+	pr_debug("%s: pdev=%p\n", __func__, pdev);
+
+	rc = init_afu(cfg);
+	if (unlikely(rc)) {
+		dev_err(dev, "%s: EEH recovery failed! (%d)\n", __func__, rc);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * cxlflash_pci_resume() - called when normal operation can resume
+ * @pdev:	PCI device struct
+ */
+static void cxlflash_pci_resume(struct pci_dev *pdev)
+{
+	struct cxlflash_cfg *cfg = pci_get_drvdata(pdev);
+
+	pr_debug("%s: pdev=%p\n", __func__, pdev);
+
+	cfg->eeh_active = EEH_STATE_NONE;
+	wake_up_all(&cfg->eeh_waitq);
+}
+
+static const struct pci_error_handlers cxlflash_err_handler = {
+	.error_detected = cxlflash_pci_error_detected,
+	.slot_reset = cxlflash_pci_slot_reset,
+	.resume = cxlflash_pci_resume,
+};
+
 /*
  * PCI device structure
  */
@@ -2294,6 +2428,7 @@  static struct pci_driver cxlflash_driver = {
 	.id_table = cxlflash_pci_table,
 	.probe = cxlflash_probe,
 	.remove = cxlflash_remove,
+	.err_handler = &cxlflash_err_handler,
 };
 
 /**