@@ -676,6 +676,11 @@ typedef struct srb {
* code.
*/
void (*put_fn)(struct kref *kref);
+ /*
+ * Report completition for asynchronous commands.
+ */
+ void (*async_done)(struct srb *sp, int res);
+ spinlock_t lock;
} srb_t;
#define GET_CMD_SP(sp) (sp->u.scmd.cmd)
@@ -313,6 +313,7 @@ extern int qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *, srb_t *,
struct dsd64 *, uint16_t, struct qla_tgt_cmd *);
extern int qla24xx_get_one_block_sg(uint32_t, struct qla2_sgx *, uint32_t *);
extern int qla24xx_configure_prot_mode(srb_t *, uint16_t *);
+void qla2x00_sp_release(struct kref *kref);
/*
* Global Function Prototypes in qla_mbx.c source file.
@@ -529,7 +529,6 @@ static void qla2x00_async_sns_sp_done(srb_t *sp, int rc)
if (!e)
goto err2;
- del_timer(&sp->u.iocb_cmd.timer);
e->u.iosb.sp = sp;
qla2x00_post_work(vha, e);
return;
@@ -556,7 +555,7 @@ static void qla2x00_async_sns_sp_done(srb_t *sp, int rc)
sp->u.iocb_cmd.u.ctarg.rsp = NULL;
}
- sp->free(sp);
+ kref_put_lock(&sp->cmd_kref, qla2x00_sp_release, &sp->lock);
return;
}
@@ -2982,7 +2981,7 @@ void qla24xx_sp_unmap(scsi_qla_host_t *vha, srb_t *sp)
break;
}
- sp->free(sp);
+ kref_put_lock(&sp->cmd_kref, qla2x00_sp_release, &sp->lock);
}
void qla24xx_handle_gpnid_event(scsi_qla_host_t *vha, struct event_arg *ea)
@@ -3121,13 +3120,13 @@ static void qla2x00_async_gpnid_sp_done(srb_t *sp, int res)
if (res) {
if (res == QLA_FUNCTION_TIMEOUT) {
qla24xx_post_gpnid_work(sp->vha, &ea.id);
- sp->free(sp);
+ kref_put_lock(&sp->cmd_kref, qla2x00_sp_release, &sp->lock);
return;
}
} else if (sp->gen1) {
/* There was another RSCN for this Nport ID */
qla24xx_post_gpnid_work(sp->vha, &ea.id);
- sp->free(sp);
+ kref_put_lock(&sp->cmd_kref, qla2x00_sp_release, &sp->lock);
return;
}
@@ -3148,7 +3147,7 @@ static void qla2x00_async_gpnid_sp_done(srb_t *sp, int res)
sp->u.iocb_cmd.u.ctarg.rsp_dma);
sp->u.iocb_cmd.u.ctarg.rsp = NULL;
- sp->free(sp);
+ kref_put_lock(&sp->cmd_kref, qla2x00_sp_release, &sp->lock);
return;
}
@@ -3739,7 +3738,6 @@ static void qla2x00_async_gpnft_gnnft_sp_done(srb_t *sp, int res)
"Async done-%s res %x FC4Type %x\n",
sp->name, res, sp->gen2);
- del_timer(&sp->u.iocb_cmd.timer);
sp->rc = res;
if (res) {
unsigned long flags;
@@ -4133,7 +4131,7 @@ static void qla2x00_async_gnnid_sp_done(srb_t *sp, int res)
qla24xx_handle_gnnid_event(vha, &ea);
- sp->free(sp);
+ kref_put_lock(&sp->cmd_kref, qla2x00_sp_release, &sp->lock);
}
int qla24xx_async_gnnid(scsi_qla_host_t *vha, fc_port_t *fcport)
@@ -4260,7 +4258,7 @@ static void qla2x00_async_gfpnid_sp_done(srb_t *sp, int res)
qla24xx_handle_gfpnid_event(vha, &ea);
- sp->free(sp);
+ kref_put_lock(&sp->cmd_kref, qla2x00_sp_release, &sp->lock);
}
int qla24xx_async_gfpnid(scsi_qla_host_t *vha, fc_port_t *fcport)
@@ -126,11 +126,14 @@ static void qla24xx_abort_iocb_timeout(void *data)
}
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
- if (sp->cmd_sp)
+ if (sp->cmd_sp) {
sp->cmd_sp->done(sp->cmd_sp, QLA_OS_TIMER_EXPIRED);
+ kref_put_lock(&sp->cmd_kref, qla2x00_sp_release, &sp->lock);
+ }
abt->u.abt.comp_status = cpu_to_le16(CS_TIMEOUT);
sp->done(sp, QLA_OS_TIMER_EXPIRED);
+ kref_put_lock(&sp->cmd_kref, qla2x00_sp_release, &sp->lock);
}
static void qla24xx_abort_sp_done(srb_t *sp, int res)
@@ -141,11 +144,17 @@ static void qla24xx_abort_sp_done(srb_t *sp, int res)
if (orig_sp)
qla_wait_nvme_release_cmd_kref(orig_sp);
- del_timer(&sp->u.iocb_cmd.timer);
+ if (sp->cmd_sp) {
+ sp->cmd_sp->done(sp->cmd_sp, QLA_OS_TIMER_EXPIRED);
+ kref_put_lock(&sp->cmd_sp->cmd_kref,
+ qla2x00_sp_release,
+ &sp->cmd_sp->lock);
+ }
+
if (sp->flags & SRB_WAKEUP_ON_COMP)
complete(&abt->u.abt.comp);
else
- sp->free(sp);
+ kref_put_lock(&sp->cmd_kref, qla2x00_sp_release, &sp->lock);
}
int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
@@ -190,7 +199,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
wait_for_completion(&abt_iocb->u.abt.comp);
rval = abt_iocb->u.abt.comp_status == CS_COMPLETE ?
QLA_SUCCESS : QLA_FUNCTION_FAILED;
- sp->free(sp);
+ kref_put_lock(&sp->cmd_kref, qla2x00_sp_release, &sp->lock);
}
return rval;
@@ -237,6 +246,7 @@ qla2x00_async_iocb_timeout(void *data)
}
spin_unlock_irqrestore(sp->qpair->qp_lock_ptr, flags);
sp->done(sp, QLA_FUNCTION_TIMEOUT);
+ kref_put_lock(&sp->cmd_kref, qla2x00_sp_release, &sp->lock);
}
break;
case SRB_LOGOUT_CMD:
@@ -261,6 +271,7 @@ qla2x00_async_iocb_timeout(void *data)
}
spin_unlock_irqrestore(sp->qpair->qp_lock_ptr, flags);
sp->done(sp, QLA_FUNCTION_TIMEOUT);
+ kref_put_lock(&sp->cmd_kref, qla2x00_sp_release, &sp->lock);
}
break;
}
@@ -2597,12 +2597,36 @@ qla24xx_tm_iocb(srb_t *sp, struct tsk_mgmt_entry *tsk)
}
}
+static void
+qla2x00_async_done(struct srb *sp, int res)
+{
+ if (del_timer(&sp->u.iocb_cmd.timer)) {
+ /* Succcesfully cancelled the timeout handler */
+ if (kref_put_lock(&sp->cmd_kref, qla2x00_sp_release, &sp->lock))
+ return;
+ }
+
+ sp->async_done(sp, res);
+}
+
+void
+qla2x00_sp_release(struct kref *kref)
+{
+ struct srb *sp = container_of(kref, struct srb, cmd_kref);
+
+ sp->free(sp);
+}
+
void
qla2x00_init_async_sp(srb_t *sp, unsigned long tmo,
void (*done)(struct srb *sp, int res))
{
timer_setup(&sp->u.iocb_cmd.timer, qla2x00_sp_timeout, 0);
- sp->done = done;
+ kref_init(&sp->cmd_kref); /* normal control flow */
+ kref_get(&sp->cmd_kref); /* timeout control flow */
+ spin_lock_init(&sp->lock);
+ sp->done = qla2x00_async_done;
+ sp->async_done = done;
sp->free = qla2x00_sp_free;
sp->u.iocb_cmd.timeout = qla2x00_async_iocb_timeout;
sp->u.iocb_cmd.timer.expires = jiffies + tmo * HZ;
@@ -2889,7 +2913,6 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res)
sp->name, res, sp->handle, fcport->d_id.b24, fcport->port_name);
fcport->flags &= ~(FCF_ASYNC_SENT|FCF_ASYNC_ACTIVE);
- del_timer(&sp->u.iocb_cmd.timer);
if (sp->flags & SRB_WAKEUP_ON_COMP)
complete(&lio->u.els_plogi.comp);
The timeout handler and done function are racing. When qla2x00_async_iocb_timeout() starts to run it can be preempted by the normal response path (via the firmware?). qla24xx_async_gpsc_sp_done() releases the SRB unconditionally. When scheduling back to qla2x00_async_iocb_timeout() qla24xx_async_abort_cmd() will access an freed sp->qpair pointer: qla2xxx [0000:83:00.0]-2871:0: Async-gpsc timeout - hdl=63d portid=234500 50:06:0e:80:08:77:b6:21. qla2xxx [0000:83:00.0]-2853:0: Async done-gpsc res 0, WWPN 50:06:0e:80:08:77:b6:21 qla2xxx [0000:83:00.0]-2854:0: Async-gpsc OUT WWPN 20:45:00:27:f8:75:33:00 speeds=2c00 speed=0400. qla2xxx [0000:83:00.0]-28d8:0: qla24xx_handle_gpsc_event 50:06:0e:80:08:77:b6:21 DS 7 LS 6 rc 0 login 1|1 rscn 1|0 lid 5 BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 IP: qla24xx_async_abort_cmd+0x1b/0x1c0 [qla2xxx] An obvious solution to this is to introduce a reference counter. One reference is taken for the normal code path (the 'good case') and one for the timeout path. As we always race between the normal good case and the timeout/abort handler we need to serialize it. Also we cannot assume any order between the handlers. Since this is slow path we can use proper synchronization via locks. When we are able to cancel a timer (del_timer returns 1) we know there can't be any error handling in progress because the timeout handler hasn't expired yet, thus we can safely decrement the refcounter by one. If we are not able to cancel the timer, we know an abort handler is running. We have to make sure we call sp->done() in the abort handlers before calling kref_put(). Signed-off-by: Daniel Wagner <dwagner@suse.de> --- drivers/scsi/qla2xxx/qla_def.h | 5 +++++ drivers/scsi/qla2xxx/qla_gbl.h | 1 + drivers/scsi/qla2xxx/qla_gs.c | 16 +++++++--------- drivers/scsi/qla2xxx/qla_init.c | 19 +++++++++++++++---- drivers/scsi/qla2xxx/qla_iocb.c | 27 +++++++++++++++++++++++++-- 5 files changed, 53 insertions(+), 15 deletions(-)