Message ID | 20190614221020.19173-2-hmadhani@marvell.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | qla2xxx: Fix crashes with FC-NVMe devices | expand |
See below. On Fri, 2019-06-14 at 15:10 -0700, Himanshu Madhani wrote: > From: Arun Easi <aeasi@marvell.com> > > BUG: unable to handle kernel NULL pointer dereference at (null) > IP: [<ffffffffc050d10c>] qla_nvme_unregister_remote_port+0x6c/0xf0 [qla2xxx] > PGD 800000084cf41067 PUD 84d288067 PMD 0 > Oops: 0000 [#1] SMP > Call Trace: > [<ffffffff98abcfdf>] process_one_work+0x17f/0x440 > [<ffffffff98abdca6>] worker_thread+0x126/0x3c0 > [<ffffffff98abdb80>] ? manage_workers.isra.26+0x2a0/0x2a0 > [<ffffffff98ac4f81>] kthread+0xd1/0xe0 > [<ffffffff98ac4eb0>] ? insert_kthread_work+0x40/0x40 > [<ffffffff9918ad37>] ret_from_fork_nospec_begin+0x21/0x21 > [<ffffffff98ac4eb0>] ? insert_kthread_work+0x40/0x40 > RIP [<ffffffffc050d10c>] qla_nvme_unregister_remote_port+0x6c/0xf0 [qla2xxx] > > The crash is due to a bad entry in the nvme_rport_list. This list is not > protected, and when a remoteport_delete callback is called, driver > traverses the list and crashes. > > Actually, the list could be removed and driver could traverse the main > fcport list instead. Fix does exactly that. > > Signed-off-by: Arun Easi <aeasi@marvell.com> > Signed-off-by: Himanshu Madhani <hmadhani@marvell.com> > --- > drivers/scsi/qla2xxx/qla_def.h | 1 - > drivers/scsi/qla2xxx/qla_nvme.c | 52 ++++++++++++++++++++--------------------- > drivers/scsi/qla2xxx/qla_nvme.h | 1 - > drivers/scsi/qla2xxx/qla_os.c | 1 - > 4 files changed, 25 insertions(+), 30 deletions(-) > > diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h > index 1a4095c56eee..602ed24bb806 100644 > --- a/drivers/scsi/qla2xxx/qla_def.h > +++ b/drivers/scsi/qla2xxx/qla_def.h > @@ -4376,7 +4376,6 @@ typedef struct scsi_qla_host { > > struct nvme_fc_local_port *nvme_local_port; > struct completion nvme_del_done; > - struct list_head nvme_rport_list; > > uint16_t fcoe_vlan_id; > uint16_t fcoe_fcf_idx; > diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c > index 22e3fba28e51..99220a3cf734 100644 > --- a/drivers/scsi/qla2xxx/qla_nvme.c > +++ b/drivers/scsi/qla2xxx/qla_nvme.c > @@ -14,6 +14,18 @@ static struct nvme_fc_port_template qla_nvme_fc_transport; > > static void qla_nvme_unregister_remote_port(struct work_struct *); > > +static inline > +int qla_is_active_nvme_fcport(struct fc_port *fcport) > +{ > + return fcport->nvme_flag & NVME_FLAG_REGISTERED; > +} > + Nitpick: "qla_is_active_nvme_fcport", the qualifier "active" does not match any of the flag definitions and this is only checking the REGISTERED flag. Maybe "qla_is_registered_nvme_fcport" instead? Would help to understand vs. DELETING and RESETTING case. > +#define qla_list_for_each_nvme_fcport(_fcport, _vha) \ > +{ \ > + list_for_each_entry(_fcport, &_vha->vp_fcports, list) \ > + if (qla_is_active_nvme_fcport(_fcport)) \ > +} > + Like here, "if (qla_is_registered_nvme_fcport(" would be clearer. > int qla_nvme_register_remote(struct scsi_qla_host *vha, struct fc_port *fcport) > { > struct qla_nvme_rport *rport; > @@ -74,7 +86,6 @@ int qla_nvme_register_remote(struct scsi_qla_host *vha, struct fc_port *fcport) > > rport = fcport->nvme_remote_port->private; > rport->fcport = fcport; > - list_add_tail(&rport->list, &vha->nvme_rport_list); > > fcport->nvme_flag |= NVME_FLAG_REGISTERED; > return 0; > @@ -542,19 +553,12 @@ static void qla_nvme_localport_delete(struct nvme_fc_local_port *lport) > static void qla_nvme_remoteport_delete(struct nvme_fc_remote_port *rport) > { > fc_port_t *fcport; > - struct qla_nvme_rport *qla_rport = rport->private, *trport; > + struct qla_nvme_rport *qla_rport = rport->private; > > fcport = qla_rport->fcport; > fcport->nvme_remote_port = NULL; > fcport->nvme_flag &= ~NVME_FLAG_REGISTERED; > > - list_for_each_entry_safe(qla_rport, trport, > - &fcport->vha->nvme_rport_list, list) { > - if (qla_rport->fcport == fcport) { > - list_del(&qla_rport->list); > - break; > - } > - } > complete(&fcport->nvme_del_done); > > if (!test_bit(UNLOADING, &fcport->vha->dpc_flags)) { > @@ -590,31 +594,25 @@ static void qla_nvme_unregister_remote_port(struct work_struct *work) > { > struct fc_port *fcport = container_of(work, struct fc_port, > nvme_del_work); > - struct qla_nvme_rport *qla_rport, *trport; > + int ret; > > if (!IS_ENABLED(CONFIG_NVME_FC)) > return; > > + if (!qla_is_active_nvme_fcport(fcport)) > + return; > + This looks like it is prone to a race because the REGISTERED flag is cleared in the callback from the NVMe transport? The flag was already checked in qlt_unreg_sess(), then the work item runs here, then later qla_nvme_remoteport_delete() runs. What is happening here that the check is preventing? -Ewan > ql_log(ql_log_warn, NULL, 0x2112, > "%s: unregister remoteport on %p\n",__func__, fcport); > > - list_for_each_entry_safe(qla_rport, trport, > - &fcport->vha->nvme_rport_list, list) { > - if (qla_rport->fcport == fcport) { > - ql_log(ql_log_info, fcport->vha, 0x2113, > - "%s: fcport=%p\n", __func__, fcport); > - nvme_fc_set_remoteport_devloss > - (fcport->nvme_remote_port, 0); > - init_completion(&fcport->nvme_del_done); > - if (nvme_fc_unregister_remoteport > - (fcport->nvme_remote_port)) > - ql_log(ql_log_info, fcport->vha, 0x2114, > - "%s: Failed to unregister nvme_remote_port\n", > - __func__); > - wait_for_completion(&fcport->nvme_del_done); > - break; > - } > - } > + nvme_fc_set_remoteport_devloss(fcport->nvme_remote_port, 0); > + init_completion(&fcport->nvme_del_done); > + ret = nvme_fc_unregister_remoteport(fcport->nvme_remote_port); > + if (ret) > + ql_log(ql_log_info, fcport->vha, 0x2114, > + "%s: Failed to unregister nvme_remote_port (%d)\n", > + __func__, ret); > + wait_for_completion(&fcport->nvme_del_done); > } > > void qla_nvme_delete(struct scsi_qla_host *vha) > diff --git a/drivers/scsi/qla2xxx/qla_nvme.h b/drivers/scsi/qla2xxx/qla_nvme.h > index d3b8a6440113..2d088add7011 100644 > --- a/drivers/scsi/qla2xxx/qla_nvme.h > +++ b/drivers/scsi/qla2xxx/qla_nvme.h > @@ -37,7 +37,6 @@ struct nvme_private { > }; > > struct qla_nvme_rport { > - struct list_head list; > struct fc_port *fcport; > }; > > diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c > index 00fee5bf4de1..ae93ae2b6090 100644 > --- a/drivers/scsi/qla2xxx/qla_os.c > +++ b/drivers/scsi/qla2xxx/qla_os.c > @@ -4789,7 +4789,6 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht, > INIT_LIST_HEAD(&vha->plogi_ack_list); > INIT_LIST_HEAD(&vha->qp_list); > INIT_LIST_HEAD(&vha->gnl.fcports); > - INIT_LIST_HEAD(&vha->nvme_rport_list); > INIT_LIST_HEAD(&vha->gpnid_list); > INIT_WORK(&vha->iocb_work, qla2x00_iocb_work_fn); >
Thanks for the review, Ewan. My response inline.. On Mon, 17 Jun 2019, 11:38am, Ewan D. Milne wrote: > See below. > > On Fri, 2019-06-14 at 15:10 -0700, Himanshu Madhani wrote: > > From: Arun Easi <aeasi@marvell.com> > > > > BUG: unable to handle kernel NULL pointer dereference at (null) > > IP: [<ffffffffc050d10c>] qla_nvme_unregister_remote_port+0x6c/0xf0 [qla2xxx] > > PGD 800000084cf41067 PUD 84d288067 PMD 0 > > Oops: 0000 [#1] SMP > > Call Trace: > > [<ffffffff98abcfdf>] process_one_work+0x17f/0x440 > > [<ffffffff98abdca6>] worker_thread+0x126/0x3c0 > > [<ffffffff98abdb80>] ? manage_workers.isra.26+0x2a0/0x2a0 > > [<ffffffff98ac4f81>] kthread+0xd1/0xe0 > > [<ffffffff98ac4eb0>] ? insert_kthread_work+0x40/0x40 > > [<ffffffff9918ad37>] ret_from_fork_nospec_begin+0x21/0x21 > > [<ffffffff98ac4eb0>] ? insert_kthread_work+0x40/0x40 > > RIP [<ffffffffc050d10c>] qla_nvme_unregister_remote_port+0x6c/0xf0 [qla2xxx] > > > > The crash is due to a bad entry in the nvme_rport_list. This list is not > > protected, and when a remoteport_delete callback is called, driver > > traverses the list and crashes. > > > > Actually, the list could be removed and driver could traverse the main > > fcport list instead. Fix does exactly that. > > > > Signed-off-by: Arun Easi <aeasi@marvell.com> > > Signed-off-by: Himanshu Madhani <hmadhani@marvell.com> > > --- > > drivers/scsi/qla2xxx/qla_def.h | 1 - > > drivers/scsi/qla2xxx/qla_nvme.c | 52 ++++++++++++++++++++--------------------- > > drivers/scsi/qla2xxx/qla_nvme.h | 1 - > > drivers/scsi/qla2xxx/qla_os.c | 1 - > > 4 files changed, 25 insertions(+), 30 deletions(-) > > > > diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h > > index 1a4095c56eee..602ed24bb806 100644 > > --- a/drivers/scsi/qla2xxx/qla_def.h > > +++ b/drivers/scsi/qla2xxx/qla_def.h > > @@ -4376,7 +4376,6 @@ typedef struct scsi_qla_host { > > > > struct nvme_fc_local_port *nvme_local_port; > > struct completion nvme_del_done; > > - struct list_head nvme_rport_list; > > > > uint16_t fcoe_vlan_id; > > uint16_t fcoe_fcf_idx; > > diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c > > index 22e3fba28e51..99220a3cf734 100644 > > --- a/drivers/scsi/qla2xxx/qla_nvme.c > > +++ b/drivers/scsi/qla2xxx/qla_nvme.c > > @@ -14,6 +14,18 @@ static struct nvme_fc_port_template qla_nvme_fc_transport; > > > > static void qla_nvme_unregister_remote_port(struct work_struct *); > > > > +static inline > > +int qla_is_active_nvme_fcport(struct fc_port *fcport) > > +{ > > + return fcport->nvme_flag & NVME_FLAG_REGISTERED; > > +} > > + > > Nitpick: "qla_is_active_nvme_fcport", the qualifier "active" > does not match any of the flag definitions and this is only > checking the REGISTERED flag. Maybe "qla_is_registered_nvme_fcport" > instead? Would help to understand vs. DELETING and RESETTING case. Sure, this sounds better, will do in v2. > > > +#define qla_list_for_each_nvme_fcport(_fcport, _vha) \ > > +{ \ > > + list_for_each_entry(_fcport, &_vha->vp_fcports, list) \ > > + if (qla_is_active_nvme_fcport(_fcport)) \ > > +} > > + > > Like here, "if (qla_is_registered_nvme_fcport(" would be clearer. > > > int qla_nvme_register_remote(struct scsi_qla_host *vha, struct fc_port *fcport) > > { > > struct qla_nvme_rport *rport; > > @@ -74,7 +86,6 @@ int qla_nvme_register_remote(struct scsi_qla_host *vha, struct fc_port *fcport) > > > > rport = fcport->nvme_remote_port->private; > > rport->fcport = fcport; > > - list_add_tail(&rport->list, &vha->nvme_rport_list); > > > > fcport->nvme_flag |= NVME_FLAG_REGISTERED; > > return 0; > > @@ -542,19 +553,12 @@ static void qla_nvme_localport_delete(struct nvme_fc_local_port *lport) > > static void qla_nvme_remoteport_delete(struct nvme_fc_remote_port *rport) > > { > > fc_port_t *fcport; > > - struct qla_nvme_rport *qla_rport = rport->private, *trport; > > + struct qla_nvme_rport *qla_rport = rport->private; > > > > fcport = qla_rport->fcport; > > fcport->nvme_remote_port = NULL; > > fcport->nvme_flag &= ~NVME_FLAG_REGISTERED; > > > > - list_for_each_entry_safe(qla_rport, trport, > > - &fcport->vha->nvme_rport_list, list) { > > - if (qla_rport->fcport == fcport) { > > - list_del(&qla_rport->list); > > - break; > > - } > > - } > > complete(&fcport->nvme_del_done); > > > > if (!test_bit(UNLOADING, &fcport->vha->dpc_flags)) { > > @@ -590,31 +594,25 @@ static void qla_nvme_unregister_remote_port(struct work_struct *work) > > { > > struct fc_port *fcport = container_of(work, struct fc_port, > > nvme_del_work); > > - struct qla_nvme_rport *qla_rport, *trport; > > + int ret; > > > > if (!IS_ENABLED(CONFIG_NVME_FC)) > > return; > > > > + if (!qla_is_active_nvme_fcport(fcport)) > > + return; > > + > > This looks like it is prone to a race because the REGISTERED flag > is cleared in the callback from the NVMe transport? The remoteport_delete callback gets invoked only after a nvme_fc_unregister_remoteport() which has not yet happened, so I am not sure where the race is. > The flag was already checked in qlt_unreg_sess(), then the work item > runs here, then later qla_nvme_remoteport_delete() runs. What is > happening here that the check is preventing? This was merely a defensive check added, that I guess got stuck in mind somehow. Anycase, you are right, that is not needed and can be removed. -- arun > > -Ewan > > > ql_log(ql_log_warn, NULL, 0x2112, > > "%s: unregister remoteport on %p\n",__func__, fcport); > > > > - list_for_each_entry_safe(qla_rport, trport, > > - &fcport->vha->nvme_rport_list, list) { > > - if (qla_rport->fcport == fcport) { > > - ql_log(ql_log_info, fcport->vha, 0x2113, > > - "%s: fcport=%p\n", __func__, fcport); > > - nvme_fc_set_remoteport_devloss > > - (fcport->nvme_remote_port, 0); > > - init_completion(&fcport->nvme_del_done); > > - if (nvme_fc_unregister_remoteport > > - (fcport->nvme_remote_port)) > > - ql_log(ql_log_info, fcport->vha, 0x2114, > > - "%s: Failed to unregister nvme_remote_port\n", > > - __func__); > > - wait_for_completion(&fcport->nvme_del_done); > > - break; > > - } > > - } > > + nvme_fc_set_remoteport_devloss(fcport->nvme_remote_port, 0); > > + init_completion(&fcport->nvme_del_done); > > + ret = nvme_fc_unregister_remoteport(fcport->nvme_remote_port); > > + if (ret) > > + ql_log(ql_log_info, fcport->vha, 0x2114, > > + "%s: Failed to unregister nvme_remote_port (%d)\n", > > + __func__, ret); > > + wait_for_completion(&fcport->nvme_del_done); > > } > > > > void qla_nvme_delete(struct scsi_qla_host *vha) > > diff --git a/drivers/scsi/qla2xxx/qla_nvme.h b/drivers/scsi/qla2xxx/qla_nvme.h > > index d3b8a6440113..2d088add7011 100644 > > --- a/drivers/scsi/qla2xxx/qla_nvme.h > > +++ b/drivers/scsi/qla2xxx/qla_nvme.h > > @@ -37,7 +37,6 @@ struct nvme_private { > > }; > > > > struct qla_nvme_rport { > > - struct list_head list; > > struct fc_port *fcport; > > }; > > > > diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c > > index 00fee5bf4de1..ae93ae2b6090 100644 > > --- a/drivers/scsi/qla2xxx/qla_os.c > > +++ b/drivers/scsi/qla2xxx/qla_os.c > > @@ -4789,7 +4789,6 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht, > > INIT_LIST_HEAD(&vha->plogi_ack_list); > > INIT_LIST_HEAD(&vha->qp_list); > > INIT_LIST_HEAD(&vha->gnl.fcports); > > - INIT_LIST_HEAD(&vha->nvme_rport_list); > > INIT_LIST_HEAD(&vha->gpnid_list); > > INIT_WORK(&vha->iocb_work, qla2x00_iocb_work_fn); > > >
On 6/15/19 12:10 AM, Himanshu Madhani wrote: > From: Arun Easi <aeasi@marvell.com> > > BUG: unable to handle kernel NULL pointer dereference at (null) > IP: [<ffffffffc050d10c>] qla_nvme_unregister_remote_port+0x6c/0xf0 [qla2xxx] > PGD 800000084cf41067 PUD 84d288067 PMD 0 > Oops: 0000 [#1] SMP > Call Trace: > [<ffffffff98abcfdf>] process_one_work+0x17f/0x440 > [<ffffffff98abdca6>] worker_thread+0x126/0x3c0 > [<ffffffff98abdb80>] ? manage_workers.isra.26+0x2a0/0x2a0 > [<ffffffff98ac4f81>] kthread+0xd1/0xe0 > [<ffffffff98ac4eb0>] ? insert_kthread_work+0x40/0x40 > [<ffffffff9918ad37>] ret_from_fork_nospec_begin+0x21/0x21 > [<ffffffff98ac4eb0>] ? insert_kthread_work+0x40/0x40 > RIP [<ffffffffc050d10c>] qla_nvme_unregister_remote_port+0x6c/0xf0 [qla2xxx] > > The crash is due to a bad entry in the nvme_rport_list. This list is not > protected, and when a remoteport_delete callback is called, driver > traverses the list and crashes. > > Actually, the list could be removed and driver could traverse the main > fcport list instead. Fix does exactly that. > > Signed-off-by: Arun Easi <aeasi@marvell.com> > Signed-off-by: Himanshu Madhani <hmadhani@marvell.com> > --- > drivers/scsi/qla2xxx/qla_def.h | 1 - > drivers/scsi/qla2xxx/qla_nvme.c | 52 ++++++++++++++++++++--------------------- > drivers/scsi/qla2xxx/qla_nvme.h | 1 - > drivers/scsi/qla2xxx/qla_os.c | 1 - > 4 files changed, 25 insertions(+), 30 deletions(-) > [ .. ] > diff --git a/drivers/scsi/qla2xxx/qla_nvme.h b/drivers/scsi/qla2xxx/qla_nvme.h > index d3b8a6440113..2d088add7011 100644 > --- a/drivers/scsi/qla2xxx/qla_nvme.h > +++ b/drivers/scsi/qla2xxx/qla_nvme.h > @@ -37,7 +37,6 @@ struct nvme_private { > }; > > struct qla_nvme_rport { > - struct list_head list; > struct fc_port *fcport; > }; > Where is the point of this structure now? Please drop it, and use fc_port directly. Cheers, Hannes
On Tue, 2019-06-18 at 12:51 +0200, Hannes Reinecke wrote: > On 6/15/19 12:10 AM, Himanshu Madhani wrote: > > From: Arun Easi <aeasi@marvell.com> > > > > BUG: unable to handle kernel NULL pointer dereference at (null) > > IP: [<ffffffffc050d10c>] qla_nvme_unregister_remote_port+0x6c/0xf0 [qla2xxx] > > PGD 800000084cf41067 PUD 84d288067 PMD 0 > > Oops: 0000 [#1] SMP > > Call Trace: > > [<ffffffff98abcfdf>] process_one_work+0x17f/0x440 > > [<ffffffff98abdca6>] worker_thread+0x126/0x3c0 > > [<ffffffff98abdb80>] ? manage_workers.isra.26+0x2a0/0x2a0 > > [<ffffffff98ac4f81>] kthread+0xd1/0xe0 > > [<ffffffff98ac4eb0>] ? insert_kthread_work+0x40/0x40 > > [<ffffffff9918ad37>] ret_from_fork_nospec_begin+0x21/0x21 > > [<ffffffff98ac4eb0>] ? insert_kthread_work+0x40/0x40 > > RIP [<ffffffffc050d10c>] qla_nvme_unregister_remote_port+0x6c/0xf0 [qla2xxx] > > > > The crash is due to a bad entry in the nvme_rport_list. This list is not > > protected, and when a remoteport_delete callback is called, driver > > traverses the list and crashes. > > > > Actually, the list could be removed and driver could traverse the main > > fcport list instead. Fix does exactly that. > > > > Signed-off-by: Arun Easi <aeasi@marvell.com> > > Signed-off-by: Himanshu Madhani <hmadhani@marvell.com> > > --- > > drivers/scsi/qla2xxx/qla_def.h | 1 - > > drivers/scsi/qla2xxx/qla_nvme.c | 52 ++++++++++++++++++++--------------------- > > drivers/scsi/qla2xxx/qla_nvme.h | 1 - > > drivers/scsi/qla2xxx/qla_os.c | 1 - > > 4 files changed, 25 insertions(+), 30 deletions(-) > > > > [ .. ] > > diff --git a/drivers/scsi/qla2xxx/qla_nvme.h b/drivers/scsi/qla2xxx/qla_nvme.h > > index d3b8a6440113..2d088add7011 100644 > > --- a/drivers/scsi/qla2xxx/qla_nvme.h > > +++ b/drivers/scsi/qla2xxx/qla_nvme.h > > @@ -37,7 +37,6 @@ struct nvme_private { > > }; > > > > struct qla_nvme_rport { > > - struct list_head list; > > struct fc_port *fcport; > > }; > > > > Where is the point of this structure now? > Please drop it, and use fc_port directly. I thought about mentioning that, but nvme_fc_remote_port's ->private field is allocated by .remote_priv_sz in the call to nvme_fc_register_remoteport(), so I don't see a clean way to just set ->private to the fc_port. And, if a driver-specific field needs to be added later, it would all have to be put back. -Ewan > > Cheers, > > Hannes
On Tue, 18 Jun 2019, 3:51am, Hannes Reinecke wrote: > On 6/15/19 12:10 AM, Himanshu Madhani wrote: > > From: Arun Easi <aeasi@marvell.com> > > > > BUG: unable to handle kernel NULL pointer dereference at (null) > > IP: [<ffffffffc050d10c>] qla_nvme_unregister_remote_port+0x6c/0xf0 [qla2xxx] > > PGD 800000084cf41067 PUD 84d288067 PMD 0 > > Oops: 0000 [#1] SMP > > Call Trace: > > [<ffffffff98abcfdf>] process_one_work+0x17f/0x440 > > [<ffffffff98abdca6>] worker_thread+0x126/0x3c0 > > [<ffffffff98abdb80>] ? manage_workers.isra.26+0x2a0/0x2a0 > > [<ffffffff98ac4f81>] kthread+0xd1/0xe0 > > [<ffffffff98ac4eb0>] ? insert_kthread_work+0x40/0x40 > > [<ffffffff9918ad37>] ret_from_fork_nospec_begin+0x21/0x21 > > [<ffffffff98ac4eb0>] ? insert_kthread_work+0x40/0x40 > > RIP [<ffffffffc050d10c>] qla_nvme_unregister_remote_port+0x6c/0xf0 [qla2xxx] > > > > The crash is due to a bad entry in the nvme_rport_list. This list is not > > protected, and when a remoteport_delete callback is called, driver > > traverses the list and crashes. > > > > Actually, the list could be removed and driver could traverse the main > > fcport list instead. Fix does exactly that. > > > > Signed-off-by: Arun Easi <aeasi@marvell.com> > > Signed-off-by: Himanshu Madhani <hmadhani@marvell.com> > > --- > > drivers/scsi/qla2xxx/qla_def.h | 1 - > > drivers/scsi/qla2xxx/qla_nvme.c | 52 ++++++++++++++++++++--------------------- > > drivers/scsi/qla2xxx/qla_nvme.h | 1 - > > drivers/scsi/qla2xxx/qla_os.c | 1 - > > 4 files changed, 25 insertions(+), 30 deletions(-) > > > [ .. ] > > diff --git a/drivers/scsi/qla2xxx/qla_nvme.h b/drivers/scsi/qla2xxx/qla_nvme.h > > index d3b8a6440113..2d088add7011 100644 > > --- a/drivers/scsi/qla2xxx/qla_nvme.h > > +++ b/drivers/scsi/qla2xxx/qla_nvme.h > > @@ -37,7 +37,6 @@ struct nvme_private { > > }; > > > > struct qla_nvme_rport { > > - struct list_head list; > > struct fc_port *fcport; > > }; > > > Where is the point of this structure now? > Please drop it, and use fc_port directly. > It could be removed, but was kept to add fields in the future if needed. Not much of a strong preference, so one more nudge and I will remove it. v2 was planned to be posted soon, so please let me know if you would like this to be changed. -- arun
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 1a4095c56eee..602ed24bb806 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -4376,7 +4376,6 @@ typedef struct scsi_qla_host { struct nvme_fc_local_port *nvme_local_port; struct completion nvme_del_done; - struct list_head nvme_rport_list; uint16_t fcoe_vlan_id; uint16_t fcoe_fcf_idx; diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 22e3fba28e51..99220a3cf734 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -14,6 +14,18 @@ static struct nvme_fc_port_template qla_nvme_fc_transport; static void qla_nvme_unregister_remote_port(struct work_struct *); +static inline +int qla_is_active_nvme_fcport(struct fc_port *fcport) +{ + return fcport->nvme_flag & NVME_FLAG_REGISTERED; +} + +#define qla_list_for_each_nvme_fcport(_fcport, _vha) \ +{ \ + list_for_each_entry(_fcport, &_vha->vp_fcports, list) \ + if (qla_is_active_nvme_fcport(_fcport)) \ +} + int qla_nvme_register_remote(struct scsi_qla_host *vha, struct fc_port *fcport) { struct qla_nvme_rport *rport; @@ -74,7 +86,6 @@ int qla_nvme_register_remote(struct scsi_qla_host *vha, struct fc_port *fcport) rport = fcport->nvme_remote_port->private; rport->fcport = fcport; - list_add_tail(&rport->list, &vha->nvme_rport_list); fcport->nvme_flag |= NVME_FLAG_REGISTERED; return 0; @@ -542,19 +553,12 @@ static void qla_nvme_localport_delete(struct nvme_fc_local_port *lport) static void qla_nvme_remoteport_delete(struct nvme_fc_remote_port *rport) { fc_port_t *fcport; - struct qla_nvme_rport *qla_rport = rport->private, *trport; + struct qla_nvme_rport *qla_rport = rport->private; fcport = qla_rport->fcport; fcport->nvme_remote_port = NULL; fcport->nvme_flag &= ~NVME_FLAG_REGISTERED; - list_for_each_entry_safe(qla_rport, trport, - &fcport->vha->nvme_rport_list, list) { - if (qla_rport->fcport == fcport) { - list_del(&qla_rport->list); - break; - } - } complete(&fcport->nvme_del_done); if (!test_bit(UNLOADING, &fcport->vha->dpc_flags)) { @@ -590,31 +594,25 @@ static void qla_nvme_unregister_remote_port(struct work_struct *work) { struct fc_port *fcport = container_of(work, struct fc_port, nvme_del_work); - struct qla_nvme_rport *qla_rport, *trport; + int ret; if (!IS_ENABLED(CONFIG_NVME_FC)) return; + if (!qla_is_active_nvme_fcport(fcport)) + return; + ql_log(ql_log_warn, NULL, 0x2112, "%s: unregister remoteport on %p\n",__func__, fcport); - list_for_each_entry_safe(qla_rport, trport, - &fcport->vha->nvme_rport_list, list) { - if (qla_rport->fcport == fcport) { - ql_log(ql_log_info, fcport->vha, 0x2113, - "%s: fcport=%p\n", __func__, fcport); - nvme_fc_set_remoteport_devloss - (fcport->nvme_remote_port, 0); - init_completion(&fcport->nvme_del_done); - if (nvme_fc_unregister_remoteport - (fcport->nvme_remote_port)) - ql_log(ql_log_info, fcport->vha, 0x2114, - "%s: Failed to unregister nvme_remote_port\n", - __func__); - wait_for_completion(&fcport->nvme_del_done); - break; - } - } + nvme_fc_set_remoteport_devloss(fcport->nvme_remote_port, 0); + init_completion(&fcport->nvme_del_done); + ret = nvme_fc_unregister_remoteport(fcport->nvme_remote_port); + if (ret) + ql_log(ql_log_info, fcport->vha, 0x2114, + "%s: Failed to unregister nvme_remote_port (%d)\n", + __func__, ret); + wait_for_completion(&fcport->nvme_del_done); } void qla_nvme_delete(struct scsi_qla_host *vha) diff --git a/drivers/scsi/qla2xxx/qla_nvme.h b/drivers/scsi/qla2xxx/qla_nvme.h index d3b8a6440113..2d088add7011 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.h +++ b/drivers/scsi/qla2xxx/qla_nvme.h @@ -37,7 +37,6 @@ struct nvme_private { }; struct qla_nvme_rport { - struct list_head list; struct fc_port *fcport; }; diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 00fee5bf4de1..ae93ae2b6090 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -4789,7 +4789,6 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht, INIT_LIST_HEAD(&vha->plogi_ack_list); INIT_LIST_HEAD(&vha->qp_list); INIT_LIST_HEAD(&vha->gnl.fcports); - INIT_LIST_HEAD(&vha->nvme_rport_list); INIT_LIST_HEAD(&vha->gpnid_list); INIT_WORK(&vha->iocb_work, qla2x00_iocb_work_fn);