diff mbox series

[for-next,3/3] IB/hfi1: Close race condition on user context disable and close

Message ID 20190226164530.31476.26502.stgit@scvm10.sc.intel.com (mailing list archive)
State Mainlined
Commit bc5add09764c123f58942a37c8335247e683d234
Delegated to: Jason Gunthorpe
Headers show
Series Driver fixes | expand

Commit Message

Dennis Dalessandro Feb. 26, 2019, 4:45 p.m. UTC
From: Michael J. Ruhl <michael.j.ruhl@intel.com>

When disabling and removing a receive context, it is possible for an
asynchronous event (i.e IRQ) to occur.  Because of this, there is a
race between cleaning up the context, and the context being used by
the asynchronous event.

cpu 0  (context cleanup)
    rc->ref_count-- (ref_count == 0)
    hfi1_rcd_free()
cpu 1  (IRQ (with rcd index))
	rcd_get_by_index()
	lock
	ref_count+++     <-- reference count race (WARNING)
	return rcd
	unlock
cpu 0
    hfi1_free_ctxtdata() <-- incorrect free location
    lock
    remove rcd from array
    unlock
    free rcd

This race will cause the following WARNING trace:

WARNING: CPU: 0 PID: 175027 at include/linux/kref.h:52 hfi1_rcd_get_by_index+0x84/0xa0 [hfi1]
CPU: 0 PID: 175027 Comm: IMB-MPI1 Kdump: loaded Tainted: G OE ------------ 3.10.0-957.el7.x86_64 #1
Hardware name: Intel Corporation S2600KP/S2600KP, BIOS SE5C610.86B.11.01.0076.C4.111920150602 11/19/2015
Call Trace:
  dump_stack+0x19/0x1b
  __warn+0xd8/0x100
  warn_slowpath_null+0x1d/0x20
  hfi1_rcd_get_by_index+0x84/0xa0 [hfi1]
  is_rcv_urgent_int+0x24/0x90 [hfi1]
  general_interrupt+0x1b6/0x210 [hfi1]
  __handle_irq_event_percpu+0x44/0x1c0
  handle_irq_event_percpu+0x32/0x80
  handle_irq_event+0x3c/0x60
  handle_edge_irq+0x7f/0x150
  handle_irq+0xe4/0x1a0
  do_IRQ+0x4d/0xf0
  common_interrupt+0x162/0x162

The race can also lead to a use after free which could be similar
to:

general protection fault: 0000 1 SMP
CPU: 71 PID: 177147 Comm: IMB-MPI1 Kdump: loaded Tainted: G W OE ------------ 3.10.0-957.el7.x86_64 #1
Hardware name: Intel Corporation S2600KP/S2600KP, BIOS SE5C610.86B.11.01.0076.C4.111920150602 11/19/2015
task: ffff9962a8098000 ti: ffff99717a508000 task.ti: ffff99717a508000 __kmalloc+0x94/0x230
Call Trace:
  ? hfi1_user_sdma_process_request+0x9c8/0x1250 [hfi1]
  hfi1_user_sdma_process_request+0x9c8/0x1250 [hfi1]
  hfi1_aio_write+0xba/0x110 [hfi1]
  do_sync_readv_writev+0x7b/0xd0
  do_readv_writev+0xce/0x260
  ? handle_mm_fault+0x39d/0x9b0
  ? pick_next_task_fair+0x5f/0x1b0
  ? sched_clock_cpu+0x85/0xc0
  ? __schedule+0x13a/0x890
  vfs_writev+0x35/0x60
  SyS_writev+0x7f/0x110
  system_call_fastpath+0x22/0x27

Use the appropriate kref API to verify access.

Reorder context cleanup to ensure context removal before cleanup
occurs correctly.

Cc: stable@vger.kernel.org # v4.14.0+
Fixes: f683c80ca68e ("IB/hfi1: Resolve kernel panics by reference counting receive contexts")
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>

---
Changes since v1:
	adjust to use kref_get_unless_zero() instead of a flag
---
 drivers/infiniband/hw/hfi1/hfi.h  |    2 +-
 drivers/infiniband/hw/hfi1/init.c |   14 +++++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

Comments

Dennis Dalessandro Feb. 26, 2019, 5:15 p.m. UTC | #1
On 2/26/2019 11:45 AM, Dennis Dalessandro wrote:
>   struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt);
> diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
> index 7841a0a..2cc5164 100644
> --- a/drivers/infiniband/hw/hfi1/init.c
> +++ b/drivers/infiniband/hw/hfi1/init.c
> @@ -214,12 +214,12 @@ static void hfi1_rcd_free(struct kref *kref)
>   	struct hfi1_ctxtdata *rcd =
>   		container_of(kref, struct hfi1_ctxtdata, kref);
>   
> -	hfi1_free_ctxtdata(rcd->dd, rcd);
> -
>   	spin_lock_irqsave(&rcd->dd->uctxt_lock, flags);
>   	rcd->dd->rcd[rcd->ctxt] = NULL;
>   	spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags);
>   
> +	hfi1_free_ctxtdata(rcd->dd, rcd);
> +
>   	kfree(rcd);
>   }
>   
Whoops, hold off on pulling this one just yet. We need to take a closer 
look at the above hunk.

-Denny
Jason Gunthorpe March 4, 2019, 8:41 p.m. UTC | #2
On Tue, Feb 26, 2019 at 12:15:59PM -0500, Dennis Dalessandro wrote:
> On 2/26/2019 11:45 AM, Dennis Dalessandro wrote:
> >   struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt);
> > diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
> > index 7841a0a..2cc5164 100644
> > +++ b/drivers/infiniband/hw/hfi1/init.c
> > @@ -214,12 +214,12 @@ static void hfi1_rcd_free(struct kref *kref)
> >   	struct hfi1_ctxtdata *rcd =
> >   		container_of(kref, struct hfi1_ctxtdata, kref);
> > -	hfi1_free_ctxtdata(rcd->dd, rcd);
> > -
> >   	spin_lock_irqsave(&rcd->dd->uctxt_lock, flags);
> >   	rcd->dd->rcd[rcd->ctxt] = NULL;
> >   	spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags);
> > +	hfi1_free_ctxtdata(rcd->dd, rcd);
> > +
> >   	kfree(rcd);
> >   }
> Whoops, hold off on pulling this one just yet. We need to take a closer look
> at the above hunk.

Oh, okay.. Please resend it

Jason
Dennis Dalessandro March 5, 2019, 3:30 p.m. UTC | #3
On 3/4/2019 3:41 PM, Jason Gunthorpe wrote:
> On Tue, Feb 26, 2019 at 12:15:59PM -0500, Dennis Dalessandro wrote:
>> On 2/26/2019 11:45 AM, Dennis Dalessandro wrote:
>>>    struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt);
>>> diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
>>> index 7841a0a..2cc5164 100644
>>> +++ b/drivers/infiniband/hw/hfi1/init.c
>>> @@ -214,12 +214,12 @@ static void hfi1_rcd_free(struct kref *kref)
>>>    	struct hfi1_ctxtdata *rcd =
>>>    		container_of(kref, struct hfi1_ctxtdata, kref);
>>> -	hfi1_free_ctxtdata(rcd->dd, rcd);
>>> -
>>>    	spin_lock_irqsave(&rcd->dd->uctxt_lock, flags);
>>>    	rcd->dd->rcd[rcd->ctxt] = NULL;
>>>    	spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags);
>>> +	hfi1_free_ctxtdata(rcd->dd, rcd);
>>> +
>>>    	kfree(rcd);
>>>    }
>> Whoops, hold off on pulling this one just yet. We need to take a closer look
>> at the above hunk.
> 
> Oh, okay.. Please resend it

We've talked it over, it is OK to take after all. Just needed to be sure 
and folks were on vacation last week. If you still have the patch it's 
good to go, if not I'll tack it on my next submit as well.

-Denny
Jason Gunthorpe March 6, 2019, 6:47 p.m. UTC | #4
On Tue, Mar 05, 2019 at 10:30:39AM -0500, Dennis Dalessandro wrote:
> On 3/4/2019 3:41 PM, Jason Gunthorpe wrote:
> > On Tue, Feb 26, 2019 at 12:15:59PM -0500, Dennis Dalessandro wrote:
> > > On 2/26/2019 11:45 AM, Dennis Dalessandro wrote:
> > > >    struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt);
> > > > diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
> > > > index 7841a0a..2cc5164 100644
> > > > +++ b/drivers/infiniband/hw/hfi1/init.c
> > > > @@ -214,12 +214,12 @@ static void hfi1_rcd_free(struct kref *kref)
> > > >    	struct hfi1_ctxtdata *rcd =
> > > >    		container_of(kref, struct hfi1_ctxtdata, kref);
> > > > -	hfi1_free_ctxtdata(rcd->dd, rcd);
> > > > -
> > > >    	spin_lock_irqsave(&rcd->dd->uctxt_lock, flags);
> > > >    	rcd->dd->rcd[rcd->ctxt] = NULL;
> > > >    	spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags);
> > > > +	hfi1_free_ctxtdata(rcd->dd, rcd);
> > > > +
> > > >    	kfree(rcd);
> > > >    }
> > > Whoops, hold off on pulling this one just yet. We need to take a closer look
> > > at the above hunk.
> > 
> > Oh, okay.. Please resend it
> 
> We've talked it over, it is OK to take after all. Just needed to be sure and
> folks were on vacation last week. If you still have the patch it's good to
> go, if not I'll tack it on my next submit as well.

Okay I grabbed the old one

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 6582184..048b5d7 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1455,7 +1455,7 @@  void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
 			 struct hfi1_devdata *dd, u8 hw_pidx, u8 port);
 void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
 int hfi1_rcd_put(struct hfi1_ctxtdata *rcd);
-void hfi1_rcd_get(struct hfi1_ctxtdata *rcd);
+int hfi1_rcd_get(struct hfi1_ctxtdata *rcd);
 struct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd,
 						 u16 ctxt);
 struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt);
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 7841a0a..2cc5164 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -214,12 +214,12 @@  static void hfi1_rcd_free(struct kref *kref)
 	struct hfi1_ctxtdata *rcd =
 		container_of(kref, struct hfi1_ctxtdata, kref);
 
-	hfi1_free_ctxtdata(rcd->dd, rcd);
-
 	spin_lock_irqsave(&rcd->dd->uctxt_lock, flags);
 	rcd->dd->rcd[rcd->ctxt] = NULL;
 	spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags);
 
+	hfi1_free_ctxtdata(rcd->dd, rcd);
+
 	kfree(rcd);
 }
 
@@ -242,10 +242,13 @@  int hfi1_rcd_put(struct hfi1_ctxtdata *rcd)
  * @rcd: pointer to an initialized rcd data structure
  *
  * Use this to get a reference after the init.
+ *
+ * Return : reflect kref_get_unless_zero(), which returns non-zero on
+ * increment, otherwise 0.
  */
-void hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
+int hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
 {
-	kref_get(&rcd->kref);
+	return kref_get_unless_zero(&rcd->kref);
 }
 
 /**
@@ -325,7 +328,8 @@  struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt)
 	spin_lock_irqsave(&dd->uctxt_lock, flags);
 	if (dd->rcd[ctxt]) {
 		rcd = dd->rcd[ctxt];
-		hfi1_rcd_get(rcd);
+		if (!hfi1_rcd_get(rcd))
+			rcd = NULL;
 	}
 	spin_unlock_irqrestore(&dd->uctxt_lock, flags);