diff mbox series

cxl/hdm: Fix DPA reservation vs cxl_endpoint_decoder lifetime

Message ID 165896020625.3546860.12390103413706292760.stgit@dwillia2-xfh.jf.intel.com
State Accepted
Commit 4d5c42a80bd17d1979dbcd40c5c44ff3c93e1476
Headers show
Series cxl/hdm: Fix DPA reservation vs cxl_endpoint_decoder lifetime | expand

Commit Message

Dan Williams July 27, 2022, 10:16 p.m. UTC
After adding support for emulating platform firmware established DPA
reservations, the cxl-topology.sh [1] unit test started crashing with
the following signature:

 general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6bc3: 0000 [#1] PREEMPT SMP
 [..]
 RIP: 0010:to_cxl_port+0x8/0x60 [cxl_core]
 [..]
 Call Trace:
  <TASK>
  __cxl_dpa_release+0x1b/0xd0 [cxl_core]
  cxl_dpa_release+0x1d/0x30 [cxl_core]
  release_nodes+0x63/0x90
  devres_release_all+0x88/0xc0

...i.e. a use after free of a 'struct cxl_endpoint_decoder' object. This
results from the ordering of init_hdm_decoder() before add_hdm_decoder()
where, at release time, the decoder is unregistered and released before
the DPA reservation.

Fix this by extending the life of the object until all DPA reservations
have been released which also preserves platform decoder settings being
settled by the time the decoder is published in sysfs (KOBJ_ADD time).

Note that the @len == 0 case in __cxl_dpa_reserve() is avoided in
practice as this function is only called for committed decoders and new
non-zero DPA allocations.

Link: https://github.com/pmem/ndctl/blob/pending/test/cxl-topology.sh [1]
Fixes: 9c57cde0dcbd ("cxl/hdm: Enumerate allocated DPA")
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/hdm.c |    7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

Comments

Verma, Vishal L Aug. 1, 2022, 7:18 p.m. UTC | #1
On Wed, 2022-07-27 at 15:16 -0700, Dan Williams wrote:
> After adding support for emulating platform firmware established DPA
> reservations, the cxl-topology.sh [1] unit test started crashing with
> the following signature:
> 
>  general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6bc3: 0000 [#1] PREEMPT SMP
>  [..]
>  RIP: 0010:to_cxl_port+0x8/0x60 [cxl_core]
>  [..]
>  Call Trace:
>   <TASK>
>   __cxl_dpa_release+0x1b/0xd0 [cxl_core]
>   cxl_dpa_release+0x1d/0x30 [cxl_core]
>   release_nodes+0x63/0x90
>   devres_release_all+0x88/0xc0
> 
> ...i.e. a use after free of a 'struct cxl_endpoint_decoder' object. This
> results from the ordering of init_hdm_decoder() before add_hdm_decoder()
> where, at release time, the decoder is unregistered and released before
> the DPA reservation.
> 
> Fix this by extending the life of the object until all DPA reservations
> have been released which also preserves platform decoder settings being
> settled by the time the decoder is published in sysfs (KOBJ_ADD time).
> 
> Note that the @len == 0 case in __cxl_dpa_reserve() is avoided in
> practice as this function is only called for committed decoders and new
> non-zero DPA allocations.
> 
> Link: https://github.com/pmem/ndctl/blob/pending/test/cxl-topology.sh [1]
> Fixes: 9c57cde0dcbd ("cxl/hdm: Enumerate allocated DPA")
> Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  drivers/cxl/core/hdm.c |    7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)

Looks good,

Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>

> 
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index ee53e8ac5c96..8143e2615957 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -178,6 +178,7 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
>                 __release_region(&cxlds->dpa_res, skip_start, cxled->skip);
>         cxled->skip = 0;
>         cxled->dpa_res = NULL;
> +       put_device(&cxled->cxld.dev);
>         port->hdm_end--;
>  }
>  
> @@ -214,7 +215,7 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
>         lockdep_assert_held_write(&cxl_dpa_rwsem);
>  
>         if (!len)
> -               return 0;
> +               goto success;
>  
>         if (cxled->dpa_res) {
>                 dev_dbg(dev, "decoder%d.%d: existing allocation %pr assigned\n",
> @@ -266,8 +267,10 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
>                         cxled->cxld.id, cxled->dpa_res);
>                 cxled->mode = CXL_DECODER_MIXED;
>         }
> -       port->hdm_end++;
>  
> +success:
> +       port->hdm_end++;
> +       get_device(&cxled->cxld.dev);
>         return 0;
>  }
>  
>
diff mbox series

Patch

diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index ee53e8ac5c96..8143e2615957 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -178,6 +178,7 @@  static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
 		__release_region(&cxlds->dpa_res, skip_start, cxled->skip);
 	cxled->skip = 0;
 	cxled->dpa_res = NULL;
+	put_device(&cxled->cxld.dev);
 	port->hdm_end--;
 }
 
@@ -214,7 +215,7 @@  static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
 	lockdep_assert_held_write(&cxl_dpa_rwsem);
 
 	if (!len)
-		return 0;
+		goto success;
 
 	if (cxled->dpa_res) {
 		dev_dbg(dev, "decoder%d.%d: existing allocation %pr assigned\n",
@@ -266,8 +267,10 @@  static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
 			cxled->cxld.id, cxled->dpa_res);
 		cxled->mode = CXL_DECODER_MIXED;
 	}
-	port->hdm_end++;
 
+success:
+	port->hdm_end++;
+	get_device(&cxled->cxld.dev);
 	return 0;
 }