diff mbox series

[v2,2/2] cxl/core/region: check interleave capability

Message ID 20240403021747.17260-3-yaoxt.fnst@fujitsu.com
State New, archived
Headers show
Series cxl: add interleave capability check | expand

Commit Message

Xingtao Yao (Fujitsu) April 3, 2024, 2:17 a.m. UTC
Since interleave capability is not checked, a target can be attached to
region successfully even it does not support the interleave ways or
interleave granularity.

When accessing the memory, unexpected behavior occurs due to converting
HPA to an error DPA:
$ numactl -m 2 ls
Segmentation fault (core dumped)

Link: https://lore.kernel.org/qemu-devel/20240327014653.26623-1-yaoxt.fnst@fujitsu.com
Signed-off-by: Yao Xingtao <yaoxt.fnst@fujitsu.com>
---
 drivers/cxl/core/hdm.c    |  4 ++++
 drivers/cxl/core/region.c | 41 +++++++++++++++++++++++++++++++++++++++
 drivers/cxl/cxl.h         |  2 ++
 drivers/cxl/cxlmem.h      |  1 +
 4 files changed, 48 insertions(+)

Comments

Jonathan Cameron April 3, 2024, 2:27 p.m. UTC | #1
On Tue,  2 Apr 2024 22:17:47 -0400
Yao Xingtao <yaoxt.fnst@fujitsu.com> wrote:

> Since interleave capability is not checked, a target can be attached to
> region successfully even it does not support the interleave ways or
> interleave granularity.
> 
> When accessing the memory, unexpected behavior occurs due to converting
> HPA to an error DPA:
> $ numactl -m 2 ls
> Segmentation fault (core dumped)
> 
> Link: https://lore.kernel.org/qemu-devel/20240327014653.26623-1-yaoxt.fnst@fujitsu.com
> Signed-off-by: Yao Xingtao <yaoxt.fnst@fujitsu.com>

I argued on the CXL opensource sync call last night that we'd get an
hdm commit fail (on working hardware - unlike current qemu) if this check
wasn't present.  Having thought more I think I was wrong and this is a
necessary fix because a device that doesn't support one of these ways
treats the HDM Decoder n Control Register / Interleave Ways (IW) values
as 'reserved'. Is it guaranteed to not just do that by fixing the higher
bits to zero?

If that's a possible implementation and the decoder was set to 6-way (0x9)
maybe the device would interpret that as 2-way (0x1) and give us the wrong
decode.

With that in mind I think this is a fix and needs a Fixes tag.


> ---
>  drivers/cxl/core/hdm.c    |  4 ++++
>  drivers/cxl/core/region.c | 41 +++++++++++++++++++++++++++++++++++++++
>  drivers/cxl/cxl.h         |  2 ++
>  drivers/cxl/cxlmem.h      |  1 +
>  4 files changed, 48 insertions(+)
> 
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 9bb6a256cc6f..1a99b138dbec 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -79,6 +79,10 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
>  		cxlhdm->ig_cap_mask |= GENMASK(11, 8);
>  	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap))
>  		cxlhdm->ig_cap_mask |= GENMASK(14, 12);
> +	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY, hdm_cap))
> +		cxlhdm->iw_cap_mask |= BIT(3) | BIT(6) | BIT(12);
> +	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_16_WAY, hdm_cap))
> +		cxlhdm->iw_cap_mask |= BIT(16);

Whilst it doesn't matter as such (because they are always valid) I think
we should also st the bits for 1,2,4,8 so that all relevant bits are
enabled.

>  }
>  
>  static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 5c186e0a39b9..25d178e14ed1 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -1786,6 +1786,36 @@ static int cxl_region_sort_targets(struct cxl_region *cxlr)
>  	return rc;
>  }
>  
> +static int
> +check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
> +{
> +	struct cxl_port *port = to_cxl_port(cxld->dev.parent);
> +	struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
> +	u8 eiw;
> +	u16 eig;
> +	int rc;
> +
> +	rc = ways_to_eiw(iw, &eiw);
> +	if (rc)
> +		return rc;
> +
> +	if (eiw > 3 && !(cxlhdm->iw_cap_mask & BIT(iw))) {
If you have all the bits set then you can avoid using eiw > 3 just
to check we aren't 1,2,4,8

> +		dev_dbg(&cxld->dev, "iw: %d is not supported\n", iw);
> +		return -EOPNOTSUPP;
> +	}
> +
> +	rc = granularity_to_eig(ig, &eig);
> +	if (rc)
> +		return rc;
> +
> +	if (!(BIT(eig + 8) & cxlhdm->ig_cap_mask)) {

This seems too simple.  Need to look at the calculations in
IMPLEMENTATON NOTE: CXL Host Bridge and Upstream Switch Port Decode Flow.

For a decode of more than 2 ways you need more bits to be supported.

> +		dev_dbg(&cxld->dev, "ig: %d is not supported\n", ig);
> +		return -EOPNOTSUPP;
> +	}
> +
> +	return 0;
> +}
> +
>  static int cxl_region_attach(struct cxl_region *cxlr,
>  			     struct cxl_endpoint_decoder *cxled, int pos)
>  {
> @@ -1796,6 +1826,17 @@ static int cxl_region_attach(struct cxl_region *cxlr,
>  	struct cxl_dport *dport;
>  	int rc = -ENXIO;
>  
> +	rc = check_interleave_cap(&cxled->cxld, p->interleave_ways,
> +				  p->interleave_granularity);
> +	if (rc) {
> +		dev_dbg(&cxlr->dev,
> +			"%s with region iw: %d, ig: %d is not supported\n",
> +			dev_name(&cxled->cxld.dev),
> +			p->interleave_ways,
> +			p->interleave_granularity);
> +		return rc;
> +	}
> +
>  	if (cxled->mode != cxlr->mode) {
>  		dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
>  			dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode);
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index 534e25e2f0a4..da8a487ededa 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -45,6 +45,8 @@
>  #define   CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4)
>  #define   CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8)
>  #define   CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9)
> +#define   CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY BIT(11)
> +#define   CXL_HDM_DECODER_INTERLEAVE_16_WAY BIT(12)
>  #define CXL_HDM_DECODER_CTRL_OFFSET 0x4
>  #define   CXL_HDM_DECODER_ENABLE BIT(1)
>  #define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10)
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index b53f7ae0fdd6..979c22955246 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -853,6 +853,7 @@ struct cxl_hdm {
>  	unsigned int decoder_count;
>  	unsigned int target_count;
>  	unsigned int ig_cap_mask;
> +	unsigned int iw_cap_mask;
>  	struct cxl_port *port;
>  };
>
Xingtao Yao (Fujitsu) April 8, 2024, 2:50 a.m. UTC | #2
> -----Original Message-----
> From: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
> Sent: Wednesday, April 3, 2024 10:28 PM
> To: Yao, Xingtao/姚 幸涛 <yaoxt.fnst@fujitsu.com>
> Cc: dave@stgolabs.net; dave.jiang@intel.com; alison.schofield@intel.com;
> vishal.l.verma@intel.com; ira.weiny@intel.com; dan.j.williams@intel.com;
> jim.harris@samsung.com; linux-cxl@vger.kernel.org
> Subject: Re: [PATCH v2 2/2] cxl/core/region: check interleave capability
> 
> On Tue,  2 Apr 2024 22:17:47 -0400
> Yao Xingtao <yaoxt.fnst@fujitsu.com> wrote:
> 
> > Since interleave capability is not checked, a target can be attached to
> > region successfully even it does not support the interleave ways or
> > interleave granularity.
> >
> > When accessing the memory, unexpected behavior occurs due to converting
> > HPA to an error DPA:
> > $ numactl -m 2 ls
> > Segmentation fault (core dumped)
> >
> > Link:
> https://lore.kernel.org/qemu-devel/20240327014653.26623-1-yaoxt.fnst@fujitsu.c
> om
> > Signed-off-by: Yao Xingtao <yaoxt.fnst@fujitsu.com>
> 
> I argued on the CXL opensource sync call last night that we'd get an
> hdm commit fail (on working hardware - unlike current qemu) if this check
> wasn't present.  Having thought more I think I was wrong and this is a
> necessary fix because a device that doesn't support one of these ways
> treats the HDM Decoder n Control Register / Interleave Ways (IW) values
> as 'reserved'. Is it guaranteed to not just do that by fixing the higher
> bits to zero?
> 
> If that's a possible implementation and the decoder was set to 6-way (0x9)
> maybe the device would interpret that as 2-way (0x1) and give us the wrong
> decode.
> 
> With that in mind I think this is a fix and needs a Fixes tag.
> 
> 
> > ---
> >  drivers/cxl/core/hdm.c    |  4 ++++
> >  drivers/cxl/core/region.c | 41
> +++++++++++++++++++++++++++++++++++++++
> >  drivers/cxl/cxl.h         |  2 ++
> >  drivers/cxl/cxlmem.h      |  1 +
> >  4 files changed, 48 insertions(+)
> >
> > diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> > index 9bb6a256cc6f..1a99b138dbec 100644
> > --- a/drivers/cxl/core/hdm.c
> > +++ b/drivers/cxl/core/hdm.c
> > @@ -79,6 +79,10 @@ static void parse_hdm_decoder_caps(struct cxl_hdm
> *cxlhdm)
> >  		cxlhdm->ig_cap_mask |= GENMASK(11, 8);
> >  	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap))
> >  		cxlhdm->ig_cap_mask |= GENMASK(14, 12);
> > +	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY,
> hdm_cap))
> > +		cxlhdm->iw_cap_mask |= BIT(3) | BIT(6) | BIT(12);
> > +	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_16_WAY, hdm_cap))
> > +		cxlhdm->iw_cap_mask |= BIT(16);
> 
> Whilst it doesn't matter as such (because they are always valid) I think
> we should also st the bits for 1,2,4,8 so that all relevant bits are
> enabled.
good idea!

> 
> >  }
> >
> >  static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
> > diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> > index 5c186e0a39b9..25d178e14ed1 100644
> > --- a/drivers/cxl/core/region.c
> > +++ b/drivers/cxl/core/region.c
> > @@ -1786,6 +1786,36 @@ static int cxl_region_sort_targets(struct cxl_region
> *cxlr)
> >  	return rc;
> >  }
> >
> > +static int
> > +check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
> > +{
> > +	struct cxl_port *port = to_cxl_port(cxld->dev.parent);
> > +	struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
> > +	u8 eiw;
> > +	u16 eig;
> > +	int rc;
> > +
> > +	rc = ways_to_eiw(iw, &eiw);
> > +	if (rc)
> > +		return rc;
> > +
> > +	if (eiw > 3 && !(cxlhdm->iw_cap_mask & BIT(iw))) {
> If you have all the bits set then you can avoid using eiw > 3 just
> to check we aren't 1,2,4,8
> 
> > +		dev_dbg(&cxld->dev, "iw: %d is not supported\n", iw);
> > +		return -EOPNOTSUPP;
> > +	}
> > +
> > +	rc = granularity_to_eig(ig, &eig);
> > +	if (rc)
> > +		return rc;
> > +
> > +	if (!(BIT(eig + 8) & cxlhdm->ig_cap_mask)) {
> 
> This seems too simple.  Need to look at the calculations in
> IMPLEMENTATON NOTE: CXL Host Bridge and Upstream Switch Port Decode
> Flow.
> 
> For a decode of more than 2 ways you need more bits to be supported.
this is my misunderstanding of interleave bits, I will change this logical.

> 
> > +		dev_dbg(&cxld->dev, "ig: %d is not supported\n", ig);
> > +		return -EOPNOTSUPP;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >  static int cxl_region_attach(struct cxl_region *cxlr,
> >  			     struct cxl_endpoint_decoder *cxled, int pos)
> >  {
> > @@ -1796,6 +1826,17 @@ static int cxl_region_attach(struct cxl_region *cxlr,
> >  	struct cxl_dport *dport;
> >  	int rc = -ENXIO;
> >
> > +	rc = check_interleave_cap(&cxled->cxld, p->interleave_ways,
> > +				  p->interleave_granularity);
> > +	if (rc) {
> > +		dev_dbg(&cxlr->dev,
> > +			"%s with region iw: %d, ig: %d is not supported\n",
> > +			dev_name(&cxled->cxld.dev),
> > +			p->interleave_ways,
> > +			p->interleave_granularity);
> > +		return rc;
> > +	}
> > +
> >  	if (cxled->mode != cxlr->mode) {
> >  		dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
> >  			dev_name(&cxled->cxld.dev), cxlr->mode,
> cxled->mode);
> > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> > index 534e25e2f0a4..da8a487ededa 100644
> > --- a/drivers/cxl/cxl.h
> > +++ b/drivers/cxl/cxl.h
> > @@ -45,6 +45,8 @@
> >  #define   CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4)
> >  #define   CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8)
> >  #define   CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9)
> > +#define   CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY BIT(11)
> > +#define   CXL_HDM_DECODER_INTERLEAVE_16_WAY BIT(12)
> >  #define CXL_HDM_DECODER_CTRL_OFFSET 0x4
> >  #define   CXL_HDM_DECODER_ENABLE BIT(1)
> >  #define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10)
> > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> > index b53f7ae0fdd6..979c22955246 100644
> > --- a/drivers/cxl/cxlmem.h
> > +++ b/drivers/cxl/cxlmem.h
> > @@ -853,6 +853,7 @@ struct cxl_hdm {
> >  	unsigned int decoder_count;
> >  	unsigned int target_count;
> >  	unsigned int ig_cap_mask;
> > +	unsigned int iw_cap_mask;
> >  	struct cxl_port *port;
> >  };
> >
Dan Williams April 8, 2024, 11:10 p.m. UTC | #3
Xingtao Yao (Fujitsu) wrote:
[..]
> > > +		dev_dbg(&cxld->dev, "iw: %d is not supported\n", iw);
> > > +		return -EOPNOTSUPP;
> > > +	}
> > > +
> > > +	rc = granularity_to_eig(ig, &eig);
> > > +	if (rc)
> > > +		return rc;
> > > +
> > > +	if (!(BIT(eig + 8) & cxlhdm->ig_cap_mask)) {
> > 
> > This seems too simple.  Need to look at the calculations in
> > IMPLEMENTATON NOTE: CXL Host Bridge and Upstream Switch Port Decode
> > Flow.
> > 
> > For a decode of more than 2 ways you need more bits to be supported.
> this is my misunderstanding of interleave bits, I will change this logical.

My expectation is that interleave_mask is an address mask and can only
be validated by multiplying it against interleave_ways. For example, for
power-of-2 interleave_ways:

	region_interleave_mask = GENMASK(eiw - 1 + eig + 8, eig + 8);

...that is the address bits required to decode the interleave, and if
that mask is not fully supported by the device-address mask then the
device can not be attached to that region.

However, above is untested, please double check my math.
diff mbox series

Patch

diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 9bb6a256cc6f..1a99b138dbec 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -79,6 +79,10 @@  static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
 		cxlhdm->ig_cap_mask |= GENMASK(11, 8);
 	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap))
 		cxlhdm->ig_cap_mask |= GENMASK(14, 12);
+	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY, hdm_cap))
+		cxlhdm->iw_cap_mask |= BIT(3) | BIT(6) | BIT(12);
+	if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_16_WAY, hdm_cap))
+		cxlhdm->iw_cap_mask |= BIT(16);
 }
 
 static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 5c186e0a39b9..25d178e14ed1 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -1786,6 +1786,36 @@  static int cxl_region_sort_targets(struct cxl_region *cxlr)
 	return rc;
 }
 
+static int
+check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
+{
+	struct cxl_port *port = to_cxl_port(cxld->dev.parent);
+	struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
+	u8 eiw;
+	u16 eig;
+	int rc;
+
+	rc = ways_to_eiw(iw, &eiw);
+	if (rc)
+		return rc;
+
+	if (eiw > 3 && !(cxlhdm->iw_cap_mask & BIT(iw))) {
+		dev_dbg(&cxld->dev, "iw: %d is not supported\n", iw);
+		return -EOPNOTSUPP;
+	}
+
+	rc = granularity_to_eig(ig, &eig);
+	if (rc)
+		return rc;
+
+	if (!(BIT(eig + 8) & cxlhdm->ig_cap_mask)) {
+		dev_dbg(&cxld->dev, "ig: %d is not supported\n", ig);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
 static int cxl_region_attach(struct cxl_region *cxlr,
 			     struct cxl_endpoint_decoder *cxled, int pos)
 {
@@ -1796,6 +1826,17 @@  static int cxl_region_attach(struct cxl_region *cxlr,
 	struct cxl_dport *dport;
 	int rc = -ENXIO;
 
+	rc = check_interleave_cap(&cxled->cxld, p->interleave_ways,
+				  p->interleave_granularity);
+	if (rc) {
+		dev_dbg(&cxlr->dev,
+			"%s with region iw: %d, ig: %d is not supported\n",
+			dev_name(&cxled->cxld.dev),
+			p->interleave_ways,
+			p->interleave_granularity);
+		return rc;
+	}
+
 	if (cxled->mode != cxlr->mode) {
 		dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n",
 			dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 534e25e2f0a4..da8a487ededa 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -45,6 +45,8 @@ 
 #define   CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4)
 #define   CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8)
 #define   CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9)
+#define   CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY BIT(11)
+#define   CXL_HDM_DECODER_INTERLEAVE_16_WAY BIT(12)
 #define CXL_HDM_DECODER_CTRL_OFFSET 0x4
 #define   CXL_HDM_DECODER_ENABLE BIT(1)
 #define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10)
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index b53f7ae0fdd6..979c22955246 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -853,6 +853,7 @@  struct cxl_hdm {
 	unsigned int decoder_count;
 	unsigned int target_count;
 	unsigned int ig_cap_mask;
+	unsigned int iw_cap_mask;
 	struct cxl_port *port;
 };