diff mbox series

[2/5] cxl/hdm: Use 4-byte reads to retrieve HDM decoder base+limit

Message ID 168149844056.792294.8224490474529733736.stgit@dwillia2-xfh.jf.intel.com
State Accepted
Commit 1423885c84a5b3a53b79bcf241b18124d0d7cba6
Headers show
Series cxl/hdm: Decoder enumeration fixes | expand

Commit Message

Dan Williams April 14, 2023, 6:54 p.m. UTC
The CXL specification mandates that 4-byte registers must be accessed
with 4-byte access cycles. CXL 3.0 8.2.3 "Component Register Layout and
Definition" states that the behavior is undefined if (2) 32-bit
registers are accessed as an 8-byte quantity. It turns out that at least
one hardware implementation is sensitive to this in practice. The @size
variable results in zero with:

    size = readq(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));

...and the correct size with:

    lo = readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
    hi = readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(which));
    size = (hi << 32) + lo;

Fixes: d17d0540a0db ("cxl/core/hdm: Add CXL standard decoder enumeration to the core")
Cc: <stable@vger.kernel.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/hdm.c |   20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

Comments

Alison Schofield April 14, 2023, 8:32 p.m. UTC | #1
On Fri, Apr 14, 2023 at 11:54:00AM -0700, Dan Williams wrote:
> The CXL specification mandates that 4-byte registers must be accessed
> with 4-byte access cycles. CXL 3.0 8.2.3 "Component Register Layout and
> Definition" states that the behavior is undefined if (2) 32-bit
> registers are accessed as an 8-byte quantity. It turns out that at least
> one hardware implementation is sensitive to this in practice. The @size
> variable results in zero with:
> 
>     size = readq(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
> 
> ...and the correct size with:
> 
>     lo = readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
>     hi = readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(which));
>     size = (hi << 32) + lo;
> 
> Fixes: d17d0540a0db ("cxl/core/hdm: Add CXL standard decoder enumeration to the core")
> Cc: <stable@vger.kernel.org>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

I see you got rid of ioread64_hi_lo(), so this can't be
happening anywhere else. Are all the other readl, writel
usages known to be OK, or do you need review help against
the spec?

Reviewed-by: Alison Schofield <alison.schofield@intel.com>


> ---
>  drivers/cxl/core/hdm.c |   20 +++++++++++++-------
>  1 file changed, 13 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 35b338b716fe..6fdf7981ddc7 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -1,6 +1,5 @@
>  // SPDX-License-Identifier: GPL-2.0-only
>  /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
> -#include <linux/io-64-nonatomic-hi-lo.h>
>  #include <linux/seq_file.h>
>  #include <linux/device.h>
>  #include <linux/delay.h>
> @@ -785,8 +784,8 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>  			    int *target_map, void __iomem *hdm, int which,
>  			    u64 *dpa_base, struct cxl_endpoint_dvsec_info *info)
>  {
> +	u64 size, base, skip, dpa_size, lo, hi;
>  	struct cxl_endpoint_decoder *cxled;
> -	u64 size, base, skip, dpa_size;
>  	bool committed;
>  	u32 remainder;
>  	int i, rc;
> @@ -801,8 +800,12 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>  							which, info);
>  
>  	ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
> -	base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
> -	size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
> +	lo = readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
> +	hi = readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(which));
> +	base = (hi << 32) + lo;
> +	lo = readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
> +	hi = readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(which));
> +	size = (hi << 32) + lo;
>  	committed = !!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED);
>  	cxld->commit = cxl_decoder_commit;
>  	cxld->reset = cxl_decoder_reset;
> @@ -865,8 +868,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>  		return rc;
>  
>  	if (!info) {
> -		target_list.value =
> -			ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which));
> +		lo = readl(hdm + CXL_HDM_DECODER0_TL_LOW(which));
> +		hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(which));
> +		target_list.value = (hi << 32) + lo;
>  		for (i = 0; i < cxld->interleave_ways; i++)
>  			target_map[i] = target_list.target_id[i];
>  
> @@ -883,7 +887,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>  			port->id, cxld->id, size, cxld->interleave_ways);
>  		return -ENXIO;
>  	}
> -	skip = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
> +	lo = readl(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
> +	hi = readl(hdm + CXL_HDM_DECODER0_SKIP_HIGH(which));
> +	skip = (hi << 32) + lo;
>  	cxled = to_cxl_endpoint_decoder(&cxld->dev);
>  	rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip);
>  	if (rc) {
>
Dan Williams April 14, 2023, 8:44 p.m. UTC | #2
Alison Schofield wrote:
> On Fri, Apr 14, 2023 at 11:54:00AM -0700, Dan Williams wrote:
> > The CXL specification mandates that 4-byte registers must be accessed
> > with 4-byte access cycles. CXL 3.0 8.2.3 "Component Register Layout and
> > Definition" states that the behavior is undefined if (2) 32-bit
> > registers are accessed as an 8-byte quantity. It turns out that at least
> > one hardware implementation is sensitive to this in practice. The @size
> > variable results in zero with:
> > 
> >     size = readq(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
> > 
> > ...and the correct size with:
> > 
> >     lo = readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
> >     hi = readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(which));
> >     size = (hi << 32) + lo;
> > 
> > Fixes: d17d0540a0db ("cxl/core/hdm: Add CXL standard decoder enumeration to the core")
> > Cc: <stable@vger.kernel.org>
> > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> 
> I see you got rid of ioread64_hi_lo(), so this can't be
> happening anywhere else. Are all the other readl, writel
> usages known to be OK, or do you need review help against
> the spec?

Good question. That's what I looked to answer in patch3. As far as I can
see all the other readq() usage in the driver is for registers defined
as 64-bit, so that patch ended up only being a deletion of unneeded
includes.
Dave Jiang April 14, 2023, 9:22 p.m. UTC | #3
On 4/14/23 11:54 AM, Dan Williams wrote:
> The CXL specification mandates that 4-byte registers must be accessed
> with 4-byte access cycles. CXL 3.0 8.2.3 "Component Register Layout and
> Definition" states that the behavior is undefined if (2) 32-bit
> registers are accessed as an 8-byte quantity. It turns out that at least
> one hardware implementation is sensitive to this in practice. The @size
> variable results in zero with:
> 
>      size = readq(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
> 
> ...and the correct size with:
> 
>      lo = readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
>      hi = readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(which));
>      size = (hi << 32) + lo;
> 
> Fixes: d17d0540a0db ("cxl/core/hdm: Add CXL standard decoder enumeration to the core")
> Cc: <stable@vger.kernel.org>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Reviewed-by: Dave Jiang <dave.jiang@intel.com>

> ---
>   drivers/cxl/core/hdm.c |   20 +++++++++++++-------
>   1 file changed, 13 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 35b338b716fe..6fdf7981ddc7 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -1,6 +1,5 @@
>   // SPDX-License-Identifier: GPL-2.0-only
>   /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
> -#include <linux/io-64-nonatomic-hi-lo.h>
>   #include <linux/seq_file.h>
>   #include <linux/device.h>
>   #include <linux/delay.h>
> @@ -785,8 +784,8 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>   			    int *target_map, void __iomem *hdm, int which,
>   			    u64 *dpa_base, struct cxl_endpoint_dvsec_info *info)
>   {
> +	u64 size, base, skip, dpa_size, lo, hi;
>   	struct cxl_endpoint_decoder *cxled;
> -	u64 size, base, skip, dpa_size;
>   	bool committed;
>   	u32 remainder;
>   	int i, rc;
> @@ -801,8 +800,12 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>   							which, info);
>   
>   	ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
> -	base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
> -	size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
> +	lo = readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
> +	hi = readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(which));
> +	base = (hi << 32) + lo;
> +	lo = readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
> +	hi = readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(which));
> +	size = (hi << 32) + lo;
>   	committed = !!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED);
>   	cxld->commit = cxl_decoder_commit;
>   	cxld->reset = cxl_decoder_reset;
> @@ -865,8 +868,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>   		return rc;
>   
>   	if (!info) {
> -		target_list.value =
> -			ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which));
> +		lo = readl(hdm + CXL_HDM_DECODER0_TL_LOW(which));
> +		hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(which));
> +		target_list.value = (hi << 32) + lo;
>   		for (i = 0; i < cxld->interleave_ways; i++)
>   			target_map[i] = target_list.target_id[i];
>   
> @@ -883,7 +887,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>   			port->id, cxld->id, size, cxld->interleave_ways);
>   		return -ENXIO;
>   	}
> -	skip = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
> +	lo = readl(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
> +	hi = readl(hdm + CXL_HDM_DECODER0_SKIP_HIGH(which));
> +	skip = (hi << 32) + lo;
>   	cxled = to_cxl_endpoint_decoder(&cxld->dev);
>   	rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip);
>   	if (rc) {
>
Jonathan Cameron May 15, 2023, 10:46 a.m. UTC | #4
On Fri, 14 Apr 2023 11:54:00 -0700
Dan Williams <dan.j.williams@intel.com> wrote:

> The CXL specification mandates that 4-byte registers must be accessed
> with 4-byte access cycles. CXL 3.0 8.2.3 "Component Register Layout and
> Definition" states that the behavior is undefined if (2) 32-bit
> registers are accessed as an 8-byte quantity. It turns out that at least
> one hardware implementation is sensitive to this in practice. The @size
> variable results in zero with:
> 
>     size = readq(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
> 
> ...and the correct size with:
> 
>     lo = readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
>     hi = readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(which));
>     size = (hi << 32) + lo;

Hmm. Annoying that there isn't an always present split version of the
ioread64_hi_lo like there effectively is for hi_low_readq()

Mind you, why was this using the ioread64_hi_lo() variant rather
than hi_lo_readq()?  Far as I can tell that wouldn't have suffered
from this problem in the first place.

There is at least one other direct user of that function, so maybe
we should just use it here as well?

Jonathan

> 
> Fixes: d17d0540a0db ("cxl/core/hdm: Add CXL standard decoder enumeration to the core")
> Cc: <stable@vger.kernel.org>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  drivers/cxl/core/hdm.c |   20 +++++++++++++-------
>  1 file changed, 13 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 35b338b716fe..6fdf7981ddc7 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -1,6 +1,5 @@
>  // SPDX-License-Identifier: GPL-2.0-only
>  /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
> -#include <linux/io-64-nonatomic-hi-lo.h>
>  #include <linux/seq_file.h>
>  #include <linux/device.h>
>  #include <linux/delay.h>
> @@ -785,8 +784,8 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>  			    int *target_map, void __iomem *hdm, int which,
>  			    u64 *dpa_base, struct cxl_endpoint_dvsec_info *info)
>  {
> +	u64 size, base, skip, dpa_size, lo, hi;
>  	struct cxl_endpoint_decoder *cxled;
> -	u64 size, base, skip, dpa_size;
>  	bool committed;
>  	u32 remainder;
>  	int i, rc;
> @@ -801,8 +800,12 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>  							which, info);
>  
>  	ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
> -	base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
> -	size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
> +	lo = readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
> +	hi = readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(which));
> +	base = (hi << 32) + lo;
> +	lo = readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
> +	hi = readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(which));
> +	size = (hi << 32) + lo;
>  	committed = !!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED);
>  	cxld->commit = cxl_decoder_commit;
>  	cxld->reset = cxl_decoder_reset;
> @@ -865,8 +868,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>  		return rc;
>  
>  	if (!info) {
> -		target_list.value =
> -			ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which));
> +		lo = readl(hdm + CXL_HDM_DECODER0_TL_LOW(which));
> +		hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(which));
> +		target_list.value = (hi << 32) + lo;
>  		for (i = 0; i < cxld->interleave_ways; i++)
>  			target_map[i] = target_list.target_id[i];
>  
> @@ -883,7 +887,9 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
>  			port->id, cxld->id, size, cxld->interleave_ways);
>  		return -ENXIO;
>  	}
> -	skip = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
> +	lo = readl(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
> +	hi = readl(hdm + CXL_HDM_DECODER0_SKIP_HIGH(which));
> +	skip = (hi << 32) + lo;
>  	cxled = to_cxl_endpoint_decoder(&cxld->dev);
>  	rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip);
>  	if (rc) {
>
diff mbox series

Patch

diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 35b338b716fe..6fdf7981ddc7 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -1,6 +1,5 @@ 
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
-#include <linux/io-64-nonatomic-hi-lo.h>
 #include <linux/seq_file.h>
 #include <linux/device.h>
 #include <linux/delay.h>
@@ -785,8 +784,8 @@  static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 			    int *target_map, void __iomem *hdm, int which,
 			    u64 *dpa_base, struct cxl_endpoint_dvsec_info *info)
 {
+	u64 size, base, skip, dpa_size, lo, hi;
 	struct cxl_endpoint_decoder *cxled;
-	u64 size, base, skip, dpa_size;
 	bool committed;
 	u32 remainder;
 	int i, rc;
@@ -801,8 +800,12 @@  static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 							which, info);
 
 	ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
-	base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
-	size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
+	lo = readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
+	hi = readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(which));
+	base = (hi << 32) + lo;
+	lo = readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
+	hi = readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(which));
+	size = (hi << 32) + lo;
 	committed = !!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED);
 	cxld->commit = cxl_decoder_commit;
 	cxld->reset = cxl_decoder_reset;
@@ -865,8 +868,9 @@  static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 		return rc;
 
 	if (!info) {
-		target_list.value =
-			ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which));
+		lo = readl(hdm + CXL_HDM_DECODER0_TL_LOW(which));
+		hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(which));
+		target_list.value = (hi << 32) + lo;
 		for (i = 0; i < cxld->interleave_ways; i++)
 			target_map[i] = target_list.target_id[i];
 
@@ -883,7 +887,9 @@  static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 			port->id, cxld->id, size, cxld->interleave_ways);
 		return -ENXIO;
 	}
-	skip = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
+	lo = readl(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
+	hi = readl(hdm + CXL_HDM_DECODER0_SKIP_HIGH(which));
+	skip = (hi << 32) + lo;
 	cxled = to_cxl_endpoint_decoder(&cxld->dev);
 	rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip);
 	if (rc) {