diff mbox series

[V11,7/8] cxl/port: Retry reading CDAT on failure

Message ID 20220610202259.3544623-8-ira.weiny@intel.com
State Superseded
Headers show
Series CXL: Read CDAT and DSMAS data | expand

Commit Message

Ira Weiny June 10, 2022, 8:22 p.m. UTC
From: Ira Weiny <ira.weiny@intel.com>

The CDAT read may fail for a number of reasons but mainly it is possible
to get different parts of a valid state.  The checksum in the CDAT table
protects against this.

Now that the cdat data is validated, issue a retry if the CDAT read
fails.  For now 5 retries are implemented.

Reviewed-by: Ben Widawsky <bwidawsk@kernel.org>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
Changes from V10
	Pick up review tag and fix commit message

Changes from V9
	Alison Schofield/Davidlohr Bueso
		Print debug on each iteration and error only after failure

Changes from V8
	Move code to cxl/core/pci.c

Changes from V6
	Move to pci.c
	Fix retries count
	Change to 5 retries

Changes from V5:
	New patch -- easy to push off or drop.
---
 drivers/cxl/core/pci.c | 40 +++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

Comments

Alison Schofield June 28, 2022, 3:32 a.m. UTC | #1
On Fri, Jun 10, 2022 at 01:22:58PM -0700, Ira Weiny wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> The CDAT read may fail for a number of reasons but mainly it is possible
> to get different parts of a valid state.  The checksum in the CDAT table
> protects against this.
> 
> Now that the cdat data is validated, issue a retry if the CDAT read
> fails.  For now 5 retries are implemented.
> 
> Reviewed-by: Ben Widawsky <bwidawsk@kernel.org>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
>

Reviewed-by: Alison Schofield <alison.schofield@intel.com>


> ---
> Changes from V10
> 	Pick up review tag and fix commit message
> 
> Changes from V9
> 	Alison Schofield/Davidlohr Bueso
> 		Print debug on each iteration and error only after failure
> 
> Changes from V8
> 	Move code to cxl/core/pci.c
> 
> Changes from V6
> 	Move to pci.c
> 	Fix retries count
> 	Change to 5 retries
> 
> Changes from V5:
> 	New patch -- easy to push off or drop.
> ---
>  drivers/cxl/core/pci.c | 40 +++++++++++++++++++++++++++++++---------
>  1 file changed, 31 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index cb70287e2984..fd02bc7c0d97 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -617,19 +617,13 @@ static int cxl_cdat_read_table(struct device *dev,
>  	return rc;
>  }
>  
> -/**
> - * read_cdat_data - Read the CDAT data on this port
> - * @port: Port to read data from
> - *
> - * This call will sleep waiting for responses from the DOE mailbox.
> - */
> -void read_cdat_data(struct cxl_port *port)
> +static int __read_cdat_data(struct cxl_port *port)
>  {
>  	static struct pci_doe_mb *cdat_mb;
>  	struct device *dev = &port->dev;
>  	struct device *uport = port->uport;
>  	size_t cdat_length;
> -	int ret;
> +	int ret = 0;
>  
>  	/*
>  	 * Ensure a reference on the underlying uport device which has the
> @@ -640,17 +634,21 @@ void read_cdat_data(struct cxl_port *port)
>  	cdat_mb = find_cdat_mb(uport);
>  	if (!cdat_mb) {
>  		dev_dbg(dev, "No CDAT mailbox\n");
> +		ret = -EIO;
>  		goto out;
>  	}
>  
>  	if (cxl_cdat_get_length(dev, cdat_mb, &cdat_length)) {
>  		dev_dbg(dev, "No CDAT length\n");
> +		ret = -EIO;
>  		goto out;
>  	}
>  
>  	port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL);
> -	if (!port->cdat.table)
> +	if (!port->cdat.table) {
> +		ret = -ENOMEM;
>  		goto out;
> +	}
>  
>  	port->cdat.length = cdat_length;
>  	ret = cxl_cdat_read_table(dev, cdat_mb, &port->cdat);
> @@ -664,5 +662,29 @@ void read_cdat_data(struct cxl_port *port)
>  
>  out:
>  	put_device(uport);
> +	return ret;
> +}
> +
> +/**
> + * read_cdat_data - Read the CDAT data on this port
> + * @port: Port to read data from
> + *
> + * This call will sleep waiting for responses from the DOE mailbox.
> + */
> +void read_cdat_data(struct cxl_port *port)
> +{
> +	int retries = 5;
> +	int rc;
> +
> +	while (retries--) {
> +		rc = __read_cdat_data(port);
> +		if (!rc)
> +			return;
> +		dev_dbg(&port->dev,
> +			"CDAT data read error rc=%d (retries %d)\n",
> +			rc, retries);
> +	}
> +	dev_err(&port->dev, "CDAT data read failed after %d retries\n",
> +		retries);
>  }
>  EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
> -- 
> 2.35.1
>
diff mbox series

Patch

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index cb70287e2984..fd02bc7c0d97 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -617,19 +617,13 @@  static int cxl_cdat_read_table(struct device *dev,
 	return rc;
 }
 
-/**
- * read_cdat_data - Read the CDAT data on this port
- * @port: Port to read data from
- *
- * This call will sleep waiting for responses from the DOE mailbox.
- */
-void read_cdat_data(struct cxl_port *port)
+static int __read_cdat_data(struct cxl_port *port)
 {
 	static struct pci_doe_mb *cdat_mb;
 	struct device *dev = &port->dev;
 	struct device *uport = port->uport;
 	size_t cdat_length;
-	int ret;
+	int ret = 0;
 
 	/*
 	 * Ensure a reference on the underlying uport device which has the
@@ -640,17 +634,21 @@  void read_cdat_data(struct cxl_port *port)
 	cdat_mb = find_cdat_mb(uport);
 	if (!cdat_mb) {
 		dev_dbg(dev, "No CDAT mailbox\n");
+		ret = -EIO;
 		goto out;
 	}
 
 	if (cxl_cdat_get_length(dev, cdat_mb, &cdat_length)) {
 		dev_dbg(dev, "No CDAT length\n");
+		ret = -EIO;
 		goto out;
 	}
 
 	port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL);
-	if (!port->cdat.table)
+	if (!port->cdat.table) {
+		ret = -ENOMEM;
 		goto out;
+	}
 
 	port->cdat.length = cdat_length;
 	ret = cxl_cdat_read_table(dev, cdat_mb, &port->cdat);
@@ -664,5 +662,29 @@  void read_cdat_data(struct cxl_port *port)
 
 out:
 	put_device(uport);
+	return ret;
+}
+
+/**
+ * read_cdat_data - Read the CDAT data on this port
+ * @port: Port to read data from
+ *
+ * This call will sleep waiting for responses from the DOE mailbox.
+ */
+void read_cdat_data(struct cxl_port *port)
+{
+	int retries = 5;
+	int rc;
+
+	while (retries--) {
+		rc = __read_cdat_data(port);
+		if (!rc)
+			return;
+		dev_dbg(&port->dev,
+			"CDAT data read error rc=%d (retries %d)\n",
+			rc, retries);
+	}
+	dev_err(&port->dev, "CDAT data read failed after %d retries\n",
+		retries);
 }
 EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);