diff mbox series

[1/3] hwmon: tmp421: handle I2C errors

Message ID 20210922134154.20766-1-fercerpav@gmail.com (mailing list archive)
State Superseded
Headers show
Series [1/3] hwmon: tmp421: handle I2C errors | expand

Commit Message

Paul Fertser Sept. 22, 2021, 1:41 p.m. UTC
Function i2c_smbus_read_byte_data() can return a negative error number
instead of the data read if I2C transaction failed for whatever reason.

I consider this fix to be stable material as lack of error checking here
leads to serious issues on production hardware. Errors treated as
temperatures produce spurious critical temperature-crossed-threshold
errors in BMC logs for OCP server hardware. The patch was tested with
Mellanox OCP Mezzanine card emulating TMP421 protocol for temperature
sensing which sometimes leads to I2C protocol error during early boot up
stage.

Cc: stable@vger.kernel.org
Signed-off-by: Paul Fertser <fercerpav@gmail.com>
---
 drivers/hwmon/tmp421.c | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

Comments

Guenter Roeck Sept. 22, 2021, 3:53 p.m. UTC | #1
On Wed, Sep 22, 2021 at 04:41:52PM +0300, Paul Fertser wrote:
> Function i2c_smbus_read_byte_data() can return a negative error number
> instead of the data read if I2C transaction failed for whatever reason.
> 
> I consider this fix to be stable material as lack of error checking here
> leads to serious issues on production hardware. Errors treated as
> temperatures produce spurious critical temperature-crossed-threshold
> errors in BMC logs for OCP server hardware. The patch was tested with
> Mellanox OCP Mezzanine card emulating TMP421 protocol for temperature
> sensing which sometimes leads to I2C protocol error during early boot up
> stage.
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Paul Fertser <fercerpav@gmail.com>
> ---
>  drivers/hwmon/tmp421.c | 31 ++++++++++++++++++++++---------
>  1 file changed, 22 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c
> index ede66ea6a730..6175ed4b10bd 100644
> --- a/drivers/hwmon/tmp421.c
> +++ b/drivers/hwmon/tmp421.c
> @@ -93,7 +93,7 @@ struct tmp421_data {
>  	struct hwmon_channel_info temp_info;
>  	const struct hwmon_channel_info *info[2];
>  	struct hwmon_chip_info chip;
> -	char valid;
> +	int last_errno;
>  	unsigned long last_updated;
>  	unsigned long channels;
>  	u8 config;
> @@ -128,20 +128,30 @@ static struct tmp421_data *tmp421_update_device(struct device *dev)
>  	mutex_lock(&data->update_lock);
>  
>  	if (time_after(jiffies, data->last_updated + (HZ / 2)) ||
> -	    !data->valid) {
> -		data->config = i2c_smbus_read_byte_data(client,
> -			TMP421_CONFIG_REG_1);
> +	    data->last_errno) {
> +		data->last_errno = i2c_smbus_read_byte_data(client,
> +							    TMP421_CONFIG_REG_1);

No. The function should return an ERR_PTR after an error.
Something like
	int ret = 0;
	...
	ret = i2c_smbus_read_byte_data(client, TMP421_CONFIG_REG_1);
	if (ret < 0)
		goto exit;
	data->config = ret;
	...
exit:
	mutex_unlock(...);
	return ret < 0 ? ERR_PTR(ret) : data;

Or, even better, let tmp421_update_device() return an error code instead
of data, and let the caller get the data pointer.

int tmp421_update_device(struct tmp421_data *data)
{
	struct i2c_client *client = data->client;
	int ret = 0;

	...
	return ret < 0 ? ret : 0;
}
...

	struct tmp421_data *data = dev_get_drvdata(dev);

	ret = tmp421_update_device(data);
	if (ret)
		return ret;


Guenter

> +		if (data->last_errno < 0)
> +			goto exit;
> +		data->config =  data->last_errno;
>  
>  		for (i = 0; i < data->channels; i++) {
> -			data->temp[i] = i2c_smbus_read_byte_data(client,
> -				TMP421_TEMP_MSB[i]) << 8;
> -			data->temp[i] |= i2c_smbus_read_byte_data(client,
> -				TMP421_TEMP_LSB[i]);
> +			data->last_errno = i2c_smbus_read_byte_data(client,
> +								    TMP421_TEMP_MSB[i]);
> +			if (data->last_errno < 0)
> +				goto exit;
> +			data->temp[i] = data->last_errno << 8;
> +			data->last_errno = i2c_smbus_read_byte_data(client,
> +								    TMP421_TEMP_LSB[i]);
> +			if (data->last_errno < 0)
> +				goto exit;
> +			data->temp[i] |= data->last_errno;
>  		}
>  		data->last_updated = jiffies;
> -		data->valid = 1;
> +		data->last_errno = 0;
>  	}
>  
> +exit:
>  	mutex_unlock(&data->update_lock);
>  
>  	return data;
> @@ -152,6 +162,9 @@ static int tmp421_read(struct device *dev, enum hwmon_sensor_types type,
>  {
>  	struct tmp421_data *tmp421 = tmp421_update_device(dev);
>  
> +	if (tmp421->last_errno)
> +		return tmp421->last_errno;
> +
>  	switch (attr) {
>  	case hwmon_temp_input:
>  		if (tmp421->config & TMP421_CONFIG_RANGE)
> -- 
> 2.17.1
>
diff mbox series

Patch

diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c
index ede66ea6a730..6175ed4b10bd 100644
--- a/drivers/hwmon/tmp421.c
+++ b/drivers/hwmon/tmp421.c
@@ -93,7 +93,7 @@  struct tmp421_data {
 	struct hwmon_channel_info temp_info;
 	const struct hwmon_channel_info *info[2];
 	struct hwmon_chip_info chip;
-	char valid;
+	int last_errno;
 	unsigned long last_updated;
 	unsigned long channels;
 	u8 config;
@@ -128,20 +128,30 @@  static struct tmp421_data *tmp421_update_device(struct device *dev)
 	mutex_lock(&data->update_lock);
 
 	if (time_after(jiffies, data->last_updated + (HZ / 2)) ||
-	    !data->valid) {
-		data->config = i2c_smbus_read_byte_data(client,
-			TMP421_CONFIG_REG_1);
+	    data->last_errno) {
+		data->last_errno = i2c_smbus_read_byte_data(client,
+							    TMP421_CONFIG_REG_1);
+		if (data->last_errno < 0)
+			goto exit;
+		data->config =  data->last_errno;
 
 		for (i = 0; i < data->channels; i++) {
-			data->temp[i] = i2c_smbus_read_byte_data(client,
-				TMP421_TEMP_MSB[i]) << 8;
-			data->temp[i] |= i2c_smbus_read_byte_data(client,
-				TMP421_TEMP_LSB[i]);
+			data->last_errno = i2c_smbus_read_byte_data(client,
+								    TMP421_TEMP_MSB[i]);
+			if (data->last_errno < 0)
+				goto exit;
+			data->temp[i] = data->last_errno << 8;
+			data->last_errno = i2c_smbus_read_byte_data(client,
+								    TMP421_TEMP_LSB[i]);
+			if (data->last_errno < 0)
+				goto exit;
+			data->temp[i] |= data->last_errno;
 		}
 		data->last_updated = jiffies;
-		data->valid = 1;
+		data->last_errno = 0;
 	}
 
+exit:
 	mutex_unlock(&data->update_lock);
 
 	return data;
@@ -152,6 +162,9 @@  static int tmp421_read(struct device *dev, enum hwmon_sensor_types type,
 {
 	struct tmp421_data *tmp421 = tmp421_update_device(dev);
 
+	if (tmp421->last_errno)
+		return tmp421->last_errno;
+
 	switch (attr) {
 	case hwmon_temp_input:
 		if (tmp421->config & TMP421_CONFIG_RANGE)