Function i2c_smbus_read_byte_data() can return a negative error number instead of the data read if I2C transaction failed for whatever reason.
I consider this fix to be stable material as lack of error checking here leads to serious issues on production hardware. Errors treated as temperatures produce spurious critical temperature-crossed-threshold errors in BMC logs for OCP server hardware. The patch was tested with Mellanox OCP Mezzanine card emulating TMP421 protocol for temperature sensing which sometimes leads to I2C protocol error during early boot up stage.
Cc: stable@vger.kernel.org Signed-off-by: Paul Fertser fercerpav@gmail.com --- drivers/hwmon/tmp421.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-)
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c index ede66ea6a730..6175ed4b10bd 100644 --- a/drivers/hwmon/tmp421.c +++ b/drivers/hwmon/tmp421.c @@ -93,7 +93,7 @@ struct tmp421_data { struct hwmon_channel_info temp_info; const struct hwmon_channel_info *info[2]; struct hwmon_chip_info chip; - char valid; + int last_errno; unsigned long last_updated; unsigned long channels; u8 config; @@ -128,20 +128,30 @@ static struct tmp421_data *tmp421_update_device(struct device *dev) mutex_lock(&data->update_lock);
if (time_after(jiffies, data->last_updated + (HZ / 2)) || - !data->valid) { - data->config = i2c_smbus_read_byte_data(client, - TMP421_CONFIG_REG_1); + data->last_errno) { + data->last_errno = i2c_smbus_read_byte_data(client, + TMP421_CONFIG_REG_1); + if (data->last_errno < 0) + goto exit; + data->config = data->last_errno;
for (i = 0; i < data->channels; i++) { - data->temp[i] = i2c_smbus_read_byte_data(client, - TMP421_TEMP_MSB[i]) << 8; - data->temp[i] |= i2c_smbus_read_byte_data(client, - TMP421_TEMP_LSB[i]); + data->last_errno = i2c_smbus_read_byte_data(client, + TMP421_TEMP_MSB[i]); + if (data->last_errno < 0) + goto exit; + data->temp[i] = data->last_errno << 8; + data->last_errno = i2c_smbus_read_byte_data(client, + TMP421_TEMP_LSB[i]); + if (data->last_errno < 0) + goto exit; + data->temp[i] |= data->last_errno; } data->last_updated = jiffies; - data->valid = 1; + data->last_errno = 0; }
+exit: mutex_unlock(&data->update_lock);
return data; @@ -152,6 +162,9 @@ static int tmp421_read(struct device *dev, enum hwmon_sensor_types type, { struct tmp421_data *tmp421 = tmp421_update_device(dev);
+ if (tmp421->last_errno) + return tmp421->last_errno; + switch (attr) { case hwmon_temp_input: if (tmp421->config & TMP421_CONFIG_RANGE)
On Wed, Sep 22, 2021 at 04:41:52PM +0300, Paul Fertser wrote:
Function i2c_smbus_read_byte_data() can return a negative error number instead of the data read if I2C transaction failed for whatever reason.
I consider this fix to be stable material as lack of error checking here leads to serious issues on production hardware. Errors treated as temperatures produce spurious critical temperature-crossed-threshold errors in BMC logs for OCP server hardware. The patch was tested with Mellanox OCP Mezzanine card emulating TMP421 protocol for temperature sensing which sometimes leads to I2C protocol error during early boot up stage.
Cc: stable@vger.kernel.org Signed-off-by: Paul Fertser fercerpav@gmail.com
drivers/hwmon/tmp421.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-)
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c index ede66ea6a730..6175ed4b10bd 100644 --- a/drivers/hwmon/tmp421.c +++ b/drivers/hwmon/tmp421.c @@ -93,7 +93,7 @@ struct tmp421_data { struct hwmon_channel_info temp_info; const struct hwmon_channel_info *info[2]; struct hwmon_chip_info chip;
- char valid;
- int last_errno; unsigned long last_updated; unsigned long channels; u8 config;
@@ -128,20 +128,30 @@ static struct tmp421_data *tmp421_update_device(struct device *dev) mutex_lock(&data->update_lock); if (time_after(jiffies, data->last_updated + (HZ / 2)) ||
!data->valid) {
data->config = i2c_smbus_read_byte_data(client,
TMP421_CONFIG_REG_1);
data->last_errno) {
data->last_errno = i2c_smbus_read_byte_data(client,
TMP421_CONFIG_REG_1);
No. The function should return an ERR_PTR after an error. Something like int ret = 0; ... ret = i2c_smbus_read_byte_data(client, TMP421_CONFIG_REG_1); if (ret < 0) goto exit; data->config = ret; ... exit: mutex_unlock(...); return ret < 0 ? ERR_PTR(ret) : data;
Or, even better, let tmp421_update_device() return an error code instead of data, and let the caller get the data pointer.
int tmp421_update_device(struct tmp421_data *data) { struct i2c_client *client = data->client; int ret = 0;
... return ret < 0 ? ret : 0; } ...
struct tmp421_data *data = dev_get_drvdata(dev);
ret = tmp421_update_device(data); if (ret) return ret;
Guenter
if (data->last_errno < 0)
goto exit;
data->config = data->last_errno;
for (i = 0; i < data->channels; i++) {
data->temp[i] = i2c_smbus_read_byte_data(client,
TMP421_TEMP_MSB[i]) << 8;
data->temp[i] |= i2c_smbus_read_byte_data(client,
TMP421_TEMP_LSB[i]);
data->last_errno = i2c_smbus_read_byte_data(client,
TMP421_TEMP_MSB[i]);
if (data->last_errno < 0)
goto exit;
data->temp[i] = data->last_errno << 8;
data->last_errno = i2c_smbus_read_byte_data(client,
TMP421_TEMP_LSB[i]);
if (data->last_errno < 0)
goto exit;
} data->last_updated = jiffies;data->temp[i] |= data->last_errno;
data->valid = 1;
}data->last_errno = 0;
+exit: mutex_unlock(&data->update_lock); return data; @@ -152,6 +162,9 @@ static int tmp421_read(struct device *dev, enum hwmon_sensor_types type, { struct tmp421_data *tmp421 = tmp421_update_device(dev);
- if (tmp421->last_errno)
return tmp421->last_errno;
- switch (attr) { case hwmon_temp_input: if (tmp421->config & TMP421_CONFIG_RANGE)
-- 2.17.1
Function i2c_smbus_read_byte_data() can return a negative error number instead of the data read if I2C transaction failed for whatever reason.
I consider this fix to be stable material as lack of error checking here leads to serious issues on production hardware. Errors treated as temperatures produce spurious critical temperature-crossed-threshold errors in BMC logs for OCP server hardware. The patch was tested with Mellanox OCP Mezzanine card emulating TMP421 protocol for temperature sensing which sometimes leads to I2C protocol error during early boot up stage.
Cc: stable@vger.kernel.org Signed-off-by: Paul Fertser fercerpav@gmail.com ---
Changes from v1: - Reorganise code following excellent suggestion by Guenter Roeck to use tagged errors consistently
drivers/hwmon/tmp421.c | 45 +++++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 12 deletions(-)
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c index ede66ea6a730..63cb6badb478 100644 --- a/drivers/hwmon/tmp421.c +++ b/drivers/hwmon/tmp421.c @@ -93,7 +93,7 @@ struct tmp421_data { struct hwmon_channel_info temp_info; const struct hwmon_channel_info *info[2]; struct hwmon_chip_info chip; - char valid; + bool valid; unsigned long last_updated; unsigned long channels; u8 config; @@ -119,38 +119,59 @@ static int temp_from_u16(u16 reg) return (temp * 1000 + 128) / 256; }
-static struct tmp421_data *tmp421_update_device(struct device *dev) +static int tmp421_update_device(struct tmp421_data *data) { - struct tmp421_data *data = dev_get_drvdata(dev); struct i2c_client *client = data->client; + int ret = 0; int i;
mutex_lock(&data->update_lock);
if (time_after(jiffies, data->last_updated + (HZ / 2)) || !data->valid) { - data->config = i2c_smbus_read_byte_data(client, - TMP421_CONFIG_REG_1); + ret = i2c_smbus_read_byte_data(client, + TMP421_CONFIG_REG_1); + if (ret < 0) + goto exit; + data->config = ret;
for (i = 0; i < data->channels; i++) { - data->temp[i] = i2c_smbus_read_byte_data(client, - TMP421_TEMP_MSB[i]) << 8; - data->temp[i] |= i2c_smbus_read_byte_data(client, - TMP421_TEMP_LSB[i]); + ret = i2c_smbus_read_byte_data(client, + TMP421_TEMP_MSB[i]); + if (ret < 0) + goto exit; + data->temp[i] = ret << 8; + + ret = i2c_smbus_read_byte_data(client, + TMP421_TEMP_LSB[i]); + if (ret < 0) + goto exit; + data->temp[i] |= ret; } data->last_updated = jiffies; - data->valid = 1; + data->valid = true; }
+exit: mutex_unlock(&data->update_lock);
- return data; + if (ret < 0) { + data->valid = false; + return ret; + } + + return 0; }
static int tmp421_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { - struct tmp421_data *tmp421 = tmp421_update_device(dev); + struct tmp421_data *tmp421 = dev_get_drvdata(dev); + int ret = 0; + + ret = tmp421_update_device(tmp421); + if (ret) + return ret;
switch (attr) { case hwmon_temp_input:
On Thu, Sep 23, 2021 at 12:47:59PM +0300, Paul Fertser wrote:
Function i2c_smbus_read_byte_data() can return a negative error number instead of the data read if I2C transaction failed for whatever reason.
I consider this fix to be stable material as lack of error checking here leads to serious issues on production hardware. Errors treated as temperatures produce spurious critical temperature-crossed-threshold errors in BMC logs for OCP server hardware. The patch was tested with Mellanox OCP Mezzanine card emulating TMP421 protocol for temperature sensing which sometimes leads to I2C protocol error during early boot up stage.
Possibly, but this doesn't belong into the commit log but after --- as written. I'd suggest to leave the "I consider this fix to be stable material" off, rephrase to "Lack of error checking can lead to serious..." which does belong in the commit description, and add a Fixes: tag which marks the patch for stable releases. Also, drop the char -> bool change from the bug fix; it is unrelated and should be a separate cleanup patch.
Cc: stable@vger.kernel.org Signed-off-by: Paul Fertser fercerpav@gmail.com
Changes from v1:
- Reorganise code following excellent suggestion by Guenter Roeck to use tagged errors consistently
drivers/hwmon/tmp421.c | 45 +++++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 12 deletions(-)
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c index ede66ea6a730..63cb6badb478 100644 --- a/drivers/hwmon/tmp421.c +++ b/drivers/hwmon/tmp421.c @@ -93,7 +93,7 @@ struct tmp421_data { struct hwmon_channel_info temp_info; const struct hwmon_channel_info *info[2]; struct hwmon_chip_info chip;
- char valid;
- bool valid;
This is a cleanup, not a bug fix, and it isn't even mentioned in the patch description.
unsigned long last_updated; unsigned long channels; u8 config; @@ -119,38 +119,59 @@ static int temp_from_u16(u16 reg) return (temp * 1000 + 128) / 256; } -static struct tmp421_data *tmp421_update_device(struct device *dev) +static int tmp421_update_device(struct tmp421_data *data) {
- struct tmp421_data *data = dev_get_drvdata(dev); struct i2c_client *client = data->client;
- int ret = 0; int i;
mutex_lock(&data->update_lock); if (time_after(jiffies, data->last_updated + (HZ / 2)) || !data->valid) {
data->config = i2c_smbus_read_byte_data(client,
TMP421_CONFIG_REG_1);
ret = i2c_smbus_read_byte_data(client,
TMP421_CONFIG_REG_1);
if (ret < 0)
goto exit;
data->config = ret;
for (i = 0; i < data->channels; i++) {
data->temp[i] = i2c_smbus_read_byte_data(client,
TMP421_TEMP_MSB[i]) << 8;
data->temp[i] |= i2c_smbus_read_byte_data(client,
TMP421_TEMP_LSB[i]);
ret = i2c_smbus_read_byte_data(client,
TMP421_TEMP_MSB[i]);
if (ret < 0)
goto exit;
data->temp[i] = ret << 8;
ret = i2c_smbus_read_byte_data(client,
TMP421_TEMP_LSB[i]);
if (ret < 0)
goto exit;
} data->last_updated = jiffies;data->temp[i] |= ret;
data->valid = 1;
}data->valid = true;
+exit: mutex_unlock(&data->update_lock);
- return data;
- if (ret < 0) {
data->valid = false;
return ret;
- }
- return 0;
} static int tmp421_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) {
- struct tmp421_data *tmp421 = tmp421_update_device(dev);
- struct tmp421_data *tmp421 = dev_get_drvdata(dev);
- int ret = 0;
- ret = tmp421_update_device(tmp421);
- if (ret)
return ret;
switch (attr) { case hwmon_temp_input:
Function i2c_smbus_read_byte_data() can return a negative error number instead of the data read if I2C transaction failed for whatever reason.
Lack of error checking can lead to serious issues on production hardware, e.g. errors treated as temperatures produce spurious critical temperature-crossed-threshold errors in BMC logs for OCP server hardware. The patch was tested with Mellanox OCP Mezzanine card emulating TMP421 protocol for temperature sensing which sometimes leads to I2C protocol error during early boot up stage.
Fixes: 9410700b881f ("hwmon: Add driver for Texas Instruments TMP421/422/423 sensor chips") Cc: stable@vger.kernel.org Signed-off-by: Paul Fertser fercerpav@gmail.com ---
Changes from v2: - Do not change data->valid type as that's an unrelated cleanup - Add Fixes: tag - Remove clutter from the commit message
Changes from v1: - Reorganise code following excellent suggestion by Guenter Roeck to use tagged errors consistently
drivers/hwmon/tmp421.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-)
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c index ede66ea6a730..e6b2b31d17c8 100644 --- a/drivers/hwmon/tmp421.c +++ b/drivers/hwmon/tmp421.c @@ -119,38 +119,59 @@ static int temp_from_u16(u16 reg) return (temp * 1000 + 128) / 256; }
-static struct tmp421_data *tmp421_update_device(struct device *dev) +static int tmp421_update_device(struct tmp421_data *data) { - struct tmp421_data *data = dev_get_drvdata(dev); struct i2c_client *client = data->client; + int ret = 0; int i;
mutex_lock(&data->update_lock);
if (time_after(jiffies, data->last_updated + (HZ / 2)) || !data->valid) { - data->config = i2c_smbus_read_byte_data(client, - TMP421_CONFIG_REG_1); + ret = i2c_smbus_read_byte_data(client, + TMP421_CONFIG_REG_1); + if (ret < 0) + goto exit; + data->config = ret;
for (i = 0; i < data->channels; i++) { - data->temp[i] = i2c_smbus_read_byte_data(client, - TMP421_TEMP_MSB[i]) << 8; - data->temp[i] |= i2c_smbus_read_byte_data(client, - TMP421_TEMP_LSB[i]); + ret = i2c_smbus_read_byte_data(client, + TMP421_TEMP_MSB[i]); + if (ret < 0) + goto exit; + data->temp[i] = ret << 8; + + ret = i2c_smbus_read_byte_data(client, + TMP421_TEMP_LSB[i]); + if (ret < 0) + goto exit; + data->temp[i] |= ret; } data->last_updated = jiffies; data->valid = 1; }
+exit: mutex_unlock(&data->update_lock);
- return data; + if (ret < 0) { + data->valid = 0; + return ret; + } + + return 0; }
static int tmp421_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { - struct tmp421_data *tmp421 = tmp421_update_device(dev); + struct tmp421_data *tmp421 = dev_get_drvdata(dev); + int ret = 0; + + ret = tmp421_update_device(tmp421); + if (ret) + return ret;
switch (attr) { case hwmon_temp_input:
On Fri, Sep 24, 2021 at 12:30:09PM +0300, Paul Fertser wrote:
Function i2c_smbus_read_byte_data() can return a negative error number instead of the data read if I2C transaction failed for whatever reason.
Lack of error checking can lead to serious issues on production hardware, e.g. errors treated as temperatures produce spurious critical temperature-crossed-threshold errors in BMC logs for OCP server hardware. The patch was tested with Mellanox OCP Mezzanine card emulating TMP421 protocol for temperature sensing which sometimes leads to I2C protocol error during early boot up stage.
Fixes: 9410700b881f ("hwmon: Add driver for Texas Instruments TMP421/422/423 sensor chips") Cc: stable@vger.kernel.org Signed-off-by: Paul Fertser fercerpav@gmail.com
Applied.
Thanks, Guenter
Changes from v2:
- Do not change data->valid type as that's an unrelated cleanup
- Add Fixes: tag
- Remove clutter from the commit message
Changes from v1:
- Reorganise code following excellent suggestion by Guenter Roeck to use tagged errors consistently
drivers/hwmon/tmp421.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-)
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c index ede66ea6a730..e6b2b31d17c8 100644 --- a/drivers/hwmon/tmp421.c +++ b/drivers/hwmon/tmp421.c @@ -119,38 +119,59 @@ static int temp_from_u16(u16 reg) return (temp * 1000 + 128) / 256; } -static struct tmp421_data *tmp421_update_device(struct device *dev) +static int tmp421_update_device(struct tmp421_data *data) {
- struct tmp421_data *data = dev_get_drvdata(dev); struct i2c_client *client = data->client;
- int ret = 0; int i;
mutex_lock(&data->update_lock); if (time_after(jiffies, data->last_updated + (HZ / 2)) || !data->valid) {
data->config = i2c_smbus_read_byte_data(client,
TMP421_CONFIG_REG_1);
ret = i2c_smbus_read_byte_data(client,
TMP421_CONFIG_REG_1);
if (ret < 0)
goto exit;
data->config = ret;
for (i = 0; i < data->channels; i++) {
data->temp[i] = i2c_smbus_read_byte_data(client,
TMP421_TEMP_MSB[i]) << 8;
data->temp[i] |= i2c_smbus_read_byte_data(client,
TMP421_TEMP_LSB[i]);
ret = i2c_smbus_read_byte_data(client,
TMP421_TEMP_MSB[i]);
if (ret < 0)
goto exit;
data->temp[i] = ret << 8;
ret = i2c_smbus_read_byte_data(client,
TMP421_TEMP_LSB[i]);
if (ret < 0)
goto exit;
} data->last_updated = jiffies; data->valid = 1; }data->temp[i] |= ret;
+exit: mutex_unlock(&data->update_lock);
- return data;
- if (ret < 0) {
data->valid = 0;
return ret;
- }
- return 0;
} static int tmp421_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) {
- struct tmp421_data *tmp421 = tmp421_update_device(dev);
- struct tmp421_data *tmp421 = dev_get_drvdata(dev);
- int ret = 0;
- ret = tmp421_update_device(tmp421);
- if (ret)
return ret;
switch (attr) { case hwmon_temp_input:
linux-stable-mirror@lists.linaro.org