From: Eddie James eajames@linux.ibm.com
[ Upstream commit dbed963ed62c4c2b8870a02c8b7dcb0c2af3ee0b ]
Due to the OCC communication design with a shared SRAM area, checkum errors are expected due to corrupted buffer from OCC communications with other system components. Therefore, retry the command twice in the event of a checksum failure.
Signed-off-by: Eddie James eajames@linux.ibm.com Acked-by: Guenter Roeck linux@roeck-us.net Link: https://lore.kernel.org/r/20220426154956.27205-3-eajames@linux.ibm.com Signed-off-by: Joel Stanley joel@jms.id.au Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/hwmon/occ/p9_sbe.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/drivers/hwmon/occ/p9_sbe.c b/drivers/hwmon/occ/p9_sbe.c index c1e0a1d96cd4..f3791a589b01 100644 --- a/drivers/hwmon/occ/p9_sbe.c +++ b/drivers/hwmon/occ/p9_sbe.c @@ -14,6 +14,8 @@
#include "common.h"
+#define OCC_CHECKSUM_RETRIES 3 + struct p9_sbe_occ { struct occ occ; bool sbe_error; @@ -80,18 +82,23 @@ static bool p9_sbe_occ_save_ffdc(struct p9_sbe_occ *ctx, const void *resp, static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len, void *resp, size_t resp_len) { + size_t original_resp_len = resp_len; struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ); - int rc; + int rc, i;
- rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len); - if (rc < 0) { + for (i = 0; i < OCC_CHECKSUM_RETRIES; ++i) { + rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len); + if (rc >= 0) + break; if (resp_len) { if (p9_sbe_occ_save_ffdc(ctx, resp, resp_len)) sysfs_notify(&occ->bus_dev->kobj, NULL, bin_attr_ffdc.attr.name); + return rc; } - - return rc; + if (rc != -EBADE) + return rc; + resp_len = original_resp_len; }
switch (((struct occ_response *)resp)->return_status) {