Skip to content

Commit

Permalink
[patch] mlxsw: i2c: Prevent transaction execution for special chip st…
Browse files Browse the repository at this point in the history
…ates

This patch fixes an issue encountered during SONiC warm-reboot: during
ISSU start the kernel reports that CPU stall is detected:

```
INFO: rcu_sched detected stalls on CPUs/tasks
```

This patch won't be upstreamed due to the upstream driver not supporting
ISSU.

Signed-off-by: Stepan Blyschak <stepanb@nvidia.com>
  • Loading branch information
stepanblyschak committed Jun 17, 2022
1 parent c5775b7 commit 18c351e
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 0 deletions.
126 changes: 126 additions & 0 deletions patch/0080-TMP-mlxsw-i2c-Prevent-transaction-execution-for-spec.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
From 392127480c91a26dae2449288229d4957213dcc1 Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@nvidia.com>
Date: Tue, 31 May 2022 10:27:38 +0300
Subject: [PATCH v4.9 ISSU WA 1/1] TMP: mlxsw: i2c: Prevent transaction
execution for special chip states

Do not run transaction in cases chip is in reset or in-service update
states.
In such case firmware is not accessible and will reject transaction
with the relevant status "RUNNING_RESET" or "FW_ISSU_ONGOING".
In case transaction is failed do to one of these reasons, stop sending
transactions. In such case driver is about to be removed since it
cannot continue running after reset or in-service update. And
re-probed again after reset or in-service update is completed.

Signed-off-by: Vadim Pasternak <vadimp@nvidia.com>
---
drivers/net/ethernet/mellanox/mlxsw/cmd.h | 4 ++++
drivers/net/ethernet/mellanox/mlxsw/i2c.c | 29 ++++++++++++++++++++---
2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
index 0772e4339b33..6329ebb09523 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
@@ -149,6 +149,8 @@ enum mlxsw_cmd_status {
MLXSW_CMD_STATUS_BAD_NVMEM = 0x0B,
/* Device is currently running reset */
MLXSW_CMD_STATUS_RUNNING_RESET = 0x26,
+ /* FW ISSU ongoing. */
+ MLXSW_CMD_STATUS_FW_ISSU = 0x27,
/* Bad management packet (silently discarded). */
MLXSW_CMD_STATUS_BAD_PKT = 0x30,
};
@@ -180,6 +182,8 @@ static inline const char *mlxsw_cmd_status_str(u8 status)
return "BAD_NVMEM";
case MLXSW_CMD_STATUS_RUNNING_RESET:
return "RUNNING_RESET";
+ case MLXSW_CMD_STATUS_FW_ISSU:
+ return "FW_ISSU_ONGOING";
case MLXSW_CMD_STATUS_BAD_PKT:
return "BAD_PKT";
default:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
index e04d521d9376..1205c6111b0b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
@@ -62,6 +62,7 @@
* @core: switch core pointer;
* @bus_info: bus info block;
* @block_size: maximum block size allowed to pass to under layer;
+ * @status: status to indicate chip reset or in-service update;
*/
struct mlxsw_i2c {
struct {
@@ -75,6 +76,7 @@ struct mlxsw_i2c {
struct mlxsw_core *core;
struct mlxsw_bus_info bus_info;
u16 block_size;
+ u8 status;
};

#define MLXSW_I2C_READ_MSG(_client, _addr_buf, _buf, _len) { \
@@ -221,6 +223,19 @@ static int mlxsw_i2c_write_cmd(struct i2c_client *client,
return 0;
}

+static bool
+mlxsw_i2c_cmd_status_verify(struct device *dev, struct mlxsw_i2c *mlxsw_i2c,
+ u8 status)
+{
+ if (status == MLXSW_CMD_STATUS_FW_ISSU ||
+ status == MLXSW_CMD_STATUS_RUNNING_RESET) {
+ mlxsw_i2c->status = status;
+ dev_info(dev, "FW status=%x(%s))\n", status, mlxsw_cmd_status_str(status));
+ return true;
+ }
+ return false;
+}
+
/* Routine posts initialization command to ASIC through mail box. */
static int
mlxsw_i2c_write_init_cmd(struct i2c_client *client,
@@ -404,6 +419,10 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,

WARN_ON(in_mbox_size % sizeof(u32) || out_mbox_size % sizeof(u32));

+ /* Do not run transaction if chip is in reset or in-service update state. */
+ if (mlxsw_i2c->status)
+ return 0;
+
if (in_mbox) {
reg_size = mlxsw_i2c_get_reg_size(in_mbox);
num = reg_size / mlxsw_i2c->block_size;
@@ -478,6 +497,8 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,

cmd_fail:
mutex_unlock(&mlxsw_i2c->cmd.lock);
+ if (mlxsw_i2c_cmd_status_verify(&client->dev, mlxsw_i2c, *status))
+ err = 0;
return err;
}

@@ -607,14 +628,16 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
/* Wait until go bit is cleared. */
err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, &status);
if (err) {
- dev_err(&client->dev, "HW semaphore is not released");
+ if (!mlxsw_i2c_cmd_status_verify(&client->dev, mlxsw_i2c, status))
+ dev_err(&client->dev, "HW semaphore is not released");
goto errout;
}

/* Validate transaction completion status. */
if (status) {
- dev_err(&client->dev, "Bad transaction completion status %x\n",
- status);
+ if (!mlxsw_i2c_cmd_status_verify(&client->dev, mlxsw_i2c, status))
+ dev_err(&client->dev, "Bad transaction completion status %x\n",
+ status);
err = -EIO;
goto errout;
}
--
2.20.1

1 change: 1 addition & 0 deletions patch/series
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ linux-4.13-thermal-intel_pch_thermal-Fix-enable-check-on.patch
0077-mlxsw-core-Increase-critical-threshold-for-ASIC-ther.patch
0078-mlxsw-core-Add-validation-of-transceiver-temperature.patch
0079-mlxsw-core-Remove-critical-trip-point-from-thermal-z.patch
0080-TMP-mlxsw-i2c-Prevent-transaction-execution-for-spec.patch
linux-4.16-firmware-dmi-handle-missing-DMI-data-gracefully.patch
mellanox-backport-introduce-psample-a-new-genetlink-channel.patch
mellanox-backport-introduce-tc-sample-action.patch
Expand Down

0 comments on commit 18c351e

Please sign in to comment.