Skip to content

Commit

Permalink
net/mlx5: Fix a race when moving command interface to events mode
Browse files Browse the repository at this point in the history
After driver creates (via FW command) an EQ for commands, the driver will
be informed on new commands completion by EQE. However, due to a race in
driver's internal command mode metadata update, some new commands will
still be miss-handled by driver as if we are in polling mode. Such commands
can get two non forced completion, leading to already freed command entry
access.

CREATE_EQ command, that maps EQ to the command queue must be posted to the
command queue while it is empty and no other command should be posted.

Add SW mechanism that once the CREATE_EQ command is about to be executed,
all other commands will return error without being sent to the FW. Allow
sending other commands only after successfully changing the driver's
internal command mode metadata.
We can safely return error to all other commands while creating the command
EQ, as all other commands might be sent from the user/application during
driver load. Application can rerun them later after driver's load was
finished.

Fixes: e126ba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
  • Loading branch information
Eran Ben Elisha authored and Saeed Mahameed committed May 23, 2020
1 parent 17d00e8 commit d43b700
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 4 deletions.
35 changes: 31 additions & 4 deletions drivers/net/ethernet/mellanox/mlx5/core/cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,14 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg);
static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
struct mlx5_cmd_msg *msg);

static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode)
{
if (cmd->allowed_opcode == CMD_ALLOWED_OPCODE_ALL)
return true;

return cmd->allowed_opcode == opcode;
}

static void cmd_work_handler(struct work_struct *work)
{
struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
Expand Down Expand Up @@ -914,7 +922,8 @@ static void cmd_work_handler(struct work_struct *work)

/* Skip sending command to fw if internal error */
if (pci_channel_offline(dev->pdev) ||
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
!opcode_allowed(&dev->cmd, ent->op)) {
u8 status = 0;
u32 drv_synd;

Expand Down Expand Up @@ -1405,6 +1414,22 @@ static void create_debugfs_files(struct mlx5_core_dev *dev)
mlx5_cmdif_debugfs_init(dev);
}

void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode)
{
struct mlx5_cmd *cmd = &dev->cmd;
int i;

for (i = 0; i < cmd->max_reg_cmds; i++)
down(&cmd->sem);
down(&cmd->pages_sem);

cmd->allowed_opcode = opcode;

up(&cmd->pages_sem);
for (i = 0; i < cmd->max_reg_cmds; i++)
up(&cmd->sem);
}

static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
{
struct mlx5_cmd *cmd = &dev->cmd;
Expand Down Expand Up @@ -1681,12 +1706,13 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
int err;
u8 status = 0;
u32 drv_synd;
u16 opcode;
u8 token;

opcode = MLX5_GET(mbox_in, in, opcode);
if (pci_channel_offline(dev->pdev) ||
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
u16 opcode = MLX5_GET(mbox_in, in, opcode);

dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
!opcode_allowed(&dev->cmd, opcode)) {
err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
MLX5_SET(mbox_out, out, status, status);
MLX5_SET(mbox_out, out, syndrome, drv_synd);
Expand Down Expand Up @@ -1988,6 +2014,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma));

cmd->mode = CMD_MODE_POLLING;
cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL;

create_msg_cache(dev);

Expand Down
3 changes: 3 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/eq.c
Original file line number Diff line number Diff line change
Expand Up @@ -611,11 +611,13 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
.nent = MLX5_NUM_CMD_EQE,
.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
};
mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_CREATE_EQ);
err = setup_async_eq(dev, &table->cmd_eq, &param, "cmd");
if (err)
goto err1;

mlx5_cmd_use_events(dev);
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);

param = (struct mlx5_eq_param) {
.irq_index = 0,
Expand Down Expand Up @@ -645,6 +647,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
mlx5_cmd_use_polling(dev);
cleanup_async_eq(dev, &table->cmd_eq, "cmd");
err1:
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
return err;
}
Expand Down
6 changes: 6 additions & 0 deletions include/linux/mlx5/driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ struct mlx5_cmd {
struct semaphore sem;
struct semaphore pages_sem;
int mode;
u16 allowed_opcode;
struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS];
struct dma_pool *pool;
struct mlx5_cmd_debug dbg;
Expand Down Expand Up @@ -875,10 +876,15 @@ mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix)
return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1);
}

enum {
CMD_ALLOWED_OPCODE_ALL,
};

int mlx5_cmd_init(struct mlx5_core_dev *dev);
void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
void mlx5_cmd_allowed_opcode(struct mlx5_core_dev *dev, u16 opcode);

struct mlx5_async_ctx {
struct mlx5_core_dev *dev;
Expand Down

0 comments on commit d43b700

Please sign in to comment.