Skip to content

Commit

Permalink
net/mlx5: Add command entry handling completion
Browse files Browse the repository at this point in the history
When FW response to commands is very slow and all command entries in
use are waiting for completion we can have a race where commands can get
timeout before they get out of the queue and handled. Timeout
completion on uninitialized command will cause releasing command's
buffers before accessing it for initialization and then we will get NULL
pointer exception while trying access it. It may also cause releasing
buffers of another command since we may have timeout completion before
even allocating entry index for this command.
Add entry handling completion to avoid this race.

Fixes: e126ba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
  • Loading branch information
Moshe Shemesh authored and Saeed Mahameed committed May 23, 2020
1 parent 5a73015 commit 17d00e8
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 0 deletions.
14 changes: 14 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,7 @@ static void cmd_work_handler(struct work_struct *work)
int alloc_ret;
int cmd_mode;

complete(&ent->handling);
sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
down(sem);
if (!ent->page_queue) {
Expand Down Expand Up @@ -978,19 +979,29 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
struct mlx5_cmd *cmd = &dev->cmd;
int err;

if (!wait_for_completion_timeout(&ent->handling, timeout) &&
cancel_work_sync(&ent->work)) {
ent->ret = -ECANCELED;
goto out_err;
}
if (cmd->mode == CMD_MODE_POLLING || ent->polling) {
wait_for_completion(&ent->done);
} else if (!wait_for_completion_timeout(&ent->done, timeout)) {
ent->ret = -ETIMEDOUT;
mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
}

out_err:
err = ent->ret;

if (err == -ETIMEDOUT) {
mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
mlx5_command_str(msg_to_opcode(ent->in)),
msg_to_opcode(ent->in));
} else if (err == -ECANCELED) {
mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n",
mlx5_command_str(msg_to_opcode(ent->in)),
msg_to_opcode(ent->in));
}
mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n",
err, deliv_status_to_str(ent->status), ent->status);
Expand Down Expand Up @@ -1026,6 +1037,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
ent->token = token;
ent->polling = force_polling;

init_completion(&ent->handling);
if (!callback)
init_completion(&ent->done);

Expand All @@ -1045,6 +1057,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
err = wait_func(dev, ent);
if (err == -ETIMEDOUT)
goto out;
if (err == -ECANCELED)
goto out_free;

ds = ent->ts2 - ent->ts1;
op = MLX5_GET(mbox_in, in->first.data, opcode);
Expand Down
1 change: 1 addition & 0 deletions include/linux/mlx5/driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,7 @@ struct mlx5_cmd_work_ent {
struct delayed_work cb_timeout_work;
void *context;
int idx;
struct completion handling;
struct completion done;
struct mlx5_cmd *cmd;
struct work_struct work;
Expand Down

0 comments on commit 17d00e8

Please sign in to comment.