Skip to content

Commit

Permalink
Merge tag 'mlx5-updates-2021-10-18' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/saeed/linux

Saeed Mahameed says:

mlx5-updates-2021-10-18

Maor Maor Gottlieb says:
========================
Use hash to select the affinity port in VF LAG

Current VF LAG architecture is based on QP association with a port.
QP must be created after LAG is enabled to allow association with non-native port.
VM Packets going on slow-path to eSwicth manager (SW path or hairpin) will be transmitted
through a different QP than the VM. This means that Different packets of the same flow might
egress from different physical ports.

This patch-set solves this issue by moving the port selection to be based on the hash function
defined by the bond.

When the device is moved to VF LAG mode, the driver creates TTC (traffic type classifier) flow
tables in order to classify the packet and steer it to the relevant hash function. Similar to what
is done in the mlx5 RSS implementation.

Each rule in the TTC table, forwards the packet to port selection flow table which has one hash
split flow group which contains two "catch all" flow table entries. Each entry point to the
relative uplink port. As shown below:

		-------------------
		| FT              |
TTC rule ->	|     ----------- |
		|   FG|   FTE --|-|-----> uplink of port #1
		|     |   FTE --|-|-----> uplink of port #2
		|     ----------- |
		-------------------

Hash split flow group is flow group that created as type of HASH_SPLIT and associated with match definer.
The match definer define the fields which included in the hash calculation.

The driver creates the match definer according to the xmit hash policy of the bond driver.

Patches overview:
========================

Minor E-Switch updates:
- Patch #12, dynamic  allocation of dest array
- Patch #13, increase number of forward destinations to 32

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
davem330 committed Oct 19, 2021
2 parents 4a6c396 + d40bfed commit aaa5570
Show file tree
Hide file tree
Showing 22 changed files with 1,239 additions and 64 deletions.
4 changes: 2 additions & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o alloc.o port.o mr.o pd.o \
transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
fs_counters.o fs_ft_pool.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
fs_counters.o fs_ft_pool.o rl.o lag/lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
fw_reset.o qos.o lib/tout.o
Expand All @@ -37,7 +37,7 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag_mp.o lib/geneve.o lib/port_tun.o \
mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag/mp.o lag/port_sel.o lib/geneve.o lib/port_tun.o \
en_rep.o en/rep/bond.o en/mod_hdr.o \
en/mapping.o
mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \
Expand Down
3 changes: 3 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,9 @@ const char *parse_fs_dst(struct trace_seq *p,
const char *ret = trace_seq_buffer_ptr(p);

switch (dst->type) {
case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
trace_seq_printf(p, "uplink\n");
break;
case MLX5_FLOW_DESTINATION_TYPE_VPORT:
trace_seq_printf(p, "vport=%u\n", dst->vport.num);
break;
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ enum mlx5_flow_match_level {
};

/* current maximum for flow based vport multicasting */
#define MLX5_MAX_FLOW_FWD_VPORTS 2
#define MLX5_MAX_FLOW_FWD_VPORTS 32

enum {
MLX5_ESW_DEST_ENCAP = BIT(0),
Expand Down
16 changes: 14 additions & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
Original file line number Diff line number Diff line change
Expand Up @@ -482,19 +482,23 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *attr)
{
struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
struct mlx5_fs_chains *chains = esw_chains(esw);
bool split = !!(esw_attr->split_count);
struct mlx5_vport_tbl_attr fwd_attr;
struct mlx5_flow_destination *dest;
struct mlx5_flow_handle *rule;
struct mlx5_flow_table *fdb;
int i = 0;

if (esw->mode != MLX5_ESWITCH_OFFLOADS)
return ERR_PTR(-EOPNOTSUPP);

dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL);
if (!dest)
return ERR_PTR(-ENOMEM);

flow_act.action = attr->action;
/* if per flow vlan pop/push is emulated, don't set that into the firmware */
if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
Expand Down Expand Up @@ -574,6 +578,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
else
atomic64_inc(&esw->offloads.num_flows);

kfree(dest);
return rule;

err_add_rule:
Expand All @@ -584,6 +589,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
err_esw_get:
esw_cleanup_dests(esw, attr);
err_create_goto_table:
kfree(dest);
return rule;
}

Expand All @@ -592,16 +598,20 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *attr)
{
struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
struct mlx5_fs_chains *chains = esw_chains(esw);
struct mlx5_vport_tbl_attr fwd_attr;
struct mlx5_flow_destination *dest;
struct mlx5_flow_table *fast_fdb;
struct mlx5_flow_table *fwd_fdb;
struct mlx5_flow_handle *rule;
int i, err = 0;

dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL);
if (!dest)
return ERR_PTR(-ENOMEM);

fast_fdb = mlx5_chains_get_table(chains, attr->chain, attr->prio, 0);
if (IS_ERR(fast_fdb)) {
rule = ERR_CAST(fast_fdb);
Expand Down Expand Up @@ -654,13 +664,15 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,

atomic64_inc(&esw->offloads.num_flows);

kfree(dest);
return rule;
err_chain_src_rewrite:
esw_put_dest_tables_loop(esw, attr, 0, i);
mlx5_esw_vporttbl_put(esw, &fwd_attr);
err_get_fwd:
mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
err_get_fast:
kfree(dest);
return rule;
}

Expand Down
66 changes: 65 additions & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,20 @@ static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master,
return mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
}

static int
mlx5_cmd_stub_destroy_match_definer(struct mlx5_flow_root_namespace *ns,
int definer_id)
{
return 0;
}

static int
mlx5_cmd_stub_create_match_definer(struct mlx5_flow_root_namespace *ns,
u16 format_id, u32 *match_mask)
{
return 0;
}

static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft, u32 underlay_qpn,
bool disconnect)
Expand Down Expand Up @@ -563,15 +577,21 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
id = dst->dest_attr.ft->id;
break;
case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
case MLX5_FLOW_DESTINATION_TYPE_VPORT:
id = dst->dest_attr.vport.num;
MLX5_SET(dest_format_struct, in_dests,
destination_eswitch_owner_vhca_id_valid,
!!(dst->dest_attr.vport.flags &
MLX5_FLOW_DEST_VPORT_VHCA_ID));
MLX5_SET(dest_format_struct, in_dests,
destination_eswitch_owner_vhca_id,
dst->dest_attr.vport.vhca_id);
if (type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) {
/* destination_id is reserved */
id = 0;
break;
}
id = dst->dest_attr.vport.num;
if (extended_dest &&
dst->dest_attr.vport.pkt_reformat) {
MLX5_SET(dest_format_struct, in_dests,
Expand Down Expand Up @@ -909,6 +929,45 @@ static void mlx5_cmd_modify_header_dealloc(struct mlx5_flow_root_namespace *ns,
mlx5_cmd_exec_in(dev, dealloc_modify_header_context, in);
}

static int mlx5_cmd_destroy_match_definer(struct mlx5_flow_root_namespace *ns,
int definer_id)
{
u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];

MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
MLX5_OBJ_TYPE_MATCH_DEFINER);
MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, definer_id);

return mlx5_cmd_exec(ns->dev, in, sizeof(in), out, sizeof(out));
}

static int mlx5_cmd_create_match_definer(struct mlx5_flow_root_namespace *ns,
u16 format_id, u32 *match_mask)
{
u32 out[MLX5_ST_SZ_DW(create_match_definer_out)] = {};
u32 in[MLX5_ST_SZ_DW(create_match_definer_in)] = {};
struct mlx5_core_dev *dev = ns->dev;
void *ptr;
int err;

MLX5_SET(create_match_definer_in, in, general_obj_in_cmd_hdr.opcode,
MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
MLX5_SET(create_match_definer_in, in, general_obj_in_cmd_hdr.obj_type,
MLX5_OBJ_TYPE_MATCH_DEFINER);

ptr = MLX5_ADDR_OF(create_match_definer_in, in, obj_context);
MLX5_SET(match_definer, ptr, format_id, format_id);

ptr = MLX5_ADDR_OF(match_definer, ptr, match_mask);
memcpy(ptr, match_mask, MLX5_FLD_SZ_BYTES(match_definer, match_mask));

err = mlx5_cmd_exec_inout(dev, create_match_definer, in, out);
return err ? err : MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
}

static const struct mlx5_flow_cmds mlx5_flow_cmds = {
.create_flow_table = mlx5_cmd_create_flow_table,
.destroy_flow_table = mlx5_cmd_destroy_flow_table,
Expand All @@ -923,6 +982,8 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds = {
.packet_reformat_dealloc = mlx5_cmd_packet_reformat_dealloc,
.modify_header_alloc = mlx5_cmd_modify_header_alloc,
.modify_header_dealloc = mlx5_cmd_modify_header_dealloc,
.create_match_definer = mlx5_cmd_create_match_definer,
.destroy_match_definer = mlx5_cmd_destroy_match_definer,
.set_peer = mlx5_cmd_stub_set_peer,
.create_ns = mlx5_cmd_stub_create_ns,
.destroy_ns = mlx5_cmd_stub_destroy_ns,
Expand All @@ -942,6 +1003,8 @@ static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
.packet_reformat_dealloc = mlx5_cmd_stub_packet_reformat_dealloc,
.modify_header_alloc = mlx5_cmd_stub_modify_header_alloc,
.modify_header_dealloc = mlx5_cmd_stub_modify_header_dealloc,
.create_match_definer = mlx5_cmd_stub_create_match_definer,
.destroy_match_definer = mlx5_cmd_stub_destroy_match_definer,
.set_peer = mlx5_cmd_stub_set_peer,
.create_ns = mlx5_cmd_stub_create_ns,
.destroy_ns = mlx5_cmd_stub_destroy_ns,
Expand Down Expand Up @@ -969,6 +1032,7 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ
case FS_FT_NIC_TX:
case FS_FT_RDMA_RX:
case FS_FT_RDMA_TX:
case FS_FT_PORT_SEL:
return mlx5_fs_cmd_get_fw_cmds();
default:
return mlx5_fs_cmd_get_stub_cmds();
Expand Down
4 changes: 4 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ struct mlx5_flow_cmds {

int (*create_ns)(struct mlx5_flow_root_namespace *ns);
int (*destroy_ns)(struct mlx5_flow_root_namespace *ns);
int (*create_match_definer)(struct mlx5_flow_root_namespace *ns,
u16 format_id, u32 *match_mask);
int (*destroy_match_definer)(struct mlx5_flow_root_namespace *ns,
int definer_id);
};

int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
Expand Down
72 changes: 72 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2191,6 +2191,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
if (steering->fdb_root_ns)
return &steering->fdb_root_ns->ns;
return NULL;
case MLX5_FLOW_NAMESPACE_PORT_SEL:
if (steering->port_sel_root_ns)
return &steering->port_sel_root_ns->ns;
return NULL;
case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
if (steering->sniffer_rx_root_ns)
return &steering->sniffer_rx_root_ns->ns;
Expand Down Expand Up @@ -2596,6 +2600,7 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
steering->fdb_root_ns = NULL;
kfree(steering->fdb_sub_ns);
steering->fdb_sub_ns = NULL;
cleanup_root_ns(steering->port_sel_root_ns);
cleanup_root_ns(steering->sniffer_rx_root_ns);
cleanup_root_ns(steering->sniffer_tx_root_ns);
cleanup_root_ns(steering->rdma_rx_root_ns);
Expand Down Expand Up @@ -2634,6 +2639,21 @@ static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
return PTR_ERR_OR_ZERO(prio);
}

#define PORT_SEL_NUM_LEVELS 3
static int init_port_sel_root_ns(struct mlx5_flow_steering *steering)
{
struct fs_prio *prio;

steering->port_sel_root_ns = create_root_ns(steering, FS_FT_PORT_SEL);
if (!steering->port_sel_root_ns)
return -ENOMEM;

/* Create single prio */
prio = fs_create_prio(&steering->port_sel_root_ns->ns, 0,
PORT_SEL_NUM_LEVELS);
return PTR_ERR_OR_ZERO(prio);
}

static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering)
{
int err;
Expand Down Expand Up @@ -3020,6 +3040,12 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
goto err;
}

if (MLX5_CAP_FLOWTABLE_PORT_SELECTION(dev, ft_support)) {
err = init_port_sel_root_ns(steering);
if (err)
goto err;
}

if (MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) &&
MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)) {
err = init_rdma_rx_root_ns(steering);
Expand Down Expand Up @@ -3224,6 +3250,52 @@ void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
}
EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);

int mlx5_get_match_definer_id(struct mlx5_flow_definer *definer)
{
return definer->id;
}

struct mlx5_flow_definer *
mlx5_create_match_definer(struct mlx5_core_dev *dev,
enum mlx5_flow_namespace_type ns_type, u16 format_id,
u32 *match_mask)
{
struct mlx5_flow_root_namespace *root;
struct mlx5_flow_definer *definer;
int id;

root = get_root_namespace(dev, ns_type);
if (!root)
return ERR_PTR(-EOPNOTSUPP);

definer = kzalloc(sizeof(*definer), GFP_KERNEL);
if (!definer)
return ERR_PTR(-ENOMEM);

definer->ns_type = ns_type;
id = root->cmds->create_match_definer(root, format_id, match_mask);
if (id < 0) {
mlx5_core_warn(root->dev, "Failed to create match definer (%d)\n", id);
kfree(definer);
return ERR_PTR(id);
}
definer->id = id;
return definer;
}

void mlx5_destroy_match_definer(struct mlx5_core_dev *dev,
struct mlx5_flow_definer *definer)
{
struct mlx5_flow_root_namespace *root;

root = get_root_namespace(dev, definer->ns_type);
if (WARN_ON(!root))
return;

root->cmds->destroy_match_definer(root, definer->id);
kfree(definer);
}

int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_root_namespace *peer_ns)
{
Expand Down
12 changes: 10 additions & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@
#define FDB_TC_MAX_PRIO 16
#define FDB_TC_LEVELS_PER_PRIO 2

struct mlx5_flow_definer {
enum mlx5_flow_namespace_type ns_type;
u32 id;
};

struct mlx5_modify_hdr {
enum mlx5_flow_namespace_type ns_type;
union {
Expand Down Expand Up @@ -97,7 +102,8 @@ enum fs_flow_table_type {
FS_FT_SNIFFER_TX = 0X6,
FS_FT_RDMA_RX = 0X7,
FS_FT_RDMA_TX = 0X8,
FS_FT_MAX_TYPE = FS_FT_RDMA_TX,
FS_FT_PORT_SEL = 0X9,
FS_FT_MAX_TYPE = FS_FT_PORT_SEL,
};

enum fs_flow_table_op_mod {
Expand Down Expand Up @@ -129,6 +135,7 @@ struct mlx5_flow_steering {
struct mlx5_flow_root_namespace *rdma_rx_root_ns;
struct mlx5_flow_root_namespace *rdma_tx_root_ns;
struct mlx5_flow_root_namespace *egress_root_ns;
struct mlx5_flow_root_namespace *port_sel_root_ns;
int esw_egress_acl_vports;
int esw_ingress_acl_vports;
};
Expand Down Expand Up @@ -341,7 +348,8 @@ struct mlx5_flow_root_namespace *find_root(struct fs_node *node);
(type == FS_FT_SNIFFER_TX) ? MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) : \
(type == FS_FT_RDMA_RX) ? MLX5_CAP_FLOWTABLE_RDMA_RX(mdev, cap) : \
(type == FS_FT_RDMA_TX) ? MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) : \
(BUILD_BUG_ON_ZERO(FS_FT_RDMA_TX != FS_FT_MAX_TYPE))\
(type == FS_FT_PORT_SEL) ? MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) : \
(BUILD_BUG_ON_ZERO(FS_FT_PORT_SEL != FS_FT_MAX_TYPE))\
)

#endif
Loading

0 comments on commit aaa5570

Please sign in to comment.