Skip to content

Commit

Permalink
Add VXLAN support for non-VTEP datapath bindings
Browse files Browse the repository at this point in the history
Because of limited space in VXLAN VNI to pass over all three of -
datapath id, ingress port, egress port - the implementation ignores
ingress; and splits the remaining 24 bits of VNI into two chunks, 12
bits each - one for datapath and one for egress port.

This also bumps priority for ramp switch flows to 110 to prioritize
them over regular non-ramp VXLAN flows.

Limitations: because ingress port is not passed, ACLs that rely on it
won't work with VXLAN; reduced number of networks and ports per
network (max 4096 for both).

NB consumers may use NB_Global options:max_tunid to determine maximum
capacity for logical switches supported by the setup.

Renamed MLF_RCV_FROM_VXLAN_BIT into MLF_RCV_FROM_RAMP_BIT to reflect
the new use case.

Added test scenarios that ping through VXLAN tunnel between two
hypervisors added. Also max_tunid is validated.

Signed-off-by: Ihar Hrachyshka <ihrachys@redhat.com>
Signed-off-by: Numan Siddique <numans@ovn.org>
  • Loading branch information
booxter authored and numansiddique committed Sep 17, 2020
1 parent acbbbe1 commit b07f1bc
Show file tree
Hide file tree
Showing 9 changed files with 729 additions and 643 deletions.
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ OVN v20.09.0 - xx xxx xxxx
this mechanism should update their code to use this new table.
- Added support for external ip based NAT. Now, besides the logical ip,
external ips will also decide if a packet will be NATed or not.
- Added support for VXLAN encapsulation (not just for ramp/VTEP switches).

OVN v20.06.0
--------------------------
Expand Down
65 changes: 40 additions & 25 deletions controller/physical.c
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,8 @@ static void
put_encapsulation(enum mf_field_id mff_ovn_geneve,
const struct chassis_tunnel *tun,
const struct sbrec_datapath_binding *datapath,
uint16_t outport, struct ofpbuf *ofpacts)
uint16_t outport, bool is_ramp_switch,
struct ofpbuf *ofpacts)
{
if (tun->type == GENEVE) {
put_load(datapath->tunnel_key, MFF_TUN_ID, 0, 24, ofpacts);
Expand All @@ -191,7 +192,12 @@ put_encapsulation(enum mf_field_id mff_ovn_geneve,
MFF_TUN_ID, 0, 64, ofpacts);
put_move(MFF_LOG_INPORT, 0, MFF_TUN_ID, 40, 15, ofpacts);
} else if (tun->type == VXLAN) {
put_load(datapath->tunnel_key, MFF_TUN_ID, 0, 24, ofpacts);
uint64_t vni = datapath->tunnel_key;
if (!is_ramp_switch) {
/* Only some bits are used for regular tunnels. */
vni |= (uint64_t) outport << 12;
}
put_load(vni, MFF_TUN_ID, 0, 24, ofpacts);
} else {
OVS_NOT_REACHED();
}
Expand Down Expand Up @@ -323,8 +329,9 @@ put_remote_port_redirect_overlay(const struct
if (!rem_tun) {
return;
}
put_encapsulation(mff_ovn_geneve, tun, binding->datapath,
port_key, ofpacts_p);
put_encapsulation(mff_ovn_geneve, tun, binding->datapath, port_key,
!strcmp(binding->type, "vtep"),
ofpacts_p);
/* Output to tunnel. */
ofpact_put_OUTPUT(ofpacts_p)->port = rem_tun->ofport;
} else {
Expand Down Expand Up @@ -360,8 +367,9 @@ put_remote_port_redirect_overlay(const struct
return;
}

put_encapsulation(mff_ovn_geneve, tun, binding->datapath,
port_key, ofpacts_p);
put_encapsulation(mff_ovn_geneve, tun, binding->datapath, port_key,
!strcmp(binding->type, "vtep"),
ofpacts_p);

/* Output to tunnels with active/backup */
struct ofpact_bundle *bundle = ofpact_put_BUNDLE(ofpacts_p);
Expand Down Expand Up @@ -1370,7 +1378,7 @@ consider_mc_group(enum mf_field_id mff_ovn_geneve,

if (!prev || tun->type != prev->type) {
put_encapsulation(mff_ovn_geneve, tun, mc->datapath,
mc->tunnel_key, &remote_ofpacts);
mc->tunnel_key, true, &remote_ofpacts);
prev = tun;
}
ofpact_put_OUTPUT(&remote_ofpacts)->port = tun->ofport;
Expand Down Expand Up @@ -1615,11 +1623,12 @@ physical_run(struct physical_ctx *p_ctx,
* Process packets that arrive from a remote hypervisor (by matching
* on tunnel in_port). */

/* Add flows for Geneve and STT encapsulations. These
* encapsulations have metadata about the ingress and egress logical
* ports. We set MFF_LOG_DATAPATH, MFF_LOG_INPORT, and
* MFF_LOG_OUTPORT from the tunnel key data, then resubmit to table
* 33 to handle packets to the local hypervisor. */
/* Add flows for Geneve, STT and VXLAN encapsulations. Geneve and STT
* encapsulations have metadata about the ingress and egress logical ports.
* VXLAN encapsulations have metadata about the egress logical port only.
* We set MFF_LOG_DATAPATH, MFF_LOG_INPORT, and MFF_LOG_OUTPORT from the
* tunnel key data where possible, then resubmit to table 33 to handle
* packets to the local hypervisor. */
HMAP_FOR_EACH (tun, hmap_node, &tunnels) {
struct match match = MATCH_CATCHALL_INITIALIZER;
match_set_in_port(&match, tun->ofport);
Expand All @@ -1636,8 +1645,10 @@ physical_run(struct physical_ctx *p_ctx,
put_move(MFF_TUN_ID, 24, MFF_LOG_OUTPORT, 0, 16, &ofpacts);
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts);
} else if (tun->type == VXLAN) {
/* We'll handle VXLAN later. */
continue;
/* Add flows for non-VTEP tunnels. Split VNI into two 12-bit
* sections and use them for datapath and outport IDs. */
put_move(MFF_TUN_ID, 12, MFF_LOG_OUTPORT, 0, 12, &ofpacts);
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 12, &ofpacts);
} else {
OVS_NOT_REACHED();
}
Expand All @@ -1648,38 +1659,42 @@ physical_run(struct physical_ctx *p_ctx,
&ofpacts, hc_uuid);
}

/* Add flows for VXLAN encapsulations. Due to the limited amount of
* metadata, we only support VXLAN for connections to gateways. The
* VNI is used to populate MFF_LOG_DATAPATH. The gateway's logical
* port is set to MFF_LOG_INPORT. Then the packet is resubmitted to
* table 16 to determine the logical egress port. */
/* Handle ramp switch encapsulations. */
HMAP_FOR_EACH (tun, hmap_node, &tunnels) {
if (tun->type != VXLAN) {
continue;
}

SBREC_PORT_BINDING_TABLE_FOR_EACH (binding,
p_ctx->port_binding_table) {
struct match match = MATCH_CATCHALL_INITIALIZER;
if (strcmp(binding->type, "vtep")) {
continue;
}

if (!binding->chassis ||
!encaps_tunnel_id_match(tun->chassis_id,
binding->chassis->name, NULL)) {
continue;
}

struct match match = MATCH_CATCHALL_INITIALIZER;
match_set_in_port(&match, tun->ofport);
ofpbuf_clear(&ofpacts);

/* Add flows for ramp switches. The VNI is used to populate
* MFF_LOG_DATAPATH. The gateway's logical port is set to
* MFF_LOG_INPORT. Then the packet is resubmitted to table 8
* to determine the logical egress port. */
match_set_tun_id(&match, htonll(binding->datapath->tunnel_key));

ofpbuf_clear(&ofpacts);
put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts);
put_load(binding->tunnel_key, MFF_LOG_INPORT, 0, 15, &ofpacts);
/* For packets received from a vxlan tunnel, set a flag to that
/* For packets received from a ramp tunnel, set a flag to that
* effect. */
put_load(1, MFF_LOG_FLAGS, MLF_RCV_FROM_VXLAN_BIT, 1, &ofpacts);
put_load(1, MFF_LOG_FLAGS, MLF_RCV_FROM_RAMP_BIT, 1, &ofpacts);
put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ofpacts);

ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100,
ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 110,
binding->header_.uuid.parts[0],
&match, &ofpacts, hc_uuid);
}
Expand All @@ -1696,7 +1711,7 @@ physical_run(struct physical_ctx *p_ctx,
struct match match;
match_init_catchall(&match);
match_set_reg_masked(&match, MFF_LOG_FLAGS - MFF_REG0,
MLF_RCV_FROM_VXLAN, MLF_RCV_FROM_VXLAN);
MLF_RCV_FROM_RAMP, MLF_RCV_FROM_RAMP);

/* Resubmit to table 33. */
ofpbuf_clear(&ofpacts);
Expand Down
12 changes: 6 additions & 6 deletions include/ovn/logical-fields.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ void ovn_init_symtab(struct shash *symtab);
/* MFF_LOG_FLAGS_REG bit assignments */
enum mff_log_flags_bits {
MLF_ALLOW_LOOPBACK_BIT = 0,
MLF_RCV_FROM_VXLAN_BIT = 1,
MLF_RCV_FROM_RAMP_BIT = 1,
MLF_FORCE_SNAT_FOR_DNAT_BIT = 2,
MLF_FORCE_SNAT_FOR_LB_BIT = 3,
MLF_LOCAL_ONLY_BIT = 4,
Expand All @@ -64,11 +64,11 @@ enum mff_log_flags {
/* Allow outputting back to inport. */
MLF_ALLOW_LOOPBACK = (1 << MLF_ALLOW_LOOPBACK_BIT),

/* Indicate that a packet was received from a VXLAN tunnel to
* compensate for the lack of egress port information available in
* VXLAN encapsulation. Egress port information is available for
* Geneve and STT tunnel types. */
MLF_RCV_FROM_VXLAN = (1 << MLF_RCV_FROM_VXLAN_BIT),
/* Indicate that a packet was received from a ramp switch to compensate for
* the lack of egress port information available in ramp switch
* encapsulation. Egress port information is available for Geneve, STT and
* regular VXLAN tunnel types. */
MLF_RCV_FROM_RAMP = (1 << MLF_RCV_FROM_RAMP_BIT),

/* Indicate that a packet needs a force SNAT in the gateway router when
* DNAT has taken place. */
Expand Down
4 changes: 4 additions & 0 deletions lib/ovn-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ void ovn_conn_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
#define OVN_MAX_DP_KEY_LOCAL (OVN_MAX_DP_KEY - OVN_MAX_DP_GLOBAL_NUM)
#define OVN_MIN_DP_KEY_GLOBAL (OVN_MAX_DP_KEY_LOCAL + 1)
#define OVN_MAX_DP_KEY_GLOBAL OVN_MAX_DP_KEY

#define OVN_MAX_DP_VXLAN_KEY ((1u << 12) - 1)
#define OVN_MAX_DP_VXLAN_KEY_LOCAL (OVN_MAX_DP_KEY - OVN_MAX_DP_GLOBAL_NUM)

struct hmap;
void ovn_destroy_tnlids(struct hmap *tnlids);
void ovn_add_tnlid(struct hmap *set, uint32_t tnlid);
Expand Down
80 changes: 56 additions & 24 deletions northd/ovn-northd.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ struct northd_context {
struct ovsdb_idl *ovnsb_idl;
struct ovsdb_idl_txn *ovnnb_txn;
struct ovsdb_idl_txn *ovnsb_txn;
struct ovsdb_idl_index *sbrec_chassis_by_name;
struct ovsdb_idl_index *sbrec_ha_chassis_grp_by_name;
struct ovsdb_idl_index *sbrec_mcast_group_by_name_dp;
struct ovsdb_idl_index *sbrec_ip_mcast_by_dp;
Expand Down Expand Up @@ -1189,12 +1190,36 @@ join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
}
}

static bool
is_vxlan_mode(struct ovsdb_idl *ovnsb_idl)
{
const struct sbrec_chassis *chassis;
SBREC_CHASSIS_FOR_EACH (chassis, ovnsb_idl) {
for (int i = 0; i < chassis->n_encaps; i++) {
if (!strcmp(chassis->encaps[i]->type, "vxlan")) {
return true;
}
}
}
return false;
}

static uint32_t
ovn_datapath_allocate_key(struct hmap *dp_tnlids)
get_ovn_max_dp_key_local(struct northd_context *ctx)
{
if (is_vxlan_mode(ctx->ovnsb_idl)) {
/* OVN_MAX_DP_GLOBAL_NUM doesn't apply for vxlan mode. */
return OVN_MAX_DP_VXLAN_KEY;
}
return OVN_MAX_DP_KEY - OVN_MAX_DP_GLOBAL_NUM;
}

static uint32_t
ovn_datapath_allocate_key(struct northd_context *ctx, struct hmap *dp_tnlids)
{
static uint32_t hint;
return ovn_allocate_tnlid(dp_tnlids, "datapath", OVN_MIN_DP_KEY_LOCAL,
OVN_MAX_DP_KEY_LOCAL, &hint);
get_ovn_max_dp_key_local(ctx), &hint);
}

/* Updates the southbound Datapath_Binding table so that it contains the
Expand Down Expand Up @@ -1237,7 +1262,7 @@ build_datapaths(struct northd_context *ctx, struct hmap *datapaths,
}
}
if (!tunnel_key) {
tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
tunnel_key = ovn_datapath_allocate_key(ctx, &dp_tnlids);
if (!tunnel_key) {
break;
}
Expand Down Expand Up @@ -12110,32 +12135,34 @@ ovnnb_db_run(struct northd_context *ctx,
}
}

if (!mac_addr_prefix || !monitor_mac) {
struct smap options;
smap_clone(&options, &nb->options);
struct smap options;
smap_clone(&options, &nb->options);

if (!mac_addr_prefix) {
eth_addr_random(&mac_prefix);
memset(&mac_prefix.ea[3], 0, 3);
if (!mac_addr_prefix) {
eth_addr_random(&mac_prefix);
memset(&mac_prefix.ea[3], 0, 3);

smap_add_format(&options, "mac_prefix",
"%02"PRIx8":%02"PRIx8":%02"PRIx8,
mac_prefix.ea[0], mac_prefix.ea[1],
mac_prefix.ea[2]);
}
smap_add_format(&options, "mac_prefix",
"%02"PRIx8":%02"PRIx8":%02"PRIx8,
mac_prefix.ea[0], mac_prefix.ea[1],
mac_prefix.ea[2]);
}

if (!monitor_mac) {
eth_addr_random(&svc_monitor_mac_ea);
snprintf(svc_monitor_mac, sizeof svc_monitor_mac,
ETH_ADDR_FMT, ETH_ADDR_ARGS(svc_monitor_mac_ea));
smap_replace(&options, "svc_monitor_mac", svc_monitor_mac);
}
if (!monitor_mac) {
eth_addr_random(&svc_monitor_mac_ea);
snprintf(svc_monitor_mac, sizeof svc_monitor_mac,
ETH_ADDR_FMT, ETH_ADDR_ARGS(svc_monitor_mac_ea));
smap_replace(&options, "svc_monitor_mac", svc_monitor_mac);
}

nbrec_nb_global_verify_options(nb);
nbrec_nb_global_set_options(nb, &options);
char *max_tunid = xasprintf("%d", get_ovn_max_dp_key_local(ctx));
smap_replace(&options, "max_tunid", max_tunid);
free(max_tunid);

smap_destroy(&options);
}
nbrec_nb_global_verify_options(nb);
nbrec_nb_global_set_options(nb, &options);

smap_destroy(&options);

/* Update the probe interval. */
northd_probe_interval_nb = get_probe_interval(ovnnb_db, nb);
Expand Down Expand Up @@ -13009,6 +13036,10 @@ main(int argc, char *argv[])
ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis);
ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_name);
ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_other_config);
ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_chassis_col_encaps);

ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_encap);
ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_encap_col_type);

ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_chassis_private);
ovsdb_idl_add_column(ovnsb_idl_loop.idl,
Expand Down Expand Up @@ -13122,6 +13153,7 @@ main(int argc, char *argv[])
.ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
.ovnsb_idl = ovnsb_idl_loop.idl,
.ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
.sbrec_chassis_by_name = sbrec_chassis_by_name,
.sbrec_ha_chassis_grp_by_name = sbrec_ha_chassis_grp_by_name,
.sbrec_mcast_group_by_name_dp = sbrec_mcast_group_by_name_dp,
.sbrec_ip_mcast_by_dp = sbrec_ip_mcast_by_dp,
Expand Down
Loading

0 comments on commit b07f1bc

Please sign in to comment.