Skip to content

Commit

Permalink
Merge branch 'net-fib_rules-add-flow-label-selector-support'
Browse files Browse the repository at this point in the history
Ido Schimmel says:

====================
net: fib_rules: Add flow label selector support

In some deployments users would like to encode path information into
certain bits of the IPv6 flow label, the UDP source port and the DSCP
and use this information to route packets accordingly.

Redirecting traffic to a routing table based on the flow label is not
currently possible with Linux as FIB rules cannot match on it despite
the flow label being available in the IPv6 flow key.

This patchset extends FIB rules to match on the flow label with a mask.
Future patches will add mask attributes to L4 ports and DSCP matches.

Patches #1-#5 gradually extend FIB rules to match on the flow label.

Patches torvalds#6-torvalds#7 allow user space to specify a flow label in route get
requests. This is useful for both debugging and testing.

Patch torvalds#8 adjusts the fib6_table_lookup tracepoint to print the flow
label to the trace buffer for better observability.

Patch torvalds#9 extends the FIB rule selftest with flow label test cases while
utilizing the route get functionality from patch torvalds#6.
====================

Link: https://patch.msgid.link/20241216171201.274644-1-idosch@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
  • Loading branch information
Paolo Abeni committed Dec 19, 2024
2 parents 4fefbc6 + 5760711 commit 6b3099e
Show file tree
Hide file tree
Showing 10 changed files with 140 additions and 6 deletions.
7 changes: 7 additions & 0 deletions Documentation/netlink/specs/rt_route.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,11 @@ attribute-sets:
-
name: rta-nh-id
type: u32
-
name: rta-flowlabel
type: u32
byte-order: big-endian
display-hint: hex
-
name: rta-metrics
attributes:
Expand Down Expand Up @@ -260,6 +265,7 @@ operations:
- rta-dport
- rta-mark
- rta-uid
- rta-flowlabel
reply:
value: 24
attributes: &all-route-attrs
Expand Down Expand Up @@ -299,6 +305,7 @@ operations:
- rta-sport
- rta-dport
- rta-nh-id
- rta-flowlabel
dump:
request:
value: 26
Expand Down
12 changes: 12 additions & 0 deletions Documentation/netlink/specs/rt_rule.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,16 @@ attribute-sets:
-
name: dscp
type: u8
-
name: flowlabel
type: u32
byte-order: big-endian
display-hint: hex
-
name: flowlabel-mask
type: u32
byte-order: big-endian
display-hint: hex

operations:
enum-model: directional
Expand Down Expand Up @@ -203,6 +213,8 @@ operations:
- sport-range
- dport-range
- dscp
- flowlabel
- flowlabel-mask
-
name: newrule-ntf
doc: Notify a rule creation
Expand Down
8 changes: 5 additions & 3 deletions include/trace/events/fib6.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ TRACE_EVENT(fib6_table_lookup,
__field( int, err )
__field( int, oif )
__field( int, iif )
__field( u32, flowlabel )
__field( __u8, tos )
__field( __u8, scope )
__field( __u8, flags )
Expand All @@ -42,6 +43,7 @@ TRACE_EVENT(fib6_table_lookup,
__entry->err = ip6_rt_type_to_error(res->fib6_type);
__entry->oif = flp->flowi6_oif;
__entry->iif = flp->flowi6_iif;
__entry->flowlabel = ntohl(flowi6_get_flowlabel(flp));
__entry->tos = ip6_tclass(flp->flowlabel);
__entry->scope = flp->flowi6_scope;
__entry->flags = flp->flowi6_flags;
Expand Down Expand Up @@ -76,11 +78,11 @@ TRACE_EVENT(fib6_table_lookup,
}
),

TP_printk("table %3u oif %d iif %d proto %u %pI6c/%u -> %pI6c/%u tos %d scope %d flags %x ==> dev %s gw %pI6c err %d",
TP_printk("table %3u oif %d iif %d proto %u %pI6c/%u -> %pI6c/%u flowlabel %#x tos %d scope %d flags %x ==> dev %s gw %pI6c err %d",
__entry->tb_id, __entry->oif, __entry->iif, __entry->proto,
__entry->src, __entry->sport, __entry->dst, __entry->dport,
__entry->tos, __entry->scope, __entry->flags,
__entry->name, __entry->gw, __entry->err)
__entry->flowlabel, __entry->tos, __entry->scope,
__entry->flags, __entry->name, __entry->gw, __entry->err)
);

#endif /* _TRACE_FIB6_H */
Expand Down
2 changes: 2 additions & 0 deletions include/uapi/linux/fib_rules.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ enum {
FRA_SPORT_RANGE, /* sport */
FRA_DPORT_RANGE, /* dport */
FRA_DSCP, /* dscp */
FRA_FLOWLABEL, /* flowlabel */
FRA_FLOWLABEL_MASK, /* flowlabel mask */
__FRA_MAX
};

Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/rtnetlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@ enum rtattr_type_t {
RTA_SPORT,
RTA_DPORT,
RTA_NH_ID,
RTA_FLOWLABEL,
__RTA_MAX
};

Expand Down
2 changes: 2 additions & 0 deletions net/core/fib_rules.c
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,8 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = {
[FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
[FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
[FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2),
[FRA_FLOWLABEL] = { .type = NLA_BE32 },
[FRA_FLOWLABEL_MASK] = { .type = NLA_BE32 },
};

int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
Expand Down
6 changes: 6 additions & 0 deletions net/ipv4/fib_rules.c
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,12 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
int err = -EINVAL;
struct fib4_rule *rule4 = (struct fib4_rule *) rule;

if (tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) {
NL_SET_ERR_MSG(extack,
"Flow label cannot be specified for IPv4 FIB rules");
goto errout;
}

if (!inet_validate_dscp(frh->tos)) {
NL_SET_ERR_MSG(extack,
"Invalid dsfield (tos): ECN bits must be 0");
Expand Down
57 changes: 55 additions & 2 deletions net/ipv6/fib6_rules.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ struct fib6_rule {
struct fib_rule common;
struct rt6key src;
struct rt6key dst;
__be32 flowlabel;
__be32 flowlabel_mask;
dscp_t dscp;
u8 dscp_full:1; /* DSCP or TOS selector */
};
Expand All @@ -34,7 +36,7 @@ static bool fib6_rule_matchall(const struct fib_rule *rule)
{
struct fib6_rule *r = container_of(rule, struct fib6_rule, common);

if (r->dst.plen || r->src.plen || r->dscp)
if (r->dst.plen || r->src.plen || r->dscp || r->flowlabel_mask)
return false;
return fib_rule_matchall(rule);
}
Expand Down Expand Up @@ -332,6 +334,9 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel))
return 0;

if ((r->flowlabel ^ flowi6_get_flowlabel(fl6)) & r->flowlabel_mask)
return 0;

if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
return 0;

Expand Down Expand Up @@ -360,6 +365,35 @@ static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6,
return 0;
}

static int fib6_nl2rule_flowlabel(struct nlattr **tb, struct fib6_rule *rule6,
struct netlink_ext_ack *extack)
{
__be32 flowlabel, flowlabel_mask;

if (NL_REQ_ATTR_CHECK(extack, NULL, tb, FRA_FLOWLABEL) ||
NL_REQ_ATTR_CHECK(extack, NULL, tb, FRA_FLOWLABEL_MASK))
return -EINVAL;

flowlabel = nla_get_be32(tb[FRA_FLOWLABEL]);
flowlabel_mask = nla_get_be32(tb[FRA_FLOWLABEL_MASK]);

if (flowlabel_mask & ~IPV6_FLOWLABEL_MASK) {
NL_SET_ERR_MSG_ATTR(extack, tb[FRA_FLOWLABEL_MASK],
"Invalid flow label mask");
return -EINVAL;
}

if (flowlabel & ~flowlabel_mask) {
NL_SET_ERR_MSG(extack, "Flow label and mask do not match");
return -EINVAL;
}

rule6->flowlabel = flowlabel;
rule6->flowlabel_mask = flowlabel_mask;

return 0;
}

static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
struct nlattr **tb,
Expand All @@ -379,6 +413,10 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0)
goto errout;

if ((tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) &&
fib6_nl2rule_flowlabel(tb, rule6, extack) < 0)
goto errout;

if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
if (rule->table == RT6_TABLE_UNSPEC) {
NL_SET_ERR_MSG(extack, "Invalid table");
Expand Down Expand Up @@ -444,6 +482,14 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
return 0;
}

if (tb[FRA_FLOWLABEL] &&
nla_get_be32(tb[FRA_FLOWLABEL]) != rule6->flowlabel)
return 0;

if (tb[FRA_FLOWLABEL_MASK] &&
nla_get_be32(tb[FRA_FLOWLABEL_MASK]) != rule6->flowlabel_mask)
return 0;

if (frh->src_len &&
nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
return 0;
Expand Down Expand Up @@ -472,6 +518,11 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->tos = inet_dscp_to_dsfield(rule6->dscp);
}

if (rule6->flowlabel_mask &&
(nla_put_be32(skb, FRA_FLOWLABEL, rule6->flowlabel) ||
nla_put_be32(skb, FRA_FLOWLABEL_MASK, rule6->flowlabel_mask)))
goto nla_put_failure;

if ((rule6->dst.plen &&
nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
(rule6->src.plen &&
Expand All @@ -487,7 +538,9 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
{
return nla_total_size(16) /* dst */
+ nla_total_size(16) /* src */
+ nla_total_size(1); /* dscp */
+ nla_total_size(1) /* dscp */
+ nla_total_size(4) /* flowlabel */
+ nla_total_size(4); /* flowlabel mask */
}

static void fib6_rule_flush_cache(struct fib_rules_ops *ops)
Expand Down
20 changes: 19 additions & 1 deletion net/ipv6/route.c
Original file line number Diff line number Diff line change
Expand Up @@ -5005,6 +5005,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_SPORT] = { .type = NLA_U16 },
[RTA_DPORT] = { .type = NLA_U16 },
[RTA_NH_ID] = { .type = NLA_U32 },
[RTA_FLOWLABEL] = { .type = NLA_BE32 },
};

static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
Expand All @@ -5030,6 +5031,12 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}

if (tb[RTA_FLOWLABEL]) {
NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL],
"Flow label cannot be specified for this operation");
goto errout;
}

*cfg = (struct fib6_config){
.fc_table = rtm->rtm_table,
.fc_dst_len = rtm->rtm_dst_len,
Expand Down Expand Up @@ -6013,6 +6020,13 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
return -EINVAL;
}

if (tb[RTA_FLOWLABEL] &&
(nla_get_be32(tb[RTA_FLOWLABEL]) & ~IPV6_FLOWLABEL_MASK)) {
NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL],
"Invalid flow label");
return -EINVAL;
}

for (i = 0; i <= RTA_MAX; i++) {
if (!tb[i])
continue;
Expand All @@ -6027,6 +6041,7 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
case RTA_SPORT:
case RTA_DPORT:
case RTA_IP_PROTO:
case RTA_FLOWLABEL:
break;
default:
NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
Expand All @@ -6049,6 +6064,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct sk_buff *skb;
struct rtmsg *rtm;
struct flowi6 fl6 = {};
__be32 flowlabel;
bool fibmatch;

err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
Expand All @@ -6057,7 +6073,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,

err = -EINVAL;
rtm = nlmsg_data(nlh);
fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);

if (tb[RTA_SRC]) {
Expand Down Expand Up @@ -6103,6 +6118,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout;
}

flowlabel = nla_get_be32_default(tb[RTA_FLOWLABEL], 0);
fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, flowlabel);

if (iif) {
struct net_device *dev;
int flags = 0;
Expand Down
31 changes: 31 additions & 0 deletions tools/testing/selftests/net/fib_rule_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,37 @@ fib_rule6_test()
"$getnomatch" "iif dscp redirect to table" \
"iif dscp no redirect to table"
fi

fib_check_iproute_support "flowlabel" "flowlabel"
if [ $? -eq 0 ]; then
match="flowlabel 0xfffff"
getmatch="flowlabel 0xfffff"
getnomatch="flowlabel 0xf"
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "flowlabel redirect to table" \
"flowlabel no redirect to table"

match="flowlabel 0xfffff"
getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff"
getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf"
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "iif flowlabel redirect to table" \
"iif flowlabel no redirect to table"

match="flowlabel 0x08000/0x08000"
getmatch="flowlabel 0xfffff"
getnomatch="flowlabel 0xf7fff"
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "flowlabel masked redirect to table" \
"flowlabel masked no redirect to table"

match="flowlabel 0x08000/0x08000"
getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff"
getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf7fff"
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
"$getnomatch" "iif flowlabel masked redirect to table" \
"iif flowlabel masked no redirect to table"
fi
}

fib_rule6_vrf_test()
Expand Down

0 comments on commit 6b3099e

Please sign in to comment.