Skip to content

Commit ee75aef

Browse files
Björn Töpelborkmann
authored andcommitted
bpf, xdp: Restructure redirect actions
The XDP_REDIRECT implementations for maps and non-maps are fairly similar, but obviously need to take different code paths depending on if the target is using a map or not. Today, the redirect targets for XDP either uses a map, or is based on ifindex. Here, the map type and id are added to bpf_redirect_info, instead of the actual map. Map type, map item/ifindex, and the map_id (if any) is passed to xdp_do_redirect(). For ifindex-based redirect, used by the bpf_redirect() XDP BFP helper, a special map type/id are used. Map type of UNSPEC together with map id equal to INT_MAX has the special meaning of an ifindex based redirect. Note that valid map ids are 1 inclusive, INT_MAX exclusive ([1,INT_MAX[). In addition to making the code easier to follow, using explicit type and id in bpf_redirect_info has a slight positive performance impact by avoiding a pointer indirection for the map type lookup, and instead use the cacheline for bpf_redirect_info. Since the actual map is not passed via bpf_redirect_info anymore, the map lookup is only done in the BPF helper. This means that the bpf_clear_redirect_map() function can be removed. The actual map item is RCU protected. The bpf_redirect_info flags member is not used by XDP, and not read/written any more. The map member is only written to when required/used, and not unconditionally. Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Acked-by: Jesper Dangaard Brouer <brouer@redhat.com> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/bpf/20210308112907.559576-3-bjorn.topel@gmail.com
1 parent e6a4750 commit ee75aef

File tree

6 files changed

+116
-129
lines changed

6 files changed

+116
-129
lines changed

include/linux/filter.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,8 @@ struct bpf_redirect_info {
646646
u32 flags;
647647
u32 tgt_index;
648648
void *tgt_value;
649-
struct bpf_map *map;
649+
u32 map_id;
650+
enum bpf_map_type map_type;
650651
u32 kern_flags;
651652
struct bpf_nh_params nh;
652653
};
@@ -1488,13 +1489,14 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
14881489
* performs multiple lookups, the last one always takes
14891490
* precedence.
14901491
*/
1491-
WRITE_ONCE(ri->map, NULL);
1492+
ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */
1493+
ri->map_type = BPF_MAP_TYPE_UNSPEC;
14921494
return flags;
14931495
}
14941496

1495-
ri->flags = flags;
14961497
ri->tgt_index = ifindex;
1497-
WRITE_ONCE(ri->map, map);
1498+
ri->map_id = map->id;
1499+
ri->map_type = map->map_type;
14981500

14991501
return XDP_REDIRECT;
15001502
}

include/trace/events/xdp.h

Lines changed: 35 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -86,19 +86,15 @@ struct _bpf_dtab_netdev {
8686
};
8787
#endif /* __DEVMAP_OBJ_TYPE */
8888

89-
#define devmap_ifindex(tgt, map) \
90-
(((map->map_type == BPF_MAP_TYPE_DEVMAP || \
91-
map->map_type == BPF_MAP_TYPE_DEVMAP_HASH)) ? \
92-
((struct _bpf_dtab_netdev *)tgt)->dev->ifindex : 0)
93-
9489
DECLARE_EVENT_CLASS(xdp_redirect_template,
9590

9691
TP_PROTO(const struct net_device *dev,
9792
const struct bpf_prog *xdp,
9893
const void *tgt, int err,
99-
const struct bpf_map *map, u32 index),
94+
enum bpf_map_type map_type,
95+
u32 map_id, u32 index),
10096

101-
TP_ARGS(dev, xdp, tgt, err, map, index),
97+
TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index),
10298

10399
TP_STRUCT__entry(
104100
__field(int, prog_id)
@@ -111,14 +107,22 @@ DECLARE_EVENT_CLASS(xdp_redirect_template,
111107
),
112108

113109
TP_fast_assign(
110+
u32 ifindex = 0, map_index = index;
111+
112+
if (map_type == BPF_MAP_TYPE_DEVMAP || map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
113+
ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex;
114+
} else if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
115+
ifindex = index;
116+
map_index = 0;
117+
}
118+
114119
__entry->prog_id = xdp->aux->id;
115120
__entry->act = XDP_REDIRECT;
116121
__entry->ifindex = dev->ifindex;
117122
__entry->err = err;
118-
__entry->to_ifindex = map ? devmap_ifindex(tgt, map) :
119-
index;
120-
__entry->map_id = map ? map->id : 0;
121-
__entry->map_index = map ? index : 0;
123+
__entry->to_ifindex = ifindex;
124+
__entry->map_id = map_id;
125+
__entry->map_index = map_index;
122126
),
123127

124128
TP_printk("prog_id=%d action=%s ifindex=%d to_ifindex=%d err=%d"
@@ -133,45 +137,49 @@ DEFINE_EVENT(xdp_redirect_template, xdp_redirect,
133137
TP_PROTO(const struct net_device *dev,
134138
const struct bpf_prog *xdp,
135139
const void *tgt, int err,
136-
const struct bpf_map *map, u32 index),
137-
TP_ARGS(dev, xdp, tgt, err, map, index)
140+
enum bpf_map_type map_type,
141+
u32 map_id, u32 index),
142+
TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
138143
);
139144

140145
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_err,
141146
TP_PROTO(const struct net_device *dev,
142147
const struct bpf_prog *xdp,
143148
const void *tgt, int err,
144-
const struct bpf_map *map, u32 index),
145-
TP_ARGS(dev, xdp, tgt, err, map, index)
149+
enum bpf_map_type map_type,
150+
u32 map_id, u32 index),
151+
TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
146152
);
147153

148-
#define _trace_xdp_redirect(dev, xdp, to) \
149-
trace_xdp_redirect(dev, xdp, NULL, 0, NULL, to)
154+
#define _trace_xdp_redirect(dev, xdp, to) \
155+
trace_xdp_redirect(dev, xdp, NULL, 0, BPF_MAP_TYPE_UNSPEC, INT_MAX, to)
150156

151-
#define _trace_xdp_redirect_err(dev, xdp, to, err) \
152-
trace_xdp_redirect_err(dev, xdp, NULL, err, NULL, to)
157+
#define _trace_xdp_redirect_err(dev, xdp, to, err) \
158+
trace_xdp_redirect_err(dev, xdp, NULL, err, BPF_MAP_TYPE_UNSPEC, INT_MAX, to)
153159

154-
#define _trace_xdp_redirect_map(dev, xdp, to, map, index) \
155-
trace_xdp_redirect(dev, xdp, to, 0, map, index)
160+
#define _trace_xdp_redirect_map(dev, xdp, to, map_type, map_id, index) \
161+
trace_xdp_redirect(dev, xdp, to, 0, map_type, map_id, index)
156162

157-
#define _trace_xdp_redirect_map_err(dev, xdp, to, map, index, err) \
158-
trace_xdp_redirect_err(dev, xdp, to, err, map, index)
163+
#define _trace_xdp_redirect_map_err(dev, xdp, to, map_type, map_id, index, err) \
164+
trace_xdp_redirect_err(dev, xdp, to, err, map_type, map_id, index)
159165

160166
/* not used anymore, but kept around so as not to break old programs */
161167
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map,
162168
TP_PROTO(const struct net_device *dev,
163169
const struct bpf_prog *xdp,
164170
const void *tgt, int err,
165-
const struct bpf_map *map, u32 index),
166-
TP_ARGS(dev, xdp, tgt, err, map, index)
171+
enum bpf_map_type map_type,
172+
u32 map_id, u32 index),
173+
TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
167174
);
168175

169176
DEFINE_EVENT(xdp_redirect_template, xdp_redirect_map_err,
170177
TP_PROTO(const struct net_device *dev,
171178
const struct bpf_prog *xdp,
172179
const void *tgt, int err,
173-
const struct bpf_map *map, u32 index),
174-
TP_ARGS(dev, xdp, tgt, err, map, index)
180+
enum bpf_map_type map_type,
181+
u32 map_id, u32 index),
182+
TP_ARGS(dev, xdp, tgt, err, map_type, map_id, index)
175183
);
176184

177185
TRACE_EVENT(xdp_cpumap_kthread,

kernel/bpf/cpumap.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,6 @@ static void cpu_map_free(struct bpf_map *map)
543543
* complete.
544544
*/
545545

546-
bpf_clear_redirect_map(map);
547546
synchronize_rcu();
548547

549548
/* For cpu_map the remote CPUs can still be using the entries

kernel/bpf/devmap.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,6 @@ static void dev_map_free(struct bpf_map *map)
197197
list_del_rcu(&dtab->list);
198198
spin_unlock(&dev_map_lock);
199199

200-
bpf_clear_redirect_map(map);
201200
synchronize_rcu();
202201

203202
/* Make sure prior __dev_map_entry_free() have completed. */

net/core/filter.c

Lines changed: 75 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -3918,23 +3918,6 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
39183918
.arg2_type = ARG_ANYTHING,
39193919
};
39203920

3921-
static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
3922-
struct bpf_map *map, struct xdp_buff *xdp)
3923-
{
3924-
switch (map->map_type) {
3925-
case BPF_MAP_TYPE_DEVMAP:
3926-
case BPF_MAP_TYPE_DEVMAP_HASH:
3927-
return dev_map_enqueue(fwd, xdp, dev_rx);
3928-
case BPF_MAP_TYPE_CPUMAP:
3929-
return cpu_map_enqueue(fwd, xdp, dev_rx);
3930-
case BPF_MAP_TYPE_XSKMAP:
3931-
return __xsk_map_redirect(fwd, xdp);
3932-
default:
3933-
return -EBADRQC;
3934-
}
3935-
return 0;
3936-
}
3937-
39383921
void xdp_do_flush(void)
39393922
{
39403923
__dev_flush();
@@ -3943,55 +3926,52 @@ void xdp_do_flush(void)
39433926
}
39443927
EXPORT_SYMBOL_GPL(xdp_do_flush);
39453928

3946-
void bpf_clear_redirect_map(struct bpf_map *map)
3947-
{
3948-
struct bpf_redirect_info *ri;
3949-
int cpu;
3950-
3951-
for_each_possible_cpu(cpu) {
3952-
ri = per_cpu_ptr(&bpf_redirect_info, cpu);
3953-
/* Avoid polluting remote cacheline due to writes if
3954-
* not needed. Once we pass this test, we need the
3955-
* cmpxchg() to make sure it hasn't been changed in
3956-
* the meantime by remote CPU.
3957-
*/
3958-
if (unlikely(READ_ONCE(ri->map) == map))
3959-
cmpxchg(&ri->map, map, NULL);
3960-
}
3961-
}
3962-
39633929
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
39643930
struct bpf_prog *xdp_prog)
39653931
{
39663932
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3967-
struct bpf_map *map = READ_ONCE(ri->map);
3968-
u32 index = ri->tgt_index;
3933+
enum bpf_map_type map_type = ri->map_type;
39693934
void *fwd = ri->tgt_value;
3935+
u32 map_id = ri->map_id;
39703936
int err;
39713937

3972-
ri->tgt_index = 0;
3973-
ri->tgt_value = NULL;
3974-
WRITE_ONCE(ri->map, NULL);
3938+
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
3939+
ri->map_type = BPF_MAP_TYPE_UNSPEC;
39753940

3976-
if (unlikely(!map)) {
3977-
fwd = dev_get_by_index_rcu(dev_net(dev), index);
3978-
if (unlikely(!fwd)) {
3979-
err = -EINVAL;
3980-
goto err;
3941+
switch (map_type) {
3942+
case BPF_MAP_TYPE_DEVMAP:
3943+
fallthrough;
3944+
case BPF_MAP_TYPE_DEVMAP_HASH:
3945+
err = dev_map_enqueue(fwd, xdp, dev);
3946+
break;
3947+
case BPF_MAP_TYPE_CPUMAP:
3948+
err = cpu_map_enqueue(fwd, xdp, dev);
3949+
break;
3950+
case BPF_MAP_TYPE_XSKMAP:
3951+
err = __xsk_map_redirect(fwd, xdp);
3952+
break;
3953+
case BPF_MAP_TYPE_UNSPEC:
3954+
if (map_id == INT_MAX) {
3955+
fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
3956+
if (unlikely(!fwd)) {
3957+
err = -EINVAL;
3958+
break;
3959+
}
3960+
err = dev_xdp_enqueue(fwd, xdp, dev);
3961+
break;
39813962
}
3982-
3983-
err = dev_xdp_enqueue(fwd, xdp, dev);
3984-
} else {
3985-
err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
3963+
fallthrough;
3964+
default:
3965+
err = -EBADRQC;
39863966
}
39873967

39883968
if (unlikely(err))
39893969
goto err;
39903970

3991-
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
3971+
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
39923972
return 0;
39933973
err:
3994-
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
3974+
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
39953975
return err;
39963976
}
39973977
EXPORT_SYMBOL_GPL(xdp_do_redirect);
@@ -4000,73 +3980,71 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
40003980
struct sk_buff *skb,
40013981
struct xdp_buff *xdp,
40023982
struct bpf_prog *xdp_prog,
4003-
struct bpf_map *map)
3983+
void *fwd,
3984+
enum bpf_map_type map_type, u32 map_id)
40043985
{
40053986
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4006-
u32 index = ri->tgt_index;
4007-
void *fwd = ri->tgt_value;
4008-
int err = 0;
4009-
4010-
ri->tgt_index = 0;
4011-
ri->tgt_value = NULL;
4012-
WRITE_ONCE(ri->map, NULL);
4013-
4014-
if (map->map_type == BPF_MAP_TYPE_DEVMAP ||
4015-
map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
4016-
struct bpf_dtab_netdev *dst = fwd;
3987+
int err;
40173988

4018-
err = dev_map_generic_redirect(dst, skb, xdp_prog);
3989+
switch (map_type) {
3990+
case BPF_MAP_TYPE_DEVMAP:
3991+
fallthrough;
3992+
case BPF_MAP_TYPE_DEVMAP_HASH:
3993+
err = dev_map_generic_redirect(fwd, skb, xdp_prog);
40193994
if (unlikely(err))
40203995
goto err;
4021-
} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
4022-
struct xdp_sock *xs = fwd;
4023-
4024-
err = xsk_generic_rcv(xs, xdp);
3996+
break;
3997+
case BPF_MAP_TYPE_XSKMAP:
3998+
err = xsk_generic_rcv(fwd, xdp);
40253999
if (err)
40264000
goto err;
40274001
consume_skb(skb);
4028-
} else {
4002+
break;
4003+
default:
40294004
/* TODO: Handle BPF_MAP_TYPE_CPUMAP */
40304005
err = -EBADRQC;
40314006
goto err;
40324007
}
40334008

4034-
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
4009+
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
40354010
return 0;
40364011
err:
4037-
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
4012+
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
40384013
return err;
40394014
}
40404015

40414016
int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
40424017
struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
40434018
{
40444019
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4045-
struct bpf_map *map = READ_ONCE(ri->map);
4046-
u32 index = ri->tgt_index;
4047-
struct net_device *fwd;
4048-
int err = 0;
4049-
4050-
if (map)
4051-
return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
4052-
map);
4053-
ri->tgt_index = 0;
4054-
fwd = dev_get_by_index_rcu(dev_net(dev), index);
4055-
if (unlikely(!fwd)) {
4056-
err = -EINVAL;
4057-
goto err;
4058-
}
4020+
enum bpf_map_type map_type = ri->map_type;
4021+
void *fwd = ri->tgt_value;
4022+
u32 map_id = ri->map_id;
4023+
int err;
40594024

4060-
err = xdp_ok_fwd_dev(fwd, skb->len);
4061-
if (unlikely(err))
4062-
goto err;
4025+
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
4026+
ri->map_type = BPF_MAP_TYPE_UNSPEC;
40634027

4064-
skb->dev = fwd;
4065-
_trace_xdp_redirect(dev, xdp_prog, index);
4066-
generic_xdp_tx(skb, xdp_prog);
4067-
return 0;
4028+
if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
4029+
fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
4030+
if (unlikely(!fwd)) {
4031+
err = -EINVAL;
4032+
goto err;
4033+
}
4034+
4035+
err = xdp_ok_fwd_dev(fwd, skb->len);
4036+
if (unlikely(err))
4037+
goto err;
4038+
4039+
skb->dev = fwd;
4040+
_trace_xdp_redirect(dev, xdp_prog, ri->tgt_index);
4041+
generic_xdp_tx(skb, xdp_prog);
4042+
return 0;
4043+
}
4044+
4045+
return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id);
40684046
err:
4069-
_trace_xdp_redirect_err(dev, xdp_prog, index, err);
4047+
_trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
40704048
return err;
40714049
}
40724050

@@ -4077,10 +4055,12 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
40774055
if (unlikely(flags))
40784056
return XDP_ABORTED;
40794057

4080-
ri->flags = flags;
4058+
/* NB! Map type UNSPEC and map_id == INT_MAX (never generated
4059+
* by map_idr) is used for ifindex based XDP redirect.
4060+
*/
40814061
ri->tgt_index = ifindex;
4082-
ri->tgt_value = NULL;
4083-
WRITE_ONCE(ri->map, NULL);
4062+
ri->map_id = INT_MAX;
4063+
ri->map_type = BPF_MAP_TYPE_UNSPEC;
40844064

40854065
return XDP_REDIRECT;
40864066
}

0 commit comments

Comments
 (0)