Skip to content

Commit 583d094

Browse files
jrfastabkernel-patches-bot
authored andcommitted
bpf, sockmap: Allow skipping sk_skb parser program
Currently, we often run with a nop parser namely one that just does this, 'return skb->len'. This happens when either our verdict program can handle streaming data or it is only looking at socket data such as IP addresses and other metadata associated with the flow. The second case is common for a L3/L4 proxy for instance. So lets allow loading programs without the parser then we can skip the stream parser logic and avoid having to add a BPF program that is effectively a nop. Signed-off-by: John Fastabend <john.fastabend@gmail.com>
1 parent 453d93f commit 583d094

File tree

3 files changed

+95
-7
lines changed

3 files changed

+95
-7
lines changed

include/linux/skmsg.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,8 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node);
308308
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
309309
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
310310
void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
311+
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock);
312+
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
311313

312314
int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
313315
struct sk_msg *msg);

net/core/skmsg.c

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
625625
rcu_assign_sk_user_data(sk, NULL);
626626
if (psock->progs.skb_parser)
627627
sk_psock_stop_strp(sk, psock);
628+
else if (psock->progs.skb_verdict)
629+
sk_psock_stop_verdict(sk, psock);
628630
write_unlock_bh(&sk->sk_callback_lock);
629631
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
630632

@@ -864,6 +866,57 @@ static void sk_psock_strp_data_ready(struct sock *sk)
864866
rcu_read_unlock();
865867
}
866868

869+
static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
870+
unsigned int offset, size_t orig_len)
871+
{
872+
struct sock *sk = (struct sock *)desc->arg.data;
873+
struct sk_psock *psock;
874+
struct bpf_prog *prog;
875+
int ret = __SK_DROP;
876+
int len = skb->len;
877+
878+
/* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */
879+
skb = skb_clone(skb, GFP_ATOMIC);
880+
if (!skb) {
881+
desc->error = -ENOMEM;
882+
return 0;
883+
}
884+
885+
rcu_read_lock();
886+
psock = sk_psock(sk);
887+
if (unlikely(!psock)) {
888+
len = 0;
889+
kfree_skb(skb);
890+
goto out;
891+
}
892+
skb_set_owner_r(skb, sk);
893+
prog = READ_ONCE(psock->progs.skb_verdict);
894+
if (likely(prog)) {
895+
tcp_skb_bpf_redirect_clear(skb);
896+
ret = sk_psock_bpf_run(psock, prog, skb);
897+
ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
898+
}
899+
sk_psock_verdict_apply(psock, skb, ret);
900+
out:
901+
rcu_read_unlock();
902+
return len;
903+
}
904+
905+
static void sk_psock_verdict_data_ready(struct sock *sk)
906+
{
907+
struct socket *sock = sk->sk_socket;
908+
read_descriptor_t desc;
909+
910+
if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
911+
return;
912+
913+
desc.arg.data = sk;
914+
desc.error = 0;
915+
desc.count = 1;
916+
917+
sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
918+
}
919+
867920
static void sk_psock_write_space(struct sock *sk)
868921
{
869922
struct sk_psock *psock;
@@ -893,6 +946,19 @@ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
893946
return strp_init(&psock->parser.strp, sk, &cb);
894947
}
895948

949+
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
950+
{
951+
struct sk_psock_parser *parser = &psock->parser;
952+
953+
if (parser->enabled)
954+
return;
955+
956+
parser->saved_data_ready = sk->sk_data_ready;
957+
sk->sk_data_ready = sk_psock_verdict_data_ready;
958+
sk->sk_write_space = sk_psock_write_space;
959+
parser->enabled = true;
960+
}
961+
896962
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
897963
{
898964
struct sk_psock_parser *parser = &psock->parser;
@@ -918,3 +984,15 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
918984
strp_stop(&parser->strp);
919985
parser->enabled = false;
920986
}
987+
988+
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
989+
{
990+
struct sk_psock_parser *parser = &psock->parser;
991+
992+
if (!parser->enabled)
993+
return;
994+
995+
sk->sk_data_ready = parser->saved_data_ready;
996+
parser->saved_data_ready = NULL;
997+
parser->enabled = false;
998+
}

net/core/sock_map.c

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,8 @@ static void sock_map_add_link(struct sk_psock *psock,
148148
static void sock_map_del_link(struct sock *sk,
149149
struct sk_psock *psock, void *link_raw)
150150
{
151+
bool strp_stop = false, verdict_stop = false;
151152
struct sk_psock_link *link, *tmp;
152-
bool strp_stop = false;
153153

154154
spin_lock_bh(&psock->link_lock);
155155
list_for_each_entry_safe(link, tmp, &psock->link, list) {
@@ -159,14 +159,19 @@ static void sock_map_del_link(struct sock *sk,
159159
map);
160160
if (psock->parser.enabled && stab->progs.skb_parser)
161161
strp_stop = true;
162+
if (psock->parser.enabled && stab->progs.skb_verdict)
163+
verdict_stop = true;
162164
list_del(&link->list);
163165
sk_psock_free_link(link);
164166
}
165167
}
166168
spin_unlock_bh(&psock->link_lock);
167-
if (strp_stop) {
169+
if (strp_stop || verdict_stop) {
168170
write_lock_bh(&sk->sk_callback_lock);
169-
sk_psock_stop_strp(sk, psock);
171+
if (strp_stop)
172+
sk_psock_stop_strp(sk, psock);
173+
else
174+
sk_psock_stop_verdict(sk, psock);
170175
write_unlock_bh(&sk->sk_callback_lock);
171176
}
172177
}
@@ -288,16 +293,19 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
288293
write_lock_bh(&sk->sk_callback_lock);
289294
if (skb_parser && skb_verdict && !psock->parser.enabled) {
290295
ret = sk_psock_init_strp(sk, psock);
291-
if (ret) {
292-
write_unlock_bh(&sk->sk_callback_lock);
293-
goto out_drop;
294-
}
296+
if (ret)
297+
goto out_unlock_drop;
295298
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
296299
psock_set_prog(&psock->progs.skb_parser, skb_parser);
297300
sk_psock_start_strp(sk, psock);
301+
} else if (!skb_parser && skb_verdict && !psock->parser.enabled) {
302+
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
303+
sk_psock_start_verdict(sk,psock);
298304
}
299305
write_unlock_bh(&sk->sk_callback_lock);
300306
return 0;
307+
out_unlock_drop:
308+
write_unlock_bh(&sk->sk_callback_lock);
301309
out_drop:
302310
sk_psock_put(sk, psock);
303311
out_progs:

0 commit comments

Comments
 (0)