Skip to content

Commit 4a1f69b

Browse files
author
Paolo Abeni
committed
Merge branch 'mitigate-double-allocations-in-ioam6_iptunnel'
Justin Iurman says: ==================== Mitigate double allocations in ioam6_iptunnel Commit dce5251 ("net: ipv6: ioam6_iptunnel: mitigate 2-realloc issue") fixed the double allocation issue in ioam6_iptunnel. However, since commit 92191dd ("net: ipv6: fix dst ref loops in rpl, seg6 and ioam6 lwtunnels"), the fix was left incomplete. Because the cache is now empty when the dst_entry is the same post transformation in order to avoid a reference loop, the double reallocation is back for such cases (e.g., inline mode) which are valid for IOAM. This patch provides a way to detect such cases without having a reference loop in the cache, and so to avoid the double reallocation issue for all cases again. v1: https://lore.kernel.org/netdev/20250410152432.30246-1-justin.iurman@uliege.be/T/#t ==================== Link: https://patch.msgid.link/20250415112554.23823-1-justin.iurman@uliege.be Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2 parents 9a0b084 + 47ce7c8 commit 4a1f69b

File tree

1 file changed

+54
-22
lines changed

1 file changed

+54
-22
lines changed

net/ipv6/ioam6_iptunnel.c

Lines changed: 54 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ struct ioam6_lwt_freq {
3838
};
3939

4040
struct ioam6_lwt {
41+
struct dst_entry null_dst;
4142
struct dst_cache cache;
4243
struct ioam6_lwt_freq freq;
4344
atomic_t pkt_cnt;
@@ -177,6 +178,14 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
177178
if (err)
178179
goto free_lwt;
179180

181+
/* This "fake" dst_entry will be stored in a dst_cache, which will call
182+
* dst_hold() and dst_release() on it. We must ensure that dst_destroy()
183+
* will never be called. For that, its initial refcount is 1 and +1 when
184+
* it is stored in the cache. Then, +1/-1 each time we read the cache
185+
* and release it. Long story short, we're fine.
186+
*/
187+
dst_init(&ilwt->null_dst, NULL, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT);
188+
180189
atomic_set(&ilwt->pkt_cnt, 0);
181190
ilwt->freq.k = freq_k;
182191
ilwt->freq.n = freq_n;
@@ -336,33 +345,45 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
336345

337346
static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
338347
{
339-
struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL;
348+
struct dst_entry *orig_dst = skb_dst(skb);
349+
struct dst_entry *dst = NULL;
340350
struct ioam6_lwt *ilwt;
341351
int err = -EINVAL;
342352
u32 pkt_cnt;
343353

344354
if (skb->protocol != htons(ETH_P_IPV6))
345355
goto drop;
346356

347-
ilwt = ioam6_lwt_state(dst->lwtstate);
357+
ilwt = ioam6_lwt_state(orig_dst->lwtstate);
348358

349359
/* Check for insertion frequency (i.e., "k over n" insertions) */
350360
pkt_cnt = atomic_fetch_inc(&ilwt->pkt_cnt);
351361
if (pkt_cnt % ilwt->freq.n >= ilwt->freq.k)
352362
goto out;
353363

354364
local_bh_disable();
355-
cache_dst = dst_cache_get(&ilwt->cache);
365+
dst = dst_cache_get(&ilwt->cache);
356366
local_bh_enable();
357367

368+
/* This is how we notify that the destination does not change after
369+
* transformation and that we need to use orig_dst instead of the cache
370+
*/
371+
if (dst == &ilwt->null_dst) {
372+
dst_release(dst);
373+
374+
dst = orig_dst;
375+
/* keep refcount balance: dst_release() is called at the end */
376+
dst_hold(dst);
377+
}
378+
358379
switch (ilwt->mode) {
359380
case IOAM6_IPTUNNEL_MODE_INLINE:
360381
do_inline:
361382
/* Direct insertion - if there is no Hop-by-Hop yet */
362383
if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
363384
goto out;
364385

365-
err = ioam6_do_inline(net, skb, &ilwt->tuninfo, cache_dst);
386+
err = ioam6_do_inline(net, skb, &ilwt->tuninfo, dst);
366387
if (unlikely(err))
367388
goto drop;
368389

@@ -372,7 +393,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
372393
/* Encapsulation (ip6ip6) */
373394
err = ioam6_do_encap(net, skb, &ilwt->tuninfo,
374395
ilwt->has_tunsrc, &ilwt->tunsrc,
375-
&ilwt->tundst, cache_dst);
396+
&ilwt->tundst, dst);
376397
if (unlikely(err))
377398
goto drop;
378399

@@ -390,7 +411,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
390411
goto drop;
391412
}
392413

393-
if (unlikely(!cache_dst)) {
414+
if (unlikely(!dst)) {
394415
struct ipv6hdr *hdr = ipv6_hdr(skb);
395416
struct flowi6 fl6;
396417

@@ -401,43 +422,54 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
401422
fl6.flowi6_mark = skb->mark;
402423
fl6.flowi6_proto = hdr->nexthdr;
403424

404-
cache_dst = ip6_route_output(net, NULL, &fl6);
405-
if (cache_dst->error) {
406-
err = cache_dst->error;
425+
dst = ip6_route_output(net, NULL, &fl6);
426+
if (dst->error) {
427+
err = dst->error;
407428
goto drop;
408429
}
409430

410-
/* cache only if we don't create a dst reference loop */
411-
if (dst->lwtstate != cache_dst->lwtstate) {
412-
local_bh_disable();
413-
dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr);
414-
local_bh_enable();
415-
}
416-
417-
err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev));
431+
/* If the destination is the same after transformation (which is
432+
* a valid use case for IOAM), then we don't want to add it to
433+
* the cache in order to avoid a reference loop. Instead, we add
434+
* our fake dst_entry to the cache as a way to detect this case.
435+
* Otherwise, we add the resolved destination to the cache.
436+
*/
437+
local_bh_disable();
438+
if (orig_dst->lwtstate == dst->lwtstate)
439+
dst_cache_set_ip6(&ilwt->cache,
440+
&ilwt->null_dst, &fl6.saddr);
441+
else
442+
dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
443+
local_bh_enable();
444+
445+
err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
418446
if (unlikely(err))
419447
goto drop;
420448
}
421449

422450
/* avoid lwtunnel_output() reentry loop when destination is the same
423451
* after transformation (e.g., with the inline mode)
424452
*/
425-
if (dst->lwtstate != cache_dst->lwtstate) {
453+
if (orig_dst->lwtstate != dst->lwtstate) {
426454
skb_dst_drop(skb);
427-
skb_dst_set(skb, cache_dst);
455+
skb_dst_set(skb, dst);
428456
return dst_output(net, sk, skb);
429457
}
430458
out:
431-
dst_release(cache_dst);
432-
return dst->lwtstate->orig_output(net, sk, skb);
459+
dst_release(dst);
460+
return orig_dst->lwtstate->orig_output(net, sk, skb);
433461
drop:
434-
dst_release(cache_dst);
462+
dst_release(dst);
435463
kfree_skb(skb);
436464
return err;
437465
}
438466

439467
static void ioam6_destroy_state(struct lwtunnel_state *lwt)
440468
{
469+
/* Since the refcount of per-cpu dst_entry caches will never be 0 (see
470+
* why above) when our "fake" dst_entry is used, it is not necessary to
471+
* remove them before calling dst_cache_destroy()
472+
*/
441473
dst_cache_destroy(&ioam6_lwt_state(lwt)->cache);
442474
}
443475

0 commit comments

Comments
 (0)