@@ -38,6 +38,7 @@ struct ioam6_lwt_freq {
3838};
3939
4040struct ioam6_lwt {
41+ struct dst_entry null_dst ;
4142 struct dst_cache cache ;
4243 struct ioam6_lwt_freq freq ;
4344 atomic_t pkt_cnt ;
@@ -177,6 +178,14 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
177178 if (err )
178179 goto free_lwt ;
179180
181+ /* This "fake" dst_entry will be stored in a dst_cache, which will call
182+ * dst_hold() and dst_release() on it. We must ensure that dst_destroy()
183+ * will never be called. For that, its initial refcount is 1 and +1 when
184+ * it is stored in the cache. Then, +1/-1 each time we read the cache
185+ * and release it. Long story short, we're fine.
186+ */
187+ dst_init (& ilwt -> null_dst , NULL , NULL , DST_OBSOLETE_NONE , DST_NOCOUNT );
188+
180189 atomic_set (& ilwt -> pkt_cnt , 0 );
181190 ilwt -> freq .k = freq_k ;
182191 ilwt -> freq .n = freq_n ;
@@ -356,6 +365,17 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
356365 dst = dst_cache_get (& ilwt -> cache );
357366 local_bh_enable ();
358367
368+ /* This is how we notify that the destination does not change after
369+ * transformation and that we need to use orig_dst instead of the cache
370+ */
371+ if (dst == & ilwt -> null_dst ) {
372+ dst_release (dst );
373+
374+ dst = orig_dst ;
375+ /* keep refcount balance: dst_release() is called at the end */
376+ dst_hold (dst );
377+ }
378+
359379 switch (ilwt -> mode ) {
360380 case IOAM6_IPTUNNEL_MODE_INLINE :
361381do_inline :
@@ -408,12 +428,19 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
408428 goto drop ;
409429 }
410430
411- /* cache only if we don't create a dst reference loop */
412- if (orig_dst -> lwtstate != dst -> lwtstate ) {
413- local_bh_disable ();
431+ /* If the destination is the same after transformation (which is
432+ * a valid use case for IOAM), then we don't want to add it to
433+ * the cache in order to avoid a reference loop. Instead, we add
434+ * our fake dst_entry to the cache as a way to detect this case.
435+ * Otherwise, we add the resolved destination to the cache.
436+ */
437+ local_bh_disable ();
438+ if (orig_dst -> lwtstate == dst -> lwtstate )
439+ dst_cache_set_ip6 (& ilwt -> cache ,
440+ & ilwt -> null_dst , & fl6 .saddr );
441+ else
414442 dst_cache_set_ip6 (& ilwt -> cache , dst , & fl6 .saddr );
415- local_bh_enable ();
416- }
443+ local_bh_enable ();
417444
418445 err = skb_cow_head (skb , LL_RESERVED_SPACE (dst -> dev ));
419446 if (unlikely (err ))
@@ -439,6 +466,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
439466
440467static void ioam6_destroy_state (struct lwtunnel_state * lwt )
441468{
469+ /* Since the refcount of per-cpu dst_entry caches will never be 0 (see
470+ * why above) when our "fake" dst_entry is used, it is not necessary to
471+ * remove them before calling dst_cache_destroy()
472+ */
442473 dst_cache_destroy (& ioam6_lwt_state (lwt )-> cache );
443474}
444475
0 commit comments