Skip to content

Commit

Permalink
ofproto-dpif-upcall: Avoid stale ukeys leaks.
Browse files Browse the repository at this point in the history
It is observed in some environments that there are much more ukeys than
actual DP flows. For example:

$ ovs-appctl upcall/show
system@ovs-system:
flows : (current 7) (avg 6) (max 117) (limit 2125)
offloaded flows : 525
dump duration : 1063ms
ufid enabled : true

23: (keys 3612)
24: (keys 3625)
25: (keys 3485)

The revalidator threads are busy revalidating the stale ukeys leading to
high CPU and long dump duration.

This patch tracks the number of consecutive missed dumps. If four dumps
are missed in a row, it is assumed that the datapath flow no longer
exists, and the ukey can be deleted.

Reported-by: Roi Dayan <roid@nvidia.com>
Co-authored-by: Han Zhou <hzhou@ovn.org>
Co-authored-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Aaron Conole <aconole@redhat.com>
  • Loading branch information
3 people authored and apconole committed Aug 30, 2024
1 parent c38ff60 commit 180ab2f
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 1 deletion.
18 changes: 18 additions & 0 deletions ofproto/ofproto-dpif-upcall.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ COVERAGE_DEFINE(dumped_inconsistent_flow);
COVERAGE_DEFINE(dumped_new_flow);
COVERAGE_DEFINE(handler_duplicate_upcall);
COVERAGE_DEFINE(revalidate_missed_dp_flow);
COVERAGE_DEFINE(revalidate_missing_dp_flow);
COVERAGE_DEFINE(ukey_dp_change);
COVERAGE_DEFINE(ukey_invalid_stat_reset);
COVERAGE_DEFINE(ukey_replace_contention);
Expand Down Expand Up @@ -284,6 +285,7 @@ enum flow_del_reason {
FDR_TOO_EXPENSIVE, /* Too expensive to revalidate. */
FDR_UPDATE_FAIL, /* Datapath update failed. */
FDR_XLATION_ERROR, /* Flow translation error. */
FDR_FLOW_MISSING_DP, /* Flow is missing from the datapath. */
};

/* 'udpif_key's are responsible for tracking the little bit of state udpif
Expand Down Expand Up @@ -318,6 +320,7 @@ struct udpif_key {
uint64_t dump_seq OVS_GUARDED; /* Tracks udpif->dump_seq. */
uint64_t reval_seq OVS_GUARDED; /* Tracks udpif->reval_seq. */
enum ukey_state state OVS_GUARDED; /* Tracks ukey lifetime. */
uint32_t missed_dumps OVS_GUARDED; /* Missed consecutive dumps. */

/* 'state' debug information. */
unsigned int state_thread OVS_GUARDED; /* Thread that transitions. */
Expand Down Expand Up @@ -3040,6 +3043,21 @@ revalidator_sweep__(struct revalidator *revalidator, bool purge)
result = revalidate_ukey(udpif, ukey, &stats, &odp_actions,
reval_seq, &recircs, &del_reason);
}

if (ukey->dump_seq != dump_seq) {
ukey->missed_dumps++;
if (ukey->missed_dumps >= 4) {
/* If the flow was not dumped for 4 revalidator rounds,
* we can assume the datapath flow no longer exists
* and the ukey should be deleted. */
COVERAGE_INC(revalidate_missing_dp_flow);
del_reason = FDR_FLOW_MISSING_DP;
result = UKEY_DELETE;
}
} else {
ukey->missed_dumps = 0;
}

if (result != UKEY_KEEP) {
/* Clears 'recircs' if filled by revalidate_ukey(). */
reval_op_init(&ops[n_ops++], result, udpif, ukey, &recircs,
Expand Down
45 changes: 45 additions & 0 deletions tests/ofproto-dpif.at
Original file line number Diff line number Diff line change
Expand Up @@ -12661,3 +12661,48 @@ AT_CHECK([ovs-appctl revalidator/resume])

OVS_VSWITCHD_STOP
AT_CLEANUP

AT_SETUP([ofproto-dpif - Cleanup missing datapath flows])

OVS_VSWITCHD_START
add_of_ports br0 1 2

m4_define([ICMP_PKT], [m4_join([,],
[eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800)],
[ipv4(src=10.10.10.2,dst=10.10.10.1,proto=1,tos=1,ttl=128,frag=no)],
[icmp(type=8,code=0)])])

AT_CHECK([ovs-ofctl del-flows br0])
AT_CHECK([ovs-ofctl add-flow br0 'actions=normal' ])

AT_CHECK([ovs-appctl netdev-dummy/receive p1 'ICMP_PKT'])

AT_CHECK([ovs-appctl dpctl/dump-flows --names | strip_used | strip_stats | dnl
strip_duration | strip_dp_hash | sort], [0], [dnl
flow-dump from the main thread:
recirc_id(0),in_port(p1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:br0,p2
])

dnl Make sure the ukey exists.
AT_CHECK([ovs-appctl upcall/show | grep '(keys' | awk '{print $3}' | \
grep -q '1)'], [0])

dnl Delete all datapath flows, and make sure they are gone.
AT_CHECK([ovs-appctl dpctl/del-flows])
AT_CHECK([ovs-appctl dpctl/dump-flows --names ], [0], [])

dnl Move forward in time and make sure we have at least 4 * 500ms.
AT_CHECK([ovs-appctl time/warp 3000 300], [0], [ignore])

dnl Make sure no more ukeys exists.
AT_CHECK([ovs-appctl upcall/show | grep '(keys' | awk '{print $3}' | \
grep -qv '0)'], [1])

dnl Verify coverage counter was hit.
AT_CHECK([ovs-appctl coverage/read-counter revalidate_missing_dp_flow], [0],
[dnl
1
])

OVS_VSWITCHD_STOP(["/failed to flow_del (No such file or directory)/d"])
AT_CLEANUP
4 changes: 3 additions & 1 deletion utilities/usdt-scripts/flow_reval_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@
"FDR_TOO_EXPENSIVE",
"FDR_UPDATE_FAIL",
"FDR_XLATION_ERROR",
"FDR_FLOW_MISSING_DP"
],
start=0,
)
Expand All @@ -270,7 +271,8 @@
FdrReasons.FDR_PURGE: "User requested flow deletion",
FdrReasons.FDR_TOO_EXPENSIVE: "Too expensive to revalidate",
FdrReasons.FDR_UPDATE_FAIL: "Datapath update failed",
FdrReasons.FDR_XLATION_ERROR: "Flow translation error"
FdrReasons.FDR_XLATION_ERROR: "Flow translation error",
FdrReasons.FDR_FLOW_MISSING_DP: "Flow is missing from the datapath"
}


Expand Down

0 comments on commit 180ab2f

Please sign in to comment.