From 0988340713f1336f9b6a1462c95a82bf93a35d1e Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Fri, 20 Dec 2024 19:14:11 +0000 Subject: [PATCH] Simplify revFlowThrough Observations: * revFlowThrough can be much larger than the other reverse-flow predicates, presumably when there are many different innerReturnAps. * It is only ever used in conjunction with flowThroughIntoCall, which can therefore be pushed in, and several of its parameters can thereby be dropped in exchange for exposing `arg`. * `revFlowThroughArg` can then be trivially inlined. Result: on repository `go-gitea/gitea` with PR https://github.com/github/codeql/pull/17701 producing a wider selection of access paths than are seen on `main`, `revFlowThrough` drops in size from ~120m tuples to ~4m, and the runtime of the reverse-flow computation for dataflow stage 4 goes from dominating the forward-flow cost to relatively insignificant. Overall runtime falls from 3 minutes to 2 with substantial ram available, and presumably falls much more under GHA-style memory pressure. --- .../codeql/dataflow/internal/DataFlowImpl.qll | 29 ++++++------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 1373345423f7..c6a16a0e15d3 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -2261,10 +2261,7 @@ module MakeImpl Lang> { returnAp = apNone() or // flow through a callable - exists(DataFlowCall call, ParamNodeEx p, Ap innerReturnAp | - revFlowThrough(call, returnCtx, p, state, _, returnAp, ap, innerReturnAp) and - flowThroughIntoCall(call, node, p, ap, innerReturnAp) - ) + revFlowThrough(_, returnCtx, state, returnAp, ap, node) or // flow out of a callable exists(ReturnPosition pos | @@ -2413,11 +2410,14 @@ module MakeImpl Lang> { pragma[nomagic] private predicate revFlowThrough( - DataFlowCall call, ReturnCtx returnCtx, ParamNodeEx p, FlowState state, - ReturnPosition pos, ApOption returnAp, Ap ap, Ap innerReturnAp + DataFlowCall call, ReturnCtx returnCtx, FlowState state, ApOption returnAp, Ap ap, + ArgNodeEx arg ) { - revFlowParamToReturn(p, state, pos, innerReturnAp, ap) and - revFlowIsReturned(call, returnCtx, returnAp, pos, innerReturnAp) + exists(ParamNodeEx p, ReturnPosition pos, Ap innerReturnAp | + flowThroughIntoCall(call, arg, p, ap, innerReturnAp) and + revFlowParamToReturn(p, state, pos, innerReturnAp, ap) and + revFlowIsReturned(call, returnCtx, returnAp, pos, innerReturnAp) + ) } /** @@ -2543,22 +2543,11 @@ module MakeImpl Lang> { ) } - pragma[nomagic] - private predicate revFlowThroughArg( - DataFlowCall call, ArgNodeEx arg, FlowState state, ReturnCtx returnCtx, ApOption returnAp, - Ap ap - ) { - exists(ParamNodeEx p, Ap innerReturnAp | - revFlowThrough(call, returnCtx, p, state, _, returnAp, ap, innerReturnAp) and - flowThroughIntoCall(call, arg, p, ap, innerReturnAp) - ) - } - pragma[nomagic] predicate callMayFlowThroughRev(DataFlowCall call) { exists(ArgNodeEx arg, FlowState state, ReturnCtx returnCtx, ApOption returnAp, Ap ap | revFlow(arg, state, returnCtx, returnAp, ap) and - revFlowThroughArg(call, arg, state, returnCtx, returnAp, ap) + revFlowThrough(call, returnCtx, state, returnAp, ap, arg) ) }